diff --git a/.env.example b/.env.example
index a45fdd5..64c31d3 100644
--- a/.env.example
+++ b/.env.example
@@ -80,6 +80,16 @@ AGENTIC_CHAT_MAX_ITERATIONS=6
 # LLAMA_SWAP_ALLOWED_MODELS=chat,vision,embed
 # LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS=180
 
+# ── Unified search translation model (optional) ─────────────────────────
+# /photos/search/unified runs one small LLM call to translate a natural-
+# language query into structured filters + a semantic term, then CLIP-ranks.
+# That step needs an LLM AND CLIP available at once. On a tight VRAM budget a
+# large chat model can't co-reside with CLIP, so pin a small, fast model here
+# (it can stay loaded alongside CLIP and the chat model). Precedence:
+# UNIFIED_SEARCH_MODEL > the client's selected model > the configured default.
+# Use the configured backend (LLM_BACKEND); local only — no hybrid.
+# UNIFIED_SEARCH_MODEL=qwen3-0.6b
+
 # ── Text-to-speech (optional, requires LLAMA_SWAP_URL) ───────────────────
 # TTS routes through the same llama-swap proxy (a Chatterbox model id), so it
 # only needs LLAMA_SWAP_URL — it does NOT require LLM_BACKEND=llamacpp.
@@ -139,3 +149,31 @@ CLIP_REQUEST_TIMEOUT_SEC=60
 # ── RAG / search ────────────────────────────────────────────────────────
 # Set to `1` to enable cross-encoder reranking on /search results.
 SEARCH_RAG_RERANK=0
+
+# ── Nightly reel pre-generation (Phase 3+) ──────────────────────────────
+# Set to `1` to enable the scheduler. Disabled by default.
+# REEL_PREGEN_ENABLED=1
+# Hour (0-23) when the nightly batch fires. Default 3 AM.
+# REEL_PREGEN_HOUR=3
+# Day of week for weekly reels (0=Sun, 1=Mon, …). Default Monday.
+# REEL_PREGEN_WEEK_DOW=1
+# Timezone offset in minutes from UTC (e.g., -480 = PST). Defaults to
+# the server's local timezone.
+# REEL_PREGEN_TZ_OFFSET_MINUTES=
+# Fixed timezone offset — overrides auto-detect to avoid DST shifts.
+# When set, both the DB fallback and env fallback use this value.
+# REEL_PREGEN_TZ_FIXED_MINUTES=-480
+# Voice ID for narration (e.g., "grandma"). Falls back to the value
+# stored in the user_ai_prefs DB row when set.
+# REEL_PREGEN_VOICE=
+# Library filter: a library id (e.g. "1") or "all" for every library.
+# REEL_PREGEN_LIBRARY=all
+# Max agentic tool iterations for pre-gen scripter. Default 8.
+# REEL_PREGEN_MAX_TOOL_ITERS=8
+#
+# On-disk reel cache sweep (runs every 24h, independent of pre-gen). Removes
+# reel MP4s with no ledger row + no live job that are older than the max age —
+# i.e. the on-demand cache, which otherwise grows forever. Set to 0 to disable.
+# REEL_CACHE_SWEEP_ENABLED=1
+# Age (days) before an unreferenced reel MP4 is swept. Default 7.
+# REEL_CACHE_MAX_AGE_DAYS=7
diff --git a/Cargo.lock b/Cargo.lock
index a35a7d2..9455f5c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2051,7 +2051,7 @@ dependencies = [
 
 [[package]]
 name = "image-api"
-version = "1.3.0"
+version = "1.4.0"
 dependencies = [
  "actix",
  "actix-cors",
diff --git a/Cargo.toml b/Cargo.toml
index 3b3a08a..860e6ae 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "image-api"
-version = "1.3.0"
+version = "1.4.0"
 authors = ["Cameron Cordes <cameronc.dev@gmail.com>"]
 edition = "2024"
 
diff --git a/migrations/2026-06-13-000000_add_precomputed_reels/down.sql b/migrations/2026-06-13-000000_add_precomputed_reels/down.sql
new file mode 100644
index 0000000..91863c2
--- /dev/null
+++ b/migrations/2026-06-13-000000_add_precomputed_reels/down.sql
@@ -0,0 +1,2 @@
+DROP INDEX IF EXISTS idx_precomputed_reels_span_library;
+DROP TABLE IF EXISTS precomputed_reels;
diff --git a/migrations/2026-06-13-000000_add_precomputed_reels/up.sql b/migrations/2026-06-13-000000_add_precomputed_reels/up.sql
new file mode 100644
index 0000000..ba49b72
--- /dev/null
+++ b/migrations/2026-06-13-000000_add_precomputed_reels/up.sql
@@ -0,0 +1,14 @@
+CREATE TABLE precomputed_reels (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    span TEXT NOT NULL,
+    library_key TEXT NOT NULL,
+    cache_key TEXT NOT NULL,
+    output_path TEXT NOT NULL,
+    title TEXT NOT NULL,
+    media_count INT NOT NULL,
+    render_version INT NOT NULL DEFAULT 1,
+    tz_offset_minutes INT NOT NULL,
+    voice TEXT,
+    generated_at BIGINT NOT NULL
+);
+CREATE INDEX idx_precomputed_reels_span_library ON precomputed_reels(span, library_key, generated_at DESC);
diff --git a/migrations/2026-06-13-000010_add_user_ai_prefs/down.sql b/migrations/2026-06-13-000010_add_user_ai_prefs/down.sql
new file mode 100644
index 0000000..83b82a3
--- /dev/null
+++ b/migrations/2026-06-13-000010_add_user_ai_prefs/down.sql
@@ -0,0 +1 @@
+DROP TABLE IF EXISTS user_ai_prefs;
diff --git a/migrations/2026-06-13-000010_add_user_ai_prefs/up.sql b/migrations/2026-06-13-000010_add_user_ai_prefs/up.sql
new file mode 100644
index 0000000..fd8f6f2
--- /dev/null
+++ b/migrations/2026-06-13-000010_add_user_ai_prefs/up.sql
@@ -0,0 +1,7 @@
+CREATE TABLE user_ai_prefs (
+    id INTEGER PRIMARY KEY CHECK(id=1),
+    voice TEXT,
+    tz_offset_minutes INTEGER,
+    library TEXT,
+    updated_at BIGINT NOT NULL
+);
diff --git a/src/ai/backend.rs b/src/ai/backend.rs
index 0515f1c..dfcdd03 100644
--- a/src/ai/backend.rs
+++ b/src/ai/backend.rs
@@ -41,6 +41,10 @@ pub struct SamplingOverrides {
     pub top_p: Option<f32>,
     pub top_k: Option<i32>,
     pub min_p: Option<f32>,
+    /// Reasoning toggle. Only the llama.cpp backend honors it (forwarded as
+    /// `chat_template_kwargs.enable_thinking`); other backends ignore it.
+    /// `None` leaves the model/template default in place.
+    pub enable_thinking: Option<bool>,
 }
 
 impl SamplingOverrides {
@@ -124,6 +128,7 @@ mod tests {
             top_p: None,
             top_k: None,
             min_p: None,
+            enable_thinking: None,
         };
         assert!(!empty.has_sampling());
 
@@ -134,6 +139,7 @@ mod tests {
             top_p: None,
             top_k: None,
             min_p: None,
+            enable_thinking: None,
         };
         assert!(with_temp.has_sampling());
     }
diff --git a/src/ai/clip_client.rs b/src/ai/clip_client.rs
index 85c66a7..3519e8b 100644
--- a/src/ai/clip_client.rs
+++ b/src/ai/clip_client.rs
@@ -191,11 +191,13 @@ impl ClipClient {
         let resp = match self.client.post(&url).json(&body).send().await {
             Ok(r) => r,
             Err(e) if e.is_timeout() || e.is_connect() => {
+                log::warn!("clip encode_text network error to {url}: {e}");
                 return Err(ClipError::Transient(anyhow::anyhow!(
                     "clip client network: {e}"
                 )));
             }
             Err(e) => {
+                log::warn!("clip encode_text request error to {url}: {e}");
                 return Err(ClipError::Transient(anyhow::anyhow!(
                     "clip client request: {e}"
                 )));
@@ -210,6 +212,7 @@ impl ClipClient {
             return Ok(body);
         }
         let body_text = resp.text().await.unwrap_or_default();
+        log::warn!("clip encode_text HTTP {status} from {url}: {body_text}");
         Err(classify_error_response(status.as_u16(), &body_text))
     }
 
diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs
index cb21b14..ae9f300 100644
--- a/src/ai/handlers.rs
+++ b/src/ai/handlers.rs
@@ -40,6 +40,12 @@ pub struct GeneratePhotoInsightRequest {
     pub top_k: Option<i32>,
     #[serde(default)]
     pub min_p: Option<f32>,
+    /// Reasoning toggle for thinking-capable models. Forwarded to the
+    /// llama.cpp backend as `chat_template_kwargs.enable_thinking`; ignored
+    /// by other backends and the non-agentic (Ollama) path. Only the agentic
+    /// endpoint routes through llama.cpp. None defers to the template default.
+    #[serde(default)]
+    pub enable_thinking: Option<bool>,
     /// `"local"` (default, Ollama with images) | `"hybrid"` (local vision +
     /// OpenRouter chat). Only respected by the agentic endpoint.
     #[serde(default)]
@@ -120,7 +126,7 @@ pub async fn generation_status_handler(
     }
 
     if let Some(ref fp) = query.path {
-        let library = libraries::resolve_library_param(&app_state, query.library.as_deref())
+        let library = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
             .ok()
             .flatten()
             .unwrap_or_else(|| app_state.primary_library());
@@ -218,10 +224,11 @@ pub async fn cancel_generation_handler(
     }
 
     if let Some(ref fp) = request.file_path {
-        let library = libraries::resolve_library_param(&app_state, request.library.as_deref())
-            .ok()
-            .flatten()
-            .unwrap_or_else(|| app_state.primary_library());
+        let library =
+            libraries::resolve_library_param_state(&app_state, request.library.as_deref())
+                .ok()
+                .flatten()
+                .unwrap_or_else(|| app_state.primary_library());
         let normalized = normalize_path(fp);
 
         // Get active job ids first, then cancel in DB, then abort tasks
@@ -580,7 +587,7 @@ pub async fn get_insight_handler(
 
     // Expand to rel_paths sharing content so an insight generated under
     // library 1 still shows when the same photo is viewed from library 2.
-    let library = libraries::resolve_library_param(&app_state, query.library.as_deref())
+    let library = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
         .ok()
         .flatten()
         .unwrap_or_else(|| app_state.primary_library());
@@ -867,6 +874,7 @@ pub async fn generate_agentic_insight_handler(
                     request.top_p,
                     request.top_k,
                     request.min_p,
+                    request.enable_thinking,
                     max_iterations,
                     request.backend.clone(),
                     fewshot_examples,
@@ -1168,6 +1176,11 @@ pub struct ChatTurnHttpRequest {
     pub top_k: Option<i32>,
     #[serde(default)]
     pub min_p: Option<f32>,
+    /// Reasoning toggle for thinking-capable models. Forwarded to the
+    /// llama.cpp backend as `chat_template_kwargs.enable_thinking`; ignored
+    /// by other backends. None defers to the model/template default.
+    #[serde(default)]
+    pub enable_thinking: Option<bool>,
     #[serde(default)]
     pub max_iterations: Option<usize>,
     /// Per-turn system-prompt override. Ephemeral in append mode,
@@ -1218,15 +1231,16 @@ pub async fn chat_turn_handler(
     let mut span = tracer.start_with_context("http.insights.chat", &parent_context);
     span.set_attribute(KeyValue::new("file_path", request.file_path.clone()));
 
-    let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
-        Ok(Some(lib)) => lib,
-        Ok(None) => app_state.primary_library(),
-        Err(e) => {
-            return HttpResponse::BadRequest().json(serde_json::json!({
-                "error": format!("invalid library: {}", e)
-            }));
-        }
-    };
+    let library =
+        match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) {
+            Ok(Some(lib)) => lib,
+            Ok(None) => app_state.primary_library(),
+            Err(e) => {
+                return HttpResponse::BadRequest().json(serde_json::json!({
+                    "error": format!("invalid library: {}", e)
+                }));
+            }
+        };
 
     // Service-token claims (sub: "service:apollo") fall through to
     // user_id=1 — the operator convention. Mobile/web clients have a
@@ -1245,6 +1259,7 @@ pub async fn chat_turn_handler(
         top_p: request.top_p,
         top_k: request.top_k,
         min_p: request.min_p,
+        enable_thinking: request.enable_thinking,
         max_iterations: request.max_iterations,
         system_prompt: request.system_prompt.clone(),
         persona_id: request.persona_id.clone(),
@@ -1344,15 +1359,16 @@ pub async fn chat_rewind_handler(
     request: web::Json<ChatRewindHttpRequest>,
     app_state: web::Data<AppState>,
 ) -> impl Responder {
-    let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
-        Ok(Some(lib)) => lib,
-        Ok(None) => app_state.primary_library(),
-        Err(e) => {
-            return HttpResponse::BadRequest().json(serde_json::json!({
-                "error": format!("invalid library: {}", e)
-            }));
-        }
-    };
+    let library =
+        match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) {
+            Ok(Some(lib)) => lib,
+            Ok(None) => app_state.primary_library(),
+            Err(e) => {
+                return HttpResponse::BadRequest().json(serde_json::json!({
+                    "error": format!("invalid library: {}", e)
+                }));
+            }
+        };
 
     match app_state
         .insight_chat
@@ -1393,7 +1409,7 @@ pub async fn chat_history_handler(
     // cross-library lookup when the scoped one misses, so a photo
     // with no insight in this library but one in another still
     // surfaces (the "show this photo's primary insight" merge case).
-    let library = libraries::resolve_library_param(&app_state, query.library.as_deref())
+    let library = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
         .ok()
         .flatten()
         .unwrap_or_else(|| app_state.primary_library());
@@ -1444,15 +1460,16 @@ pub async fn chat_stream_handler(
     request: web::Json<ChatTurnHttpRequest>,
     app_state: web::Data<AppState>,
 ) -> HttpResponse {
-    let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
-        Ok(Some(lib)) => lib,
-        Ok(None) => app_state.primary_library(),
-        Err(e) => {
-            return HttpResponse::BadRequest().json(serde_json::json!({
-                "error": format!("invalid library: {}", e)
-            }));
-        }
-    };
+    let library =
+        match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) {
+            Ok(Some(lib)) => lib,
+            Ok(None) => app_state.primary_library(),
+            Err(e) => {
+                return HttpResponse::BadRequest().json(serde_json::json!({
+                    "error": format!("invalid library: {}", e)
+                }));
+            }
+        };
 
     // Service-token sub falls through to user_id=1 (see chat_turn_handler).
     let user_id = claims.sub.parse::<i32>().unwrap_or(1);
@@ -1469,6 +1486,7 @@ pub async fn chat_stream_handler(
         top_p: request.top_p,
         top_k: request.top_k,
         min_p: request.min_p,
+        enable_thinking: request.enable_thinking,
         max_iterations: request.max_iterations,
         system_prompt: request.system_prompt.clone(),
         persona_id: request.persona_id.clone(),
@@ -1589,15 +1607,16 @@ pub async fn turn_async_handler(
     let mut span = tracer.start_with_context("http.insights.chat_turn_async", &parent_context);
     span.set_attribute(KeyValue::new("file_path", request.file_path.clone()));
 
-    let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
-        Ok(Some(lib)) => lib,
-        Ok(None) => app_state.primary_library(),
-        Err(e) => {
-            return HttpResponse::BadRequest().json(serde_json::json!({
-                "error": format!("invalid library: {}", e)
-            }));
-        }
-    };
+    let library =
+        match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) {
+            Ok(Some(lib)) => lib,
+            Ok(None) => app_state.primary_library(),
+            Err(e) => {
+                return HttpResponse::BadRequest().json(serde_json::json!({
+                    "error": format!("invalid library: {}", e)
+                }));
+            }
+        };
 
     let user_id = claims.sub.parse::<i32>().unwrap_or(1);
 
@@ -1613,6 +1632,7 @@ pub async fn turn_async_handler(
         top_p: request.top_p,
         top_k: request.top_k,
         min_p: request.min_p,
+        enable_thinking: request.enable_thinking,
         max_iterations: request.max_iterations,
         system_prompt: request.system_prompt.clone(),
         persona_id: request.persona_id.clone(),
diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs
index 84f2b32..af00731 100644
--- a/src/ai/insight_chat.rs
+++ b/src/ai/insight_chat.rs
@@ -70,6 +70,10 @@ pub struct ChatTurnRequest {
     pub top_p: Option<f32>,
     pub top_k: Option<i32>,
     pub min_p: Option<f32>,
+    /// Reasoning toggle for thinking-capable models. Forwarded to the
+    /// llama.cpp backend as `chat_template_kwargs.enable_thinking`; ignored
+    /// by other backends. None defers to the model/template default.
+    pub enable_thinking: Option<bool>,
     pub max_iterations: Option<usize>,
     /// Per-turn system-prompt override. In append mode (default), applied
     /// ephemerally — original system message restored before persistence.
@@ -344,6 +348,7 @@ impl InsightChatService {
             top_p: req.top_p,
             top_k: req.top_k,
             min_p: req.min_p,
+            enable_thinking: req.enable_thinking,
         };
         let backend = self.generator.resolve_backend(kind, &overrides).await?;
         let model_used = backend.model().to_string();
@@ -847,6 +852,7 @@ impl InsightChatService {
             top_p: req.top_p,
             top_k: req.top_k,
             min_p: req.min_p,
+            enable_thinking: req.enable_thinking,
         };
         let backend = self.generator.resolve_backend(kind, &overrides).await?;
         let model_used = backend.model().to_string();
@@ -1017,6 +1023,7 @@ impl InsightChatService {
             top_p: req.top_p,
             top_k: req.top_k,
             min_p: req.min_p,
+            enable_thinking: req.enable_thinking,
         };
         let backend = self.generator.resolve_backend(kind, &overrides).await?;
         let model_used = backend.model().to_string();
@@ -1425,6 +1432,7 @@ impl InsightChatService {
             top_p: req.top_p,
             top_k: req.top_k,
             min_p: req.min_p,
+            enable_thinking: req.enable_thinking,
         };
         let backend = self.generator.resolve_backend(kind, &overrides).await?;
         let model_used = backend.model().to_string();
@@ -1607,6 +1615,7 @@ impl InsightChatService {
             top_p: req.top_p,
             top_k: req.top_k,
             min_p: req.min_p,
+            enable_thinking: req.enable_thinking,
         };
         let backend = self.generator.resolve_backend(kind, &overrides).await?;
         let model_used = backend.model().to_string();
diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs
index 3673c43..d45fa55 100644
--- a/src/ai/insight_generator.rs
+++ b/src/ai/insight_generator.rs
@@ -217,6 +217,13 @@ impl InsightGenerator {
         &self.insight_dao
     }
 
+    /// Accessor for the EXIF DAO (used by the reel scheduler to resolve
+    /// GPS enrichment without creating a separate DB connection).
+    #[allow(dead_code)]
+    pub fn exif_dao(&self) -> &Arc<Mutex<Box<dyn ExifDao>>> {
+        &self.exif_dao
+    }
+
     /// Whether the optional Apollo Places integration is wired up. Drives
     /// tool-definition gating (no point offering `get_personal_place_at`
     /// when Apollo is unreachable) — exposed publicly so `insight_chat`
@@ -3926,6 +3933,7 @@ Return ONLY the summary, nothing else."#,
             if let Some(ctx) = overrides.num_ctx {
                 c.set_num_ctx(Some(ctx));
             }
+            c.set_enable_thinking(overrides.enable_thinking);
             Box::new(c)
         } else {
             // Pure Ollama local.
@@ -4057,6 +4065,7 @@ Return ONLY the summary, nothing else."#,
         top_p: Option<f32>,
         top_k: Option<i32>,
         min_p: Option<f32>,
+        enable_thinking: Option<bool>,
         max_iterations: usize,
         backend: Option<String>,
         fewshot_examples: Vec<Vec<ChatMessage>>,
@@ -4084,6 +4093,7 @@ Return ONLY the summary, nothing else."#,
             top_p,
             top_k,
             min_p,
+            enable_thinking,
         };
         let backend = self.resolve_backend(kind, &overrides).await?;
         span.set_attribute(KeyValue::new("model", backend.model().to_string()));
@@ -4497,6 +4507,110 @@ Return ONLY the summary, nothing else."#,
         ))
     }
 
+    /// A read-only agentic tool loop: chat with tools until the model stops
+    /// calling them, then return the final content.
+    ///
+    /// This is the loop body extracted from
+    /// `generate_agentic_insight_for_photo` (lines 4316-4377) so it can be
+    /// reused by the reel-scripter without the photo-specific context
+    /// (image_base64, file_path, persona_id). The photo insight loop still
+    /// has its own copy because it threads image/file context through
+    /// `execute_tool`.
+    ///
+    /// Calls `execute_tool` with empty file/image context; enabled tools
+    /// never read those fields.
+    ///
+    /// Only used by the `reels` module (compiled in `main.rs`, not `lib.rs`),
+    /// so the `#[allow(dead_code)]` suppresses the lib-target warning.
+    #[allow(dead_code)]
+    pub(crate) async fn run_readonly_tool_loop(
+        &self,
+        backend: &ResolvedBackend,
+        mut messages: Vec<ChatMessage>,
+        tools: Vec<Tool>,
+        max_iter: usize,
+    ) -> Result<String> {
+        let mut final_content = String::new();
+
+        for iteration in 0..max_iter {
+            log::info!("Agentic iteration {}/{}", iteration + 1, max_iter);
+
+            let (response, _prompt_tokens, _eval_tokens) = backend
+                .chat()
+                .chat_with_tools(messages.clone(), tools.clone())
+                .await?;
+
+            // Sanitize tool call arguments before pushing back into history.
+            // Some models occasionally return non-object arguments (bool,
+            // string, null) which Ollama rejects when they are re-sent in
+            // a subsequent request.
+            let mut response = response;
+            if let Some(ref mut tool_calls) = response.tool_calls {
+                for tc in tool_calls.iter_mut() {
+                    if !tc.function.arguments.is_object() {
+                        log::warn!(
+                            "Tool '{}' returned non-object arguments ({:?}), normalising to {{}}",
+                            tc.function.name,
+                            tc.function.arguments
+                        );
+                        tc.function.arguments = serde_json::Value::Object(Default::default());
+                    }
+                }
+            }
+
+            messages.push(response.clone());
+
+            if let Some(ref tool_calls) = response.tool_calls
+                && !tool_calls.is_empty()
+            {
+                for tool_call in tool_calls {
+                    log::info!(
+                        "Agentic tool call [{}]: {} {}",
+                        iteration,
+                        tool_call.function.name,
+                        tool_call.function.arguments
+                    );
+                    let result = self
+                        .execute_tool(
+                            &tool_call.function.name,
+                            &tool_call.function.arguments,
+                            backend,
+                            &None,
+                            "",
+                            0,
+                            "",
+                            &opentelemetry::Context::new(),
+                        )
+                        .await;
+                    messages.push(ChatMessage::tool_result(result));
+                }
+                continue;
+            }
+
+            // No tool calls — this is the final answer
+            final_content = response.content;
+            break;
+        }
+
+        // If loop exhausted without final answer, ask for one
+        if final_content.is_empty() {
+            log::info!(
+                "Agentic loop exhausted after {} iterations, requesting final answer",
+                max_iter
+            );
+            messages.push(ChatMessage::user(
+                "Based on the context gathered, please write the final answer. Return ONLY the JSON object, no prose or code fences.",
+            ));
+            let (final_response, _, _) = backend
+                .chat()
+                .chat_with_tools(messages.clone(), vec![])
+                .await?;
+            final_content = final_response.content;
+        }
+
+        Ok(final_content)
+    }
+
     /// Reverse geocode GPS coordinates to human-readable place names
     async fn reverse_geocode(&self, lat: f64, lon: f64) -> Option<String> {
         let url = format!(
diff --git a/src/ai/llamacpp.rs b/src/ai/llamacpp.rs
index 8a7c898..77e7f63 100644
--- a/src/ai/llamacpp.rs
+++ b/src/ai/llamacpp.rs
@@ -64,6 +64,12 @@ pub struct LlamaCppClient {
     top_p: Option<f32>,
     top_k: Option<i32>,
     min_p: Option<f32>,
+    /// When `Some`, forwarded to llama-server as
+    /// `chat_template_kwargs: {"enable_thinking": <bool>}`. The Jinja chat
+    /// template (e.g. Qwen3) reads this to gate its reasoning block. `None`
+    /// omits the key entirely, leaving the template's own default. Templates
+    /// that don't reference the key ignore it, so sending it is harmless.
+    enable_thinking: Option<bool>,
 }
 
 impl LlamaCppClient {
@@ -89,6 +95,7 @@ impl LlamaCppClient {
             top_p: None,
             top_k: None,
             min_p: None,
+            enable_thinking: None,
         }
     }
 
@@ -104,6 +111,12 @@ impl LlamaCppClient {
         self.num_ctx = num_ctx;
     }
 
+    /// Set the reasoning toggle forwarded as `chat_template_kwargs.enable_thinking`.
+    /// `None` leaves the chat template's own default in place.
+    pub fn set_enable_thinking(&mut self, enable_thinking: Option<bool>) {
+        self.enable_thinking = enable_thinking;
+    }
+
     pub fn set_sampling_params(
         &mut self,
         temperature: Option<f32>,
@@ -458,6 +471,12 @@ impl LlamaCppClient {
         // via -c, so we silently drop the override here. The config.yaml
         // entry is the source of truth for context size.
         let _ = self.num_ctx;
+        // Reasoning toggle for thinking-capable templates (Qwen3 et al.).
+        // llama-server forwards chat_template_kwargs into the Jinja render
+        // (requires --jinja); templates that ignore the key are unaffected.
+        if let Some(think) = self.enable_thinking {
+            v.push(("chat_template_kwargs", json!({ "enable_thinking": think })));
+        }
         v
     }
 
diff --git a/src/ai/mod.rs b/src/ai/mod.rs
index c5302fb..7d0802e 100644
--- a/src/ai/mod.rs
+++ b/src/ai/mod.rs
@@ -10,6 +10,7 @@ pub mod insight_generator;
 pub mod llamacpp;
 pub mod llm_client;
 pub mod local_llm;
+pub mod nl_query;
 pub mod ollama;
 pub mod openrouter;
 pub mod pronunciation;
diff --git a/src/ai/nl_query.rs b/src/ai/nl_query.rs
new file mode 100644
index 0000000..d709322
--- /dev/null
+++ b/src/ai/nl_query.rs
@@ -0,0 +1,408 @@
+//! Natural-language → structured-query translation for unified photo search.
+//!
+//! The unified search endpoint (`/photos/search/unified`, Phase 2) needs to
+//! turn a free-text query like *"sunset photos in Italy from last summer"*
+//! into the structured filter the existing `/photos` engine understands plus
+//! a semantic term for CLIP ranking. That translation is a single grounded
+//! LLM call, isolated here so it can be unit-tested without a network or the
+//! full `InsightGenerator`.
+//!
+//! Two-stage design:
+//!  1. The LLM emits a [`RawNlQuery`] — references are by *name* (tags) and
+//!     dates as ISO strings, never numeric ids it could hallucinate.
+//!  2. [`resolve_raw_query`] maps names against the real tag vocabulary and
+//!     converts ISO dates to unix seconds, producing a [`StructuredQuery`].
+//!     A tag the model invents that isn't in the vocab is surfaced in
+//!     `unmatched_tags` (the caller folds it back into the semantic term)
+//!     rather than silently dropped — this is the anti-noise guard.
+//!
+//! Geocoding of `place` and person filtering are intentionally *not* handled
+//! here: `place` stays as text for the caller to forward-geocode (async, see
+//! `geo::forward_geocode`), and person filtering is deferred until a
+//! person→photos resolver exists.
+
+use crate::ai::llm_client::{ChatMessage, LlmClient, Tool, strip_think_blocks};
+use anyhow::{Result, anyhow};
+use serde::{Deserialize, Serialize};
+
+/// Raw query object as emitted by the LLM. Tag references are by name
+/// (resolved against the real vocab in Rust); dates are ISO `YYYY-MM-DD`.
+/// Every field is optional so a partial / minimal model response still
+/// deserializes.
+#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
+pub struct RawNlQuery {
+    /// Visual/scene description handed to CLIP for ranking. The descriptive
+    /// remainder after structured filters are peeled off.
+    #[serde(default)]
+    pub semantic: Option<String>,
+    /// Tag names the photos must have. Matched case-insensitively against
+    /// the supplied vocabulary; non-matches land in `unmatched_tags`.
+    #[serde(default)]
+    pub tags: Vec<String>,
+    /// Tag names the photos must NOT have.
+    #[serde(default)]
+    pub exclude_tags: Vec<String>,
+    #[serde(default)]
+    pub camera_make: Option<String>,
+    #[serde(default)]
+    pub camera_model: Option<String>,
+    #[serde(default)]
+    pub lens_model: Option<String>,
+    /// Free-text place/location name to forward-geocode (e.g. "Italy").
+    #[serde(default)]
+    pub place: Option<String>,
+    /// Inclusive start date, ISO `YYYY-MM-DD`.
+    #[serde(default)]
+    pub date_from: Option<String>,
+    /// Inclusive end date, ISO `YYYY-MM-DD`.
+    #[serde(default)]
+    pub date_to: Option<String>,
+    /// "photo" | "video" — normalized in [`resolve_raw_query`].
+    #[serde(default)]
+    pub media_type: Option<String>,
+}
+
+/// Resolved structured query: tag names mapped to ids against the real
+/// vocab, ISO dates converted to unix seconds. `place` stays as text for the
+/// caller to forward-geocode into a gps circle. Serializable so the endpoint
+/// can echo it back to the client as "this is how I read your query"
+/// (editable filter chips).
+#[derive(Debug, Clone, Default, PartialEq, Serialize)]
+pub struct StructuredQuery {
+    pub semantic: Option<String>,
+    pub tag_ids: Vec<i32>,
+    pub exclude_tag_ids: Vec<i32>,
+    /// Tag names the model produced that don't exist in the vocabulary.
+    /// The caller folds these back into the semantic term so the concept
+    /// isn't lost — and surfacing them keeps a hallucinated tag from
+    /// silently filtering the whole library to nothing.
+    pub unmatched_tags: Vec<String>,
+    pub camera_make: Option<String>,
+    pub camera_model: Option<String>,
+    pub lens_model: Option<String>,
+    /// Raw place name awaiting forward-geocoding by the caller.
+    pub place: Option<String>,
+    pub date_from: Option<i64>,
+    pub date_to: Option<i64>,
+    /// Normalized to "photo" | "video"; `None` means no media-type filter.
+    pub media_type: Option<String>,
+}
+
+/// Convert an ISO `YYYY-MM-DD` date to a unix timestamp (seconds). With
+/// `end_of_day`, returns 23:59:59 of that day so a `date_to` filter is
+/// inclusive of the whole day; otherwise 00:00:00. Returns `None` for any
+/// unparseable input (the filter is simply omitted rather than erroring).
+pub fn iso_to_unix(date: &str, end_of_day: bool) -> Option<i64> {
+    let d = chrono::NaiveDate::parse_from_str(date.trim(), "%Y-%m-%d").ok()?;
+    let time = if end_of_day {
+        chrono::NaiveTime::from_hms_opt(23, 59, 59)?
+    } else {
+        chrono::NaiveTime::from_hms_opt(0, 0, 0)?
+    };
+    Some(d.and_time(time).and_utc().timestamp())
+}
+
+/// Normalize a free-form media-type string to the engine's vocabulary.
+/// Anything that isn't clearly photo or video (including "all") yields
+/// `None` — no filter.
+fn normalize_media_type(raw: &str) -> Option<String> {
+    match raw.trim().to_lowercase().as_str() {
+        "photo" | "photos" | "image" | "images" | "picture" | "pictures" => {
+            Some("photo".to_string())
+        }
+        "video" | "videos" | "movie" | "movies" | "clip" | "clips" => Some("video".to_string()),
+        _ => None,
+    }
+}
+
+/// Resolve a raw LLM query against the real tag vocabulary, producing the
+/// structured filter. Pure — no network, no LLM — so it carries the
+/// correctness-critical mapping logic under unit test.
+///
+/// `tag_vocab` is `(tag_id, tag_name)` pairs (the shape `TagDao::get_all_tags`
+/// yields once the count is dropped). Matching is case-insensitive and exact
+/// on the trimmed name.
+pub fn resolve_raw_query(raw: RawNlQuery, tag_vocab: &[(i32, String)]) -> StructuredQuery {
+    // Case-insensitive name → id lookup. Built once per call.
+    let lookup: std::collections::HashMap<String, i32> = tag_vocab
+        .iter()
+        .map(|(id, name)| (name.trim().to_lowercase(), *id))
+        .collect();
+
+    let resolve_names = |names: &[String], ids: &mut Vec<i32>, unmatched: &mut Vec<String>| {
+        for name in names {
+            let key = name.trim().to_lowercase();
+            if key.is_empty() {
+                continue;
+            }
+            match lookup.get(&key) {
+                Some(id) if !ids.contains(id) => ids.push(*id),
+                Some(_) => {} // duplicate, already collected
+                None => {
+                    if !unmatched.iter().any(|u| u.eq_ignore_ascii_case(name)) {
+                        unmatched.push(name.trim().to_string());
+                    }
+                }
+            }
+        }
+    };
+
+    let mut tag_ids = Vec::new();
+    let mut unmatched_tags = Vec::new();
+    resolve_names(&raw.tags, &mut tag_ids, &mut unmatched_tags);
+
+    // Excluded tags that don't match a real tag are simply ignored — you
+    // can't exclude a tag that doesn't exist, and folding them into
+    // `semantic` would make no sense.
+    let mut exclude_tag_ids = Vec::new();
+    let mut exclude_unmatched = Vec::new();
+    resolve_names(
+        &raw.exclude_tags,
+        &mut exclude_tag_ids,
+        &mut exclude_unmatched,
+    );
+
+    let clean = |s: Option<String>| s.map(|v| v.trim().to_string()).filter(|v| !v.is_empty());
+
+    StructuredQuery {
+        semantic: clean(raw.semantic),
+        tag_ids,
+        exclude_tag_ids,
+        unmatched_tags,
+        camera_make: clean(raw.camera_make),
+        camera_model: clean(raw.camera_model),
+        lens_model: clean(raw.lens_model),
+        place: clean(raw.place),
+        date_from: raw.date_from.as_deref().and_then(|d| iso_to_unix(d, false)),
+        date_to: raw.date_to.as_deref().and_then(|d| iso_to_unix(d, true)),
+        media_type: raw.media_type.as_deref().and_then(normalize_media_type),
+    }
+}
+
+/// Build the grounded system prompt. The model is told the current date (so
+/// "last summer" resolves) and the exact tag vocabulary (so it uses real
+/// tags or routes the concept to `semantic` instead of inventing one).
+fn build_system_prompt(tag_vocab: &[(i32, String)], today: chrono::NaiveDate) -> String {
+    // Cap the vocab dump so a huge library doesn't blow the context window;
+    // the most-used tags are the ones a query is likely to reference.
+    const MAX_TAGS: usize = 400;
+    let mut names: Vec<&str> = tag_vocab.iter().map(|(_, n)| n.as_str()).collect();
+    names.sort_unstable();
+    names.dedup();
+    let shown = names.len().min(MAX_TAGS);
+    let vocab = names[..shown].join(", ");
+    let truncation = if names.len() > MAX_TAGS {
+        format!(" (showing {MAX_TAGS} of {} tags)", names.len())
+    } else {
+        String::new()
+    };
+
+    format!(
+        "You translate a user's natural-language photo-search request into a JSON \
+filter. Today's date is {today}. Respond with ONLY a JSON object, no prose, no \
+code fences.\n\n\
+Schema (all fields optional):\n\
+{{\n  \
+\"semantic\": string|null,        // visual scene/subject for image similarity search\n  \
+\"tags\": string[],               // ONLY names from the tag list below\n  \
+\"exclude_tags\": string[],       // ONLY names from the tag list below\n  \
+\"camera_make\": string|null,\n  \
+\"camera_model\": string|null,\n  \
+\"lens_model\": string|null,\n  \
+\"place\": string|null,           // a location name to look up (city, country, landmark)\n  \
+\"date_from\": \"YYYY-MM-DD\"|null,  // inclusive\n  \
+\"date_to\": \"YYYY-MM-DD\"|null,    // inclusive\n  \
+\"media_type\": \"photo\"|\"video\"|null\n\
+}}\n\n\
+Rules:\n\
+- Put descriptive/visual concepts (\"sunset\", \"crowded beach\", \"red car\") in \"semantic\".\n\
+- Only use \"tags\"/\"exclude_tags\" values that appear EXACTLY in the tag list. If a \
+concept isn't a listed tag, put it in \"semantic\" instead — never invent a tag.\n\
+- Resolve relative dates against today's date (\"last summer\", \"2023\", \"last month\").\n\
+- Put place/location names in \"place\" (not \"semantic\").\n\
+- Omit (use null / empty array) anything the request doesn't mention.\n\n\
+Available tags{truncation}: {vocab}"
+    )
+}
+
+/// Extract the JSON object from a model response that may include a leading
+/// `<think>` block, code fences, or trailing prose. Strips the think block
+/// first (so reasoning that mentions braces can't fool the scan), then
+/// returns the substring from the first `{` to the last `}` inclusive — or
+/// the trimmed text if no braces are found (which then fails to parse with a
+/// clear error).
+fn extract_json(raw: &str) -> String {
+    let s = strip_think_blocks(raw);
+    let start = s.find('{');
+    let end = s.rfind('}');
+    match (start, end) {
+        (Some(a), Some(b)) if b >= a => s[a..=b].to_string(),
+        _ => s.trim().to_string(),
+    }
+}
+
+/// Parse a model response string into a [`StructuredQuery`], resolving names
+/// against the vocab. Separated from the LLM call so it's unit-testable.
+pub fn parse_response(response: &str, tag_vocab: &[(i32, String)]) -> Result<StructuredQuery> {
+    let json = extract_json(response);
+    let raw: RawNlQuery = serde_json::from_str(&json)
+        .map_err(|e| anyhow!("failed to parse NL query JSON: {e}; raw response: {response:?}"))?;
+    Ok(resolve_raw_query(raw, tag_vocab))
+}
+
+/// Translate a natural-language query into a [`StructuredQuery`] via one
+/// grounded LLM call. The `client` is any configured backend (the unified
+/// endpoint passes the resolved chat backend); `tag_vocab` grounds the tag
+/// mapping; `today` anchors relative-date resolution.
+pub async fn translate_nl_query(
+    client: &dyn LlmClient,
+    nl: &str,
+    tag_vocab: &[(i32, String)],
+    today: chrono::NaiveDate,
+) -> Result<StructuredQuery> {
+    let system = build_system_prompt(tag_vocab, today);
+    let messages = vec![ChatMessage::system(system), ChatMessage::user(nl)];
+    let (msg, _, _) = client.chat_with_tools(messages, Vec::<Tool>::new()).await?;
+    parse_response(&msg.content, tag_vocab)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn vocab() -> Vec<(i32, String)> {
+        vec![
+            (1, "beach".to_string()),
+            (2, "Sunset".to_string()), // mixed case to exercise case-insensitivity
+            (3, "family".to_string()),
+        ]
+    }
+
+    #[test]
+    fn iso_to_unix_start_and_end_of_day() {
+        // 2023-01-01 UTC midnight = 1672531200.
+        assert_eq!(iso_to_unix("2023-01-01", false), Some(1_672_531_200));
+        // End of that day is 86399 seconds later.
+        assert_eq!(
+            iso_to_unix("2023-01-01", true),
+            Some(1_672_531_200 + 86_399)
+        );
+    }
+
+    #[test]
+    fn iso_to_unix_rejects_garbage() {
+        assert_eq!(iso_to_unix("last summer", false), None);
+        assert_eq!(iso_to_unix("2023-13-99", false), None);
+        assert_eq!(iso_to_unix("", false), None);
+    }
+
+    #[test]
+    fn resolve_matches_tags_case_insensitively() {
+        let raw = RawNlQuery {
+            tags: vec!["BEACH".to_string(), "sunset".to_string()],
+            ..Default::default()
+        };
+        let q = resolve_raw_query(raw, &vocab());
+        assert_eq!(q.tag_ids, vec![1, 2]);
+        assert!(q.unmatched_tags.is_empty());
+    }
+
+    #[test]
+    fn resolve_surfaces_unmatched_tags_not_silently_dropped() {
+        // A hallucinated / non-vocab tag must be surfaced so the caller can
+        // fold it into semantic — never silently used as a hard filter.
+        let raw = RawNlQuery {
+            tags: vec!["beach".to_string(), "golden hour".to_string()],
+            ..Default::default()
+        };
+        let q = resolve_raw_query(raw, &vocab());
+        assert_eq!(q.tag_ids, vec![1]);
+        assert_eq!(q.unmatched_tags, vec!["golden hour".to_string()]);
+    }
+
+    #[test]
+    fn resolve_dedups_repeated_tags() {
+        let raw = RawNlQuery {
+            tags: vec![
+                "beach".to_string(),
+                "Beach".to_string(),
+                "beach".to_string(),
+            ],
+            ..Default::default()
+        };
+        let q = resolve_raw_query(raw, &vocab());
+        assert_eq!(q.tag_ids, vec![1]);
+    }
+
+    #[test]
+    fn resolve_normalizes_media_type_and_dates() {
+        let raw = RawNlQuery {
+            media_type: Some("Videos".to_string()),
+            date_from: Some("2023-06-01".to_string()),
+            date_to: Some("2023-06-30".to_string()),
+            ..Default::default()
+        };
+        let q = resolve_raw_query(raw, &vocab());
+        assert_eq!(q.media_type.as_deref(), Some("video"));
+        assert_eq!(q.date_from, iso_to_unix("2023-06-01", false));
+        assert_eq!(q.date_to, iso_to_unix("2023-06-30", true));
+    }
+
+    #[test]
+    fn resolve_media_type_all_is_no_filter() {
+        let raw = RawNlQuery {
+            media_type: Some("all".to_string()),
+            ..Default::default()
+        };
+        assert_eq!(resolve_raw_query(raw, &vocab()).media_type, None);
+    }
+
+    #[test]
+    fn resolve_trims_and_empties_to_none() {
+        let raw = RawNlQuery {
+            semantic: Some("   ".to_string()),
+            camera_make: Some("  Fujifilm  ".to_string()),
+            place: Some("".to_string()),
+            ..Default::default()
+        };
+        let q = resolve_raw_query(raw, &vocab());
+        assert_eq!(q.semantic, None);
+        assert_eq!(q.camera_make.as_deref(), Some("Fujifilm"));
+        assert_eq!(q.place, None);
+    }
+
+    #[test]
+    fn parse_response_handles_code_fences_and_prose() {
+        let resp = "Here is the filter:\n```json\n{\"semantic\":\"sunset\",\"tags\":[\"beach\"]}\n```\nDone.";
+        let q = parse_response(resp, &vocab()).expect("parse");
+        assert_eq!(q.semantic.as_deref(), Some("sunset"));
+        assert_eq!(q.tag_ids, vec![1]);
+    }
+
+    #[test]
+    fn parse_response_handles_think_block_then_json() {
+        let resp = "<think>user wants beach sunsets</think>{\"tags\":[\"beach\",\"sunset\"]}";
+        let q = parse_response(resp, &vocab()).expect("parse");
+        assert_eq!(q.tag_ids, vec![1, 2]);
+    }
+
+    #[test]
+    fn parse_response_errors_on_non_json() {
+        assert!(parse_response("I cannot help with that.", &vocab()).is_err());
+    }
+
+    #[test]
+    fn build_system_prompt_includes_date_and_vocab() {
+        let today = chrono::NaiveDate::from_ymd_opt(2026, 6, 14).unwrap();
+        let prompt = build_system_prompt(&vocab(), today);
+        assert!(
+            prompt.contains("2026-06-14"),
+            "prompt should state today's date"
+        );
+        assert!(prompt.contains("beach"), "prompt should list the vocab");
+        assert!(
+            prompt.contains("never invent a tag"),
+            "prompt should warn against inventing tags"
+        );
+    }
+}
diff --git a/src/ai/tts.rs b/src/ai/tts.rs
index 08d9dcd..d6ef89d 100644
--- a/src/ai/tts.rs
+++ b/src/ai/tts.rs
@@ -23,6 +23,7 @@ use std::time::{Duration, Instant};
 use tokio::sync::Semaphore;
 use uuid::Uuid;
 
+use crate::ai::llamacpp::LlamaCppClient;
 use crate::data::Claims;
 use crate::file_types::{is_audio_file, is_video_file};
 use crate::files::is_valid_full_path;
@@ -473,6 +474,40 @@ pub struct TtsJobStatusResponse {
     pub error: Option<String>,
 }
 
+/// Synthesize speech honoring the global single-GPU serialization
+/// (`TTS_PERMIT`) and the GPU write lease, exactly as the speech-job path does.
+/// Queues on the permit rather than fast-failing, so callers wait their turn
+/// instead of contending. Text is run through the same markdown/emoji cleanup +
+/// pronunciation pipeline as the HTTP handlers. Reused by the memory-reel
+/// pipeline to narrate each segment without racing a user's TTS request on the
+/// Chatterbox GPU.
+pub async fn synthesize_serialized(
+    client: &LlamaCppClient,
+    text: &str,
+    voice: Option<&str>,
+    format: &str,
+    exaggeration: Option<f32>,
+) -> anyhow::Result<Vec<u8>> {
+    let prepared = prepare_for_tts(text);
+    if prepared.is_empty() {
+        anyhow::bail!("nothing to synthesize after cleanup");
+    }
+    // Clamp to Chatterbox's documented range, matching the HTTP handlers
+    // (which clamp before forwarding; this path bypasses them).
+    let exaggeration = exaggeration.map(|x| x.clamp(0.25, 2.0));
+    // Queue rather than fast-fail (mirrors create_speech_job_handler).
+    let _permit = TTS_PERMIT
+        .acquire()
+        .await
+        .map_err(|_| anyhow::anyhow!("TTS permit closed"))?;
+    // Wait for the LLM side to release the GPU before the request timeout
+    // starts (see ai::gpu).
+    let _gpu = crate::ai::gpu::tts_lease().await;
+    client
+        .text_to_speech(&prepared, voice, format, exaggeration, None, None)
+        .await
+}
+
 /// POST /tts/speech/jobs — durable variant of /tts/speech for long syntheses.
 /// Returns 202 + a job id immediately; the synth queues on the single GPU
 /// permit (instead of fast-failing 429) and the client polls the job until
@@ -985,7 +1020,7 @@ pub async fn create_voice_from_library_handler(
     let voice_name =
         append_ref_window(&voice_name, ref_start, ref_duration.round().max(1.0) as u32);
 
-    let library = match libraries::resolve_library_param(&app_state, req.library.as_deref()) {
+    let library = match libraries::resolve_library_param_state(&app_state, req.library.as_deref()) {
         Ok(Some(l)) => l,
         Ok(None) => app_state.primary_library(),
         Err(msg) => {
diff --git a/src/bin/populate_knowledge.rs b/src/bin/populate_knowledge.rs
index 71f2f8a..396eddc 100644
--- a/src/bin/populate_knowledge.rs
+++ b/src/bin/populate_knowledge.rs
@@ -336,6 +336,7 @@ async fn main() -> anyhow::Result<()> {
                 args.top_p,
                 args.top_k,
                 args.min_p,
+                None, // enable_thinking: leave model/template default
                 args.max_iterations,
                 None,
                 Vec::new(),
diff --git a/src/clip_search.rs b/src/clip_search.rs
index 98ea96e..7b4510e 100644
--- a/src/clip_search.rs
+++ b/src/clip_search.rs
@@ -124,65 +124,161 @@ fn dot(a: &[f32], b: &[f32]) -> f32 {
     a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
 }
 
-pub async fn search_photos(
-    state: web::Data<AppState>,
-    exif_dao: web::Data<Mutex<Box<dyn ExifDao>>>,
-    query: web::Query<SearchQuery>,
-) -> ActixResult<HttpResponse> {
-    let q_text = query.q.trim().to_string();
-    if q_text.is_empty() {
-        return Ok(HttpResponse::BadRequest().json(SearchError {
-            error: "query parameter `q` is required".into(),
-        }));
-    }
+/// Failure modes of [`score_photos`]. Carries enough to let each caller pick
+/// an appropriate HTTP status (the CLIP service being down is a 502, a
+/// disabled feature is a 503, a rejected query is a 400, a DB failure 500).
+pub enum ScoreError {
+    /// CLIP search isn't configured at all (no Apollo endpoint).
+    Disabled,
+    /// The query was rejected by the encoder (client error).
+    Rejected(String),
+    /// The CLIP service is transiently unavailable (upstream error).
+    Unavailable(String),
+    /// The encoder returned an embedding we couldn't decode.
+    MalformedEmbedding,
+    /// A database / index load failure.
+    Internal(String),
+}
+
+/// Result of scoring the whole library against a query embedding: the
+/// resolved model version, how many embeddings were considered, and every
+/// `(score, content_hash)` above threshold, sorted by descending score.
+/// Pagination and path resolution are the caller's job (see [`resolve_hits`])
+/// so this core can be reused for both the plain search endpoint and the
+/// unified endpoint (which filters by hash before paginating).
+pub struct ScoredPhotos {
+    pub model_version: String,
+    pub considered: usize,
+    /// `(cosine_score, content_hash)` pairs, descending by score.
+    pub hits: Vec<(f32, String)>,
+}
+
+/// Encode `q_text` via CLIP and score it against every stored embedding in
+/// the given library scope. Returns all matches above `threshold`, sorted by
+/// descending similarity. Pure of HTTP concerns so it's shared by
+/// `search_photos` and the unified search endpoint.
+pub async fn score_photos(
+    state: &AppState,
+    exif_dao: &Mutex<Box<dyn ExifDao>>,
+    q_text: &str,
+    library_ids: &[i32],
+    threshold: f32,
+    model_version: Option<&str>,
+) -> Result<ScoredPhotos, ScoreError> {
     if !state.clip_client.is_enabled() {
-        return Ok(HttpResponse::ServiceUnavailable().json(SearchError {
-            error: "CLIP search is disabled (no Apollo CLIP endpoint configured)".into(),
-        }));
+        return Err(ScoreError::Disabled);
     }
 
-    let limit = query.limit.clamp(1, 200);
-    let offset = query.offset;
-    let threshold = query.threshold.clamp(-1.0, 1.0);
-
-    // 1. Encode the query text. Fast — Apollo's text encoder is ~50ms
-    // on CPU. Bail with a clear error message if Apollo's down so the
-    // user sees "service unavailable" rather than empty results.
-    let query_resp = match state.clip_client.encode_text(&q_text).await {
+    // 1. Encode the query text. Fast — Apollo's text encoder is ~50ms on CPU.
+    let query_resp = match state.clip_client.encode_text(q_text).await {
         Ok(r) => r,
-        Err(ClipError::Permanent(e)) => {
-            return Ok(HttpResponse::BadRequest().json(SearchError {
-                error: format!("query rejected: {e}"),
-            }));
-        }
-        Err(ClipError::Transient(e)) => {
-            return Ok(HttpResponse::BadGateway().json(SearchError {
-                error: format!("CLIP service unavailable: {e}"),
-            }));
-        }
-        Err(ClipError::Disabled) => {
-            return Ok(HttpResponse::ServiceUnavailable().json(SearchError {
-                error: "CLIP service disabled".into(),
-            }));
-        }
+        Err(ClipError::Permanent(e)) => return Err(ScoreError::Rejected(e.to_string())),
+        Err(ClipError::Transient(e)) => return Err(ScoreError::Unavailable(e.to_string())),
+        Err(ClipError::Disabled) => return Err(ScoreError::Disabled),
     };
     // decode_embedding works on raw bytes; the wire format is b64.
     let query_bytes = base64::engine::general_purpose::STANDARD
         .decode(query_resp.embedding.as_bytes())
         .unwrap_or_default();
-    let query_vec = match decode_embedding(&query_bytes) {
-        Some(v) => v,
-        None => {
-            return Ok(HttpResponse::BadGateway().json(SearchError {
-                error: "CLIP service returned a malformed query embedding".into(),
-            }));
-        }
-    };
+    let query_vec = decode_embedding(&query_bytes).ok_or(ScoreError::MalformedEmbedding)?;
 
-    // 2. Decide which library scope to search. `library_ids` (multi)
-    // wins over the legacy `library` (single) when both are present;
-    // either / both empty falls back to "every enabled library".
-    let library_ids: Vec<i32> = if let Some(raw) = query.library_ids.as_deref() {
+    // 2. Pull the (hash, embedding) matrix under the dao lock, release
+    // before scoring. The caller-supplied `model_version` (or the live
+    // engine's) forces a strict join so a mid-flight model swap can't mix
+    // geometries.
+    let ctx = opentelemetry::Context::current();
+    let rows: Vec<(String, Vec<u8>)> = {
+        let mut dao = exif_dao.lock().expect("exif dao");
+        dao.list_clip_index(
+            &ctx,
+            library_ids,
+            model_version.or(Some(&query_resp.model_version)),
+        )
+        .map_err(|e| {
+            log::warn!("clip_search: list_clip_index failed: {:?}", e);
+            ScoreError::Internal("failed to load search index".into())
+        })?
+    };
+    let considered = rows.len();
+
+    // 3. Score. Keep all matches and sort at the end (~microseconds at 14k).
+    let mut hits: Vec<(f32, String)> = Vec::with_capacity(considered);
+    for (hash, blob) in rows {
+        let Some(emb) = decode_embedding(&blob) else {
+            continue;
+        };
+        if emb.len() != query_vec.len() {
+            continue;
+        }
+        let sim = dot(&emb, &query_vec);
+        if sim < threshold {
+            continue;
+        }
+        hits.push((sim, hash));
+    }
+    hits.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
+
+    Ok(ScoredPhotos {
+        model_version: query_resp.model_version,
+        considered,
+        hits,
+    })
+}
+
+/// Resolve a page of `(score, content_hash)` pairs back to [`SearchHit`]s
+/// (each carrying `library_id` + `rel_path`). Hashes that no longer resolve
+/// to a row are skipped. Shared by both endpoints.
+pub fn resolve_hits(
+    exif_dao: &Mutex<Box<dyn ExifDao>>,
+    scored: &[(f32, String)],
+) -> Vec<SearchHit> {
+    if scored.is_empty() {
+        return Vec::new();
+    }
+    let ctx = opentelemetry::Context::current();
+    let hashes: Vec<String> = scored.iter().map(|(_, h)| h.clone()).collect();
+    let mut dao = exif_dao.lock().expect("exif dao");
+    let path_map = dao
+        .get_rel_paths_for_hashes(&ctx, &hashes)
+        .unwrap_or_else(|e| {
+            log::warn!("clip_search: get_rel_paths_for_hashes failed: {:?}", e);
+            std::collections::HashMap::new()
+        });
+
+    let mut results = Vec::with_capacity(scored.len());
+    for (score, hash) in scored {
+        let row = match dao.find_by_content_hash(&ctx, hash) {
+            Ok(Some(r)) => r,
+            Ok(None) => continue,
+            Err(e) => {
+                log::warn!("clip_search: find_by_content_hash failed for {hash}: {e:?}");
+                continue;
+            }
+        };
+        // Prefer get_rel_paths_for_hashes's first entry (shares image_exif's
+        // natural order), falling back to the ImageExif row.
+        let rel_path = path_map
+            .get(hash)
+            .and_then(|paths| paths.first().cloned())
+            .unwrap_or(row.file_path);
+        results.push(SearchHit {
+            library_id: row.library_id,
+            rel_path,
+            content_hash: hash.clone(),
+            score: *score,
+        });
+    }
+    results
+}
+
+/// Parse the `library_ids` (multi) / `library` (single) scope params into a
+/// deduped id list. Empty = "every enabled library". Shared so the unified
+/// endpoint scopes CLIP identically.
+pub fn parse_library_scope(
+    library_ids: Option<&str>,
+    library: Option<i32>,
+) -> Result<Vec<i32>, String> {
+    if let Some(raw) = library_ids {
         let mut out: Vec<i32> = Vec::new();
         for piece in raw.split(',') {
             let trimmed = piece.trim();
@@ -195,158 +291,92 @@ pub async fn search_photos(
                         out.push(id);
                     }
                 }
-                Err(_) => {
-                    return Ok(HttpResponse::BadRequest().json(SearchError {
-                        error: format!("invalid library_ids entry: {trimmed:?}"),
-                    }));
-                }
+                Err(_) => return Err(format!("invalid library_ids entry: {trimmed:?}")),
             }
         }
-        out
-    } else if let Some(id) = query.library {
-        vec![id]
+        Ok(out)
+    } else if let Some(id) = library {
+        Ok(vec![id])
     } else {
-        Vec::new()
-    };
+        Ok(Vec::new())
+    }
+}
 
-    // 3. Pull the (hash, embedding) matrix. Lock contention here is
-    // bounded — one big SELECT under a mutex Arc<Mutex<dyn ExifDao>>
-    // and then we release before scoring. If this becomes a hotspot
-    // we'll cache the decoded matrix in AppState with TTL.
-    let ctx = opentelemetry::Context::current();
-    let rows: Vec<(String, Vec<u8>)> = {
-        let mut dao = exif_dao.lock().expect("exif dao");
-        match dao.list_clip_index(
-            &ctx,
-            &library_ids,
-            query
-                .model_version
-                .as_deref()
-                .or(Some(&query_resp.model_version)),
-        ) {
-            Ok(r) => r,
-            Err(e) => {
-                log::warn!("clip_search: list_clip_index failed: {:?}", e);
-                return Ok(HttpResponse::InternalServerError().json(SearchError {
-                    error: "failed to load search index".into(),
-                }));
-            }
-        }
-    };
-    let considered = rows.len();
-    if considered == 0 {
-        return Ok(HttpResponse::Ok().json(SearchResponse {
-            query: q_text,
-            model_version: query_resp.model_version,
-            threshold,
-            considered,
-            total_matching: 0,
-            offset,
-            results: Vec::new(),
+pub async fn search_photos(
+    state: web::Data<AppState>,
+    exif_dao: web::Data<Mutex<Box<dyn ExifDao>>>,
+    query: web::Query<SearchQuery>,
+) -> ActixResult<HttpResponse> {
+    let q_text = query.q.trim().to_string();
+    if q_text.is_empty() {
+        return Ok(HttpResponse::BadRequest().json(SearchError {
+            error: "query parameter `q` is required".into(),
         }));
     }
 
-    // 4. Score. Cap the loop's transient allocation; we keep all scores
-    // and sort at the end. With ~14k entries the sort is microseconds.
-    let mut scored: Vec<(f32, String)> = Vec::with_capacity(considered);
-    for (hash, blob) in rows {
-        let Some(emb) = decode_embedding(&blob) else {
-            continue;
-        };
-        if emb.len() != query_vec.len() {
-            continue;
-        }
-        let sim = dot(&emb, &query_vec);
-        if sim < threshold {
-            continue;
-        }
-        scored.push((sim, hash));
-    }
-    scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
-    let total_matching = scored.len();
-    // Pagination — slice the sorted list at `[offset, offset+limit)`.
-    // Offsets past the end produce empty pages rather than an error so
-    // the client can stop fetching naturally on "load more" past the end.
-    let scored: Vec<(f32, String)> = if offset >= total_matching {
+    let limit = query.limit.clamp(1, 200);
+    let offset = query.offset;
+    let threshold = query.threshold.clamp(-1.0, 1.0);
+
+    let library_ids = match parse_library_scope(query.library_ids.as_deref(), query.library) {
+        Ok(ids) => ids,
+        Err(msg) => return Ok(HttpResponse::BadRequest().json(SearchError { error: msg })),
+    };
+
+    let scored = match score_photos(
+        &state,
+        &exif_dao,
+        &q_text,
+        &library_ids,
+        threshold,
+        query.model_version.as_deref(),
+    )
+    .await
+    {
+        Ok(s) => s,
+        Err(e) => return Ok(score_error_response(e)),
+    };
+
+    let total_matching = scored.hits.len();
+    // Pagination — slice the sorted list at `[offset, offset+limit)`. Offsets
+    // past the end produce empty pages so "load more" stops naturally.
+    let page: Vec<(f32, String)> = if offset >= total_matching {
         Vec::new()
     } else {
         let end = (offset + limit).min(total_matching);
-        scored[offset..end].to_vec()
+        scored.hits[offset..end].to_vec()
     };
-
-    if scored.is_empty() {
-        return Ok(HttpResponse::Ok().json(SearchResponse {
-            query: q_text,
-            model_version: query_resp.model_version,
-            threshold,
-            considered,
-            total_matching,
-            offset,
-            results: Vec::new(),
-        }));
-    }
-
-    // 5. Resolve each surviving hash back to a `(library_id, rel_path)`.
-    // `get_rel_paths_by_hash` returns every rel_path; we pick the first
-    // one for the result. Apollo / the UI can fetch alternatives via
-    // /image/metadata when needed.
-    let hashes: Vec<String> = scored.iter().map(|(_, h)| h.clone()).collect();
-    let path_map = {
-        let mut dao = exif_dao.lock().expect("exif dao");
-        match dao.get_rel_paths_for_hashes(&ctx, &hashes) {
-            Ok(m) => m,
-            Err(e) => {
-                log::warn!("clip_search: get_rel_paths_for_hashes failed: {:?}", e);
-                return Ok(HttpResponse::InternalServerError().json(SearchError {
-                    error: "failed to resolve photo paths".into(),
-                }));
-            }
-        }
-    };
-
-    // We need (library_id, rel_path) — get_rel_paths_for_hashes only
-    // returns rel_paths. Cross-reference via find_by_content_hash to
-    // pick the library too. Single call per surviving hash; cheap at
-    // top-20.
-    let mut results = Vec::with_capacity(scored.len());
-    {
-        let mut dao = exif_dao.lock().expect("exif dao");
-        for (score, hash) in scored {
-            let row = match dao.find_by_content_hash(&ctx, &hash) {
-                Ok(Some(r)) => r,
-                Ok(None) => continue,
-                Err(e) => {
-                    log::warn!(
-                        "clip_search: find_by_content_hash failed for {}: {:?}",
-                        hash,
-                        e
-                    );
-                    continue;
-                }
-            };
-            // Prefer get_rel_paths_for_hashes's first entry if it
-            // exists (it shares semantics with `image_exif`'s natural
-            // order), falling back to the ImageExif row.
-            let rel_path = path_map
-                .get(&hash)
-                .and_then(|paths| paths.first().cloned())
-                .unwrap_or(row.file_path);
-            results.push(SearchHit {
-                library_id: row.library_id,
-                rel_path,
-                content_hash: hash,
-                score,
-            });
-        }
-    }
+    let results = resolve_hits(&exif_dao, &page);
 
     Ok(HttpResponse::Ok().json(SearchResponse {
         query: q_text,
-        model_version: query_resp.model_version,
+        model_version: scored.model_version,
         threshold,
-        considered,
+        considered: scored.considered,
         total_matching,
         offset,
         results,
     }))
 }
+
+/// Map a [`ScoreError`] to the HTTP response `search_photos` historically
+/// returned for each failure mode. Reused by the unified endpoint.
+pub fn score_error_response(e: ScoreError) -> HttpResponse {
+    match e {
+        ScoreError::Disabled => HttpResponse::ServiceUnavailable().json(SearchError {
+            error: "CLIP search is disabled (no Apollo CLIP endpoint configured)".into(),
+        }),
+        ScoreError::Rejected(msg) => HttpResponse::BadRequest().json(SearchError {
+            error: format!("query rejected: {msg}"),
+        }),
+        ScoreError::Unavailable(msg) => HttpResponse::BadGateway().json(SearchError {
+            error: format!("CLIP service unavailable: {msg}"),
+        }),
+        ScoreError::MalformedEmbedding => HttpResponse::BadGateway().json(SearchError {
+            error: "CLIP service returned a malformed query embedding".into(),
+        }),
+        ScoreError::Internal(msg) => {
+            HttpResponse::InternalServerError().json(SearchError { error: msg })
+        }
+    }
+}
diff --git a/src/database/mod.rs b/src/database/mod.rs
index d063bd0..981f6a4 100644
--- a/src/database/mod.rs
+++ b/src/database/mod.rs
@@ -51,10 +51,12 @@ pub mod knowledge_dao;
 pub mod location_dao;
 pub mod models;
 pub mod persona_dao;
+pub mod precomputed_reel_dao;
 pub mod preview_dao;
 pub mod reconcile;
 pub mod schema;
 pub mod search_dao;
+pub mod user_ai_prefs_dao;
 
 pub use calendar_dao::{CalendarEventDao, SqliteCalendarEventDao};
 pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
@@ -66,8 +68,10 @@ pub use knowledge_dao::{
 };
 pub use location_dao::{LocationHistoryDao, SqliteLocationHistoryDao};
 pub use persona_dao::{ImportPersona, PersonaDao, PersonaPatch, SqlitePersonaDao};
+pub use precomputed_reel_dao::{PrecomputedReelDao, SqlitePrecomputedReelDao};
 pub use preview_dao::{PreviewDao, SqlitePreviewDao};
 pub use search_dao::{SearchHistoryDao, SqliteSearchHistoryDao};
+pub use user_ai_prefs_dao::{SqliteUserAiPrefsDao, UserAiPrefsDao};
 
 pub trait UserDao {
     fn create_user(&mut self, user: &str, password: &str) -> Option<User>;
diff --git a/src/database/models.rs b/src/database/models.rs
index 62274e2..d3d5440 100644
--- a/src/database/models.rs
+++ b/src/database/models.rs
@@ -1,6 +1,7 @@
 use crate::database::schema::{
     entities, entity_facts, entity_photo_links, favorites, image_exif, insight_generation_jobs,
-    libraries, personas, photo_insights, users, video_preview_clips,
+    libraries, personas, photo_insights, precomputed_reels, user_ai_prefs, users,
+    video_preview_clips,
 };
 use serde::Serialize;
 
@@ -505,3 +506,56 @@ pub struct InsightGenerationJob {
     pub result_insight_id: Option<i32>,
     pub error_message: Option<String>,
 }
+
+// --- Precomputed reels -------------------------------------------------------
+
+#[derive(Insertable)]
+#[diesel(table_name = precomputed_reels)]
+pub struct InsertablePrecomputedReel {
+    pub span: String,
+    pub library_key: String,
+    pub cache_key: String,
+    pub output_path: String,
+    pub title: String,
+    pub media_count: i32,
+    pub render_version: i32,
+    pub tz_offset_minutes: i32,
+    pub voice: Option<String>,
+    pub generated_at: i64,
+}
+
+#[derive(Serialize, Queryable, Clone, Debug)]
+pub struct PrecomputedReel {
+    pub id: i32,
+    pub span: String,
+    pub library_key: String,
+    pub cache_key: String,
+    pub output_path: String,
+    pub title: String,
+    pub media_count: i32,
+    pub render_version: i32,
+    pub tz_offset_minutes: i32,
+    pub voice: Option<String>,
+    pub generated_at: i64,
+}
+
+// --- User AI preferences (Section E) ----------------------------------------
+
+#[derive(Queryable, Insertable, Debug, Clone, serde::Deserialize, serde::Serialize)]
+#[diesel(table_name = user_ai_prefs)]
+pub struct UserAiPrefs {
+    pub id: i32,
+    pub voice: Option<String>,
+    pub tz_offset_minutes: Option<i32>,
+    pub library: Option<String>,
+    pub updated_at: i64,
+}
+
+#[derive(Insertable, Debug, Clone, serde::Deserialize, serde::Serialize)]
+#[diesel(table_name = user_ai_prefs)]
+pub struct UpsertUserAiPrefs {
+    pub voice: Option<String>,
+    pub tz_offset_minutes: Option<i32>,
+    pub library: Option<String>,
+    pub updated_at: i64,
+}
diff --git a/src/database/precomputed_reel_dao.rs b/src/database/precomputed_reel_dao.rs
new file mode 100644
index 0000000..b66573b
--- /dev/null
+++ b/src/database/precomputed_reel_dao.rs
@@ -0,0 +1,439 @@
+use diesel::prelude::*;
+use diesel::sqlite::SqliteConnection;
+use std::ops::DerefMut;
+use std::sync::{Arc, Mutex};
+
+use crate::database::models::{InsertablePrecomputedReel, PrecomputedReel};
+use crate::database::schema;
+use crate::database::{DbError, DbErrorKind, connect};
+use crate::otel::trace_db_call;
+
+/// Ledger for precomputed memory reels. The nightly agentic job writes a
+/// row after each successful render; the `GET /reels/precomputed` handler
+/// reads it to gate on freshness and serve the cached MP4.
+pub trait PrecomputedReelDao: Sync + Send {
+    /// Insert a precomputed reel row. Returns the new row's id.
+    /// Written by the nightly agentic job (Section D).
+    #[allow(dead_code)]
+    fn record_reel(
+        &mut self,
+        context: &opentelemetry::Context,
+        row: &InsertablePrecomputedReel,
+    ) -> Result<i32, DbError>;
+
+    /// Find the latest precomputed reel for the given (span, library_key).
+    fn latest_for(
+        &mut self,
+        context: &opentelemetry::Context,
+        span: &str,
+        library_key: &str,
+    ) -> Result<Option<PrecomputedReel>, DbError>;
+
+    /// Return true when a fresh precomputed reel exists for the given
+    /// (span, library_key, render_version) that was generated at or after
+    /// `min_generated_at`. Used as a fast existence gate before falling
+    /// back to `latest_for` (avoids a second query path).
+    fn exists_fresh(
+        &mut self,
+        context: &opentelemetry::Context,
+        span: &str,
+        library_key: &str,
+        render_version: i32,
+        min_generated_at: i64,
+    ) -> Result<bool, DbError>;
+
+    /// Delete all but the newest `keep` rows for (span, library_key), returning
+    /// the deleted rows so the caller can unlink their output files. Used by the
+    /// nightly job to retire superseded reels (e.g. yesterday's daily).
+    #[allow(dead_code)]
+    fn prune_superseded(
+        &mut self,
+        context: &opentelemetry::Context,
+        span: &str,
+        library_key: &str,
+        keep: usize,
+    ) -> Result<Vec<PrecomputedReel>, DbError>;
+
+    /// Every cache_key currently in the ledger. Used by the on-disk cache sweep
+    /// to protect files a ledger row still points at.
+    #[allow(dead_code)]
+    fn all_cache_keys(&mut self, context: &opentelemetry::Context) -> Result<Vec<String>, DbError>;
+}
+
+pub struct SqlitePrecomputedReelDao {
+    connection: Arc<Mutex<SqliteConnection>>,
+}
+
+impl Default for SqlitePrecomputedReelDao {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SqlitePrecomputedReelDao {
+    pub fn new() -> Self {
+        Self {
+            connection: Arc::new(Mutex::new(connect())),
+        }
+    }
+
+    #[cfg(test)]
+    pub fn from_connection(conn: Arc<Mutex<SqliteConnection>>) -> Self {
+        Self { connection: conn }
+    }
+}
+
+impl PrecomputedReelDao for SqlitePrecomputedReelDao {
+    fn record_reel(
+        &mut self,
+        context: &opentelemetry::Context,
+        row: &InsertablePrecomputedReel,
+    ) -> Result<i32, DbError> {
+        trace_db_call(context, "insert", "record_reel", |_span| {
+            use schema::precomputed_reels::dsl;
+
+            let mut connection = self
+                .connection
+                .lock()
+                .expect("Unable to lock PrecomputedReelDao");
+
+            diesel::insert_into(dsl::precomputed_reels)
+                .values(row)
+                .execute(connection.deref_mut())
+                .map_err(|e| anyhow::anyhow!("Failed to insert reel: {}", e))?;
+
+            dsl::precomputed_reels
+                .order(dsl::id.desc())
+                .select(dsl::id)
+                .first::<i32>(connection.deref_mut())
+                .map_err(|e| anyhow::anyhow!("Failed to get reel id: {}", e))
+        })
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
+    }
+
+    fn latest_for(
+        &mut self,
+        context: &opentelemetry::Context,
+        span: &str,
+        library_key: &str,
+    ) -> Result<Option<PrecomputedReel>, DbError> {
+        trace_db_call(context, "query", "latest_for", |_span| {
+            use schema::precomputed_reels::dsl;
+
+            let mut connection = self
+                .connection
+                .lock()
+                .expect("Unable to lock PrecomputedReelDao");
+
+            dsl::precomputed_reels
+                .filter(dsl::span.eq(span))
+                .filter(dsl::library_key.eq(library_key))
+                .order(dsl::generated_at.desc())
+                .first::<PrecomputedReel>(connection.deref_mut())
+                .optional()
+                .map_err(|e| anyhow::anyhow!("Failed to get latest reel: {}", e))
+        })
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+    }
+
+    fn exists_fresh(
+        &mut self,
+        context: &opentelemetry::Context,
+        span: &str,
+        library_key: &str,
+        render_version: i32,
+        min_generated_at: i64,
+    ) -> Result<bool, DbError> {
+        trace_db_call(context, "query", "exists_fresh", |_span| {
+            use schema::precomputed_reels::dsl;
+
+            let mut connection = self
+                .connection
+                .lock()
+                .expect("Unable to lock PrecomputedReelDao");
+
+            let count: i64 = dsl::precomputed_reels
+                .filter(dsl::span.eq(span))
+                .filter(dsl::library_key.eq(library_key))
+                .filter(dsl::render_version.eq(render_version))
+                .filter(dsl::generated_at.ge(min_generated_at))
+                .count()
+                .get_result(connection.deref_mut())
+                .map_err(|e| anyhow::anyhow!("Failed to check fresh reel: {}", e))?;
+
+            Ok(count > 0)
+        })
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+    }
+
+    fn prune_superseded(
+        &mut self,
+        context: &opentelemetry::Context,
+        span: &str,
+        library_key: &str,
+        keep: usize,
+    ) -> Result<Vec<PrecomputedReel>, DbError> {
+        trace_db_call(context, "delete", "prune_superseded", |_span| {
+            use schema::precomputed_reels::dsl;
+
+            let mut connection = self
+                .connection
+                .lock()
+                .expect("Unable to lock PrecomputedReelDao");
+
+            // Newest first; everything past `keep` is superseded. The table
+            // holds at most a handful of rows per (span, library), so loading
+            // and slicing in Rust is cheaper than a correlated subquery.
+            let mut rows: Vec<PrecomputedReel> = dsl::precomputed_reels
+                .filter(dsl::span.eq(span))
+                .filter(dsl::library_key.eq(library_key))
+                .order(dsl::generated_at.desc())
+                .load::<PrecomputedReel>(connection.deref_mut())
+                .map_err(|e| anyhow::anyhow!("Failed to load reels for prune: {}", e))?;
+
+            let stale = rows.split_off(rows.len().min(keep));
+            if !stale.is_empty() {
+                let ids: Vec<i32> = stale.iter().map(|r| r.id).collect();
+                diesel::delete(dsl::precomputed_reels.filter(dsl::id.eq_any(ids)))
+                    .execute(connection.deref_mut())
+                    .map_err(|e| anyhow::anyhow!("Failed to delete superseded reels: {}", e))?;
+            }
+            Ok(stale)
+        })
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+    }
+
+    fn all_cache_keys(&mut self, context: &opentelemetry::Context) -> Result<Vec<String>, DbError> {
+        trace_db_call(context, "query", "all_cache_keys", |_span| {
+            use schema::precomputed_reels::dsl;
+
+            let mut connection = self
+                .connection
+                .lock()
+                .expect("Unable to lock PrecomputedReelDao");
+
+            dsl::precomputed_reels
+                .select(dsl::cache_key)
+                .load::<String>(connection.deref_mut())
+                .map_err(|e| anyhow::anyhow!("Failed to load cache keys: {}", e))
+        })
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use diesel::Connection;
+    use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations};
+
+    const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!();
+
+    fn setup_dao() -> SqlitePrecomputedReelDao {
+        let mut conn = SqliteConnection::establish(":memory:")
+            .expect("Unable to create in-memory db connection");
+        conn.run_pending_migrations(DB_MIGRATIONS)
+            .expect("Failure running DB migrations");
+        SqlitePrecomputedReelDao::from_connection(Arc::new(Mutex::new(conn)))
+    }
+
+    fn ctx() -> opentelemetry::Context {
+        opentelemetry::Context::new()
+    }
+
+    fn sample_row() -> InsertablePrecomputedReel {
+        InsertablePrecomputedReel {
+            span: "day".to_string(),
+            library_key: "1".to_string(),
+            cache_key: "abc123".to_string(),
+            output_path: "/tmp/reel.mp4".to_string(),
+            title: "Test Reel".to_string(),
+            media_count: 10,
+            render_version: 1,
+            tz_offset_minutes: 0,
+            voice: Some("default".to_string()),
+            generated_at: 1_000_000,
+        }
+    }
+
+    #[test]
+    fn record_reel_inserts_and_returns_id() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+        let row = sample_row();
+
+        let id = dao.record_reel(&ctx, &row).unwrap();
+        assert!(id > 0, "should return a positive id");
+    }
+
+    #[test]
+    fn record_reel_returns_increasing_ids() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+        let row = sample_row();
+
+        let id1 = dao.record_reel(&ctx, &row).unwrap();
+        let id2 = dao.record_reel(&ctx, &row).unwrap();
+        assert!(id2 > id1, "each insert should get a higher id");
+    }
+
+    #[test]
+    fn latest_for_returns_latest() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+
+        let row1 = InsertablePrecomputedReel {
+            generated_at: 1_000_000,
+            ..sample_row()
+        };
+        let row2 = InsertablePrecomputedReel {
+            generated_at: 2_000_000,
+            ..sample_row()
+        };
+
+        dao.record_reel(&ctx, &row1).unwrap();
+        dao.record_reel(&ctx, &row2).unwrap();
+
+        let latest = dao.latest_for(&ctx, "day", "1").unwrap().unwrap();
+        assert_eq!(latest.generated_at, 2_000_000);
+    }
+
+    #[test]
+    fn latest_for_scoped_by_span_and_library() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+
+        let day_row = InsertablePrecomputedReel {
+            span: "day".to_string(),
+            library_key: "1".to_string(),
+            generated_at: 1_000_000,
+            ..sample_row()
+        };
+        let week_row = InsertablePrecomputedReel {
+            span: "week".to_string(),
+            library_key: "1".to_string(),
+            generated_at: 2_000_000,
+            ..sample_row()
+        };
+
+        dao.record_reel(&ctx, &day_row).unwrap();
+        dao.record_reel(&ctx, &week_row).unwrap();
+
+        let day_latest = dao.latest_for(&ctx, "day", "1").unwrap().unwrap();
+        assert_eq!(day_latest.span, "day");
+
+        let week_latest = dao.latest_for(&ctx, "week", "1").unwrap().unwrap();
+        assert_eq!(week_latest.span, "week");
+
+        // Different library returns None
+        let missing = dao.latest_for(&ctx, "day", "99").unwrap();
+        assert!(missing.is_none());
+    }
+
+    #[test]
+    fn latest_for_returns_none_when_no_rows() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+
+        let result = dao.latest_for(&ctx, "day", "1").unwrap();
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn exists_fresh_returns_true_when_present() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+
+        dao.record_reel(&ctx, &sample_row()).unwrap();
+
+        let exists = dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap();
+        assert!(exists, "should find the row we just inserted");
+    }
+
+    #[test]
+    fn exists_fresh_returns_false_when_missing() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+
+        let exists = dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap();
+        assert!(!exists, "should not find anything in empty table");
+    }
+
+    #[test]
+    fn exists_fresh_respects_min_generated_at() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+
+        dao.record_reel(&ctx, &sample_row()).unwrap();
+
+        // Below the threshold — should exist
+        let exists = dao.exists_fresh(&ctx, "day", "1", 1, 500_000).unwrap();
+        assert!(exists);
+
+        // Above the threshold — should not exist
+        let exists = dao.exists_fresh(&ctx, "day", "1", 1, 2_000_000).unwrap();
+        assert!(!exists);
+    }
+
+    #[test]
+    fn exists_fresh_respects_render_version() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+
+        let row_v1 = InsertablePrecomputedReel {
+            render_version: 1,
+            ..sample_row()
+        };
+        dao.record_reel(&ctx, &row_v1).unwrap();
+
+        assert!(dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap());
+        assert!(!dao.exists_fresh(&ctx, "day", "1", 2, 900_000).unwrap());
+    }
+
+    #[test]
+    fn prune_superseded_keeps_newest_and_returns_deleted() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+        // Three day/lib1 reels at increasing timestamps, plus an unrelated one.
+        for (i, key) in ["k1", "k2", "k3"].iter().enumerate() {
+            dao.record_reel(
+                &ctx,
+                &InsertablePrecomputedReel {
+                    cache_key: key.to_string(),
+                    generated_at: 1_000_000 + i as i64 * 1000,
+                    ..sample_row()
+                },
+            )
+            .unwrap();
+        }
+        let other = InsertablePrecomputedReel {
+            library_key: "2".to_string(),
+            cache_key: "other".to_string(),
+            ..sample_row()
+        };
+        dao.record_reel(&ctx, &other).unwrap();
+
+        // Keep the newest 2 of (day, "1"); k1 (oldest) is superseded.
+        let deleted = dao.prune_superseded(&ctx, "day", "1", 2).unwrap();
+        assert_eq!(deleted.len(), 1);
+        assert_eq!(deleted[0].cache_key, "k1");
+
+        // The newest 2 survive; the other-library row is untouched.
+        let keys = dao.all_cache_keys(&ctx).unwrap();
+        assert_eq!(keys.len(), 3);
+        assert!(keys.contains(&"k2".to_string()));
+        assert!(keys.contains(&"k3".to_string()));
+        assert!(keys.contains(&"other".to_string()));
+        assert!(!keys.contains(&"k1".to_string()));
+    }
+
+    #[test]
+    fn prune_superseded_noop_when_within_keep() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+        dao.record_reel(&ctx, &sample_row()).unwrap();
+        let deleted = dao.prune_superseded(&ctx, "day", "1", 2).unwrap();
+        assert!(deleted.is_empty());
+        assert_eq!(dao.all_cache_keys(&ctx).unwrap().len(), 1);
+    }
+}
diff --git a/src/database/schema.rs b/src/database/schema.rs
index bf5791b..846542d 100644
--- a/src/database/schema.rs
+++ b/src/database/schema.rs
@@ -266,6 +266,16 @@ diesel::table! {
     }
 }
 
+diesel::table! {
+    user_ai_prefs (id) {
+        id -> Integer,
+        voice -> Nullable<Text>,
+        tz_offset_minutes -> Nullable<Integer>,
+        library -> Nullable<Text>,
+        updated_at -> BigInt,
+    }
+}
+
 diesel::table! {
     video_preview_clips (id) {
         id -> Integer,
@@ -294,6 +304,22 @@ diesel::table! {
     }
 }
 
+diesel::table! {
+    precomputed_reels (id) {
+        id -> Integer,
+        span -> Text,
+        library_key -> Text,
+        cache_key -> Text,
+        output_path -> Text,
+        title -> Text,
+        media_count -> Integer,
+        render_version -> Integer,
+        tz_offset_minutes -> Integer,
+        voice -> Nullable<Text>,
+        generated_at -> BigInt,
+    }
+}
+
 diesel::joinable!(entity_facts -> photo_insights (source_insight_id));
 diesel::joinable!(entity_photo_links -> entities (entity_id));
 diesel::joinable!(entity_photo_links -> libraries (library_id));
@@ -322,9 +348,11 @@ diesel::allow_tables_to_appear_in_same_query!(
     personas,
     persons,
     photo_insights,
+    precomputed_reels,
     search_history,
     tagged_photo,
     tags,
+    user_ai_prefs,
     users,
     video_preview_clips,
 );
diff --git a/src/database/user_ai_prefs_dao.rs b/src/database/user_ai_prefs_dao.rs
new file mode 100644
index 0000000..129ef0c
--- /dev/null
+++ b/src/database/user_ai_prefs_dao.rs
@@ -0,0 +1,206 @@
+use diesel::prelude::*;
+use diesel::sqlite::SqliteConnection;
+use std::ops::DerefMut;
+use std::sync::{Arc, Mutex};
+
+use crate::database::models::{UpsertUserAiPrefs, UserAiPrefs};
+use crate::database::schema;
+use crate::database::{DbError, DbErrorKind, connect};
+use crate::otel::trace_db_call;
+
+/// Generic single-row table that passively mirrors the latest client AI
+/// request parameters (voice, timezone, library). Read by the nightly
+/// pre-generation scheduler (Section D) to pick up user preferences.
+pub trait UserAiPrefsDao: Sync + Send {
+    /// Read the single row; `None` when it hasn't been populated yet.
+    fn get_prefs(
+        &mut self,
+        context: &opentelemetry::Context,
+    ) -> Result<Option<UserAiPrefs>, DbError>;
+
+    /// Upsert the single row (id is always 1).
+    #[allow(dead_code)]
+    fn upsert_prefs(
+        &mut self,
+        context: &opentelemetry::Context,
+        prefs: &UpsertUserAiPrefs,
+    ) -> Result<(), DbError>;
+}
+
+pub struct SqliteUserAiPrefsDao {
+    connection: Arc<Mutex<SqliteConnection>>,
+}
+
+impl Default for SqliteUserAiPrefsDao {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SqliteUserAiPrefsDao {
+    pub fn new() -> Self {
+        Self {
+            connection: Arc::new(Mutex::new(connect())),
+        }
+    }
+
+    #[cfg(test)]
+    pub fn from_connection(conn: Arc<Mutex<SqliteConnection>>) -> Self {
+        Self { connection: conn }
+    }
+}
+
+impl UserAiPrefsDao for SqliteUserAiPrefsDao {
+    fn get_prefs(
+        &mut self,
+        context: &opentelemetry::Context,
+    ) -> Result<Option<UserAiPrefs>, DbError> {
+        trace_db_call(context, "query", "get_prefs", |_span| {
+            use schema::user_ai_prefs::dsl;
+
+            let mut connection = self
+                .connection
+                .lock()
+                .expect("Unable to lock UserAiPrefsDao");
+
+            dsl::user_ai_prefs
+                .first::<UserAiPrefs>(connection.deref_mut())
+                .optional()
+                .map_err(|e| anyhow::anyhow!("Failed to get prefs: {}", e))
+        })
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+    }
+
+    fn upsert_prefs(
+        &mut self,
+        context: &opentelemetry::Context,
+        prefs: &UpsertUserAiPrefs,
+    ) -> Result<(), DbError> {
+        trace_db_call(context, "upsert", "upsert_prefs", |_span| {
+            use schema::user_ai_prefs::dsl;
+
+            let mut connection = self
+                .connection
+                .lock()
+                .expect("Unable to lock UserAiPrefsDao");
+
+            // Single-row table (id=1): one atomic upsert. The explicit id=1
+            // makes the conflict target deterministic so the second call
+            // updates in place rather than tripping the CHECK(id=1) constraint,
+            // and real insert errors surface instead of being swallowed into a
+            // separate update branch. The columns are set explicitly (rather
+            // than via AsChangeset) so a None field overwrites to NULL — the
+            // row mirrors the latest request exactly, not a merge of past ones.
+            diesel::insert_into(dsl::user_ai_prefs)
+                .values((dsl::id.eq(1), prefs))
+                .on_conflict(dsl::id)
+                .do_update()
+                .set((
+                    dsl::voice.eq(&prefs.voice),
+                    dsl::tz_offset_minutes.eq(&prefs.tz_offset_minutes),
+                    dsl::library.eq(&prefs.library),
+                    dsl::updated_at.eq(&prefs.updated_at),
+                ))
+                .execute(connection.deref_mut())
+                .map_err(|e| anyhow::anyhow!("Failed to upsert prefs: {}", e))?;
+            Ok(())
+        })
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use diesel::Connection;
+    use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations};
+
+    const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!();
+
+    fn setup_dao() -> SqliteUserAiPrefsDao {
+        let mut conn = SqliteConnection::establish(":memory:")
+            .expect("Unable to create in-memory db connection");
+        conn.run_pending_migrations(DB_MIGRATIONS)
+            .expect("Failure running DB migrations");
+        SqliteUserAiPrefsDao::from_connection(Arc::new(Mutex::new(conn)))
+    }
+
+    fn ctx() -> opentelemetry::Context {
+        opentelemetry::Context::new()
+    }
+
+    #[test]
+    fn get_prefs_returns_none_when_empty() {
+        let mut dao = setup_dao();
+        let result = dao.get_prefs(&ctx()).unwrap();
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn upsert_prefs_inserts_row() {
+        let mut dao = setup_dao();
+        let now = 1_700_000_000i64;
+        let prefs = UpsertUserAiPrefs {
+            voice: Some("grandma".to_string()),
+            tz_offset_minutes: Some(-480),
+            library: Some("1".to_string()),
+            updated_at: now,
+        };
+        dao.upsert_prefs(&ctx(), &prefs).unwrap();
+
+        let row = dao.get_prefs(&ctx()).unwrap().unwrap();
+        assert_eq!(row.id, 1);
+        assert_eq!(row.voice, Some("grandma".to_string()));
+        assert_eq!(row.tz_offset_minutes, Some(-480));
+        assert_eq!(row.library, Some("1".to_string()));
+        assert_eq!(row.updated_at, now);
+    }
+
+    #[test]
+    fn upsert_prefs_replaces_existing() {
+        let mut dao = setup_dao();
+        let now1 = 1_700_000_000i64;
+        let now2 = 1_800_000_000i64;
+
+        let prefs1 = UpsertUserAiPrefs {
+            voice: Some("grandma".to_string()),
+            tz_offset_minutes: Some(-480),
+            library: Some("1".to_string()),
+            updated_at: now1,
+        };
+        dao.upsert_prefs(&ctx(), &prefs1).unwrap();
+
+        let prefs2 = UpsertUserAiPrefs {
+            voice: Some("dad".to_string()),
+            tz_offset_minutes: Some(-300),
+            library: None,
+            updated_at: now2,
+        };
+        dao.upsert_prefs(&ctx(), &prefs2).unwrap();
+
+        let row = dao.get_prefs(&ctx()).unwrap().unwrap();
+        assert_eq!(row.voice, Some("dad".to_string()));
+        assert_eq!(row.tz_offset_minutes, Some(-300));
+        assert!(row.library.is_none());
+        assert_eq!(row.updated_at, now2);
+    }
+
+    #[test]
+    fn upsert_partial_fields() {
+        let mut dao = setup_dao();
+        let now = 1_700_000_000i64;
+
+        let prefs = UpsertUserAiPrefs {
+            voice: None,
+            tz_offset_minutes: Some(-480),
+            library: None,
+            updated_at: now,
+        };
+        dao.upsert_prefs(&ctx(), &prefs).unwrap();
+
+        let row = dao.get_prefs(&ctx()).unwrap().unwrap();
+        assert_eq!(row.tz_offset_minutes, Some(-480));
+        assert!(row.voice.is_none());
+        assert!(row.library.is_none());
+    }
+}
diff --git a/src/duplicates.rs b/src/duplicates.rs
index 372415b..32ed92b 100644
--- a/src/duplicates.rs
+++ b/src/duplicates.rs
@@ -234,7 +234,7 @@ async fn list_exact_handler(
     let span = global_tracer().start_with_context("duplicates.list_exact", &context);
     let span_context = opentelemetry::Context::current_with_span(span);
 
-    let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
+    let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
         .ok()
         .flatten()
         .map(|l| l.id);
@@ -265,7 +265,7 @@ async fn list_perceptual_handler(
     let span = global_tracer().start_with_context("duplicates.list_perceptual", &context);
     let span_context = opentelemetry::Context::current_with_span(span);
 
-    let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
+    let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
         .ok()
         .flatten()
         .map(|l| l.id);
@@ -449,7 +449,7 @@ async fn list_folder_pairs_handler(
     let span = global_tracer().start_with_context("duplicates.list_folder_pairs", &context);
     let span_context = opentelemetry::Context::current_with_span(span);
 
-    let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
+    let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
         .ok()
         .flatten()
         .map(|l| l.id);
diff --git a/src/faces.rs b/src/faces.rs
index 3288aa3..f619966 100644
--- a/src/faces.rs
+++ b/src/faces.rs
@@ -1755,7 +1755,7 @@ async fn stats_handler<D: FaceDao>(
     let span = global_tracer().start_with_context("faces.stats", &context);
     let span_context = opentelemetry::Context::current_with_span(span);
 
-    let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
+    let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
         .ok()
         .flatten()
         .map(|l| l.id);
@@ -1782,11 +1782,12 @@ async fn list_faces_handler<D: FaceDao>(
     let normalized_path = normalize_path(&query.path);
     // resolve_library_param returns Option<&Library>; clone so the result
     // is owned (matching the primary_library fallback's type).
-    let library: Library = libraries::resolve_library_param(&app_state, query.library.as_deref())
-        .ok()
-        .flatten()
-        .cloned()
-        .unwrap_or_else(|| app_state.primary_library().clone());
+    let library: Library =
+        libraries::resolve_library_param_state(&app_state, query.library.as_deref())
+            .ok()
+            .flatten()
+            .cloned()
+            .unwrap_or_else(|| app_state.primary_library().clone());
 
     let mut dao = face_dao.lock().expect("face dao lock");
     let hash = match dao.resolve_content_hash(&span_context, library.id, &normalized_path) {
@@ -1870,7 +1871,7 @@ async fn create_face_handler<D: FaceDao>(
     }
 
     let normalized_path = normalize_path(&body.path);
-    let library: Library = match libraries::resolve_library_param(
+    let library: Library = match libraries::resolve_library_param_state(
         &app_state,
         body.library.as_ref().map(|i| i.to_string()).as_deref(),
     ) {
@@ -2192,7 +2193,7 @@ async fn list_persons_handler<D: FaceDao>(
     let span = global_tracer().start_with_context("persons.list", &context);
     let span_context = opentelemetry::Context::current_with_span(span);
 
-    let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
+    let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
         .ok()
         .flatten()
         .map(|l| l.id);
@@ -2345,7 +2346,7 @@ async fn person_faces_handler<D: FaceDao>(
     let context = extract_context_from_request(&request);
     let span = global_tracer().start_with_context("persons.faces", &context);
     let span_context = opentelemetry::Context::current_with_span(span);
-    let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
+    let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
         .ok()
         .flatten()
         .map(|l| l.id);
diff --git a/src/files.rs b/src/files.rs
index 59cd49e..920540e 100644
--- a/src/files.rs
+++ b/src/files.rs
@@ -275,14 +275,14 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
     // Resolve the optional library filter. Unknown values return 400. A
     // `None` result means "union across all libraries" and downstream
     // walks iterate every configured library root.
-    let library = match crate::libraries::resolve_library_param(&app_state, req.library.as_deref())
-    {
-        Ok(lib) => lib,
-        Err(msg) => {
-            log::warn!("Rejecting /photos request: {}", msg);
-            return HttpResponse::BadRequest().body(msg);
-        }
-    };
+    let library =
+        match crate::libraries::resolve_library_param_state(&app_state, req.library.as_deref()) {
+            Ok(lib) => lib,
+            Err(msg) => {
+                log::warn!("Rejecting /photos request: {}", msg);
+                return HttpResponse::BadRequest().body(msg);
+            }
+        };
 
     let span_context = opentelemetry::Context::current_with_span(span);
 
@@ -1238,7 +1238,7 @@ pub async fn list_exif_summary(
     // Resolve the library filter up front so a bad id/name 400s before we
     // ever take the DAO mutex. None == union across all libraries.
     let library_filter =
-        match crate::libraries::resolve_library_param(&app_state, req.library.as_deref()) {
+        match crate::libraries::resolve_library_param_state(&app_state, req.library.as_deref()) {
             Ok(lib) => lib.map(|l| l.id),
             Err(msg) => {
                 span.set_status(Status::error(msg.clone()));
diff --git a/src/geo.rs b/src/geo.rs
index 46cc1dc..b54f609 100644
--- a/src/geo.rs
+++ b/src/geo.rs
@@ -1,4 +1,5 @@
 /// Geographic calculation utilities for GPS-based search
+use serde::Deserialize;
 use std::f64;
 
 /// Calculate distance between two GPS coordinates using the Haversine formula.
@@ -61,6 +62,140 @@ pub fn gps_bounding_box(lat: f64, lon: f64, radius_km: f64) -> (f64, f64, f64, f
     )
 }
 
+/// A place resolved from a free-text query via forward geocoding.
+///
+/// The filter pipeline searches a *circle* (`gps_lat`/`gps_lon`/
+/// `gps_radius_km`), but a place can be anything from a single address to
+/// a whole country. We collapse Nominatim's bounding box into the smallest
+/// circle that circumscribes it (see [`bbox_to_circle`]) so "Portland" and
+/// "Italy" both map onto the existing circle filter without a schema change.
+#[derive(Debug, Clone, PartialEq)]
+pub struct GeoPlace {
+    /// Nominatim's canonical name for the match (e.g. "Italia").
+    pub display_name: String,
+    /// Centroid latitude in decimal degrees.
+    pub lat: f64,
+    /// Centroid longitude in decimal degrees.
+    pub lon: f64,
+    /// Radius (km) of a circle centred on the centroid that covers the
+    /// matched area. Floored to [`MIN_PLACE_RADIUS_KM`] so a point result
+    /// (whose bounding box is microscopic) still yields a usable circle.
+    pub radius_km: f64,
+}
+
+/// Floor for a geocoded place's radius. Point results (a street address)
+/// come back with a near-zero bounding box; without a floor the circle
+/// filter would match nothing.
+pub const MIN_PLACE_RADIUS_KM: f64 = 0.5;
+
+/// Collapse a bounding box into the centroid + circumscribing radius.
+///
+/// Input is Nominatim's `boundingbox` order: `(south_lat, north_lat,
+/// west_lon, east_lon)`. The radius is the *largest* great-circle distance
+/// from the centroid to any of the four corners, so the resulting circle
+/// fully covers the box. (The corners aren't equidistant on a sphere —
+/// longitude lines converge toward the poles, so the equator-facing edge's
+/// corners are farthest; taking the max guarantees coverage in either
+/// hemisphere.)
+///
+/// Pure and exact (no flooring) so it can be unit-tested directly; callers
+/// apply [`MIN_PLACE_RADIUS_KM`] when turning the result into a filter.
+pub fn bbox_to_circle(south: f64, north: f64, west: f64, east: f64) -> (f64, f64, f64) {
+    let center_lat = (south + north) / 2.0;
+    let center_lon = (west + east) / 2.0;
+    let radius_km = [(south, west), (south, east), (north, west), (north, east)]
+        .iter()
+        .map(|(clat, clon)| haversine_distance(center_lat, center_lon, *clat, *clon))
+        .fold(0.0_f64, f64::max);
+    (center_lat, center_lon, radius_km)
+}
+
+/// Raw Nominatim `/search` result. `lat`/`lon` arrive as strings and
+/// `boundingbox` as a 4-element string array `[south, north, west, east]`.
+#[derive(Deserialize)]
+struct NominatimSearchResult {
+    lat: String,
+    lon: String,
+    display_name: String,
+    boundingbox: Option<[String; 4]>,
+}
+
+/// Forward-geocode a free-text place name to a [`GeoPlace`] via the public
+/// OpenStreetMap Nominatim `/search` endpoint.
+///
+/// Mirrors `InsightGenerator::reverse_geocode`'s error posture: any network,
+/// HTTP, or parse failure returns `None` rather than propagating, so a flaky
+/// geocoder degrades the query to "no location filter" instead of failing it.
+///
+/// Nominatim's usage policy requires a `User-Agent` and rate-limits to ~1
+/// request/second; callers doing this interactively should cache results.
+pub async fn forward_geocode(query: &str) -> Option<GeoPlace> {
+    let q = query.trim();
+    if q.is_empty() {
+        return None;
+    }
+
+    let client = reqwest::Client::new();
+    let response = match client
+        .get("https://nominatim.openstreetmap.org/search")
+        .query(&[("format", "json"), ("limit", "1"), ("q", q)])
+        .header("User-Agent", "ImageAPI/1.0") // Nominatim requires User-Agent
+        .send()
+        .await
+    {
+        Ok(resp) => resp,
+        Err(e) => {
+            log::warn!("Forward geocoding network error for {q:?}: {e}");
+            return None;
+        }
+    };
+
+    if !response.status().is_success() {
+        log::warn!(
+            "Forward geocoding HTTP error for {q:?}: {}",
+            response.status()
+        );
+        return None;
+    }
+
+    let results: Vec<NominatimSearchResult> = match response.json().await {
+        Ok(r) => r,
+        Err(e) => {
+            log::warn!("Forward geocoding JSON parse error for {q:?}: {e}");
+            return None;
+        }
+    };
+
+    let top = results.into_iter().next()?;
+    let lat: f64 = top.lat.parse().ok()?;
+    let lon: f64 = top.lon.parse().ok()?;
+
+    // Prefer the bounding box (handles large places); fall back to a
+    // point + floor radius when Nominatim omits it.
+    let (center_lat, center_lon, radius_km) = match &top.boundingbox {
+        Some([s, n, w, e]) => match (s.parse(), n.parse(), w.parse(), e.parse()) {
+            (Ok(s), Ok(n), Ok(w), Ok(e)) => bbox_to_circle(s, n, w, e),
+            _ => (lat, lon, 0.0),
+        },
+        None => (lat, lon, 0.0),
+    };
+
+    let place = GeoPlace {
+        display_name: top.display_name,
+        lat: center_lat,
+        lon: center_lon,
+        radius_km: radius_km.max(MIN_PLACE_RADIUS_KM),
+    };
+    log::info!(
+        "Forward geocoded {q:?} -> {} ({:.4}, {:.4}, r={:.1}km)",
+        place.display_name,
+        place.lat,
+        place.lon,
+        place.radius_km
+    );
+    Some(place)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -118,4 +253,41 @@ mod tests {
             distance
         );
     }
+
+    #[test]
+    fn test_bbox_to_circle_centroid() {
+        // Symmetric box around (10, 20): centroid should land dead centre.
+        let (lat, lon, radius) = bbox_to_circle(9.0, 11.0, 19.0, 21.0);
+        assert!((lat - 10.0).abs() < 1e-9, "centroid lat, got {lat}");
+        assert!((lon - 20.0).abs() < 1e-9, "centroid lon, got {lon}");
+        assert!(radius > 0.0, "radius should be positive, got {radius}");
+    }
+
+    #[test]
+    fn test_bbox_to_circle_covers_corner() {
+        // The radius must reach every corner of the box. Verify the
+        // centroid-to-corner distance equals the returned radius for all
+        // four corners (they're symmetric, so all equal).
+        let (south, north, west, east) = (40.0, 42.0, -74.0, -72.0);
+        let (lat, lon, radius) = bbox_to_circle(south, north, west, east);
+        for (clat, clon) in [(south, west), (south, east), (north, west), (north, east)] {
+            let d = haversine_distance(lat, lon, clat, clon);
+            assert!(
+                d <= radius + 1e-6,
+                "corner ({clat},{clon}) at {d}km should be within radius {radius}km"
+            );
+        }
+    }
+
+    #[test]
+    fn test_bbox_to_circle_country_vs_city_scale() {
+        // A country-sized box yields a far larger radius than a city-sized
+        // one — confirming the bbox approach scales with place size.
+        let (_, _, country) = bbox_to_circle(35.5, 47.1, 6.6, 18.5); // ~Italy
+        let (_, _, city) = bbox_to_circle(45.4, 45.6, -122.8, -122.5); // ~Portland
+        assert!(
+            country > city * 10.0,
+            "country radius {country}km should dwarf city radius {city}km"
+        );
+    }
 }
diff --git a/src/handlers/image.rs b/src/handlers/image.rs
index f0d2310..923fff3 100644
--- a/src/handlers/image.rs
+++ b/src/handlers/image.rs
@@ -53,7 +53,7 @@ pub async fn get_image(
 
     // Resolve library from query param; default to primary so clients that
     // don't yet send `library=` continue to work.
-    let library = match libraries::resolve_library_param(&app_state, req.library.as_deref()) {
+    let library = match libraries::resolve_library_param_state(&app_state, req.library.as_deref()) {
         Ok(Some(lib)) => lib,
         Ok(None) => app_state.primary_library(),
         Err(msg) => {
@@ -492,7 +492,7 @@ pub async fn get_file_metadata(
     let span_context =
         opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
 
-    let library = libraries::resolve_library_param(&app_state, path.library.as_deref())
+    let library = libraries::resolve_library_param_state(&app_state, path.library.as_deref())
         .ok()
         .flatten()
         .unwrap_or_else(|| app_state.primary_library());
@@ -580,7 +580,7 @@ pub async fn set_image_gps(
     let span_context =
         opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
 
-    let library = libraries::resolve_library_param(&app_state, body.library.as_deref())
+    let library = libraries::resolve_library_param_state(&app_state, body.library.as_deref())
         .ok()
         .flatten()
         .unwrap_or_else(|| app_state.primary_library());
@@ -746,7 +746,7 @@ pub async fn get_full_exif(
     let context = extract_context_from_request(&request);
     let mut span = tracer.start_with_context("get_full_exif", &context);
 
-    let library = libraries::resolve_library_param(&app_state, path.library.as_deref())
+    let library = libraries::resolve_library_param_state(&app_state, path.library.as_deref())
         .ok()
         .flatten()
         .unwrap_or_else(|| app_state.primary_library());
@@ -888,7 +888,8 @@ pub async fn set_image_date(
     let span_context =
         opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
 
-    let library = match libraries::resolve_library_param(&app_state, body.library.as_deref()) {
+    let library = match libraries::resolve_library_param_state(&app_state, body.library.as_deref())
+    {
         Ok(Some(lib)) => lib,
         Ok(None) => app_state.primary_library(),
         Err(msg) => {
@@ -941,7 +942,8 @@ pub async fn clear_image_date(
     let span_context =
         opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
 
-    let library = match libraries::resolve_library_param(&app_state, body.library.as_deref()) {
+    let library = match libraries::resolve_library_param_state(&app_state, body.library.as_deref())
+    {
         Ok(Some(lib)) => lib,
         Ok(None) => app_state.primary_library(),
         Err(msg) => {
@@ -1001,7 +1003,7 @@ pub async fn upload_image(
     // Resolve the optional library selector. Absent → primary library
     // (backwards-compatible with clients that don't yet send `library=`).
     let target_library =
-        match libraries::resolve_library_param(&app_state, query.library.as_deref()) {
+        match libraries::resolve_library_param_state(&app_state, query.library.as_deref()) {
             Ok(Some(lib)) => lib,
             Ok(None) => app_state.primary_library(),
             Err(msg) => {
diff --git a/src/handlers/video.rs b/src/handlers/video.rs
index f9f4e64..b56a67e 100644
--- a/src/handlers/video.rs
+++ b/src/handlers/video.rs
@@ -67,10 +67,11 @@ pub async fn generate_video(
     let context = extract_context_from_request(&request);
     let mut span = tracer.start_with_context("generate_video", &context);
 
-    let preferred_library = libraries::resolve_library_param(&app_state, body.library.as_deref())
-        .ok()
-        .flatten()
-        .unwrap_or_else(|| app_state.primary_library());
+    let preferred_library =
+        libraries::resolve_library_param_state(&app_state, body.library.as_deref())
+            .ok()
+            .flatten()
+            .unwrap_or_else(|| app_state.primary_library());
 
     // Try the resolved library first, then fall back to any other library
     // that actually contains the file — handles union-mode requests where
diff --git a/src/lib.rs b/src/lib.rs
index 0ea7ddb..a228472 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -35,6 +35,7 @@ pub mod tags;
 #[cfg(test)]
 pub mod testhelpers;
 pub mod thumbnails;
+pub mod unified_search;
 pub mod utils;
 pub mod video;
 
diff --git a/src/libraries.rs b/src/libraries.rs
index 55bf5c1..377b442 100644
--- a/src/libraries.rs
+++ b/src/libraries.rs
@@ -291,11 +291,11 @@ pub fn seed_or_patch_from_env(conn: &mut SqliteConnection, base_path: &str) {
 }
 
 /// Resolve a library request parameter (accepts numeric id as string or name)
-/// against the configured libraries. Returns `Ok(None)` when the param is
+/// against a list of libraries. Returns `Ok(None)` when the param is
 /// absent, meaning "span all libraries". Returns `Err` when a value is
 /// provided but does not match any library.
 pub fn resolve_library_param<'a>(
-    state: &'a AppState,
+    libs: &'a [Library],
     param: Option<&str>,
 ) -> Result<Option<&'a Library>, String> {
     let Some(raw) = param.map(str::trim).filter(|s| !s.is_empty()) else {
@@ -303,18 +303,29 @@ pub fn resolve_library_param<'a>(
     };
 
     if let Ok(id) = raw.parse::<i32>() {
-        return state
-            .library_by_id(id)
+        return libs
+            .iter()
+            .find(|l| l.id == id)
             .map(Some)
             .ok_or_else(|| format!("unknown library id: {}", id));
     }
 
-    state
-        .library_by_name(raw)
+    libs.iter()
+        .find(|l| l.name == raw)
         .map(Some)
         .ok_or_else(|| format!("unknown library name: {}", raw))
 }
 
+/// Resolve a library request parameter against the AppState's libraries.
+/// Returns `Ok(None)` when the param is absent, meaning "span all libraries".
+/// Returns `Err` when a value is provided but does not match any library.
+pub fn resolve_library_param_state<'a>(
+    state: &'a AppState,
+    param: Option<&str>,
+) -> Result<Option<&'a Library>, String> {
+    resolve_library_param(&state.libraries, param)
+}
+
 /// Health of a library at a point in time. Probed at the top of each
 /// file-watcher tick. The `Stale` state is the "be conservative" signal:
 /// destructive paths (ingest writes, future move-handoff and orphan GC in
@@ -662,12 +673,6 @@ mod tests {
         assert_eq!(abs, PathBuf::from("/tmp/media/2024/photo.jpg"));
     }
 
-    fn state_with_libraries(libs: Vec<Library>) -> AppState {
-        let mut state = AppState::test_state();
-        state.libraries = libs;
-        state
-    }
-
     fn sample_libraries() -> Vec<Library> {
         vec![
             Library {
@@ -687,52 +692,52 @@ mod tests {
         ]
     }
 
-    #[actix_rt::test]
-    async fn resolve_library_param_absent_is_union() {
-        let state = state_with_libraries(sample_libraries());
-        assert!(matches!(resolve_library_param(&state, None), Ok(None)));
+    #[test]
+    fn resolve_library_param_absent_is_union() {
+        let libs = sample_libraries();
+        assert!(matches!(resolve_library_param(&libs, None), Ok(None)));
     }
 
-    #[actix_rt::test]
-    async fn resolve_library_param_empty_or_whitespace_is_union() {
-        let state = state_with_libraries(sample_libraries());
-        assert!(matches!(resolve_library_param(&state, Some("")), Ok(None)));
+    #[test]
+    fn resolve_library_param_empty_or_whitespace_is_union() {
+        let libs = sample_libraries();
+        assert!(matches!(resolve_library_param(&libs, Some("")), Ok(None)));
         assert!(matches!(
-            resolve_library_param(&state, Some("   ")),
+            resolve_library_param(&libs, Some("   ")),
             Ok(None)
         ));
     }
 
-    #[actix_rt::test]
-    async fn resolve_library_param_numeric_id_matches() {
-        let state = state_with_libraries(sample_libraries());
-        let lib = resolve_library_param(&state, Some("7"))
+    #[test]
+    fn resolve_library_param_numeric_id_matches() {
+        let libs = sample_libraries();
+        let lib = resolve_library_param(&libs, Some("7"))
             .expect("valid id")
             .expect("some library");
         assert_eq!(lib.id, 7);
         assert_eq!(lib.name, "archive");
     }
 
-    #[actix_rt::test]
-    async fn resolve_library_param_name_matches() {
-        let state = state_with_libraries(sample_libraries());
-        let lib = resolve_library_param(&state, Some("main"))
+    #[test]
+    fn resolve_library_param_name_matches() {
+        let libs = sample_libraries();
+        let lib = resolve_library_param(&libs, Some("main"))
             .expect("valid name")
             .expect("some library");
         assert_eq!(lib.id, 1);
     }
 
-    #[actix_rt::test]
-    async fn resolve_library_param_unknown_id_errs() {
-        let state = state_with_libraries(sample_libraries());
-        let err = resolve_library_param(&state, Some("999")).unwrap_err();
+    #[test]
+    fn resolve_library_param_unknown_id_errs() {
+        let libs = sample_libraries();
+        let err = resolve_library_param(&libs, Some("999")).unwrap_err();
         assert!(err.contains("unknown library id"));
     }
 
-    #[actix_rt::test]
-    async fn resolve_library_param_unknown_name_errs() {
-        let state = state_with_libraries(sample_libraries());
-        let err = resolve_library_param(&state, Some("missing")).unwrap_err();
+    #[test]
+    fn resolve_library_param_unknown_name_errs() {
+        let libs = sample_libraries();
+        let err = resolve_library_param(&libs, Some("missing")).unwrap_err();
         assert!(err.contains("unknown library name"));
     }
 
diff --git a/src/main.rs b/src/main.rs
index 8b56efd..7faa959 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -54,6 +54,7 @@ mod perceptual_hash;
 mod state;
 mod tags;
 mod thumbnails;
+mod unified_search;
 mod utils;
 mod video;
 mod watcher;
@@ -62,6 +63,7 @@ mod knowledge;
 mod memories;
 mod otel;
 mod personas;
+mod reels;
 mod service;
 #[cfg(test)]
 mod testhelpers;
@@ -266,6 +268,11 @@ fn main() -> std::io::Result<()> {
             }
         }
 
+        // Spawn the nightly pre-generation scheduler (Section D).
+        reels::spawn_pregen_scheduler(app_state.clone()).await;
+        // Spawn the on-disk reel-cache sweeper (bounds pre-gen + on-demand reels).
+        reels::spawn_reel_cache_sweeper(app_state.clone()).await;
+
         HttpServer::new(move || {
             let user_dao = SqliteUserDao::new();
             let favorites_dao = SqliteFavoriteDao::new();
@@ -327,6 +334,13 @@ fn main() -> std::io::Result<()> {
                     web::resource("/photos/search")
                         .route(web::get().to(clip_search::search_photos)),
                 )
+                .service(
+                    // Unified natural-language search: LLM translates the
+                    // query into structured filters + a semantic term, then
+                    // filters constrain and CLIP ranks. See src/unified_search.rs.
+                    web::resource("/photos/search/unified")
+                        .route(web::get().to(unified_search::unified_search::<SqliteTagDao>)),
+                )
                 .service(web::resource("/file/move").post(move_file::<RealFileSystem>))
                 .service(handlers::image::get_image)
                 .service(handlers::image::upload_image)
@@ -344,6 +358,11 @@ fn main() -> std::io::Result<()> {
                 .service(handlers::image::clear_image_date)
                 .service(handlers::image::get_full_exif)
                 .service(memories::list_memories)
+                .service(reels::create_reel_handler)
+                .service(reels::reel_status_handler)
+                .service(reels::reel_video_handler)
+                .service(reels::precomputed_reel_handler)
+                .service(reels::precomputed_video_handler)
                 .service(ai::generate_insight_handler)
                 .service(ai::generate_agentic_insight_handler)
                 .service(ai::generation_status_handler)
diff --git a/src/memories.rs b/src/memories.rs
index 4b1682b..2b1f473 100644
--- a/src/memories.rs
+++ b/src/memories.rs
@@ -349,12 +349,6 @@ pub async fn list_memories(
         opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
 
     let span_mode = q.span.unwrap_or(MemoriesSpan::Day);
-    let span_token = match span_mode {
-        MemoriesSpan::Day => "day",
-        MemoriesSpan::Week => "week",
-        MemoriesSpan::Month => "month",
-    };
-    let years_back: i32 = DEFAULT_YEARS_BACK;
 
     // The SQL filter expects a signed offset in minutes from UTC; default
     // 0 (UTC) when the client didn't send a hint. We also keep a chrono
@@ -366,18 +360,66 @@ pub async fn list_memories(
         .timezone_offset_minutes
         .and_then(|offset_mins| FixedOffset::east_opt(offset_mins * 60));
 
-    debug!(
-        "list_memories: span={:?} tz_offset_min={} years_back={}",
-        span_mode, tz_offset_minutes, years_back
-    );
-
-    let library = match crate::libraries::resolve_library_param(&app_state, q.library.as_deref()) {
-        Ok(lib) => lib,
+    let items = match gather_memory_items(
+        &app_state,
+        &exif_dao,
+        &span_context,
+        span_mode,
+        tz_offset_minutes,
+        client_timezone,
+        q.library.as_deref(),
+    ) {
+        Ok(items) => items,
         Err(msg) => {
             warn!("Rejecting /memories request: {}", msg);
             return HttpResponse::BadRequest().body(msg);
         }
     };
+
+    span.add_event(
+        "memories_scanned",
+        vec![
+            KeyValue::new("span", format!("{:?}", span_mode)),
+            KeyValue::new("years_back", DEFAULT_YEARS_BACK.to_string()),
+            KeyValue::new("result_count", items.len().to_string()),
+            KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()),
+            KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)),
+        ],
+    );
+    span.set_status(Status::Ok);
+
+    HttpResponse::Ok().json(MemoriesResponse { items })
+}
+
+/// Resolve an "on this day/week/month across past years" window into an
+/// ordered list of [`MemoryItem`]s. Shared by the `/memories` handler and the
+/// memory-reel selector so both honour the same library resolution, per-library
+/// exclusions, timezone handling, and sort order. Returns `Err(message)` only
+/// when the `library` param is invalid (callers map that to 400); per-library
+/// query/lock failures are logged and skipped, matching the handler's
+/// best-effort behaviour.
+pub fn gather_memory_items(
+    app_state: &AppState,
+    exif_dao: &Mutex<Box<dyn ExifDao>>,
+    span_context: &opentelemetry::Context,
+    span_mode: MemoriesSpan,
+    tz_offset_minutes: i32,
+    client_timezone: Option<FixedOffset>,
+    library_param: Option<&str>,
+) -> Result<Vec<MemoryItem>, String> {
+    let span_token = match span_mode {
+        MemoriesSpan::Day => "day",
+        MemoriesSpan::Week => "week",
+        MemoriesSpan::Month => "month",
+    };
+    let years_back: i32 = DEFAULT_YEARS_BACK;
+
+    debug!(
+        "gather_memory_items: span={:?} tz_offset_min={} years_back={}",
+        span_mode, tz_offset_minutes, years_back
+    );
+
+    let library = crate::libraries::resolve_library_param_state(app_state, library_param)?;
     let libraries_to_scan: Vec<&crate::libraries::Library> = match library {
         Some(lib) => vec![lib],
         None => app_state.libraries.iter().collect(),
@@ -394,7 +436,7 @@ pub async fn list_memories(
 
         let rows = match exif_dao.lock() {
             Ok(mut dao) => match dao.get_memories_in_window(
-                &span_context,
+                span_context,
                 lib.id,
                 span_token,
                 years_back,
@@ -469,21 +511,7 @@ pub async fn list_memories(
         }
     }
 
-    let items: Vec<MemoryItem> = memories_with_dates.into_iter().map(|(m, _)| m).collect();
-
-    span.add_event(
-        "memories_scanned",
-        vec![
-            KeyValue::new("span", format!("{:?}", span_mode)),
-            KeyValue::new("years_back", years_back.to_string()),
-            KeyValue::new("result_count", items.len().to_string()),
-            KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()),
-            KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)),
-        ],
-    );
-    span.set_status(Status::Ok);
-
-    HttpResponse::Ok().json(MemoriesResponse { items })
+    Ok(memories_with_dates.into_iter().map(|(m, _)| m).collect())
 }
 
 #[cfg(test)]
diff --git a/src/reels/mod.rs b/src/reels/mod.rs
new file mode 100644
index 0000000..afe2ced
--- /dev/null
+++ b/src/reels/mod.rs
@@ -0,0 +1,1568 @@
+//! Memory reels: render an MP4 slideshow of a selection of photos with an
+//! LLM-written, voice-cloned narration over it.
+//!
+//! Pipeline: a [`selector`] resolves *which* photos (and the reel metadata),
+//! the [`script`] module writes per-photo narration via the LLM, each line is
+//! synthesized to speech, and [`render`] assembles the stills + narration into
+//! one MP4. Jobs run in the background (mirroring the TTS speech-job registry)
+//! because a reel takes minutes; the finished MP4 is cached on disk keyed by
+//! the selection so a repeat request is instant.
+//!
+//! Phase 1 is on-demand and photos-only. The segment model is media-typed so a
+//! video-clip segment (phase 2) and a nightly pre-render (phase 3) slot in
+//! without reworking the pipeline.
+
+pub mod render;
+pub mod script;
+pub mod selector;
+
+use std::collections::HashMap;
+use std::path::{Path, PathBuf};
+use std::sync::{LazyLock, Mutex, Mutex as StdMutex};
+use std::time::{Duration, Instant};
+
+use actix_files::NamedFile;
+use actix_web::{HttpRequest, HttpResponse, Responder, get, post, web};
+use anyhow::{Context, anyhow};
+use chrono::{DateTime, Datelike, Timelike};
+use serde::{Deserialize, Serialize};
+use serde_json::json;
+use uuid::Uuid;
+
+use crate::data::Claims;
+use crate::database::{ExifDao, InsightDao};
+use crate::libraries::{Library, resolve_library_param};
+use crate::memories::MemoriesSpan;
+use crate::otel::extract_context_from_request;
+use crate::state::AppState;
+use selector::ReelSelector;
+
+// --- Precomputed reel age limits (hours) -------------------------------------
+
+/// Maximum age for a precomputed day reel before it's considered stale.
+const REEL_PRECOMPUTED_DAY_MAX_AGE_HOURS: u64 = 26;
+/// Maximum age for a precomputed week reel.
+const REEL_PRECOMPUTED_WEEK_MAX_AGE_HOURS: u64 = 192;
+/// Maximum age for a precomputed month reel.
+const REEL_PRECOMPUTED_MONTH_MAX_AGE_HOURS: u64 = 768;
+
+/// How many precomputed reels to keep per (span, library). The newest is the
+/// one served; one extra is a grace window so a regen mid-flight (or a client
+/// that started a fetch just before the swap) isn't left without a file.
+const PREGEN_KEEP_PER_SCOPE: usize = 2;
+
+/// On-disk reel cache sweep: an unreferenced reel MP4 older than this is
+/// removed. Catches the on-demand cache (which has no ledger row and otherwise
+/// grows forever) and any pre-gen orphans. Tunable via `REEL_CACHE_MAX_AGE_DAYS`.
+const REEL_CACHE_MAX_AGE_DAYS_DEFAULT: u64 = 7;
+/// Interval between on-disk cache sweeps.
+const REEL_CACHE_SWEEP_INTERVAL_SECS: u64 = 24 * 3600;
+/// Transient render artifacts (`.mp4.tmp`, `.concat.txt`, orphaned sidecars)
+/// older than this are leftovers from a crashed render and safe to remove.
+const REEL_TMP_MAX_AGE_SECS: u64 = 3600;
+
+/// Resolve a library request parameter to a stable key string.
+/// Returns the library's id as a string when found, or `"all"` when
+/// the param is absent or the lookup fails.
+pub fn normalize_library_key(libs: &[Library], param: Option<&str>) -> String {
+    match resolve_library_param(libs, param) {
+        Ok(Some(lib)) => lib.id.to_string(),
+        _ => "all".to_string(),
+    }
+}
+
+/// Best-effort: mirror the latest client reel params into `user_ai_prefs`
+/// so the nightly pre-gen scheduler can pick them up. Never fails the
+/// caller regardless of DB errors.
+fn capture_prefs(
+    app_state: &AppState,
+    req: &web::Json<CreateReelRequest>,
+    library_param: Option<&str>,
+) -> Result<(), anyhow::Error> {
+    use crate::database::models::UpsertUserAiPrefs;
+    let now = std::time::SystemTime::now()
+        .duration_since(std::time::UNIX_EPOCH)
+        .expect("Time went backwards")
+        .as_secs();
+    let library = match library_param {
+        Some(p) if !p.is_empty() => {
+            // Resolve to the actual library id for the DB row.
+            normalize_library_key(&app_state.libraries, Some(p))
+        }
+        _ => "all".to_string(),
+    };
+    let mut dao = app_state.user_ai_prefs_dao.lock().expect("lock");
+    let ctx = opentelemetry::Context::new();
+    dao.upsert_prefs(
+        &ctx,
+        &UpsertUserAiPrefs {
+            voice: req.voice.clone().filter(|s| !s.is_empty()),
+            tz_offset_minutes: Some(
+                req.timezone_offset_minutes
+                    .unwrap_or_else(|| chrono::Local::now().offset().local_minus_utc()),
+            ),
+            library: Some(library),
+            updated_at: now as i64,
+        },
+    )
+    .map_err(|e| anyhow::anyhow!("failed to upsert user_ai_prefs: {e}"))
+}
+
+/// Which scripting strategy to use for the reel narration.
+#[derive(Clone, Copy)]
+pub enum ScripterMode {
+    /// Fast path: single LLM call via the direct client.
+    Fast,
+    /// Agentic path: resolves the backend through the InsightGenerator
+    /// (honouring LLM_BACKEND, model overrides, etc.). Falls back to
+    /// Fast on error so a scripting failure never sinks a reel.
+    Agentic,
+}
+
+/// Progress callback type — receives a static-stage label.
+pub type ProgressFn<'a> = dyn Fn(&'static str) + Send + Sync + 'a;
+
+/// The media behind one shot: a still photo, or a short section of a source
+/// video (played with its live audio ducked under the narration). Both carry
+/// just the library-relative path; the renderer applies fixed clip framing
+/// (start/length) from constants.
+#[derive(Debug, Clone)]
+pub enum SegmentMedia {
+    Photo { rel_path: String, library_id: i32 },
+    Clip { rel_path: String, library_id: i32 },
+}
+
+impl SegmentMedia {
+    fn rel_path(&self) -> &str {
+        match self {
+            SegmentMedia::Photo { rel_path, .. } | SegmentMedia::Clip { rel_path, .. } => rel_path,
+        }
+    }
+    fn library_id(&self) -> i32 {
+        match self {
+            SegmentMedia::Photo { library_id, .. } | SegmentMedia::Clip { library_id, .. } => {
+                *library_id
+            }
+        }
+    }
+}
+
+/// A beat: one narration line over its media. A photo beat holds one still (a
+/// held shot) or several (a quick burst that flashes through moments of an
+/// event while the line is read). A clip beat holds a single video clip. Either
+/// way one narration line covers the whole beat, so a week/month reel can
+/// *show* everything it spans without a narration line — and the seconds that
+/// come with it — per item.
+#[derive(Debug, Clone)]
+pub struct PlannedBeat {
+    pub media: Vec<SegmentMedia>,
+    pub date: Option<i64>,
+    pub insight_title: Option<String>,
+    pub insight_summary: Option<String>,
+    /// GPS coordinates of the lead media item, when available.
+    pub gps: Option<(f64, f64)>,
+}
+
+impl PlannedBeat {
+    /// Human date for the prompt, e.g. "June 12, 2019". `None` when undated.
+    pub fn date_label(&self) -> Option<String> {
+        let ts = self.date?;
+        let dt = DateTime::from_timestamp(ts, 0)?;
+        Some(dt.format("%B %-d, %Y").to_string())
+    }
+
+    /// True when this beat is a single video clip (vs one or more photos).
+    pub fn is_clip(&self) -> bool {
+        matches!(self.media.as_slice(), [SegmentMedia::Clip { .. }])
+    }
+}
+
+/// Reel-wide metadata the scripter uses for framing.
+#[derive(Debug, Clone)]
+pub struct ReelMeta {
+    pub span: MemoriesSpan,
+    pub years: Vec<i32>,
+}
+
+impl ReelMeta {
+    /// Natural-language phrase for the span, e.g. "on this day".
+    pub fn span_phrase(&self) -> &'static str {
+        match self.span {
+            MemoriesSpan::Day => "on this day",
+            MemoriesSpan::Week => "this week",
+            MemoriesSpan::Month => "this month",
+        }
+    }
+}
+
+// --- Job registry ------------------------------------------------------------
+//
+// In-memory, same shape as the TTS speech-job registry: a reel takes minutes,
+// too long to hold one HTTP request from a phone. POST /reels returns a job id;
+// the client polls GET /reels/{id} until the video URL appears. The heavy
+// artifact (the MP4) lives on disk, not in this map — jobs only carry status +
+// the output path. State is intentionally not durable across restarts; the
+// on-disk cache is what makes a repeat request cheap, not the registry.
+
+#[derive(Clone, Copy, PartialEq, Eq, Debug, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ReelJobStatus {
+    Queued,
+    Running,
+    Done,
+    Error,
+}
+
+impl ReelJobStatus {
+    fn is_terminal(self) -> bool {
+        matches!(self, Self::Done | Self::Error)
+    }
+}
+
+struct ReelJob {
+    status: ReelJobStatus,
+    /// Coarse progress label for the client ("scripting", "narrating", …).
+    stage: &'static str,
+    title: Option<String>,
+    output_path: Option<PathBuf>,
+    error: Option<String>,
+    created_at: Instant,
+    finished_at: Option<Instant>,
+    abort: Option<tokio::task::AbortHandle>,
+}
+
+/// Finished jobs linger so a client that lost connectivity can still collect
+/// the result; anything older than MAX_AGE is dropped (aborted first if somehow
+/// still running). Swept lazily on each create.
+const REEL_JOB_RESULT_TTL: Duration = Duration::from_secs(30 * 60);
+const REEL_JOB_MAX_AGE: Duration = Duration::from_secs(60 * 60);
+
+static REEL_JOBS: LazyLock<StdMutex<HashMap<Uuid, ReelJob>>> =
+    LazyLock::new(|| StdMutex::new(HashMap::new()));
+
+fn sweep_stale_jobs(jobs: &mut HashMap<Uuid, ReelJob>, now: Instant) {
+    jobs.retain(|_, job| {
+        let result_expired = job
+            .finished_at
+            .is_some_and(|t| now.duration_since(t) >= REEL_JOB_RESULT_TTL);
+        let too_old = now.duration_since(job.created_at) >= REEL_JOB_MAX_AGE;
+        if too_old && let Some(h) = job.abort.take() {
+            h.abort();
+        }
+        !(result_expired || too_old)
+    });
+}
+
+fn with_job<R>(id: Uuid, f: impl FnOnce(&mut ReelJob) -> R) -> Option<R> {
+    REEL_JOBS.lock().unwrap().get_mut(&id).map(f)
+}
+
+fn set_stage(id: Uuid, stage: &'static str) {
+    with_job(id, |job| {
+        if !job.status.is_terminal() {
+            job.status = ReelJobStatus::Running;
+            job.stage = stage;
+        }
+    });
+}
+
+/// Move a job to a terminal state (first terminal write wins).
+fn finish_job(
+    id: Uuid,
+    status: ReelJobStatus,
+    title: Option<String>,
+    output_path: Option<PathBuf>,
+    error: Option<String>,
+) {
+    with_job(id, |job| {
+        if job.status.is_terminal() {
+            return;
+        }
+        job.status = status;
+        job.stage = match status {
+            ReelJobStatus::Done => "done",
+            _ => "error",
+        };
+        job.title = title;
+        job.output_path = output_path;
+        job.error = error;
+        job.finished_at = Some(Instant::now());
+        job.abort = None;
+    });
+}
+
+// --- On-disk cache -----------------------------------------------------------
+
+/// Render version: bump to invalidate every cached reel after a rendering /
+/// scripting change that should produce a fresh result.
+const RENDER_VERSION: u32 = 7;
+
+/// Narration expressiveness — Chatterbox's `exaggeration` knob. A slight bump
+/// over the ~0.5 default warms up otherwise-flat narration without over-acting;
+/// tune via `REEL_TTS_EXAGGERATION` (0.25–2.0).
+fn reel_tts_exaggeration() -> f32 {
+    std::env::var("REEL_TTS_EXAGGERATION")
+        .ok()
+        .and_then(|s| s.trim().parse::<f32>().ok())
+        .filter(|x| x.is_finite())
+        .unwrap_or(0.6)
+}
+
+/// Cache key over everything that determines *which* media and *how* it's
+/// voiced — but not the (non-deterministic) narration text. Same inputs → same
+/// MP4 served instantly. blake3 keeps it filesystem-safe and collision-free.
+fn cache_key(selector: &ReelSelector, media: &[SegmentMedia], voice: Option<&str>) -> String {
+    let mut buf = format!(
+        "v{}|{}|voice={}|",
+        RENDER_VERSION,
+        selector.descriptor(),
+        voice.unwrap_or("default")
+    );
+    for m in media {
+        // Tag photo vs clip so the same path used as a still and as a video
+        // clip produce different keys.
+        let tag = match m {
+            SegmentMedia::Photo { .. } => 'P',
+            SegmentMedia::Clip { .. } => 'C',
+        };
+        buf.push_str(&format!("{tag}{}:{}|", m.library_id(), m.rel_path()));
+    }
+    blake3::hash(buf.as_bytes()).to_hex().to_string()
+}
+
+fn reel_mp4_path(app_state: &AppState, key: &str) -> PathBuf {
+    Path::new(&app_state.reels_path).join(format!("{key}.mp4"))
+}
+
+fn reel_sidecar_path(app_state: &AppState, key: &str) -> PathBuf {
+    Path::new(&app_state.reels_path).join(format!("{key}.json"))
+}
+
+#[derive(Serialize, Deserialize)]
+struct ReelSidecar {
+    title: String,
+}
+
+// --- HTTP types --------------------------------------------------------------
+
+#[derive(Debug, Deserialize)]
+pub struct CreateReelRequest {
+    #[serde(default)]
+    pub span: Option<MemoriesSpan>,
+    #[serde(default)]
+    pub timezone_offset_minutes: Option<i32>,
+    #[serde(default)]
+    pub library: Option<String>,
+    /// Cloned TTS voice for the narration; server default when omitted.
+    #[serde(default)]
+    pub voice: Option<String>,
+    /// Cap on photos in the reel (clamped server-side).
+    #[serde(default)]
+    pub max_segments: Option<usize>,
+}
+
+#[derive(Debug, Serialize)]
+pub struct ReelJobCreatedResponse {
+    pub job_id: String,
+    pub status: ReelJobStatus,
+}
+
+#[derive(Debug, Serialize)]
+pub struct ReelStatusResponse {
+    pub job_id: String,
+    pub status: ReelJobStatus,
+    pub stage: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub title: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub video_url: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub error: Option<String>,
+}
+
+/// Response shape for `GET /reels/precomputed`.
+#[derive(Debug, Serialize)]
+pub struct PrecomputedReelResponse {
+    pub video_url: String,
+    pub title: String,
+}
+
+// --- Handlers ----------------------------------------------------------------
+
+/// POST /reels — start (or instantly serve from cache) a memory reel for the
+/// requested span. Returns 202 + a job id; the client polls GET /reels/{id}.
+#[post("/reels")]
+pub async fn create_reel_handler(
+    http_request: HttpRequest,
+    _claims: Claims,
+    req: web::Json<CreateReelRequest>,
+    app_state: web::Data<AppState>,
+    exif_dao: web::Data<Mutex<Box<dyn ExifDao>>>,
+    insight_dao: web::Data<Mutex<Box<dyn InsightDao>>>,
+) -> impl Responder {
+    let span_context = extract_context_from_request(&http_request);
+
+    if app_state.llamacpp.is_none() {
+        return HttpResponse::ServiceUnavailable().json(json!({
+            "error": "Reel narration needs the LLM/TTS backend (set LLAMA_SWAP_URL)"
+        }));
+    }
+
+    let span = req.span.unwrap_or(MemoriesSpan::Day);
+    let max_segments = req.max_segments.unwrap_or(selector::DEFAULT_MAX_SEGMENTS);
+    let selector = ReelSelector::Memories {
+        span,
+        tz_offset_minutes: req.timezone_offset_minutes.unwrap_or(0),
+        library: req.library.clone(),
+        max_segments,
+    };
+
+    // Cheap pass: resolve the media set for the cache key and the emptiness
+    // check. Insight enrichment + scripting happen in the background job.
+    let (planned, meta) = match selector::resolve(&app_state, &exif_dao, &span_context, &selector) {
+        Ok(r) => r,
+        Err(msg) => return HttpResponse::BadRequest().body(msg),
+    };
+    if planned.is_empty() {
+        return HttpResponse::UnprocessableEntity().json(json!({
+            "error": "No photo memories found for this span"
+        }));
+    }
+
+    // Flatten every media item across beats (in order) into the cache key — the
+    // key tracks exactly which photos/clips appear and in what sequence.
+    let media: Vec<SegmentMedia> = planned.iter().flat_map(|b| b.media.clone()).collect();
+    let voice = req.voice.clone().filter(|s| !s.is_empty());
+    let key = cache_key(&selector, &media, voice.as_deref());
+
+    let job_id = Uuid::new_v4();
+    log::info!(
+        "reel {job_id}: request span={:?} → {} beats, {} photos",
+        span,
+        planned.len(),
+        media.len()
+    );
+
+    // Cache hit: register an already-Done job pointing at the existing MP4 so
+    // the client's first poll returns the video URL immediately.
+    let mp4 = reel_mp4_path(&app_state, &key);
+    if mp4.exists() {
+        log::info!("reel {job_id}: cache hit, serving existing reel");
+        let title = std::fs::read(reel_sidecar_path(&app_state, &key))
+            .ok()
+            .and_then(|b| serde_json::from_slice::<ReelSidecar>(&b).ok())
+            .map(|s| s.title);
+        let mut jobs = REEL_JOBS.lock().unwrap();
+        sweep_stale_jobs(&mut jobs, Instant::now());
+        jobs.insert(
+            job_id,
+            ReelJob {
+                status: ReelJobStatus::Done,
+                stage: "done",
+                title,
+                output_path: Some(mp4),
+                error: None,
+                created_at: Instant::now(),
+                finished_at: Some(Instant::now()),
+                abort: None,
+            },
+        );
+        // Capture params for passive prefs mirror (best-effort, never fails).
+        let _ = capture_prefs(&app_state, &req, req.library.as_deref());
+        return HttpResponse::Accepted().json(ReelJobCreatedResponse {
+            job_id: job_id.to_string(),
+            status: ReelJobStatus::Done,
+        });
+    }
+
+    {
+        let mut jobs = REEL_JOBS.lock().unwrap();
+        sweep_stale_jobs(&mut jobs, Instant::now());
+        jobs.insert(
+            job_id,
+            ReelJob {
+                status: ReelJobStatus::Queued,
+                stage: "queued",
+                title: None,
+                output_path: None,
+                error: None,
+                created_at: Instant::now(),
+                finished_at: None,
+                abort: None,
+            },
+        );
+    }
+    log::info!("reel {job_id}: queued for generation");
+
+    let state = app_state.clone();
+    let insight_dao = insight_dao.clone();
+    let exif_dao = exif_dao.clone();
+    let handle = tokio::spawn(async move {
+        match run_reel_job(
+            &state,
+            &insight_dao,
+            &exif_dao,
+            job_id,
+            planned,
+            meta,
+            voice,
+            &key,
+        )
+        .await
+        {
+            Ok((title, path)) => {
+                finish_job(job_id, ReelJobStatus::Done, Some(title), Some(path), None)
+            }
+            Err(e) => {
+                log::error!("reel job {job_id} failed: {e:?}");
+                finish_job(
+                    job_id,
+                    ReelJobStatus::Error,
+                    None,
+                    None,
+                    Some(format!("{e}")),
+                )
+            }
+        }
+    });
+    with_job(job_id, |job| job.abort = Some(handle.abort_handle()));
+
+    // Capture params for passive prefs mirror (best-effort, never fails).
+    let _ = capture_prefs(&app_state, &req, req.library.as_deref());
+
+    HttpResponse::Accepted().json(ReelJobCreatedResponse {
+        job_id: job_id.to_string(),
+        status: ReelJobStatus::Queued,
+    })
+}
+
+/// GET /reels/{id} — poll a reel job. Done jobs carry a `video_url`.
+#[get("/reels/{id}")]
+pub async fn reel_status_handler(_claims: Claims, path: web::Path<String>) -> impl Responder {
+    let id_str = path.into_inner();
+    let Ok(id) = Uuid::parse_str(&id_str) else {
+        return HttpResponse::BadRequest().json(json!({ "error": "invalid job id" }));
+    };
+    let resp = with_job(id, |job| ReelStatusResponse {
+        job_id: id_str.clone(),
+        status: job.status,
+        stage: job.stage.to_string(),
+        title: job.title.clone(),
+        video_url: matches!(job.status, ReelJobStatus::Done)
+            .then(|| format!("/reels/{id_str}/video")),
+        error: job.error.clone(),
+    });
+    match resp {
+        Some(r) => HttpResponse::Ok().json(r),
+        None => HttpResponse::NotFound().json(json!({ "error": "job not found or expired" })),
+    }
+}
+
+/// GET /reels/{id}/video — stream the finished MP4 (supports range requests via
+/// NamedFile, so the mobile player can seek).
+#[get("/reels/{id}/video")]
+pub async fn reel_video_handler(
+    _claims: Claims,
+    request: HttpRequest,
+    path: web::Path<String>,
+) -> impl Responder {
+    let id_str = path.into_inner();
+    let Ok(id) = Uuid::parse_str(&id_str) else {
+        return HttpResponse::BadRequest().json(json!({ "error": "invalid job id" }));
+    };
+    let output = with_job(id, |job| job.output_path.clone()).flatten();
+    let Some(path) = output else {
+        return HttpResponse::NotFound().json(json!({ "error": "reel not ready" }));
+    };
+    match NamedFile::open(&path) {
+        Ok(file) => file.into_response(&request),
+        Err(e) => {
+            log::error!("opening reel mp4 {path:?} failed: {e:?}");
+            HttpResponse::NotFound().json(json!({ "error": "reel file missing" }))
+        }
+    }
+}
+
+/// GET /reels/precomputed?span=&library=
+///
+/// Look up the latest precomputed reel for the given span and library key.
+/// Validity gate (all must hold, else 404):
+///   1. `render_version == RENDER_VERSION`
+///   2. `output_path` exists on disk
+///   3. age <= max_age(span) (Day 26h, Week 8d, Month 32d)
+///
+/// Returns `{ video_url: "/reels/by-key/{cache_key}/video", title }`.
+#[get("/reels/precomputed")]
+pub async fn precomputed_reel_handler(
+    _claims: Claims,
+    query: web::Query<HashMap<String, String>>,
+    app_state: web::Data<AppState>,
+) -> impl Responder {
+    let span = query.get("span").map(|s| s.as_str()).unwrap_or("day");
+    let library_key = normalize_library_key(
+        &app_state.libraries,
+        query.get("library").map(|s| s.as_str()),
+    );
+
+    let now = std::time::SystemTime::now()
+        .duration_since(std::time::UNIX_EPOCH)
+        .expect("Time went backwards")
+        .as_secs() as i64;
+
+    let max_age_hours = match span {
+        "week" => REEL_PRECOMPUTED_WEEK_MAX_AGE_HOURS as i64,
+        "month" => REEL_PRECOMPUTED_MONTH_MAX_AGE_HOURS as i64,
+        _ => REEL_PRECOMPUTED_DAY_MAX_AGE_HOURS as i64,
+    };
+    let min_generated_at = now - (max_age_hours * 3600);
+
+    let ctx = opentelemetry::Context::new();
+    let mut dao = app_state
+        .precomputed_reel_dao
+        .lock()
+        .expect("Unable to lock PrecomputedReelDao");
+
+    // Fast existence gate: is there a fresh row at all?
+    if !dao
+        .exists_fresh(
+            &ctx,
+            span,
+            &library_key,
+            RENDER_VERSION as i32,
+            min_generated_at,
+        )
+        .unwrap_or(false)
+    {
+        return HttpResponse::NotFound().json(json!({ "error": "no precomputed reel found" }));
+    }
+
+    // Fetch the latest row for full validity checks.
+    let reel = match dao.latest_for(&ctx, span, &library_key) {
+        Ok(Some(r)) => r,
+        _ => {
+            return HttpResponse::NotFound().json(json!({ "error": "no precomputed reel found" }));
+        }
+    };
+
+    // Validity gate 1: render version must match.
+    if reel.render_version != RENDER_VERSION as i32 {
+        return HttpResponse::NotFound()
+            .json(json!({ "error": "precomputed reel is stale (render version mismatch)" }));
+    }
+
+    // Validity gate 2: output_path must exist.
+    let output = std::path::Path::new(&reel.output_path);
+    if !output.exists() {
+        return HttpResponse::NotFound().json(json!({ "error": "precomputed reel file missing" }));
+    }
+
+    // Validity gate 3: age <= max_age (re-checked via min_generated_at).
+    if reel.generated_at < min_generated_at {
+        return HttpResponse::NotFound().json(json!({ "error": "precomputed reel has expired" }));
+    }
+
+    HttpResponse::Ok().json(PrecomputedReelResponse {
+        video_url: format!("/reels/by-key/{}/video", reel.cache_key),
+        title: reel.title,
+    })
+}
+
+/// GET /reels/by-key/{key}/video — stream a precomputed reel MP4 by cache key.
+#[get("/reels/by-key/{key}/video")]
+pub async fn precomputed_video_handler(
+    _claims: Claims,
+    request: HttpRequest,
+    path: web::Path<String>,
+    app_state: web::Data<AppState>,
+) -> impl Responder {
+    let key = path.into_inner();
+    let mp4 = reel_mp4_path(&app_state, &key);
+    match NamedFile::open(&mp4) {
+        Ok(file) => file.into_response(&request),
+        Err(e) => {
+            log::error!("opening precomputed reel {key} failed: {e:?}");
+            HttpResponse::NotFound().json(json!({ "error": "precomputed reel file missing" }))
+        }
+    }
+}
+
+// --- Pipeline ----------------------------------------------------------------
+
+/// Run the full reel pipeline: enrich → script → narrate → render → concat,
+/// then publish the MP4 into the cache. Returns (title, mp4_path).
+///
+/// The `scripter` parameter controls which narration-generation strategy is
+/// used (fast single-call vs. agentic backend resolution). On scripting
+/// failure in Agentic mode the pipeline falls back to the fast path so a
+/// single LLM failure never sinks a reel.
+pub(crate) async fn produce_reel(
+    app_state: &AppState,
+    insight_dao: &Mutex<Box<dyn InsightDao>>,
+    exif_dao: &Mutex<Box<dyn ExifDao>>,
+    mut planned: Vec<PlannedBeat>,
+    meta: ReelMeta,
+    voice: Option<String>,
+    key: &str,
+    scripter: ScripterMode,
+    progress: Option<&ProgressFn<'_>>,
+) -> anyhow::Result<(String, PathBuf)> {
+    let started = Instant::now();
+    let total_photos: usize = planned.iter().map(|b| b.media.len()).sum();
+    log::info!(
+        "reel produce_reel: starting — span {:?}, {} beats, {} photos, voice={}",
+        meta.span,
+        planned.len(),
+        total_photos,
+        voice.as_deref().unwrap_or("default")
+    );
+
+    let client = app_state
+        .llamacpp
+        .as_ref()
+        .ok_or_else(|| anyhow::anyhow!("TTS/LLM backend not configured"))?
+        .clone();
+
+    // 1. Enrich each beat with its lead photo's cached insight, then script
+    //    (one LLM call → one narration line per beat).
+    emit_progress(progress, "scripting");
+    log::info!("reel produce_reel: scripting narration via LLM…");
+    let span_context = opentelemetry::Context::new();
+    selector::enrich(insight_dao, exif_dao, &span_context, &mut planned);
+    let script = match scripter {
+        ScripterMode::Fast => script::generate_script(&client, &meta, &planned).await?,
+        ScripterMode::Agentic => {
+            match script::generate_script_agentic(&app_state.insight_generator, &meta, &planned)
+                .await
+            {
+                Ok(s) => s,
+                Err(e) => {
+                    log::warn!(
+                        "reel produce_reel: agentic script failed, falling back to fast: {e}"
+                    );
+                    script::generate_script(&client, &meta, &planned).await?
+                }
+            }
+        }
+    };
+    log::info!(
+        "reel produce_reel: scripted \"{}\" ({} lines)",
+        script.title,
+        script.lines.len()
+    );
+
+    // 2. Narrate each beat's line and 3. render the beat (its photos shown in
+    //    sequence under that one narration). A beat whose audio or render fails
+    //    is skipped (logged) rather than sinking the whole reel — handles an
+    //    odd HEIC/corrupt file gracefully.
+    emit_progress(progress, "narrating");
+    let work = tempfile::tempdir().context("creating reel work dir")?;
+    let nvenc = render::is_nvenc_available().await;
+    log::info!(
+        "reel produce_reel: narrating + rendering {} beats (encoder: {})",
+        planned.len(),
+        if nvenc { "nvenc" } else { "cpu" }
+    );
+    let opts = render::SegmentOpts {
+        nvenc,
+        ..Default::default()
+    };
+
+    let beat_total = planned.len();
+    let mut beat_files: Vec<String> = Vec::new();
+    for (i, (beat, line)) in planned.iter().zip(script.lines.iter()).enumerate() {
+        // Resolve the beat's media to absolute paths; drop any that don't
+        // resolve. An empty beat is skipped.
+        let paths: Vec<PathBuf> = beat
+            .media
+            .iter()
+            .filter_map(|m| resolve_media_path(app_state, m))
+            .collect();
+        if paths.is_empty() {
+            log::warn!("reel produce_reel: skipping beat {i}, no media paths resolved");
+            continue;
+        }
+
+        let audio_bytes = match crate::ai::tts::synthesize_serialized(
+            &client,
+            line,
+            voice.as_deref(),
+            "wav",
+            Some(reel_tts_exaggeration()),
+        )
+        .await
+        {
+            Ok(b) => b,
+            Err(e) => {
+                log::warn!("reel produce_reel: skipping beat {i}, TTS failed: {e}");
+                continue;
+            }
+        };
+        let audio_path = work.path().join(format!("narration_{i:03}.wav"));
+        if let Err(e) = tokio::fs::write(&audio_path, &audio_bytes).await {
+            log::warn!("reel produce_reel: skipping beat {i}, writing audio failed: {e}");
+            continue;
+        }
+
+        let narration_secs =
+            crate::video::ffmpeg::get_duration_seconds(&audio_path.to_string_lossy())
+                .await
+                .ok()
+                .flatten()
+                .unwrap_or(render::MIN_SEGMENT_SECONDS);
+
+        emit_progress(progress, "rendering");
+        let beat_out = work.path().join(format!("beat_{i:03}.mp4"));
+        let render_result = if beat.is_clip() {
+            log::info!(
+                "reel produce_reel: beat {}/{} — video clip, narration {:.1}s",
+                i + 1,
+                beat_total,
+                narration_secs
+            );
+            render::render_clip_beat(&paths[0], &audio_path, &beat_out, narration_secs, &opts).await
+        } else {
+            log::info!(
+                "reel produce_reel: beat {}/{} — {} photo(s), narration {:.1}s",
+                i + 1,
+                beat_total,
+                paths.len(),
+                narration_secs
+            );
+            render::render_beat(&paths, &audio_path, &beat_out, narration_secs, &opts).await
+        };
+        if let Err(e) = render_result {
+            log::warn!("reel produce_reel: skipping beat {i}, render failed: {e}");
+            continue;
+        }
+        beat_files.push(beat_out.to_string_lossy().to_string());
+    }
+
+    let segment_files = beat_files;
+    if segment_files.is_empty() {
+        return Err(anyhow!("no beats rendered successfully"));
+    }
+
+    // 4. Concat into the cache. Write to a temp name in the reels dir, then
+    // rename atomically (same filesystem) so a reader never sees a partial.
+    emit_progress(progress, "rendering");
+    log::info!(
+        "reel produce_reel: joining {} rendered beats into the final reel",
+        segment_files.len()
+    );
+    std::fs::create_dir_all(&app_state.reels_path).context("creating reels dir")?;
+    let final_path = reel_mp4_path(app_state, key);
+    let tmp_path = final_path.with_extension("mp4.tmp");
+    render::concat_segments(&segment_files, &tmp_path).await?;
+    std::fs::rename(&tmp_path, &final_path).context("publishing reel mp4")?;
+
+    // Sidecar carries the title so a future cache hit can return it without
+    // re-running the pipeline.
+    let sidecar = serde_json::to_vec(&ReelSidecar {
+        title: script.title.clone(),
+    })
+    .context("serializing reel sidecar")?;
+    let _ = std::fs::write(reel_sidecar_path(app_state, key), sidecar);
+
+    log::info!(
+        "reel produce_reel: done in {:.1}s — {} beats → {}",
+        started.elapsed().as_secs_f64(),
+        segment_files.len(),
+        final_path.display()
+    );
+    Ok((script.title, final_path))
+}
+
+/// Emit a progress stage label via the optional callback.
+fn emit_progress(progress: Option<&ProgressFn<'_>>, stage: &'static str) {
+    if let Some(p) = progress {
+        p(stage);
+    }
+}
+
+/// Run the full reel pipeline and publish the MP4 into the cache.
+/// Thin wrapper around [`produce_reel`] that wires up job-stage tracking.
+async fn run_reel_job(
+    app_state: &AppState,
+    insight_dao: &Mutex<Box<dyn InsightDao>>,
+    exif_dao: &Mutex<Box<dyn ExifDao>>,
+    job_id: Uuid,
+    planned: Vec<PlannedBeat>,
+    meta: ReelMeta,
+    voice: Option<String>,
+    key: &str,
+) -> anyhow::Result<(String, PathBuf)> {
+    let progress = move |stage: &'static str| {
+        set_stage(job_id, stage);
+    };
+    produce_reel(
+        app_state,
+        insight_dao,
+        exif_dao,
+        planned,
+        meta,
+        voice,
+        key,
+        ScripterMode::Fast,
+        Some(&progress),
+    )
+    .await
+}
+
+/// Resolve a media item's library-relative path to a validated absolute path
+/// under its library root (works for both photos and clips).
+fn resolve_media_path(app_state: &AppState, media: &SegmentMedia) -> Option<PathBuf> {
+    let lib = app_state.library_by_id(media.library_id())?;
+    let rel = media.rel_path().to_string();
+    crate::files::is_valid_full_path(&lib.root_path, &rel, false)
+}
+
+// --- Nightly pre-generation scheduler (Section D) ----------------------------
+
+/// Env: "3" (default). The hour (0-23) when the nightly pre-gen batch fires.
+/// Clamped to 0-23; invalid values fall back to default.
+fn pregen_run_hour() -> u32 {
+    std::env::var("REEL_PREGEN_HOUR")
+        .ok()
+        .and_then(|v| v.trim().parse().ok())
+        .filter(|h| *h <= 23)
+        .unwrap_or(3)
+}
+
+/// Env: "1" (default, Monday). Day of week for weekly pre-gen (0=Sun, 1=Mon, ...).
+/// Clamped to 0-6; invalid values fall back to default.
+fn pregen_week_dow() -> u32 {
+    std::env::var("REEL_PREGEN_WEEK_DOW")
+        .ok()
+        .and_then(|v| v.trim().parse().ok())
+        .filter(|d| *d <= 6)
+        .unwrap_or(1)
+}
+
+/// Pure: seconds until the next `run_hour:00:00` strictly after `now`.
+///
+/// Minute/second-accurate (not just hour-granular): when `now` is already at or
+/// past the target this wraps to the same hour tomorrow, so a batch that
+/// finishes inside the run hour sleeps ~24h rather than busy-looping (waking,
+/// re-running, and re-sleeping 0s) for the rest of that hour. The tradeoff is
+/// that booting at or after `run_hour` waits until the next day. Recomputed each
+/// loop iteration from `Local::now()` so DST shifts are absorbed.
+pub(crate) fn secs_until_next_run_hour(now: chrono::DateTime<chrono::Local>, run_hour: u32) -> u64 {
+    let now_secs = now.hour() * 3600 + now.minute() * 60 + now.second();
+    let target_secs = run_hour * 3600;
+    let diff = if target_secs > now_secs {
+        target_secs - now_secs
+    } else {
+        86_400 - now_secs + target_secs
+    };
+    diff as u64
+}
+
+/// Load pre-gen parameters: tries the user_ai_prefs DB row first, falls back
+/// to env vars, then to server-local defaults.
+fn load_pregen_params(app_state: &AppState) -> (i32, Option<String>, String) {
+    // Try DB row first
+    if let Ok(mut dao) = app_state.user_ai_prefs_dao.lock() {
+        let ctx = opentelemetry::Context::new();
+        if let Ok(Some(prefs)) = dao.get_prefs(&ctx) {
+            let tz = prefs.tz_offset_minutes.unwrap_or_else(fixed_tz_offset);
+            let voice = prefs.voice;
+            let library = prefs.library.unwrap_or_else(|| "all".to_string());
+            return (tz, voice, library);
+        }
+    }
+    // Fall back to env (explicit offset overrides auto-detect)
+    let tz = std::env::var("REEL_PREGEN_TZ_OFFSET_MINUTES")
+        .ok()
+        .and_then(|v| v.parse().ok())
+        .unwrap_or_else(fixed_tz_offset);
+    let voice = std::env::var("REEL_PREGEN_VOICE").ok();
+    let library = std::env::var("REEL_PREGEN_LIBRARY")
+        .ok()
+        .unwrap_or_else(|| "all".to_string());
+    (tz, voice, library)
+}
+
+/// Fixed timezone offset: reads `REEL_PREGEN_TZ_FIXED_MINUTES` (e.g. "-480"
+/// for US Eastern) when set, falling back to the system local offset. Using
+/// a fixed offset avoids DST shifts changing the pre-gen schedule halfway
+/// through the year.
+fn fixed_tz_offset() -> i32 {
+    std::env::var("REEL_PREGEN_TZ_FIXED_MINUTES")
+        .ok()
+        .and_then(|v| v.trim().parse().ok())
+        .unwrap_or_else(|| chrono::Local::now().offset().local_minus_utc())
+}
+
+/// Spawn the nightly pre-generation scheduler. Runs behind `REEL_PREGEN_ENABLED`.
+pub(crate) async fn spawn_pregen_scheduler(app_state: web::Data<AppState>) {
+    if std::env::var("REEL_PREGEN_ENABLED").ok() != Some("1".to_string()) {
+        log::info!("Reel pre-generation scheduler disabled (REEL_PREGEN_ENABLED != 1)");
+        return;
+    }
+
+    let run_hour = pregen_run_hour();
+    log::info!(
+        "Reel pre-generation scheduler enabled, running at hour {} local",
+        run_hour
+    );
+
+    tokio::spawn(async move {
+        loop {
+            let now = chrono::Local::now();
+            let sleep_secs = secs_until_next_run_hour(now, run_hour);
+            log::debug!("Next pre-gen run in {}s", sleep_secs);
+            tokio::time::sleep(std::time::Duration::from_secs(sleep_secs)).await;
+
+            if let Err(e) = run_pregen_batch(&app_state).await {
+                log::error!("Reel pre-generation batch failed: {}", e);
+            }
+        }
+    });
+}
+
+/// Run the pre-generation batch for all applicable spans.
+async fn run_pregen_batch(app_state: &AppState) -> anyhow::Result<()> {
+    let now = chrono::Local::now();
+    let weekday = now.weekday().num_days_from_sunday(); // 0=Sun, 1=Mon, ...
+    let day_of_month = now.day();
+
+    let mut spans = vec!["day"];
+    if weekday == pregen_week_dow() {
+        spans.push("week");
+    }
+    if day_of_month == 1 {
+        spans.push("month");
+    }
+
+    let (tz, voice, library) = load_pregen_params(app_state);
+
+    for span in spans {
+        if let Err(e) = pregen_one(app_state, span, tz, voice.clone(), &library).await {
+            log::error!("Pre-gen failed for span={}: {}", span, e);
+        }
+    }
+
+    Ok(())
+}
+
+/// Pre-generate a single reel for the given span.
+async fn pregen_one(
+    app_state: &AppState,
+    span: &str,
+    tz: i32,
+    voice: Option<String>,
+    library: &str,
+) -> anyhow::Result<()> {
+    let memories_span = match span {
+        "day" => MemoriesSpan::Day,
+        "week" => MemoriesSpan::Week,
+        "month" => MemoriesSpan::Month,
+        _ => MemoriesSpan::Day,
+    };
+
+    let selector = ReelSelector::Memories {
+        span: memories_span,
+        tz_offset_minutes: tz,
+        library: if library == "all" {
+            None
+        } else {
+            Some(library.to_string())
+        },
+        // Must match the on-demand default (create_reel_handler) so the cache
+        // key — which encodes the raw max_segments — lines up and the on-demand
+        // cache-hit path serves this pre-generated reel. The client sends no
+        // max_segments, so it defaults to DEFAULT_MAX_SEGMENTS there too.
+        max_segments: selector::DEFAULT_MAX_SEGMENTS,
+    };
+
+    let exif_dao = app_state.insight_generator.exif_dao();
+    let insight_dao = app_state.insight_generator.insight_dao();
+    let ctx = opentelemetry::Context::new();
+    let (planned, reel_meta) = match selector::resolve(app_state, exif_dao, &ctx, &selector) {
+        Ok((p, m)) => (p, m),
+        Err(e) => {
+            log::warn!("Pre-gen resolve failed for span={}: {}", span, e);
+            return Ok(());
+        }
+    };
+
+    if planned.is_empty() {
+        log::info!("No beats for span={}, skipping", span);
+        return Ok(());
+    }
+
+    // Flatten every media item across beats (in order) into the cache key.
+    let media: Vec<SegmentMedia> = planned.iter().flat_map(|b| b.media.clone()).collect();
+    let key = cache_key(&selector, &media, voice.as_deref());
+    // Total media items shown (photos + clips), not beat count.
+    let media_count = media.len() as i32;
+
+    // Dedup: check if fresh ledger row exists
+    let now = std::time::SystemTime::now()
+        .duration_since(std::time::UNIX_EPOCH)
+        .expect("Time went backwards")
+        .as_secs() as i64;
+
+    let max_age_hours = match span {
+        "week" => REEL_PRECOMPUTED_WEEK_MAX_AGE_HOURS,
+        "month" => REEL_PRECOMPUTED_MONTH_MAX_AGE_HOURS,
+        _ => REEL_PRECOMPUTED_DAY_MAX_AGE_HOURS,
+    };
+    let min_generated_at = now - (max_age_hours as i64 * 3600);
+
+    // Skip only when a fresh ledger row points at THIS exact cache key (same
+    // media, params, render version) and its file still exists. Comparing the
+    // stored cache_key — not just (span, library) — means a key change from
+    // selection-logic/params drift that doesn't bump RENDER_VERSION still forces
+    // a regen within the freshness window, instead of leaving a stale row that
+    // points at an orphaned reel.
+    let already_current = {
+        let mut dao = app_state.precomputed_reel_dao.lock().expect("lock");
+        matches!(
+            dao.latest_for(&ctx, span, library),
+            Ok(Some(row))
+                if row.cache_key == key
+                    && row.render_version == RENDER_VERSION as i32
+                    && row.generated_at >= min_generated_at
+        ) && reel_mp4_path(app_state, &key).exists()
+    };
+
+    if already_current {
+        log::info!(
+            "Fresh precomputed reel already current for span={} key={}, skipping",
+            span,
+            key
+        );
+        return Ok(());
+    }
+
+    // Past the key-aware dedup above, any MP4 already at this key was NOT
+    // pre-generated by us (it has no matching ledger row) — most likely an
+    // on-demand fast-scripted reel that happens to share the key. Don't adopt
+    // it: regenerate so the precomputed reel is the agentic one. produce_reel
+    // publishes atomically, overwriting whatever is there. (The narrow
+    // render-succeeded-but-ledger-write-failed crash window just costs one
+    // redundant re-render next run.)
+    log::info!("Generating precomputed reel for span={}, key={}", span, key);
+    let (title, mp4) = produce_reel(
+        app_state,
+        insight_dao,
+        exif_dao,
+        planned,
+        reel_meta,
+        voice.clone(),
+        &key,
+        ScripterMode::Agentic,
+        None,
+    )
+    .await?;
+
+    // Record to ledger, then retire superseded reels for this (span, library)
+    // — yesterday's daily, an older render-version, etc. — keeping a small
+    // grace window. Done under one lock so the prune sees the row we just wrote.
+    let superseded = {
+        let mut reel_dao = app_state.precomputed_reel_dao.lock().expect("lock");
+        reel_dao.record_reel(
+            &ctx,
+            &crate::database::models::InsertablePrecomputedReel {
+                span: span.to_string(),
+                library_key: library.to_string(),
+                cache_key: key.clone(),
+                output_path: mp4.to_string_lossy().to_string(),
+                title,
+                media_count,
+                render_version: RENDER_VERSION as i32,
+                tz_offset_minutes: tz,
+                voice: voice.clone(),
+                generated_at: now,
+            },
+        )?;
+        reel_dao
+            .prune_superseded(&ctx, span, library, PREGEN_KEEP_PER_SCOPE)
+            .unwrap_or_default()
+    };
+    for row in &superseded {
+        delete_reel_files(&row.output_path);
+    }
+    if !superseded.is_empty() {
+        log::info!(
+            "Pruned {} superseded precomputed reel(s) for span={}",
+            superseded.len(),
+            span
+        );
+    }
+
+    log::info!("Precomputed reel generated for span={}, key={}", span, key);
+    Ok(())
+}
+
+// --- On-disk cache sweep -----------------------------------------------------
+
+/// Best-effort unlink of a reel's MP4 and its `.json` sidecar.
+fn delete_reel_files(mp4_output_path: &str) {
+    let mp4 = Path::new(mp4_output_path);
+    let _ = std::fs::remove_file(mp4);
+    let _ = std::fs::remove_file(mp4.with_extension("json"));
+}
+
+/// Max age (seconds) before an unreferenced reel MP4 is swept.
+fn reel_cache_max_age_secs() -> u64 {
+    std::env::var("REEL_CACHE_MAX_AGE_DAYS")
+        .ok()
+        .and_then(|v| v.trim().parse::<u64>().ok())
+        .filter(|d| *d > 0)
+        .unwrap_or(REEL_CACHE_MAX_AGE_DAYS_DEFAULT)
+        * 86_400
+}
+
+/// Spawn the periodic on-disk reel-cache sweeper. Runs independently of the
+/// pre-gen scheduler because the on-demand cache grows whether or not pre-gen
+/// is enabled. Disable with `REEL_CACHE_SWEEP_ENABLED=0`.
+pub(crate) async fn spawn_reel_cache_sweeper(app_state: web::Data<AppState>) {
+    if std::env::var("REEL_CACHE_SWEEP_ENABLED").ok().as_deref() == Some("0") {
+        log::info!("Reel cache sweeper disabled (REEL_CACHE_SWEEP_ENABLED=0)");
+        return;
+    }
+    tokio::spawn(async move {
+        // Settle after startup, then sweep on a fixed cadence.
+        tokio::time::sleep(Duration::from_secs(300)).await;
+        loop {
+            let removed = sweep_reel_cache(&app_state);
+            if removed > 0 {
+                log::info!("Reel cache sweep removed {removed} stale file(s)");
+            }
+            tokio::time::sleep(Duration::from_secs(REEL_CACHE_SWEEP_INTERVAL_SECS)).await;
+        }
+    });
+}
+
+/// One sweep of `reels_path`. Removes: stale render artifacts (`.mp4.tmp`,
+/// `.concat.txt`, orphaned sidecars) from crashed runs; and reel MP4s that no
+/// ledger row references, that no live job points at, and that are older than
+/// the cache max age (the on-demand cache, which has no ledger row). Returns the
+/// number of files removed. Best-effort — any IO error on one entry is skipped.
+fn sweep_reel_cache(app_state: &AppState) -> usize {
+    let dir = Path::new(&app_state.reels_path);
+    let read_dir = match std::fs::read_dir(dir) {
+        Ok(rd) => rd,
+        Err(_) => return 0, // dir not created yet → nothing to sweep
+    };
+
+    // Files a ledger row still points at (current pre-gen reels).
+    let protected: std::collections::HashSet<String> = {
+        let ctx = opentelemetry::Context::new();
+        let mut dao = app_state.precomputed_reel_dao.lock().expect("lock");
+        dao.all_cache_keys(&ctx)
+            .unwrap_or_default()
+            .into_iter()
+            .collect()
+    };
+    // Outputs of live in-memory jobs (a Done reel a client may still be fetching).
+    let active: std::collections::HashSet<String> = {
+        let jobs = REEL_JOBS.lock().unwrap();
+        jobs.values()
+            .filter_map(|j| j.output_path.as_ref())
+            .map(|p| p.to_string_lossy().to_string())
+            .collect()
+    };
+
+    let now = std::time::SystemTime::now();
+    let max_age = Duration::from_secs(reel_cache_max_age_secs());
+    let tmp_max_age = Duration::from_secs(REEL_TMP_MAX_AGE_SECS);
+    let mut removed = 0usize;
+
+    for entry in read_dir.flatten() {
+        let path = entry.path();
+        let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
+            continue;
+        };
+        let age = entry
+            .metadata()
+            .and_then(|m| m.modified())
+            .ok()
+            .and_then(|t| now.duration_since(t).ok())
+            .unwrap_or_default();
+
+        // Transient render artifacts from a crashed run.
+        if name.ends_with(".mp4.tmp") || name.ends_with(".concat.txt") {
+            if age > tmp_max_age && std::fs::remove_file(&path).is_ok() {
+                removed += 1;
+            }
+            continue;
+        }
+
+        // Reel MP4: keep if referenced (ledger or live job) or still recent.
+        if let Some(key) = name.strip_suffix(".mp4") {
+            let p = path.to_string_lossy().to_string();
+            if protected.contains(key) || active.contains(&p) || age < max_age {
+                continue;
+            }
+            if std::fs::remove_file(&path).is_ok() {
+                let _ = std::fs::remove_file(path.with_extension("json"));
+                removed += 1;
+            }
+            continue;
+        }
+
+        // Orphaned sidecar (its MP4 is gone).
+        if name.ends_with(".json")
+            && !path.with_extension("mp4").exists()
+            && age > tmp_max_age
+            && std::fs::remove_file(&path).is_ok()
+        {
+            removed += 1;
+        }
+    }
+    removed
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::libraries::Library;
+    use chrono::TimeZone;
+
+    fn photo(p: &str, lib: i32) -> SegmentMedia {
+        SegmentMedia::Photo {
+            rel_path: p.to_string(),
+            library_id: lib,
+        }
+    }
+
+    fn clip(p: &str, lib: i32) -> SegmentMedia {
+        SegmentMedia::Clip {
+            rel_path: p.to_string(),
+            library_id: lib,
+        }
+    }
+
+    fn day_selector() -> ReelSelector {
+        ReelSelector::Memories {
+            span: MemoriesSpan::Day,
+            tz_offset_minutes: 0,
+            library: None,
+            max_segments: 24,
+        }
+    }
+
+    #[test]
+    fn cache_key_is_stable_for_same_inputs() {
+        let media = vec![photo("a.jpg", 1), photo("b.jpg", 1)];
+        let k1 = cache_key(&day_selector(), &media, Some("grandma"));
+        let k2 = cache_key(&day_selector(), &media, Some("grandma"));
+        assert_eq!(k1, k2);
+        // 64-hex blake3.
+        assert_eq!(k1.len(), 64);
+        assert!(k1.chars().all(|c| c.is_ascii_hexdigit()));
+    }
+
+    #[test]
+    fn cache_key_changes_with_media_order_voice_and_selector() {
+        let media = vec![photo("a.jpg", 1), photo("b.jpg", 1)];
+        let reordered = vec![photo("b.jpg", 1), photo("a.jpg", 1)];
+        let base = cache_key(&day_selector(), &media, Some("grandma"));
+        // Order matters (the reel sequence differs).
+        assert_ne!(
+            base,
+            cache_key(&day_selector(), &reordered, Some("grandma"))
+        );
+        // Voice matters.
+        assert_ne!(base, cache_key(&day_selector(), &media, Some("dad")));
+        assert_ne!(base, cache_key(&day_selector(), &media, None));
+        // Span matters.
+        let week = ReelSelector::Memories {
+            span: MemoriesSpan::Week,
+            tz_offset_minutes: 0,
+            library: None,
+            max_segments: 24,
+        };
+        assert_ne!(base, cache_key(&week, &media, Some("grandma")));
+    }
+
+    #[test]
+    fn cache_key_distinguishes_photo_from_clip() {
+        // Same path/library used as a still vs a video clip must differ.
+        let as_photo = vec![photo("v.mp4", 1)];
+        let as_clip = vec![clip("v.mp4", 1)];
+        assert_ne!(
+            cache_key(&day_selector(), &as_photo, None),
+            cache_key(&day_selector(), &as_clip, None)
+        );
+    }
+
+    #[test]
+    fn is_clip_only_for_single_clip_beat() {
+        let clip_beat = PlannedBeat {
+            media: vec![clip("v.mp4", 1)],
+            date: None,
+            insight_title: None,
+            insight_summary: None,
+            gps: None,
+        };
+        let photo_beat = PlannedBeat {
+            media: vec![photo("a.jpg", 1), photo("b.jpg", 1)],
+            date: None,
+            insight_title: None,
+            insight_summary: None,
+            gps: None,
+        };
+        assert!(clip_beat.is_clip());
+        assert!(!photo_beat.is_clip());
+    }
+
+    #[test]
+    fn span_phrase_maps_each_span() {
+        let mk = |span| ReelMeta {
+            span,
+            years: vec![],
+        };
+        assert_eq!(mk(MemoriesSpan::Day).span_phrase(), "on this day");
+        assert_eq!(mk(MemoriesSpan::Week).span_phrase(), "this week");
+        assert_eq!(mk(MemoriesSpan::Month).span_phrase(), "this month");
+    }
+
+    #[test]
+    fn date_label_formats_or_none() {
+        let beat = PlannedBeat {
+            media: vec![photo("a.jpg", 1)],
+            date: Some(1_560_384_000), // 2019-06-13 UTC
+            insight_title: None,
+            insight_summary: None,
+            gps: None,
+        };
+        assert!(beat.date_label().unwrap().contains("2019"));
+
+        let undated = PlannedBeat {
+            media: vec![photo("a.jpg", 1)],
+            date: None,
+            insight_title: None,
+            insight_summary: None,
+            gps: None,
+        };
+        assert_eq!(undated.date_label(), None);
+    }
+
+    #[test]
+    fn normalize_library_key_returns_id_when_found_numeric() {
+        let libs = vec![
+            Library {
+                id: 1,
+                name: "main".to_string(),
+                root_path: "/tmp/main".to_string(),
+                enabled: true,
+                excluded_dirs: Vec::new(),
+            },
+            Library {
+                id: 7,
+                name: "archive".to_string(),
+                root_path: "/tmp/archive".to_string(),
+                enabled: true,
+                excluded_dirs: Vec::new(),
+            },
+        ];
+        assert_eq!(normalize_library_key(&libs, Some("1")), "1");
+    }
+
+    #[test]
+    fn normalize_library_key_returns_id_when_found_by_name() {
+        let libs = vec![Library {
+            id: 1,
+            name: "main".to_string(),
+            root_path: "/tmp/main".to_string(),
+            enabled: true,
+            excluded_dirs: Vec::new(),
+        }];
+        assert_eq!(normalize_library_key(&libs, Some("main")), "1");
+    }
+
+    #[test]
+    fn normalize_library_key_returns_all_when_absent() {
+        let libs = vec![Library {
+            id: 1,
+            name: "main".to_string(),
+            root_path: "/tmp/main".to_string(),
+            enabled: true,
+            excluded_dirs: Vec::new(),
+        }];
+        assert_eq!(normalize_library_key(&libs, None), "all");
+    }
+
+    #[test]
+    fn normalize_library_key_returns_all_when_empty() {
+        let libs = vec![Library {
+            id: 1,
+            name: "main".to_string(),
+            root_path: "/tmp/main".to_string(),
+            enabled: true,
+            excluded_dirs: Vec::new(),
+        }];
+        assert_eq!(normalize_library_key(&libs, Some("")), "all");
+    }
+
+    #[test]
+    fn normalize_library_key_returns_all_when_unknown() {
+        let libs = vec![Library {
+            id: 1,
+            name: "main".to_string(),
+            root_path: "/tmp/main".to_string(),
+            enabled: true,
+            excluded_dirs: Vec::new(),
+        }];
+        assert_eq!(normalize_library_key(&libs, Some("missing")), "all");
+    }
+
+    #[test]
+    fn secs_until_next_run_hour_within_run_hour_wraps_to_tomorrow() {
+        // 03:30, run 3 → already past today's 03:00, so wait until tomorrow
+        // 03:00 (23h30m). Crucially NOT 0 — that would busy-loop the scheduler
+        // for the rest of the hour.
+        let dt = chrono::Local
+            .with_ymd_and_hms(2026, 6, 13, 3, 30, 0)
+            .single()
+            .expect("valid datetime");
+        assert_eq!(secs_until_next_run_hour(dt, 3), 23 * 3600 + 30 * 60);
+    }
+
+    #[test]
+    fn secs_until_next_run_hour_future_today_counts_minutes() {
+        // 10:15 → 14:00 is 3h45m, not a whole-hour 4h (minutes count).
+        let dt = chrono::Local
+            .with_ymd_and_hms(2026, 6, 13, 10, 15, 0)
+            .single()
+            .expect("valid datetime");
+        assert_eq!(secs_until_next_run_hour(dt, 14), 3 * 3600 + 45 * 60);
+    }
+
+    #[test]
+    fn secs_until_next_run_hour_past_today_wraps() {
+        let dt = chrono::Local
+            .with_ymd_and_hms(2026, 6, 13, 20, 0, 0)
+            .single()
+            .expect("valid datetime");
+        assert_eq!(secs_until_next_run_hour(dt, 3), (24 - 20 + 3) * 3600);
+    }
+
+    #[test]
+    fn secs_until_next_run_hour_midnight() {
+        let dt = chrono::Local
+            .with_ymd_and_hms(2026, 6, 13, 0, 0, 0)
+            .single()
+            .expect("valid datetime");
+        // 0:00, run at 3 → 3 hours
+        assert_eq!(secs_until_next_run_hour(dt, 3), 3 * 3600);
+        // 0:00 exactly, run at 0 → wraps to next midnight (not 0, so no busy loop)
+        assert_eq!(secs_until_next_run_hour(dt, 0), 86_400);
+    }
+
+    #[test]
+    fn secs_until_next_run_hour_just_before_target() {
+        // 23:30, run 0 → 30 minutes to midnight (minute-accurate, not 1h).
+        let dt = chrono::Local
+            .with_ymd_and_hms(2026, 6, 13, 23, 30, 0)
+            .single()
+            .expect("valid datetime");
+        assert_eq!(secs_until_next_run_hour(dt, 0), 30 * 60);
+        // 23:30, run 23 → already past today's 23:00, wait until tomorrow.
+        assert_eq!(secs_until_next_run_hour(dt, 23), 86_400 - 30 * 60);
+    }
+}
diff --git a/src/reels/render.rs b/src/reels/render.rs
new file mode 100644
index 0000000..221df5f
--- /dev/null
+++ b/src/reels/render.rs
@@ -0,0 +1,742 @@
+//! ffmpeg assembly for memory reels.
+//!
+//! Two-stage, per-segment design: each segment is rendered to its own
+//! normalized MP4 (identical codec/resolution/fps/timebase), then the segments
+//! are joined with the concat demuxer (stream copy, no re-encode). Rendering
+//! per segment — rather than one monster filtergraph — keeps each ffmpeg
+//! invocation simple to reason about, parallelizes naturally, and means a
+//! video-clip segment type (phase 2) slots in as just a different per-segment
+//! builder without touching the concat stage.
+//!
+//! The arg builders are pure (`Vec<String>` out) so the exact ffmpeg command
+//! is unit-testable; the runners spawn ffmpeg and surface stderr on failure.
+
+use anyhow::{Context, Result, bail};
+use std::path::Path;
+use tokio::process::Command;
+
+/// Re-exported so the reel pipeline reaches NVENC detection through this module
+/// rather than depending on `video::ffmpeg` directly.
+pub use crate::video::ffmpeg::is_nvenc_available;
+
+/// Reel canvas. Portrait, because reels are watched on a phone held upright —
+/// a landscape canvas letterboxes to a thin ~25%-height band there. Each photo
+/// is fitted sharp and centered over a blurred, zoomed copy of itself (see
+/// [`photo_filter_chain`]) so the frame is always filled regardless of the
+/// photo's orientation, without cropping the subject.
+pub const REEL_WIDTH: u32 = 1080;
+pub const REEL_HEIGHT: u32 = 1920;
+pub const REEL_FPS: u32 = 30;
+
+/// A beat's screen time is its narration length plus a short breath, with a
+/// floor so a terse line still lingers. No ceiling: the beat always covers the
+/// full narration so speech is never truncated — the scripter is asked to keep
+/// lines short instead.
+pub const MIN_SEGMENT_SECONDS: f64 = 2.5;
+const NARRATION_TAIL_SECONDS: f64 = 0.6;
+
+/// Fade durations baked into each photo. A held (single-photo) beat gets a
+/// gentle dip; burst photos get a much snappier fade so the difference between
+/// a held shot and a quick burst is obvious.
+const SINGLE_FADE_SECONDS: f64 = 0.35;
+const BURST_FADE_SECONDS: f64 = 0.12;
+
+/// Video-clip framing. Fallback cap on how much of a clip we read when the
+/// source length can't be probed; with a known length, a clip instead plays for
+/// as much of its beat as its footage allows (see [`clip_beat_plan`]). Its live
+/// audio is ducked to `CLIP_DUCK_VOLUME` under the narration.
+pub const CLIP_SECONDS: f64 = 5.0;
+const CLIP_DUCK_VOLUME: f64 = 0.35;
+
+/// Floor on how long each burst photo stays up, so a long line over many photos
+/// doesn't flash them subliminally. If the narration is too short to give every
+/// photo this much, the beat is stretched to fit.
+const MIN_BURST_PHOTO_SECONDS: f64 = 0.6;
+
+/// Base screen time for a beat given its narration length: narration + breath,
+/// floored. Used as the lower bound on a beat's total duration.
+pub fn segment_duration(narration_secs: f64) -> f64 {
+    let d = narration_secs + NARRATION_TAIL_SECONDS;
+    if d.is_finite() && d > MIN_SEGMENT_SECONDS {
+        d
+    } else {
+        MIN_SEGMENT_SECONDS
+    }
+}
+
+/// Split a beat into per-photo durations. The beat lasts at least its narration
+/// (so speech isn't cut) and at least `n × MIN_BURST_PHOTO_SECONDS` (so a fast
+/// burst stays legible); the photos share that total evenly. Returns
+/// `(total_seconds, per_photo_seconds)`.
+pub fn beat_durations(narration_secs: f64, n_photos: usize) -> (f64, Vec<f64>) {
+    let n = n_photos.max(1);
+    let base = segment_duration(narration_secs);
+    let min_total = n as f64 * MIN_BURST_PHOTO_SECONDS;
+    let total = if base > min_total { base } else { min_total };
+    let each = total / n as f64;
+    (total, vec![each; n])
+}
+
+/// Fade length to use for a beat of `n_photos` (gentle when held, snappy in a
+/// burst).
+fn fade_for(n_photos: usize) -> f64 {
+    if n_photos > 1 {
+        BURST_FADE_SECONDS
+    } else {
+        SINGLE_FADE_SECONDS
+    }
+}
+
+/// Options controlling per-segment rendering.
+#[derive(Debug, Clone, Copy)]
+pub struct SegmentOpts {
+    pub width: u32,
+    pub height: u32,
+    pub fps: u32,
+    pub nvenc: bool,
+}
+
+impl Default for SegmentOpts {
+    fn default() -> Self {
+        Self {
+            width: REEL_WIDTH,
+            height: REEL_HEIGHT,
+            fps: REEL_FPS,
+            nvenc: false,
+        }
+    }
+}
+
+/// Filter chain for one photo (input `idx`) producing the labelled output
+/// `[v{idx}]`. Splits the still into a background and foreground: the background
+/// is scaled to *cover* the canvas and heavily blurred; the foreground is
+/// scaled to *fit* and overlaid centered. This fills the portrait frame for any
+/// photo orientation — no black bars, no cropping of the subject — then a fade
+/// in/out softens the cut. Intermediate labels are suffixed with `idx` so
+/// several chains coexist in one `filter_complex`.
+///
+/// `fps` is normalized BEFORE the fades so the brightness ramp is computed on a
+/// true {fps}-frame timeline; otherwise the fade is sampled at the looped
+/// still's coarse cadence and duplicated up, which reads as a steppy dip.
+fn photo_filter_chain(idx: usize, opts: &SegmentOpts, duration: f64, fade: f64) -> String {
+    let (w, h, fps) = (opts.width, opts.height, opts.fps);
+    let fade_out_start = (duration - fade).max(0.0);
+    format!(
+        "[{idx}:v]split=2[bg{idx}][fg{idx}];\
+         [bg{idx}]scale={w}:{h}:force_original_aspect_ratio=increase,\
+         crop={w}:{h},boxblur=20:2[bgb{idx}];\
+         [fg{idx}]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs{idx}];\
+         [bgb{idx}][fgs{idx}]overlay=(W-w)/2:(H-h)/2,\
+         fps={fps},\
+         fade=t=in:st=0:d={fade},\
+         fade=t=out:st={fade_out_start:.3}:d={fade},\
+         setsar=1,format=yuv420p[v{idx}]"
+    )
+}
+
+/// Full `filter_complex` for a beat of `per_photo` durations: one chain per
+/// photo, concatenated into `[v]`, with the narration (the last input, index
+/// `per_photo.len()`) padded with trailing silence into `[a]`. A single-photo
+/// beat degenerates to one chain + `concat=n=1` (a passthrough).
+pub fn beat_filtergraph(opts: &SegmentOpts, per_photo: &[f64]) -> String {
+    let n = per_photo.len().max(1);
+    let fade = fade_for(n);
+    let chains: Vec<String> = per_photo
+        .iter()
+        .enumerate()
+        .map(|(i, &d)| photo_filter_chain(i, opts, d, fade))
+        .collect();
+    let concat_inputs: String = (0..n).map(|i| format!("[v{i}]")).collect();
+    format!(
+        "{chains};{concat_inputs}concat=n={n}:v=1:a=0[v];[{n}:a]apad[a]",
+        chains = chains.join(";")
+    )
+}
+
+fn video_encoder_args(nvenc: bool) -> Vec<String> {
+    if nvenc {
+        // p4 ≈ balanced; cq 23 ≈ libx264 crf 21. Matches the HLS transcode path.
+        [
+            "-c:v",
+            "h264_nvenc",
+            "-preset",
+            "p4",
+            "-cq",
+            "23",
+            "-pix_fmt",
+            "yuv420p",
+        ]
+    } else {
+        [
+            "-c:v", "libx264", "-crf", "21", "-preset", "veryfast", "-pix_fmt", "yuv420p",
+        ]
+    }
+    .iter()
+    .map(|s| s.to_string())
+    .collect()
+}
+
+/// Build the ffmpeg args that render one beat: each photo looped for its slice
+/// of the beat (filled to the portrait canvas with a blurred backdrop), the
+/// slices concatenated, and the single narration muxed over the whole thing.
+/// `total` bounds the output (and the apad'd audio) to the beat length.
+pub fn build_beat_args(
+    image_paths: &[String],
+    audio_path: &str,
+    out_path: &str,
+    per_photo: &[f64],
+    total: f64,
+    opts: &SegmentOpts,
+) -> Vec<String> {
+    let fps = opts.fps.to_string();
+    let mut args: Vec<String> = vec!["-y".into()];
+    if opts.nvenc {
+        args.extend(["-hwaccel".into(), "cuda".into()]);
+    }
+    // One looped-still input per photo, each bounded to its slice by an input
+    // `-t`; reading at the target `-framerate` gives the fades real frames to
+    // ramp across.
+    for (path, &dur) in image_paths.iter().zip(per_photo.iter()) {
+        args.extend([
+            "-framerate".into(),
+            fps.clone(),
+            "-loop".into(),
+            "1".into(),
+            "-t".into(),
+            format!("{dur:.3}"),
+            "-i".into(),
+            path.clone(),
+        ]);
+    }
+    args.extend([
+        "-i".into(),
+        audio_path.into(),
+        "-filter_complex".into(),
+        beat_filtergraph(opts, per_photo),
+        "-map".into(),
+        "[v]".into(),
+        "-map".into(),
+        "[a]".into(),
+        "-t".into(),
+        format!("{total:.3}"),
+        // Force constant frame rate so the beat (and the concatenated reel)
+        // plays at a steady {fps} rather than a variable cadence.
+        "-r".into(),
+        fps,
+    ]);
+    args.extend(video_encoder_args(opts.nvenc));
+    args.extend(
+        ["-c:a", "aac", "-b:a", "160k", "-ar", "48000", "-shortest"]
+            .iter()
+            .map(|s| s.to_string()),
+    );
+    args.push(out_path.into());
+    args
+}
+
+/// Build the concat-demuxer args that join rendered segments losslessly.
+/// `+faststart` moves the moov atom up front so the reel streams immediately
+/// on the mobile client. The output muxer is forced with `-f mp4` because we
+/// write to a `.tmp` path (atomic publish) whose extension ffmpeg can't map to
+/// a format on its own.
+pub fn build_concat_args(list_path: &str, out_path: &str) -> Vec<String> {
+    [
+        "-y",
+        "-f",
+        "concat",
+        "-safe",
+        "0",
+        "-i",
+        list_path,
+        "-c",
+        "copy",
+        "-movflags",
+        "+faststart",
+        "-f",
+        "mp4",
+        out_path,
+    ]
+    .iter()
+    .map(|s| s.to_string())
+    .collect()
+}
+
+/// Render the concat list file body. Each line points the demuxer at one
+/// segment; single quotes in paths are escaped per ffmpeg's concat syntax.
+pub fn build_concat_list(segment_paths: &[String]) -> String {
+    let mut out = String::new();
+    for p in segment_paths {
+        let escaped = p.replace('\'', r"'\''");
+        out.push_str(&format!("file '{escaped}'\n"));
+    }
+    out
+}
+
+async fn run_ffmpeg(args: &[String], what: &str) -> Result<()> {
+    let output = Command::new("ffmpeg")
+        .args(args)
+        .output()
+        .await
+        .with_context(|| format!("spawning ffmpeg for {what}"))?;
+    if !output.status.success() {
+        bail!(
+            "ffmpeg {what} failed: {}",
+            String::from_utf8_lossy(&output.stderr)
+        );
+    }
+    Ok(())
+}
+
+/// Render one beat to `out_path`: its photos shown in sequence (a held shot for
+/// one photo, a quick burst for several) under the single narration in
+/// `audio_path`, whose measured length sets the beat's pacing.
+pub async fn render_beat(
+    image_paths: &[std::path::PathBuf],
+    audio_path: &Path,
+    out_path: &Path,
+    narration_secs: f64,
+    opts: &SegmentOpts,
+) -> Result<()> {
+    if image_paths.is_empty() {
+        bail!("render_beat called with no images");
+    }
+    let (total, per_photo) = beat_durations(narration_secs, image_paths.len());
+    let paths: Vec<String> = image_paths
+        .iter()
+        .map(|p| p.to_string_lossy().to_string())
+        .collect();
+    let args = build_beat_args(
+        &paths,
+        &audio_path.to_string_lossy(),
+        &out_path.to_string_lossy(),
+        &per_photo,
+        total,
+        opts,
+    );
+    run_ffmpeg(&args, "beat render").await
+}
+
+// --- Video-clip beats --------------------------------------------------------
+
+/// Decide how long the clip plays and how long the whole beat lasts, from the
+/// source video's length (if known) and the narration length. Returns
+/// `(clip_dur, beat_total)`.
+///
+/// The beat always lasts long enough for the full narration. The clip plays for
+/// as much of that beat as its footage covers — so the motion fills the screen
+/// time rather than stopping early. We only freeze the last frame (the
+/// `beat_total - clip_dur` gap, handled by `tpad` in [`clip_video_filter`]) when
+/// the source video is genuinely shorter than the narration. Capping clip
+/// playback at a fixed length while the narration ran longer was what produced
+/// the second-or-two freeze that read as a glitchy pause before the transition.
+pub fn clip_beat_plan(source_dur: Option<f64>, narration_secs: f64) -> (f64, f64) {
+    let want = segment_duration(narration_secs);
+    let clip_dur = match source_dur {
+        // Known length: play up to the whole beat, but never past the source.
+        Some(d) if d > 0.0 => d.min(want),
+        // Unknown length: read up to the fallback cap; tpad covers any shortfall.
+        _ => want.min(CLIP_SECONDS),
+    };
+    (clip_dur, want.max(clip_dur))
+}
+
+/// Video chain for a clip beat: fill the clip to the portrait canvas (blurred
+/// backdrop, same look as photos), normalize fps, hold the last frame if the
+/// narration outlasts the clip (`tpad`), then fade. Produces `[v]`.
+fn clip_video_filter(opts: &SegmentOpts, clip_dur: f64, beat_total: f64) -> String {
+    let (w, h, fps) = (opts.width, opts.height, opts.fps);
+    let fade = SINGLE_FADE_SECONDS;
+    let hold = (beat_total - clip_dur).max(0.0);
+    let fade_out_start = (beat_total - fade).max(0.0);
+    // Freeze the final frame to cover narration that runs past the clip.
+    let tpad = if hold > 0.05 {
+        format!(",tpad=stop_mode=clone:stop_duration={hold:.3}")
+    } else {
+        String::new()
+    };
+    format!(
+        "[0:v]split=2[bg][fg];\
+         [bg]scale={w}:{h}:force_original_aspect_ratio=increase,\
+         crop={w}:{h},boxblur=20:2[bgb];\
+         [fg]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs];\
+         [bgb][fgs]overlay=(W-w)/2:(H-h)/2,fps={fps}{tpad},\
+         fade=t=in:st=0:d={fade},fade=t=out:st={fade_out_start:.3}:d={fade},\
+         setsar=1,format=yuv420p[v]"
+    )
+}
+
+/// Audio chain for a clip beat. With a clip audio track, duck it under the
+/// narration and mix; without one, just the narration. Produces `[a]`.
+fn clip_audio_filter(has_audio: bool) -> String {
+    if has_audio {
+        format!(
+            "[0:a]volume={CLIP_DUCK_VOLUME}[duck];[1:a]apad[narr];\
+             [duck][narr]amix=inputs=2:duration=longest:normalize=0[a]"
+        )
+    } else {
+        "[1:a]apad[a]".to_string()
+    }
+}
+
+/// Full `filter_complex` for a clip beat (input 0 = clip, input 1 = narration).
+pub fn clip_beat_filtergraph(
+    opts: &SegmentOpts,
+    clip_dur: f64,
+    beat_total: f64,
+    has_audio: bool,
+) -> String {
+    format!(
+        "{};{}",
+        clip_video_filter(opts, clip_dur, beat_total),
+        clip_audio_filter(has_audio)
+    )
+}
+
+/// Build the ffmpeg args for a clip beat: the first `clip_dur` seconds of the
+/// source video, filled to the portrait canvas with its live audio ducked under
+/// the narration, bounded to `beat_total`.
+pub fn build_clip_beat_args(
+    clip_path: &str,
+    audio_path: &str,
+    out_path: &str,
+    clip_dur: f64,
+    beat_total: f64,
+    has_audio: bool,
+    opts: &SegmentOpts,
+) -> Vec<String> {
+    let fps = opts.fps.to_string();
+    let mut args: Vec<String> = vec!["-y".into()];
+    if opts.nvenc {
+        args.extend(["-hwaccel".into(), "cuda".into()]);
+    }
+    args.extend([
+        // Input `-t` limits the clip to its window; audio has none (apad fills).
+        "-t".into(),
+        format!("{clip_dur:.3}"),
+        "-i".into(),
+        clip_path.into(),
+        "-i".into(),
+        audio_path.into(),
+        "-filter_complex".into(),
+        clip_beat_filtergraph(opts, clip_dur, beat_total, has_audio),
+        "-map".into(),
+        "[v]".into(),
+        "-map".into(),
+        "[a]".into(),
+        "-t".into(),
+        format!("{beat_total:.3}"),
+        "-r".into(),
+        fps,
+    ]);
+    args.extend(video_encoder_args(opts.nvenc));
+    args.extend(
+        ["-c:a", "aac", "-b:a", "160k", "-ar", "48000"]
+            .iter()
+            .map(|s| s.to_string()),
+    );
+    args.push(out_path.into());
+    args
+}
+
+/// Whether a media file has at least one audio stream (so a clip beat knows
+/// whether to mix in live audio). Defaults to `false` on any probe failure.
+pub async fn has_audio_stream(path: &str) -> bool {
+    Command::new("ffprobe")
+        .args([
+            "-v",
+            "error",
+            "-select_streams",
+            "a",
+            "-show_entries",
+            "stream=index",
+            "-of",
+            "csv=p=0",
+            path,
+        ])
+        .output()
+        .await
+        .map(|out| !out.stdout.is_empty())
+        .unwrap_or(false)
+}
+
+/// Render one clip beat: a section of `clip_path` (capped at [`CLIP_SECONDS`],
+/// and to the source length) under the narration in `audio_path`. The beat
+/// lasts at least the narration, freezing the clip's last frame if needed.
+pub async fn render_clip_beat(
+    clip_path: &Path,
+    audio_path: &Path,
+    out_path: &Path,
+    narration_secs: f64,
+    opts: &SegmentOpts,
+) -> Result<()> {
+    let clip_str = clip_path.to_string_lossy().to_string();
+    // Play the clip for as much of the beat as its footage covers; freeze only
+    // when the source is genuinely shorter than the narration (see clip_beat_plan).
+    let source_dur = crate::video::ffmpeg::get_duration_seconds(&clip_str)
+        .await
+        .ok()
+        .flatten();
+    let (clip_dur, beat_total) = clip_beat_plan(source_dur, narration_secs);
+    let has_audio = has_audio_stream(&clip_str).await;
+
+    let args = build_clip_beat_args(
+        &clip_str,
+        &audio_path.to_string_lossy(),
+        &out_path.to_string_lossy(),
+        clip_dur,
+        beat_total,
+        has_audio,
+        opts,
+    );
+    run_ffmpeg(&args, "clip beat render").await
+}
+
+/// Join rendered segments into the final reel. Writes the concat list into the
+/// same directory as the output so relative paths and cleanup stay local.
+pub async fn concat_segments(segment_paths: &[String], out_path: &Path) -> Result<()> {
+    let list_path = out_path.with_extension("concat.txt");
+    let body = build_concat_list(segment_paths);
+    tokio::fs::write(&list_path, body)
+        .await
+        .context("writing concat list")?;
+    let args = build_concat_args(&list_path.to_string_lossy(), &out_path.to_string_lossy());
+    let result = run_ffmpeg(&args, "concat").await;
+    let _ = tokio::fs::remove_file(&list_path).await;
+    result
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn segment_duration_floors_short_lines() {
+        // A one-word narration still lingers at the floor.
+        assert_eq!(segment_duration(0.5), MIN_SEGMENT_SECONDS);
+        assert_eq!(segment_duration(0.0), MIN_SEGMENT_SECONDS);
+    }
+
+    #[test]
+    fn segment_duration_covers_full_narration_plus_tail() {
+        // No ceiling: a long line gets its full length so speech isn't cut.
+        assert!((segment_duration(5.0) - 5.6).abs() < 1e-9);
+        assert!((segment_duration(20.0) - 20.6).abs() < 1e-9);
+    }
+
+    #[test]
+    fn segment_duration_rejects_nonfinite() {
+        assert_eq!(segment_duration(f64::NAN), MIN_SEGMENT_SECONDS);
+        assert_eq!(segment_duration(f64::INFINITY), MIN_SEGMENT_SECONDS);
+    }
+
+    #[test]
+    fn beat_durations_single_photo_matches_base() {
+        let (total, per) = beat_durations(4.0, 1);
+        assert!((total - 4.6).abs() < 1e-9); // narration + tail
+        assert_eq!(per.len(), 1);
+        assert!((per[0] - 4.6).abs() < 1e-9);
+    }
+
+    #[test]
+    fn beat_durations_burst_splits_evenly() {
+        // 5 photos, narration 4.6s base → ~0.92s each (above the 0.6 floor).
+        let (total, per) = beat_durations(4.0, 5);
+        assert!((total - 4.6).abs() < 1e-9);
+        assert_eq!(per.len(), 5);
+        assert!((per.iter().sum::<f64>() - total).abs() < 1e-9);
+        assert!(per.iter().all(|&d| d >= MIN_BURST_PHOTO_SECONDS));
+    }
+
+    #[test]
+    fn beat_durations_stretches_when_narration_too_short_for_burst() {
+        // Floor narration (2.5s) over 10 photos would be 0.25s each — below the
+        // legibility floor, so the beat stretches to 10 × 0.6 = 6s.
+        let (total, per) = beat_durations(0.0, 10);
+        assert!((total - 6.0).abs() < 1e-9);
+        assert!(per.iter().all(|&d| (d - 0.6).abs() < 1e-9));
+    }
+
+    #[test]
+    fn beat_filtergraph_single_photo_fills_portrait_and_holds() {
+        let (_t, per) = beat_durations(4.0, 1);
+        let g = beat_filtergraph(&SegmentOpts::default(), &per);
+        assert!(g.contains("[0:v]split=2[bg0][fg0]"));
+        assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=increase"));
+        assert!(g.contains("crop=1080:1920"));
+        assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=decrease"));
+        assert!(g.contains("overlay=(W-w)/2:(H-h)/2"));
+        // Single photo → concat of one, gentle fade, audio is input 1.
+        assert!(g.contains("concat=n=1:v=1:a=0[v]"));
+        assert!(g.contains("d=0.35")); // SINGLE_FADE
+        assert!(g.contains("[1:a]apad[a]"));
+    }
+
+    #[test]
+    fn beat_filtergraph_burst_chains_concats_and_snappy_fade() {
+        let (_t, per) = beat_durations(4.0, 3);
+        let g = beat_filtergraph(&SegmentOpts::default(), &per);
+        // One chain per photo with index-suffixed labels.
+        assert!(g.contains("[0:v]split") && g.contains("[1:v]split") && g.contains("[2:v]split"));
+        // Concatenated in order, audio is the 4th input (index 3).
+        assert!(g.contains("[v0][v1][v2]concat=n=3:v=1:a=0[v]"));
+        assert!(g.contains("[3:a]apad[a]"));
+        // Burst uses the much snappier fade (vs 0.35 for a held shot).
+        assert!(g.contains("d=0.12"));
+        assert!(!g.contains("d=0.35"));
+    }
+
+    #[test]
+    fn beat_filtergraph_normalizes_fps_before_fading() {
+        // fps must precede the fades on every chain (else the dip looks steppy).
+        let (_t, per) = beat_durations(4.0, 1);
+        let g = beat_filtergraph(&SegmentOpts::default(), &per);
+        let fps_at = g.find("fps=30").expect("fps in graph");
+        let fade_at = g.find("fade=t=in").expect("fade in graph");
+        assert!(fps_at < fade_at);
+    }
+
+    #[test]
+    fn beat_args_one_input_per_photo_plus_audio_bound_by_total() {
+        let (total, per) = beat_durations(4.0, 2);
+        let args = build_beat_args(
+            &["/a.jpg".into(), "/b.jpg".into()],
+            "/n.wav",
+            "/out.mp4",
+            &per,
+            total,
+            &SegmentOpts::default(),
+        );
+        let joined = args.join(" ");
+        // A looped-still input per photo, each with its slice -t, then the audio.
+        assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /a.jpg"));
+        assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /b.jpg"));
+        assert!(joined.contains("-i /n.wav"));
+        // Output bounded to the beat total and forced CFR.
+        assert!(joined.contains("-t 4.600"));
+        assert!(joined.contains("-r 30"));
+        assert!(joined.ends_with("/out.mp4"));
+    }
+
+    #[test]
+    fn beat_args_use_nvenc_and_cuda_when_enabled() {
+        let opts = SegmentOpts {
+            nvenc: true,
+            ..SegmentOpts::default()
+        };
+        let (total, per) = beat_durations(3.0, 1);
+        let args = build_beat_args(
+            &["/img.jpg".into()],
+            "/a.wav",
+            "/out.mp4",
+            &per,
+            total,
+            &opts,
+        );
+        let joined = args.join(" ");
+        assert!(joined.contains("-hwaccel cuda"));
+        assert!(joined.contains("h264_nvenc"));
+        assert!(!joined.contains("libx264"));
+    }
+
+    #[test]
+    fn clip_filter_ducks_audio_and_holds_last_frame_when_narration_longer() {
+        // 5s clip, 7s beat → 2s freeze of the last frame, ducked-audio mix.
+        let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 7.0, true);
+        assert!(g.contains("tpad=stop_mode=clone:stop_duration=2.000"));
+        assert!(g.contains("volume=0.35"));
+        assert!(g.contains("amix=inputs=2"));
+        assert!(g.contains("[1:a]apad[narr]"));
+        // Fill applied to the clip too.
+        assert!(g.contains("boxblur"));
+        assert!(g.contains("overlay=(W-w)/2:(H-h)/2"));
+    }
+
+    #[test]
+    fn clip_beat_plan_plays_clip_through_the_whole_beat_when_source_is_long() {
+        // 30s source, 4s narration → beat is narration+tail (4.6), and the clip
+        // plays that whole 4.6s of motion: no freeze (clip_dur == beat_total).
+        let (clip_dur, beat_total) = clip_beat_plan(Some(30.0), 4.0);
+        assert!((beat_total - 4.6).abs() < 1e-9);
+        assert!((clip_dur - 4.6).abs() < 1e-9);
+        assert!((beat_total - clip_dur).abs() < 1e-9); // no hold
+    }
+
+    #[test]
+    fn clip_beat_plan_freezes_only_when_source_shorter_than_narration() {
+        // 2s source under a 4s narration → play all 2s, freeze the remainder.
+        let (clip_dur, beat_total) = clip_beat_plan(Some(2.0), 4.0);
+        assert!((clip_dur - 2.0).abs() < 1e-9);
+        assert!((beat_total - 4.6).abs() < 1e-9);
+        assert!(beat_total - clip_dur > 2.0); // unavoidable freeze gap
+    }
+
+    #[test]
+    fn clip_beat_plan_caps_read_when_source_length_unknown() {
+        // Probe failed: read up to the fallback cap, beat still covers narration.
+        let (clip_dur, beat_total) = clip_beat_plan(None, 8.0);
+        assert!((clip_dur - CLIP_SECONDS).abs() < 1e-9);
+        assert!((beat_total - 8.6).abs() < 1e-9);
+    }
+
+    #[test]
+    fn clip_filter_no_tpad_when_clip_covers_the_beat() {
+        // Clip at least as long as the beat → no freeze.
+        let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 5.0, true);
+        assert!(!g.contains("tpad"));
+    }
+
+    #[test]
+    fn clip_filter_narration_only_without_clip_audio() {
+        let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 5.0, false);
+        assert!(!g.contains("amix"));
+        assert!(!g.contains("volume="));
+        assert!(g.contains("[1:a]apad[a]"));
+    }
+
+    #[test]
+    fn clip_beat_args_bound_clip_and_output() {
+        let args = build_clip_beat_args(
+            "/v.mp4",
+            "/n.wav",
+            "/out.mp4",
+            5.0,
+            6.6,
+            true,
+            &SegmentOpts::default(),
+        );
+        let joined = args.join(" ");
+        // Input -t bounds the clip read; output -t bounds the beat.
+        assert!(joined.contains("-t 5.000 -i /v.mp4"));
+        assert!(joined.contains("-i /n.wav"));
+        assert!(joined.contains("-t 6.600"));
+        assert!(joined.contains("-r 30"));
+        assert!(joined.ends_with("/out.mp4"));
+    }
+
+    #[test]
+    fn concat_args_stream_copy_with_faststart_and_forced_muxer() {
+        // Output goes to a .tmp path, so the muxer must be forced — ffmpeg
+        // can't infer mp4 from the extension (the bug this guards against).
+        let args = build_concat_args("/tmp/list.txt", "/out.mp4.tmp");
+        let joined = args.join(" ");
+        assert!(joined.contains("-f concat -safe 0 -i /tmp/list.txt"));
+        assert!(joined.contains("-c copy"));
+        assert!(joined.contains("+faststart"));
+        assert!(joined.contains("-f mp4"));
+        // The forced muxer must come before the output path.
+        let f_mp4 = args.windows(2).position(|w| w == ["-f", "mp4"]).unwrap();
+        let out = args.iter().position(|a| a == "/out.mp4.tmp").unwrap();
+        assert!(f_mp4 < out);
+    }
+
+    #[test]
+    fn concat_list_escapes_single_quotes() {
+        let body = build_concat_list(&[
+            "/tmp/seg_000.mp4".into(),
+            "/tmp/own's dir/seg_001.mp4".into(),
+        ]);
+        assert!(body.contains("file '/tmp/seg_000.mp4'\n"));
+        // The apostrophe is closed-escaped-reopened per ffmpeg concat syntax.
+        assert!(body.contains(r"own'\''s"));
+    }
+}
diff --git a/src/reels/script.rs b/src/reels/script.rs
new file mode 100644
index 0000000..38ef9cc
--- /dev/null
+++ b/src/reels/script.rs
@@ -0,0 +1,491 @@
+//! Narration scripting for memory reels.
+//!
+//! One LLM call turns the planned beats (each carrying its date and, where
+//! available, its cached insight) into a short first-person narration line per
+//! beat plus a title for the reel. A beat may show several photos in a quick
+//! burst, so a line narrates the *moment*, not a single frame. We reuse the
+//! cached insight summary as the richest signal rather than re-running vision
+//! at reel time — that keeps reel generation off the GPU's vision slot.
+//!
+//! The prompt builder and response parser are pure so the contract is
+//! unit-testable; `generate_script` wires them to the LLM client.
+//!
+//! The agentic scripter (pre-generation) resolves the backend through the
+//! InsightGenerator, builds a read-only tool set, and runs a tool loop to
+//! ground the narration in retrieved context before asking for the final JSON.
+
+use anyhow::{Context, Result};
+use std::sync::Arc;
+
+use super::{PlannedBeat, ReelMeta};
+use crate::ai::backend::{BackendKind, SamplingOverrides};
+use crate::ai::insight_generator::InsightGenerator;
+use crate::ai::llamacpp::LlamaCppClient;
+use crate::ai::llm_client::{LlmClient, Tool};
+use crate::ai::ollama::ChatMessage;
+
+/// The narration for a whole reel: a title and one line per beat, in order.
+#[derive(Debug, Clone, PartialEq)]
+pub struct ReelScript {
+    pub title: String,
+    pub lines: Vec<String>,
+}
+
+const SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \
+slideshow of someone's own photos set to a spoken voiceover. Write warm, \
+specific, first-person narration as if the person is gently looking back on \
+their own memories. Each line plays over one moment, which may be a quick burst \
+of several photos, so narrate the moment as a whole rather than a single frame. \
+Be concrete and grounded in the details given; never invent names, places, or \
+events that aren't supported. Keep each line to one or two short sentences that \
+can be read aloud in a few seconds. Avoid generic filler like \"what a \
+wonderful day\" — if you have little to go on, simply describe the moment \
+plainly.";
+
+/// Agentic scripter system prompt: richer version that tells the model it may
+/// call read-only tools to ground each line.
+const AGENTIC_SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \
+slideshow of someone's own photos set to a spoken voiceover. Write warm, \
+specific, first-person narration as if the person is gently looking back on \
+their own memories. Each line plays over one moment, which may be a quick burst \
+of several photos, so narrate the moment as a whole rather than a single frame. \
+Be concrete and grounded in the details given; never invent names, places, or \
+events that aren't supported. Keep each line to one or two short sentences that \
+can be read aloud in a few seconds. Avoid generic filler like \"what a \
+wonderful day\" — if you have little to go on, simply describe the moment \
+plainly.\n\nYou may call read-only tools (search_rag, search_messages, \
+get_sms_messages, get_calendar_events, get_location_history, reverse_geocode, \
+get_personal_place_at, recall_entities, get_current_datetime) to ground each \
+line in real context — e.g. reverse_geocode a moment's GPS to name the place, \
+or check the calendar/messages around its date. Never invent details. Return \
+ONLY the JSON object, no prose or code fences.";
+
+/// Maximum agentic tool iterations for pre-generation. Tunable via
+/// `REEL_PREGEN_MAX_TOOL_ITERS` (default 8).
+fn reel_pregen_max_tool_iters() -> usize {
+    std::env::var("REEL_PREGEN_MAX_TOOL_ITERS")
+        .ok()
+        .and_then(|s| s.trim().parse::<usize>().ok())
+        .filter(|x| *x > 0)
+        .unwrap_or(8)
+}
+
+/// Build the (system, user) prompt pair for the scripter. The user message
+/// describes each beat in order and asks for strict JSON back.
+pub fn build_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> (String, String) {
+    let mut user = String::new();
+    user.push_str(&format!(
+        "This reel has {} moments surfaced as memories {}.\n\n",
+        beats.len(),
+        meta.span_phrase()
+    ));
+    if !meta.years.is_empty() {
+        let years: Vec<String> = meta.years.iter().map(|y| y.to_string()).collect();
+        user.push_str(&format!("They span the years: {}.\n\n", years.join(", ")));
+    }
+    user.push_str("Moments, in the order they will appear:\n");
+    for (i, beat) in beats.iter().enumerate() {
+        user.push_str(&format!("\n[{}]", i + 1));
+        if let Some(date) = beat.date_label() {
+            user.push_str(&format!(" {date}"));
+        }
+        if beat.is_clip() {
+            user.push_str(" (a video clip)");
+        } else if beat.media.len() > 1 {
+            user.push_str(&format!(" (a burst of {} photos)", beat.media.len()));
+        }
+        user.push('\n');
+        match (&beat.insight_title, &beat.insight_summary) {
+            (Some(t), Some(s)) if !s.trim().is_empty() => {
+                user.push_str(&format!("  Known context: {t} — {s}\n"));
+            }
+            (Some(t), _) => user.push_str(&format!("  Known context: {t}\n")),
+            (_, Some(s)) if !s.trim().is_empty() => {
+                user.push_str(&format!("  Known context: {s}\n"));
+            }
+            _ => user.push_str("  (no extra context — narrate plainly from the date)\n"),
+        }
+    }
+    user.push_str(&format!(
+        "\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\
+         {{\"title\": \"<short reel title>\", \"segments\": [\"<line for moment 1>\", \
+         \"<line for moment 2>\", ... ]}}\n\
+         The \"segments\" array MUST have exactly {} items, one per moment in order.",
+        beats.len()
+    ));
+    (SYSTEM_PROMPT.to_string(), user)
+}
+
+/// Build a richer (system, user) prompt pair for the agentic scripter. The
+/// system prompt tells the model it may call read-only tools to ground each
+/// line. The user message uses the same per-beat enumeration as
+/// `build_script_messages` plus a GPS line per beat when available.
+pub fn build_agentic_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> Vec<ChatMessage> {
+    let mut user = String::new();
+    user.push_str(&format!(
+        "This reel has {} moments surfaced as memories {}.\n\n",
+        beats.len(),
+        meta.span_phrase()
+    ));
+    if !meta.years.is_empty() {
+        let years: Vec<String> = meta.years.iter().map(|y| y.to_string()).collect();
+        user.push_str(&format!("They span the years: {}.\n\n", years.join(", ")));
+    }
+    user.push_str("Moments, in the order they will appear:\n");
+    for (i, beat) in beats.iter().enumerate() {
+        user.push_str(&format!("\n[{}]", i + 1));
+        if let Some(date) = beat.date_label() {
+            user.push_str(&format!(" {date}"));
+        }
+        if beat.is_clip() {
+            user.push_str(" (a video clip)");
+        } else if beat.media.len() > 1 {
+            user.push_str(&format!(" (a burst of {} photos)", beat.media.len()));
+        }
+        if let Some((lat, lon)) = beat.gps {
+            user.push_str(&format!("\n  GPS: {:.4}, {:.4}", lat, lon));
+        }
+        user.push('\n');
+        match (&beat.insight_title, &beat.insight_summary) {
+            (Some(t), Some(s)) if !s.trim().is_empty() => {
+                user.push_str(&format!("  Known context: {t} — {s}\n"));
+            }
+            (Some(t), _) => user.push_str(&format!("  Known context: {t}\n")),
+            (_, Some(s)) if !s.trim().is_empty() => {
+                user.push_str(&format!("  Known context: {s}\n"));
+            }
+            _ => user.push_str("  (no extra context — narrate plainly from the date)\n"),
+        }
+    }
+    user.push_str(&format!(
+        "\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\
+         {{\"title\": \"<short reel title>\", \"segments\": [\"<line for moment 1>\", \
+         \"<line for moment 2>\", ... ]}}\n\
+         The \"segments\" array MUST have exactly {} items, one per moment in order.",
+        beats.len()
+    ));
+
+    vec![
+        ChatMessage::system(AGENTIC_SYSTEM_PROMPT.to_string()),
+        ChatMessage::user(user),
+    ]
+}
+
+/// Parse the model's response into a script with exactly `n` lines. Tolerant of
+/// code fences and surrounding prose, and of both `segments: [".."]` and
+/// `segments: [{"narration": ".."}]` shapes. Missing/extra lines are padded or
+/// truncated so the caller always gets `n` aligned to the segments.
+pub fn parse_script_response(raw: &str, n: usize) -> ReelScript {
+    let fallback_line = "A moment worth remembering.";
+    let value = extract_json_object(raw);
+
+    let title = value
+        .as_ref()
+        .and_then(|v| v.get("title"))
+        .and_then(|t| t.as_str())
+        .map(clean_text)
+        .filter(|s| !s.is_empty())
+        .unwrap_or_else(|| "Memories".to_string());
+
+    let mut lines: Vec<String> = value
+        .as_ref()
+        .and_then(|v| v.get("segments"))
+        .and_then(|s| s.as_array())
+        .map(|arr| {
+            arr.iter()
+                .map(|item| {
+                    let text = item
+                        .as_str()
+                        .map(|s| s.to_string())
+                        .or_else(|| {
+                            item.get("narration")
+                                .and_then(|n| n.as_str())
+                                .map(|s| s.to_string())
+                        })
+                        .unwrap_or_default();
+                    clean_text(&text)
+                })
+                .collect()
+        })
+        .unwrap_or_default();
+
+    // Align to exactly n: drop extras, pad shortfalls with a neutral line so
+    // every photo still gets spoken audio.
+    lines.truncate(n);
+    while lines.len() < n {
+        lines.push(fallback_line.to_string());
+    }
+    for line in lines.iter_mut() {
+        if line.is_empty() {
+            *line = fallback_line.to_string();
+        }
+    }
+
+    ReelScript { title, lines }
+}
+
+/// Pull the first balanced top-level JSON object out of a possibly-noisy model
+/// response (code fences, leading prose). Returns None if nothing parses.
+fn extract_json_object(raw: &str) -> Option<serde_json::Value> {
+    // Fast path: the whole thing is valid JSON.
+    if let Ok(v) = serde_json::from_str::<serde_json::Value>(raw.trim()) {
+        return Some(v);
+    }
+    // Otherwise scan for the first '{' ... matching '}' span, ignoring braces
+    // inside strings.
+    let bytes = raw.as_bytes();
+    let start = raw.find('{')?;
+    let mut depth = 0i32;
+    let mut in_str = false;
+    let mut escaped = false;
+    for i in start..bytes.len() {
+        let c = bytes[i] as char;
+        if in_str {
+            if escaped {
+                escaped = false;
+            } else if c == '\\' {
+                escaped = true;
+            } else if c == '"' {
+                in_str = false;
+            }
+            continue;
+        }
+        match c {
+            '"' => in_str = true,
+            '{' => depth += 1,
+            '}' => {
+                depth -= 1;
+                if depth == 0 {
+                    return serde_json::from_str(&raw[start..=i]).ok();
+                }
+            }
+            _ => {}
+        }
+    }
+    None
+}
+
+/// Collapse whitespace and strip stray markdown/quote decorations a model
+/// sometimes leaves around a line.
+fn clean_text(s: &str) -> String {
+    let trimmed = s.trim().trim_matches('"').trim();
+    trimmed.split_whitespace().collect::<Vec<_>>().join(" ")
+}
+
+/// Generate the reel script via the LLM. Text-only (no images) — the per-beat
+/// context comes from cached insights. The call takes the GPU read lease
+/// internally (see `LlamaCppClient::generate`).
+pub async fn generate_script(
+    client: &Arc<LlamaCppClient>,
+    meta: &ReelMeta,
+    beats: &[PlannedBeat],
+) -> Result<ReelScript> {
+    let (system, user) = build_script_messages(meta, beats);
+    let raw = client
+        .generate(&user, Some(&system), None)
+        .await
+        .context("LLM script generation failed")?;
+    Ok(parse_script_response(&raw, beats.len()))
+}
+
+/// Agentic version of script generation: resolves the backend via the
+/// InsightGenerator (honouring LLM_BACKEND, model overrides, etc.), builds
+/// a read-only tool set, runs the tool loop, then parses the JSON response.
+/// Returns the same ReelScript shape. On failure the caller may fall back to
+/// `generate_script`.
+pub async fn generate_script_agentic(
+    generator: &InsightGenerator,
+    meta: &ReelMeta,
+    beats: &[PlannedBeat],
+) -> Result<ReelScript> {
+    // 1. Resolve the backend. Bail if the local model lacks tool-calling.
+    let backend = generator
+        .resolve_backend(
+            BackendKind::Local,
+            &SamplingOverrides {
+                model: None,
+                num_ctx: None,
+                temperature: None,
+                top_p: None,
+                top_k: None,
+                min_p: None,
+                enable_thinking: None,
+            },
+        )
+        .await
+        .context("resolving backend for agentic script")?;
+
+    // 2. Build the read-only tool set. Start from the persona gate (no
+    //    persona context, so corrections are closed), force has_vision=false,
+    //    then filter out write tools.
+    let gate = generator.current_gate_opts_for_persona(false, None);
+    let all_tools = InsightGenerator::build_tool_definitions(gate);
+    // Whole-reel calls have no single photo and no authenticated user, so the
+    // loop runs execute_tool with empty file/image context and user_id=0. Only
+    // tools that work without that context are useful here — photo/user-bound
+    // tools (get_file_tags, get_faces_in_photo, recall_facts_for_photo,
+    // recall_facts_for_entity) would just no-op or error, burning iterations,
+    // so they're excluded.
+    let read_only_names: std::collections::HashSet<&str> = [
+        "search_rag",
+        "search_messages",
+        "get_sms_messages",
+        "get_calendar_events",
+        "get_location_history",
+        "reverse_geocode",
+        "get_personal_place_at",
+        "recall_entities",
+        "get_current_datetime",
+    ]
+    .into_iter()
+    .collect();
+    let tools: Vec<Tool> = all_tools
+        .into_iter()
+        .filter(|t| read_only_names.contains(t.function.name.as_str()))
+        .collect();
+
+    // 3. Build the agentic prompt messages.
+    let messages = build_agentic_script_messages(meta, beats);
+
+    // 4. Run the tool loop.
+    let max_iter = reel_pregen_max_tool_iters();
+    let raw = generator
+        .run_readonly_tool_loop(&backend, messages, tools, max_iter)
+        .await
+        .context("agentic tool loop failed")?;
+
+    // 5. Strip any think-blocks the model may have emitted, then parse.
+    let raw = crate::ai::llm_client::strip_think_blocks(&raw);
+    Ok(parse_script_response(&raw, beats.len()))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::memories::MemoriesSpan;
+
+    fn meta() -> ReelMeta {
+        ReelMeta {
+            span: MemoriesSpan::Day,
+            years: vec![2019, 2021],
+        }
+    }
+
+    fn planned(n: usize) -> Vec<PlannedBeat> {
+        (0..n)
+            .map(|i| PlannedBeat {
+                media: vec![super::super::SegmentMedia::Photo {
+                    rel_path: format!("p{i}.jpg"),
+                    library_id: 1,
+                }],
+                date: Some(1_560_000_000 + i as i64 * 86_400),
+                insight_title: None,
+                insight_summary: None,
+                gps: None,
+            })
+            .collect()
+    }
+
+    #[test]
+    fn prompt_states_exact_moment_count_and_span() {
+        let (sys, user) = build_script_messages(&meta(), &planned(3));
+        assert!(sys.contains("memory reel"));
+        assert!(user.contains("3 moments"));
+        assert!(user.contains("on this day"));
+        assert!(user.contains("exactly 3 items"));
+        // Each moment gets an indexed entry.
+        assert!(user.contains("[1]") && user.contains("[2]") && user.contains("[3]"));
+    }
+
+    #[test]
+    fn prompt_notes_burst_photo_count() {
+        let mut p = planned(1);
+        p[0].media = vec![
+            super::super::SegmentMedia::Photo {
+                rel_path: "a.jpg".into(),
+                library_id: 1,
+            },
+            super::super::SegmentMedia::Photo {
+                rel_path: "b.jpg".into(),
+                library_id: 1,
+            },
+            super::super::SegmentMedia::Photo {
+                rel_path: "c.jpg".into(),
+                library_id: 1,
+            },
+        ];
+        let (_sys, user) = build_script_messages(&meta(), &p);
+        assert!(user.contains("a burst of 3 photos"));
+    }
+
+    #[test]
+    fn prompt_marks_clip_beats() {
+        let mut p = planned(1);
+        p[0].media = vec![super::super::SegmentMedia::Clip {
+            rel_path: "v.mp4".into(),
+            library_id: 1,
+        }];
+        let (_sys, user) = build_script_messages(&meta(), &p);
+        assert!(user.contains("a video clip"));
+    }
+
+    #[test]
+    fn prompt_includes_insight_context_when_present() {
+        let mut p = planned(1);
+        p[0].insight_title = Some("Lake house weekend".into());
+        p[0].insight_summary = Some("Swimming with the dogs.".into());
+        let (_sys, user) = build_script_messages(&meta(), &p);
+        assert!(user.contains("Lake house weekend — Swimming with the dogs."));
+    }
+
+    #[test]
+    fn parse_plain_json_object() {
+        let raw = r#"{"title":"Summer Days","segments":["First line.","Second line."]}"#;
+        let script = parse_script_response(raw, 2);
+        assert_eq!(script.title, "Summer Days");
+        assert_eq!(script.lines, vec!["First line.", "Second line."]);
+    }
+
+    #[test]
+    fn parse_tolerates_code_fences_and_prose() {
+        let raw = "Sure! Here's your reel:\n```json\n{\"title\": \"Trip\", \"segments\": [\"A.\", \"B.\"]}\n```\nEnjoy!";
+        let script = parse_script_response(raw, 2);
+        assert_eq!(script.title, "Trip");
+        assert_eq!(script.lines, vec!["A.", "B."]);
+    }
+
+    #[test]
+    fn parse_accepts_object_segment_shape() {
+        let raw = r#"{"title":"T","segments":[{"narration":"One."},{"narration":"Two."}]}"#;
+        let script = parse_script_response(raw, 2);
+        assert_eq!(script.lines, vec!["One.", "Two."]);
+    }
+
+    #[test]
+    fn parse_pads_short_and_truncates_long_to_n() {
+        // Model returned 1 line but we have 3 segments → pad with neutral lines.
+        let short = parse_script_response(r#"{"title":"T","segments":["Only one."]}"#, 3);
+        assert_eq!(short.lines.len(), 3);
+        assert_eq!(short.lines[0], "Only one.");
+        assert!(!short.lines[1].is_empty());
+
+        // Model returned 3 but we have 2 → truncate.
+        let long = parse_script_response(r#"{"title":"T","segments":["a","b","c"]}"#, 2);
+        assert_eq!(long.lines, vec!["a", "b"]);
+    }
+
+    #[test]
+    fn parse_falls_back_on_garbage() {
+        let script = parse_script_response("the model said no", 2);
+        assert_eq!(script.title, "Memories");
+        assert_eq!(script.lines.len(), 2);
+        assert!(script.lines.iter().all(|l| !l.is_empty()));
+    }
+
+    #[test]
+    fn parse_blank_line_replaced_with_fallback() {
+        let script = parse_script_response(r#"{"title":"T","segments":["  ","Real."]}"#, 2);
+        assert!(!script.lines[0].is_empty());
+        assert_eq!(script.lines[1], "Real.");
+    }
+}
diff --git a/src/reels/selector.rs b/src/reels/selector.rs
new file mode 100644
index 0000000..a02cbb8
--- /dev/null
+++ b/src/reels/selector.rs
@@ -0,0 +1,560 @@
+//! Reel selectors: resolve "what goes in the reel" into an ordered media set
+//! plus the metadata the scripter needs. The renderer and scripter are
+//! selector-agnostic, so adding tag- or date-range-based reels later means
+//! adding a variant here, not touching the pipeline.
+//!
+//! Resolution is split in two so the handler can compute a cache key (and
+//! short-circuit on a cache hit) without the per-photo insight lookups:
+//! [`resolve`] is the cheap media-set pass; [`enrich`] adds cached insights and
+//! runs in the background job.
+
+use std::path::Path;
+use std::sync::Mutex;
+
+use chrono::{DateTime, Datelike, FixedOffset};
+
+use super::{PlannedBeat, ReelMeta, SegmentMedia};
+use crate::database::{ExifDao, InsightDao};
+use crate::file_types::{is_image_file, is_video_file};
+use crate::memories::{self, MemoriesSpan};
+use crate::state::AppState;
+
+/// Default and hard caps on how many photos a reel covers. The default is an
+/// upper bound on the request; the effective count is usually smaller, set by
+/// the duration budget (see [`budget_segments`]). The hard cap bounds work per
+/// reel regardless.
+pub const DEFAULT_MAX_SEGMENTS: usize = 40;
+pub const HARD_MAX_SEGMENTS: usize = 40;
+
+/// Target reel length. Week and especially month spans can surface hundreds of
+/// photos; at a few seconds of narration each, a naive reel runs minutes. We
+/// cap the segment count to keep the reel near this length. Tunable via
+/// `REEL_TARGET_SECONDS`.
+const DEFAULT_TARGET_REEL_SECONDS: f64 = 90.0;
+
+/// Rough average wall-time per photo segment (a short narration line + the
+/// silent tail). Only used to turn the duration target into a segment count;
+/// the real per-segment time is the measured narration length.
+const EST_SECONDS_PER_SEGMENT: f64 = 5.0;
+
+/// Time gap that separates one "event/moment" from the next when clustering a
+/// span's photos. Photos within a few hours are treated as the same occasion
+/// (and across years/days the gaps are far larger, so each instance clusters
+/// on its own). 4 hours splits e.g. a morning hike from an evening dinner.
+const EVENT_GAP_SECONDS: i64 = 4 * 3600;
+
+fn target_reel_seconds() -> f64 {
+    std::env::var("REEL_TARGET_SECONDS")
+        .ok()
+        .and_then(|s| s.trim().parse::<f64>().ok())
+        .filter(|x| x.is_finite() && *x > 0.0)
+        .unwrap_or(DEFAULT_TARGET_REEL_SECONDS)
+}
+
+/// How many photo segments fit the duration budget, bounded by the request's
+/// max and the hard cap. This is what keeps week/month reels from running long.
+pub fn budget_segments(requested_max: usize) -> usize {
+    let by_budget = (target_reel_seconds() / EST_SECONDS_PER_SEGMENT).floor() as usize;
+    by_budget.min(requested_max).clamp(1, HARD_MAX_SEGMENTS)
+}
+
+/// What a reel is built from. v1 ships the memories (on this day/week/month)
+/// selector; tag and date-range variants slot in here later.
+#[derive(Debug, Clone)]
+pub enum ReelSelector {
+    Memories {
+        span: MemoriesSpan,
+        tz_offset_minutes: i32,
+        library: Option<String>,
+        max_segments: usize,
+    },
+}
+
+impl ReelSelector {
+    /// Stable string identity for the cache key. Captures everything that
+    /// changes *which* media is selected (but not the non-deterministic
+    /// narration, which can't be part of a pre-render key).
+    pub fn descriptor(&self) -> String {
+        match self {
+            ReelSelector::Memories {
+                span,
+                tz_offset_minutes,
+                library,
+                max_segments,
+            } => format!(
+                "memories:span={:?}:tz={}:lib={}:max={}",
+                span,
+                tz_offset_minutes,
+                library.as_deref().unwrap_or("all"),
+                max_segments
+            ),
+        }
+    }
+}
+
+/// Pick at most `max` items spread evenly across the input, always keeping the
+/// first and last. Returns the input unchanged when it already fits.
+pub fn sample_evenly<T: Clone>(items: &[T], max: usize) -> Vec<T> {
+    if max == 0 {
+        return Vec::new();
+    }
+    if items.len() <= max {
+        return items.to_vec();
+    }
+    if max == 1 {
+        return vec![items[0].clone()];
+    }
+    let last = items.len() - 1;
+    (0..max)
+        .map(|i| {
+            // Spread indices 0..=last across max picks, endpoints included.
+            let idx = (i * last + (max - 1) / 2) / (max - 1);
+            items[idx.min(last)].clone()
+        })
+        .collect()
+}
+
+/// Group time-sorted items into events by gap: a new event starts whenever the
+/// jump from the previous photo exceeds `gap_seconds`. Preserves order; items
+/// without a timestamp extend the current event.
+fn cluster_by_gap(
+    items: &[memories::MemoryItem],
+    gap_seconds: i64,
+) -> Vec<Vec<memories::MemoryItem>> {
+    let mut clusters: Vec<Vec<memories::MemoryItem>> = Vec::new();
+    let mut prev_ts: Option<i64> = None;
+    for it in items {
+        let starts_new = match (prev_ts, it.created) {
+            (Some(p), Some(c)) => c - p > gap_seconds,
+            _ => false,
+        };
+        if starts_new || clusters.is_empty() {
+            clusters.push(Vec::new());
+        }
+        clusters.last_mut().unwrap().push(it.clone());
+        if let Some(c) = it.created {
+            prev_ts = Some(c);
+        }
+    }
+    clusters
+}
+
+/// Most photos a single beat will flash through. Bounds the burst so one huge
+/// event doesn't dominate, and keeps each photo on screen long enough to
+/// register at the per-beat narration length (see render's beat timing).
+pub const MAX_BURST_PHOTOS: usize = 10;
+
+/// Merge a list of (time-ordered) event clusters into exactly `n` contiguous
+/// groups, so a span with more events than the beat budget still covers the
+/// whole timeline — adjacent events fold together into one beat rather than
+/// getting dropped. `n` must be ≥ 1 and ≤ clusters.len().
+fn partition_into_groups(
+    clusters: Vec<Vec<memories::MemoryItem>>,
+    n: usize,
+) -> Vec<Vec<memories::MemoryItem>> {
+    let c = clusters.len();
+    let mut clusters = clusters.into_iter();
+    (0..n)
+        .map(|j| {
+            // Even contiguous split of c clusters into n groups.
+            let start = j * c / n;
+            let end = (j + 1) * c / n;
+            let take = end.saturating_sub(start).max(1);
+            (0..take)
+                .flat_map(|_| clusters.next().into_iter().flatten())
+                .collect()
+        })
+        .collect()
+}
+
+/// Turn photo items into `n_beats` photo beats. Clusters photos into events by
+/// time gap; if there are more events than beats, adjacent events are merged so
+/// the whole span is still covered. Each beat then flashes up to `max_burst`
+/// photos (an even spread of its group) under one narration line — so a
+/// week/month reel *shows* all its moments without a narrated (and timed)
+/// segment per photo.
+fn form_photo_beats(
+    items: &[memories::MemoryItem],
+    n_beats: usize,
+    max_burst: usize,
+) -> Vec<PlannedBeat> {
+    if n_beats == 0 || items.is_empty() {
+        return Vec::new();
+    }
+    let clusters = cluster_by_gap(items, EVENT_GAP_SECONDS);
+    // One beat per event when they fit; otherwise fold adjacent events together
+    // into exactly n_beats groups.
+    let groups = if clusters.len() <= n_beats {
+        clusters
+    } else {
+        partition_into_groups(clusters, n_beats)
+    };
+
+    groups
+        .into_iter()
+        .filter(|g| !g.is_empty())
+        .map(|group| {
+            let shown = sample_evenly(&group, max_burst);
+            let date = shown.first().and_then(|it| it.created);
+            PlannedBeat {
+                media: shown
+                    .into_iter()
+                    .map(|it| SegmentMedia::Photo {
+                        rel_path: it.path,
+                        library_id: it.library_id,
+                    })
+                    .collect(),
+                date,
+                insight_title: None,
+                insight_summary: None,
+                gps: None,
+            }
+        })
+        .collect()
+}
+
+/// Split the beat budget between photo beats and video-clip beats. Clips are
+/// individually valuable (motion + live audio) so they get up to half the
+/// budget (at least one if any exist); photos take the rest. With only one
+/// kind present, it gets the whole budget.
+fn split_beat_budget(n_photos: usize, n_videos: usize, n_beats: usize) -> (usize, usize) {
+    if n_videos == 0 {
+        return (n_beats, 0);
+    }
+    if n_photos == 0 {
+        return (0, n_beats.min(n_videos));
+    }
+    let clip_beats = n_videos.min((n_beats / 2).max(1));
+    let photo_beats = n_beats.saturating_sub(clip_beats);
+    (photo_beats, clip_beats)
+}
+
+/// Build the reel's beats from a span's photos and videos under a beat budget.
+/// Videos become one-clip beats (sampled across time if there are more than the
+/// clip budget); photos cluster into burst beats. The two are merged back into
+/// chronological order so the reel reads as the span unfolded.
+pub fn form_beats(
+    photos: &[memories::MemoryItem],
+    videos: &[memories::MemoryItem],
+    n_beats: usize,
+    max_burst: usize,
+) -> Vec<PlannedBeat> {
+    if n_beats == 0 {
+        return Vec::new();
+    }
+    let (photo_budget, clip_budget) = split_beat_budget(photos.len(), videos.len(), n_beats);
+
+    let mut beats = form_photo_beats(photos, photo_budget, max_burst);
+
+    // One clip beat per chosen video, spread across the span's videos.
+    for v in sample_evenly(videos, clip_budget) {
+        beats.push(PlannedBeat {
+            media: vec![SegmentMedia::Clip {
+                rel_path: v.path,
+                library_id: v.library_id,
+            }],
+            date: v.created,
+            insight_title: None,
+            insight_summary: None,
+            gps: None,
+        });
+    }
+
+    // Merge photo and clip beats back into chronological order (undated last).
+    beats.sort_by(|a, b| match (a.date, b.date) {
+        (Some(x), Some(y)) => x.cmp(&y),
+        (Some(_), None) => std::cmp::Ordering::Less,
+        (None, Some(_)) => std::cmp::Ordering::Greater,
+        (None, None) => std::cmp::Ordering::Equal,
+    });
+    beats
+}
+
+/// Cheap pass: resolve the selector into an ordered list of media (no insight
+/// lookups yet) plus reel metadata. `Err` only on an invalid library param.
+pub fn resolve(
+    app_state: &AppState,
+    exif_dao: &Mutex<Box<dyn ExifDao>>,
+    span_context: &opentelemetry::Context,
+    selector: &ReelSelector,
+) -> Result<(Vec<PlannedBeat>, ReelMeta), String> {
+    match selector {
+        ReelSelector::Memories {
+            span,
+            tz_offset_minutes,
+            library,
+            max_segments,
+        } => {
+            let client_tz = FixedOffset::east_opt(tz_offset_minutes * 60);
+            let items = memories::gather_memory_items(
+                app_state,
+                exif_dao,
+                span_context,
+                *span,
+                *tz_offset_minutes,
+                client_tz,
+                library.as_deref(),
+            )?;
+
+            // Split into photos and video clips; anything that's neither is
+            // dropped. Years span both, computed before the budget narrows it.
+            let years = distinct_years(&items, client_tz);
+            let meta = ReelMeta { span: *span, years };
+
+            let (photos, videos): (Vec<_>, Vec<_>) = items
+                .into_iter()
+                .filter(|it| {
+                    is_image_file(Path::new(&it.path)) || is_video_file(Path::new(&it.path))
+                })
+                .partition(|it| is_image_file(Path::new(&it.path)));
+
+            // The budget caps the number of narrated beats (≈ reel length);
+            // photo beats then burst through several photos and video beats
+            // play a short clip, so the reel covers the span without running
+            // minutes long.
+            let n_beats = budget_segments(*max_segments);
+            let beats = form_beats(&photos, &videos, n_beats, MAX_BURST_PHOTOS);
+            Ok((beats, meta))
+        }
+    }
+}
+
+/// Distinct calendar years represented by the selected media, in the client's
+/// timezone, ascending. Used to tell the scripter how far back the reel reaches.
+fn distinct_years(items: &[memories::MemoryItem], tz: Option<FixedOffset>) -> Vec<i32> {
+    let mut years: Vec<i32> = items
+        .iter()
+        .filter_map(|it| it.created)
+        .filter_map(|ts| DateTime::from_timestamp(ts, 0))
+        .map(|dt| match tz {
+            Some(off) => dt.with_timezone(&off).year(),
+            None => dt.year(),
+        })
+        .collect();
+    years.sort_unstable();
+    years.dedup();
+    years
+}
+
+/// Background pass: fill each beat's cached insight (title + summary) and
+/// GPS coordinates from its lead photo, where one exists. Best-effort — a
+/// missing or errored lookup leaves the fields `None` and the scripter
+/// narrates from the date alone.
+pub fn enrich(
+    insight_dao: &Mutex<Box<dyn InsightDao>>,
+    exif_dao: &Mutex<Box<dyn ExifDao>>,
+    span_context: &opentelemetry::Context,
+    beats: &mut [PlannedBeat],
+) {
+    let Ok(mut insight_dao) = insight_dao.lock() else {
+        return;
+    };
+    let Ok(mut exif_dao) = exif_dao.lock() else {
+        return;
+    };
+    for beat in beats.iter_mut() {
+        let rel_path = match beat.media.first() {
+            Some(SegmentMedia::Photo { rel_path, .. } | SegmentMedia::Clip { rel_path, .. }) => {
+                rel_path.clone()
+            }
+            None => continue,
+        };
+        if let Ok(Some(insight)) = insight_dao.get_insight(span_context, &rel_path) {
+            beat.insight_title = Some(insight.title);
+            beat.insight_summary = Some(insight.summary);
+        }
+        // Enrich GPS from EXIF when the lead media is a photo.
+        if let Some(SegmentMedia::Photo { .. }) = beat.media.first()
+            && let Ok(Some(exif)) = exif_dao.get_exif(span_context, &rel_path)
+            && let (Some(lat), Some(lon)) = (exif.gps_latitude, exif.gps_longitude)
+        {
+            beat.gps = Some((lat as f64, lon as f64));
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn sample_evenly_returns_all_when_under_cap() {
+        let v = vec![1, 2, 3];
+        assert_eq!(sample_evenly(&v, 5), vec![1, 2, 3]);
+        assert_eq!(sample_evenly(&v, 3), vec![1, 2, 3]);
+    }
+
+    #[test]
+    fn sample_evenly_keeps_endpoints_and_spreads() {
+        let v: Vec<i32> = (0..100).collect();
+        let picked = sample_evenly(&v, 5);
+        assert_eq!(picked.len(), 5);
+        assert_eq!(picked[0], 0); // first kept
+        assert_eq!(*picked.last().unwrap(), 99); // last kept
+        // Strictly increasing, no dupes.
+        assert!(picked.windows(2).all(|w| w[0] < w[1]));
+    }
+
+    #[test]
+    fn sample_evenly_handles_one_and_zero() {
+        let v: Vec<i32> = (0..10).collect();
+        assert_eq!(sample_evenly(&v, 1), vec![0]);
+        assert!(sample_evenly(&v, 0).is_empty());
+    }
+
+    #[test]
+    fn descriptor_is_stable_and_distinguishes_inputs() {
+        let a = ReelSelector::Memories {
+            span: MemoriesSpan::Day,
+            tz_offset_minutes: -480,
+            library: None,
+            max_segments: 24,
+        };
+        let b = ReelSelector::Memories {
+            span: MemoriesSpan::Week,
+            tz_offset_minutes: -480,
+            library: None,
+            max_segments: 24,
+        };
+        assert_eq!(a.descriptor(), a.clone().descriptor());
+        assert_ne!(a.descriptor(), b.descriptor());
+        assert!(a.descriptor().contains("lib=all"));
+    }
+
+    #[test]
+    fn distinct_years_dedupes_and_sorts() {
+        let items = vec![
+            memories::MemoryItem {
+                path: "a".into(),
+                created: Some(1_560_000_000), // 2019
+                modified: None,
+                library_id: 1,
+            },
+            memories::MemoryItem {
+                path: "b".into(),
+                created: Some(1_560_086_400), // 2019
+                modified: None,
+                library_id: 1,
+            },
+            memories::MemoryItem {
+                path: "c".into(),
+                created: Some(1_623_000_000), // 2021
+                modified: None,
+                library_id: 1,
+            },
+        ];
+        assert_eq!(distinct_years(&items, None), vec![2019, 2021]);
+    }
+
+    // Build an item at a given unix timestamp (seconds) with a chosen extension.
+    fn item_ext(ts: i64, name: &str, ext: &str) -> memories::MemoryItem {
+        memories::MemoryItem {
+            path: format!("{name}.{ext}"),
+            created: Some(ts),
+            modified: None,
+            library_id: 1,
+        }
+    }
+    fn item_at(ts: i64, name: &str) -> memories::MemoryItem {
+        item_ext(ts, name, "jpg")
+    }
+
+    #[test]
+    fn budget_segments_caps_to_duration_target() {
+        // 90s / 5s ≈ 18, bounded by the request max and hard cap.
+        assert_eq!(budget_segments(40), 18);
+        assert_eq!(budget_segments(5), 5); // request asked for fewer
+        assert_eq!(budget_segments(1000), 18); // hard cap / budget wins
+    }
+
+    #[test]
+    fn cluster_by_gap_splits_on_large_jumps() {
+        // Two photos minutes apart, then one a day later → two events.
+        let items = vec![
+            item_at(1_000_000, "a"),
+            item_at(1_000_300, "b"), // +5 min → same event
+            item_at(1_100_000, "c"), // +~27h → new event
+        ];
+        let clusters = cluster_by_gap(&items, EVENT_GAP_SECONDS);
+        assert_eq!(clusters.len(), 2);
+        assert_eq!(clusters[0].len(), 2);
+        assert_eq!(clusters[1].len(), 1);
+    }
+
+    #[test]
+    fn photo_beats_one_per_event_when_they_fit() {
+        // Three well-separated events, budget of 10 → three beats, each holding
+        // all of its (few) photos.
+        let items = vec![
+            item_at(0, "a"),
+            item_at(50, "b"), // same event as a
+            item_at(1_000_000, "c"),
+            item_at(2_000_000, "d"),
+        ];
+        let beats = form_photo_beats(&items, 10, MAX_BURST_PHOTOS);
+        assert_eq!(beats.len(), 3);
+        assert_eq!(beats[0].media.len(), 2); // burst of the first event
+        assert_eq!(beats[1].media.len(), 1);
+        assert_eq!(beats[2].media.len(), 1);
+    }
+
+    #[test]
+    fn photo_beats_merge_events_when_over_budget() {
+        // Six distinct events but only two beats → adjacent events fold in, and
+        // every event's photos still appear (capped by the burst max).
+        let items: Vec<memories::MemoryItem> = (0..6)
+            .map(|i| item_at(i as i64 * 1_000_000, &format!("e{i}")))
+            .collect();
+        let beats = form_photo_beats(&items, 2, MAX_BURST_PHOTOS);
+        assert_eq!(beats.len(), 2);
+        let shown: usize = beats.iter().map(|b| b.media.len()).sum();
+        assert_eq!(shown, 6); // all six moments still shown across two beats
+    }
+
+    #[test]
+    fn photo_beats_cap_burst_to_max() {
+        // One dense event of 30 photos, generous budget → a single beat that
+        // bursts at most MAX_BURST_PHOTOS, not all 30.
+        let items: Vec<memories::MemoryItem> = (0..30)
+            .map(|i| item_at(i as i64, &format!("p{i}")))
+            .collect();
+        let beats = form_photo_beats(&items, 18, MAX_BURST_PHOTOS);
+        assert_eq!(beats.len(), 1);
+        assert_eq!(beats[0].media.len(), MAX_BURST_PHOTOS);
+    }
+
+    #[test]
+    fn split_beat_budget_handles_each_mix() {
+        // Only photos / only videos → that kind gets the whole budget.
+        assert_eq!(split_beat_budget(10, 0, 18), (18, 0));
+        assert_eq!(split_beat_budget(0, 10, 18), (0, 10)); // capped at n_videos
+        assert_eq!(split_beat_budget(0, 30, 18), (0, 18)); // capped at budget
+        // Mixed → clips up to half (≥1), photos the rest.
+        assert_eq!(split_beat_budget(100, 100, 18), (9, 9));
+        assert_eq!(split_beat_budget(100, 1, 18), (17, 1)); // few videos
+    }
+
+    #[test]
+    fn form_beats_mixes_clip_and_photo_beats_in_time_order() {
+        let photos = vec![item_at(0, "p0"), item_at(2_000_000, "p1")];
+        // A video between the two photo events (in time).
+        let videos = vec![item_ext(1_000_000, "v0", "mp4")];
+        let beats = form_beats(&photos, &videos, 10, MAX_BURST_PHOTOS);
+        // Two photo events + one clip = three beats, chronological.
+        assert_eq!(beats.len(), 3);
+        assert!(!beats[0].is_clip()); // p0 @ t=0
+        assert!(beats[1].is_clip()); // v0 @ t=1e6
+        assert!(!beats[2].is_clip()); // p1 @ t=2e6
+        assert!(matches!(beats[1].media[0], SegmentMedia::Clip { .. }));
+    }
+
+    #[test]
+    fn form_beats_videos_only_become_clip_beats() {
+        let videos: Vec<memories::MemoryItem> = (0..3)
+            .map(|i| item_ext(i as i64 * 1_000_000, &format!("v{i}"), "mov"))
+            .collect();
+        let beats = form_beats(&[], &videos, 10, MAX_BURST_PHOTOS);
+        assert_eq!(beats.len(), 3);
+        assert!(beats.iter().all(|b| b.is_clip()));
+    }
+}
diff --git a/src/state.rs b/src/state.rs
index e678ad1..33e8e3f 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -8,9 +8,10 @@ use crate::ai::turn_registry::TurnRegistry;
 use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient};
 use crate::database::{
     CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, InsightGenerationJobDao, KnowledgeDao,
-    LocationHistoryDao, SearchHistoryDao, SqliteCalendarEventDao, SqliteDailySummaryDao,
-    SqliteExifDao, SqliteInsightDao, SqliteInsightGenerationJobDao, SqliteKnowledgeDao,
-    SqliteLocationHistoryDao, SqliteSearchHistoryDao, connect,
+    LocationHistoryDao, PrecomputedReelDao, SearchHistoryDao, SqliteCalendarEventDao,
+    SqliteDailySummaryDao, SqliteExifDao, SqliteInsightDao, SqliteInsightGenerationJobDao,
+    SqliteKnowledgeDao, SqliteLocationHistoryDao, SqlitePrecomputedReelDao, SqliteSearchHistoryDao,
+    SqliteUserAiPrefsDao, UserAiPrefsDao, connect,
 };
 use crate::database::{PreviewDao, SqlitePreviewDao};
 use crate::faces;
@@ -53,6 +54,10 @@ pub struct AppState {
     pub video_path: String,
     pub gif_path: String,
     pub preview_clips_path: String,
+    /// Directory for cached memory-reel MP4s (+ title sidecars). Derived from
+    /// `REELS_DIRECTORY`, defaulting to a `reels` dir beside the preview clips.
+    /// Created lazily by the reel pipeline on first render.
+    pub reels_path: String,
     pub excluded_dirs: Vec<String>,
     pub ollama: OllamaClient,
     /// `None` when `OPENROUTER_API_KEY` is not configured. Consulted only
@@ -84,6 +89,14 @@ pub struct AppState {
     pub clip_client: ClipClient,
     pub insight_job_dao: Arc<Mutex<Box<dyn InsightGenerationJobDao>>>,
     pub insight_job_handles: Arc<Mutex<HashMap<i32, tokio::task::AbortHandle>>>,
+    /// Ledger for precomputed memory reels. Written by the nightly agentic
+    /// job (Section D); read by `GET /reels/precomputed` (Section C).
+    #[allow(dead_code)]
+    pub precomputed_reel_dao: Arc<Mutex<Box<dyn PrecomputedReelDao>>>,
+    /// User AI preferences (voice, timezone, library). Mirrored by the
+    /// client; read by the nightly pre-generation scheduler.
+    #[allow(dead_code)]
+    pub user_ai_prefs_dao: Arc<Mutex<Box<dyn UserAiPrefsDao>>>,
 }
 
 impl AppState {
@@ -97,6 +110,7 @@ impl AppState {
         self.libraries.iter().find(|l| l.id == id)
     }
 
+    #[allow(dead_code)]
     pub fn library_by_name(&self, name: &str) -> Option<&Library> {
         self.libraries.iter().find(|l| l.name == name)
     }
@@ -125,6 +139,8 @@ impl AppState {
         clip_client: ClipClient,
         insight_job_dao: Arc<Mutex<Box<dyn InsightGenerationJobDao>>>,
         insight_job_handles: Arc<Mutex<HashMap<i32, tokio::task::AbortHandle>>>,
+        precomputed_reel_dao: Arc<Mutex<Box<dyn PrecomputedReelDao>>>,
+        user_ai_prefs_dao: Arc<Mutex<Box<dyn UserAiPrefsDao>>>,
     ) -> Self {
         assert!(
             !libraries_vec.is_empty(),
@@ -141,6 +157,19 @@ impl AppState {
             preview_dao,
         );
 
+        // Reels cache dir: explicit env, else a `reels` sibling of the preview
+        // clips dir (a known-writable, test-safe location). Not created here —
+        // the reel pipeline does `create_dir_all` before its first write, so
+        // construction (incl. tests) never touches the filesystem.
+        let reels_path = std::env::var("REELS_DIRECTORY").unwrap_or_else(|_| {
+            std::path::Path::new(&preview_clips_path)
+                .parent()
+                .map(|p| p.join("reels"))
+                .unwrap_or_else(|| std::path::PathBuf::from("reels"))
+                .to_string_lossy()
+                .to_string()
+        });
+
         let library_health = libraries::new_health_map(&libraries_vec);
         let live_libraries = Arc::new(RwLock::new(libraries_vec.clone()));
         Self {
@@ -155,6 +184,7 @@ impl AppState {
             video_path,
             gif_path,
             preview_clips_path,
+            reels_path,
             excluded_dirs,
             ollama,
             openrouter,
@@ -169,6 +199,8 @@ impl AppState {
             clip_client,
             insight_job_dao,
             insight_job_handles,
+            precomputed_reel_dao,
+            user_ai_prefs_dao,
         }
     }
 
@@ -249,6 +281,14 @@ impl Default for AppState {
         let insight_job_handles: Arc<Mutex<HashMap<i32, tokio::task::AbortHandle>>> =
             Arc::new(Mutex::new(HashMap::new()));
 
+        // Initialize precomputed reel DAO (nightly pre-generation ledger)
+        let precomputed_reel_dao: Arc<Mutex<Box<dyn PrecomputedReelDao>>> =
+            Arc::new(Mutex::new(Box::new(SqlitePrecomputedReelDao::new())));
+
+        // Initialize user AI preferences DAO (Section E)
+        let user_ai_prefs_dao: Arc<Mutex<Box<dyn UserAiPrefsDao>>> =
+            Arc::new(Mutex::new(Box::new(SqliteUserAiPrefsDao::new())));
+
         // Load base path and ensure the primary library row reflects it.
         let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env");
         let mut seed_conn = connect();
@@ -326,6 +366,8 @@ impl Default for AppState {
             clip_client,
             insight_job_dao,
             insight_job_handles,
+            precomputed_reel_dao,
+            user_ai_prefs_dao,
         )
     }
 }
@@ -535,6 +577,8 @@ impl AppState {
             ClipClient::new(None), // disabled in test
             Arc::new(Mutex::new(Box::new(SqliteInsightGenerationJobDao::new()))), // placeholder for test
             Arc::new(Mutex::new(HashMap::new())), // placeholder for test
+            Arc::new(Mutex::new(Box::new(SqlitePrecomputedReelDao::new()))), // placeholder for test
+            Arc::new(Mutex::new(Box::new(SqliteUserAiPrefsDao::new()))), // placeholder for test
         )
     }
 }
diff --git a/src/tags.rs b/src/tags.rs
index f3e0135..3dc0859 100644
--- a/src/tags.rs
+++ b/src/tags.rs
@@ -168,7 +168,7 @@ async fn get_tags<D: TagDao>(
     // this file, so tags added under one library show up under the
     // others when they hold the same file. Falls back to direct rel_path
     // match when the file hasn't been hashed yet.
-    let library = libraries::resolve_library_param(&app_state, request.library.as_deref())
+    let library = libraries::resolve_library_param_state(&app_state, request.library.as_deref())
         .ok()
         .flatten()
         .unwrap_or_else(|| app_state.primary_library());
diff --git a/src/unified_search.rs b/src/unified_search.rs
new file mode 100644
index 0000000..0940a92
--- /dev/null
+++ b/src/unified_search.rs
@@ -0,0 +1,521 @@
+//! `/photos/search/unified?q=<natural language>` — unified NL photo search.
+//!
+//! One free-text box that composes the two existing engines instead of making
+//! the user pick between them:
+//!  1. A grounded local-LLM call ([`crate::ai::nl_query`]) translates the
+//!     query into a structured filter + a semantic term.
+//!  2. Structured filters (tags / EXIF / geo / date / media-type) define the
+//!     candidate set; the semantic term ranks within it via CLIP.
+//!
+//! Path A (orchestration): we reuse `clip_search`'s scoring core and the
+//! existing `ExifDao` / `TagDao` queries, joining on `content_hash`. EXIF rows
+//! are the universal candidate carrier — each has `(library_id, file_path,
+//! content_hash, date_taken)` — so the structured filter is just a predicate
+//! over them, and the CLIP hits (which key on `content_hash`) intersect by
+//! hash. No new schema, no surgery on `list_photos`.
+//!
+//! Degenerate cases collapse to the existing behavior: semantic-only → plain
+//! CLIP search; filters-only → a date-sorted filtered listing.
+//!
+//! Person filtering is intentionally deferred (no person→photos resolver yet).
+
+use crate::AppState;
+use crate::ai::backend::{BackendKind, SamplingOverrides};
+use crate::ai::nl_query::{StructuredQuery, translate_nl_query};
+use crate::clip_search::{
+    SearchHit, parse_library_scope, resolve_hits, score_error_response, score_photos,
+};
+use crate::data::Claims;
+use crate::database::ExifDao;
+use crate::file_types::{is_image_file, is_video_file};
+use crate::geo::{forward_geocode, gps_bounding_box, haversine_distance};
+use crate::tags::TagDao;
+use actix_web::HttpResponse;
+use actix_web::web::{Data, Query};
+use serde::{Deserialize, Serialize};
+use std::collections::HashSet;
+use std::path::Path;
+use std::sync::Mutex;
+
+#[derive(Debug, Deserialize)]
+pub struct UnifiedQuery {
+    /// Natural-language query. Required; empty triggers 400.
+    pub q: String,
+    #[serde(default = "default_limit")]
+    pub limit: usize,
+    #[serde(default)]
+    pub offset: usize,
+    /// CLIP cosine floor for the semantic ranking stage. Same default as the
+    /// plain search endpoint.
+    #[serde(default = "default_threshold")]
+    pub threshold: f32,
+    /// Legacy single-library scope (see clip_search).
+    pub library: Option<i32>,
+    /// Multi-library scope, comma-separated ids.
+    pub library_ids: Option<String>,
+    /// Optional model override. The client passes the user's currently-selected
+    /// local model so the translation step reuses a model that's already loaded
+    /// (avoids a llama-swap eviction / cold start). Falls back to the configured
+    /// default local model when absent. Local only — no hybrid here.
+    pub model: Option<String>,
+}
+
+fn default_limit() -> usize {
+    20
+}
+fn default_threshold() -> f32 {
+    0.20
+}
+
+/// A geocoded place echoed back so the client can show / edit the location
+/// filter it actually searched.
+#[derive(Debug, Serialize)]
+struct ResolvedPlace {
+    display_name: String,
+    lat: f64,
+    lon: f64,
+    radius_km: f64,
+}
+
+/// How the server interpreted the NL query — echoed to the client to render
+/// editable filter chips. tag ids map to the client's existing tag list.
+#[derive(Debug, Serialize)]
+struct Interpreted {
+    semantic: Option<String>,
+    tag_ids: Vec<i32>,
+    exclude_tag_ids: Vec<i32>,
+    /// Words the model treated as tags that don't exist in the vocab; folded
+    /// into the semantic term and surfaced here so the UI can explain it.
+    unmatched_tags: Vec<String>,
+    camera_make: Option<String>,
+    camera_model: Option<String>,
+    lens_model: Option<String>,
+    date_from: Option<i64>,
+    date_to: Option<i64>,
+    media_type: Option<String>,
+    place: Option<ResolvedPlace>,
+}
+
+#[derive(Debug, Serialize)]
+struct UnifiedResponse {
+    query: String,
+    interpreted: Interpreted,
+    /// CLIP model version used for ranking; `None` when the query had no
+    /// semantic term (filters-only).
+    model_version: Option<String>,
+    /// Embeddings scored by CLIP (0 when filters-only).
+    considered: usize,
+    /// Matches before pagination.
+    total_matching: usize,
+    offset: usize,
+    results: Vec<SearchHit>,
+}
+
+#[derive(Debug, Serialize)]
+struct ErrorBody {
+    error: String,
+}
+
+fn bad_request(msg: impl Into<String>) -> HttpResponse {
+    HttpResponse::BadRequest().json(ErrorBody { error: msg.into() })
+}
+
+/// Combine the model's semantic term with any tag words that didn't match the
+/// vocab, so a hallucinated/non-vocab tag becomes a soft semantic signal
+/// rather than being dropped.
+fn effective_semantic(sq: &StructuredQuery) -> Option<String> {
+    let mut parts: Vec<String> = Vec::new();
+    if let Some(s) = sq.semantic.as_deref() {
+        parts.push(s.to_string());
+    }
+    parts.extend(sq.unmatched_tags.iter().cloned());
+    if parts.is_empty() {
+        None
+    } else {
+        Some(parts.join(" "))
+    }
+}
+
+pub async fn unified_search<TagD: TagDao>(
+    _: Claims,
+    state: Data<AppState>,
+    exif_dao: Data<Mutex<Box<dyn ExifDao>>>,
+    tag_dao: Data<Mutex<TagD>>,
+    query: Query<UnifiedQuery>,
+) -> HttpResponse {
+    let nl = query.q.trim().to_string();
+    if nl.is_empty() {
+        return bad_request("query parameter `q` is required");
+    }
+
+    let limit = query.limit.clamp(1, 200);
+    let offset = query.offset;
+    let threshold = query.threshold.clamp(-1.0, 1.0);
+
+    let library_ids = match parse_library_scope(query.library_ids.as_deref(), query.library) {
+        Ok(ids) => ids,
+        Err(msg) => return bad_request(msg),
+    };
+
+    let ctx = opentelemetry::Context::current();
+
+    // ── 1. Translate the NL query, grounded on the real tag vocabulary ──
+    let tag_vocab: Vec<(i32, String)> = {
+        let mut dao = tag_dao.lock().expect("tag dao");
+        match dao.get_all_tags(&ctx, None) {
+            Ok(tags) => tags.into_iter().map(|(_, t)| (t.id, t.name)).collect(),
+            Err(e) => {
+                log::warn!("unified_search: get_all_tags failed: {e:?}");
+                Vec::new()
+            }
+        }
+    };
+
+    // Respect env/config for the LLM backend (LLM_BACKEND → ollama or
+    // llama-swap); local only, no hybrid, per the feature's design.
+    //
+    // Translation-model precedence:
+    //   1. UNIFIED_SEARCH_MODEL env — pin a small, fast model that can stay
+    //      co-resident with CLIP (and the chat model) so translation never
+    //      evicts them. This is the recommended setup on a tight VRAM budget.
+    //   2. the client-selected model — routes translation to whatever the user
+    //      already has loaded (no swap) when no dedicated model is pinned.
+    //   3. None → resolve_backend uses the configured default local model.
+    let translation_model = std::env::var("UNIFIED_SEARCH_MODEL")
+        .ok()
+        .filter(|m| !m.trim().is_empty())
+        .or_else(|| query.model.clone())
+        .filter(|m| !m.trim().is_empty());
+    let overrides = SamplingOverrides {
+        model: translation_model,
+        num_ctx: None,
+        temperature: None,
+        top_p: None,
+        top_k: None,
+        min_p: None,
+        enable_thinking: None,
+    };
+    let backend = match state
+        .insight_generator
+        .resolve_backend(BackendKind::Local, &overrides)
+        .await
+    {
+        Ok(b) => b,
+        Err(e) => {
+            log::warn!("unified_search: resolve_backend failed: {e:?}");
+            return HttpResponse::ServiceUnavailable().json(ErrorBody {
+                error: "LLM backend unavailable".into(),
+            });
+        }
+    };
+    log::info!("unified_search: translating with model={}", backend.model());
+
+    let today = chrono::Utc::now().date_naive();
+    let sq = match translate_nl_query(backend.chat(), &nl, &tag_vocab, today).await {
+        Ok(sq) => sq,
+        Err(e) => {
+            log::warn!("unified_search: translate_nl_query failed: {e:?}");
+            return HttpResponse::BadGateway().json(ErrorBody {
+                error: "could not interpret the query".into(),
+            });
+        }
+    };
+
+    // ── 2. Forward-geocode the place name into a gps circle ──
+    let resolved_place = match sq.place.as_deref() {
+        Some(p) => forward_geocode(p).await.map(|g| ResolvedPlace {
+            display_name: g.display_name,
+            lat: g.lat,
+            lon: g.lon,
+            radius_km: g.radius_km,
+        }),
+        None => None,
+    };
+    let gps = resolved_place.as_ref().map(|p| (p.lat, p.lon, p.radius_km));
+
+    let semantic = effective_semantic(&sq);
+
+    let has_exif_filter = sq.camera_make.is_some()
+        || sq.camera_model.is_some()
+        || sq.lens_model.is_some()
+        || sq.date_from.is_some()
+        || sq.date_to.is_some();
+    let has_struct =
+        has_exif_filter || gps.is_some() || !sq.tag_ids.is_empty() || sq.media_type.is_some();
+
+    // Stage trace: what the model extracted + whether a structured filter is
+    // active. The chips show this to the user too, but logging it makes the
+    // "why no results" path debuggable from the server side.
+    log::info!(
+        "unified_search: q={nl:?} semantic={:?} tag_ids={:?} exclude={:?} place={:?} gps={:?} date=({:?},{:?}) media={:?} unmatched={:?} has_struct={has_struct}",
+        sq.semantic,
+        sq.tag_ids,
+        sq.exclude_tag_ids,
+        resolved_place.as_ref().map(|p| p.display_name.as_str()),
+        gps,
+        sq.date_from,
+        sq.date_to,
+        sq.media_type,
+        sq.unmatched_tags,
+    );
+
+    // ── 3. Build the structured candidate set (EXIF rows passing every
+    // filter). Skipped entirely for a pure-semantic query. ──
+    let mut candidate: Vec<crate::database::models::ImageExif> = Vec::new();
+    let mut allowed_hashes: HashSet<String> = HashSet::new();
+    if has_struct {
+        // Tag membership set (rel_path only — same cross-library imprecision
+        // as the existing /photos tag listing). ANY-mode: a photo matches if
+        // it carries any of the named tags. ALL-mode over-constrains NL
+        // queries (the model maps several words to tags and few photos carry
+        // them all); the semantic term does the precision work instead.
+        let tag_set: Option<HashSet<String>> = if sq.tag_ids.is_empty() {
+            None
+        } else {
+            let mut dao = tag_dao.lock().expect("tag dao");
+            match dao.get_files_with_any_tag_ids(
+                sq.tag_ids.clone(),
+                sq.exclude_tag_ids.clone(),
+                &ctx,
+            ) {
+                Ok(files) => Some(files.into_iter().map(|f| f.file_name).collect()),
+                Err(e) => {
+                    log::warn!("unified_search: tag filter failed: {e:?}");
+                    Some(HashSet::new())
+                }
+            }
+        };
+        log::info!(
+            "unified_search: tag_ids={:?} -> tag_set_files={:?}",
+            sq.tag_ids,
+            tag_set.as_ref().map(|s| s.len())
+        );
+
+        // EXIF query handles camera/lens/gps-box/date. With no EXIF filters
+        // it returns the whole table, which we then narrow by the predicates
+        // below (tags / media / scope). Fine at personal-library scale.
+        let gps_bounds = gps.map(|(lat, lon, r)| gps_bounding_box(lat, lon, r));
+        let rows = {
+            let mut dao = exif_dao.lock().expect("exif dao");
+            dao.query_by_exif(
+                &ctx,
+                None, // scope filtered in-Rust to support multi-library
+                sq.camera_make.as_deref(),
+                sq.camera_model.as_deref(),
+                sq.lens_model.as_deref(),
+                gps_bounds,
+                sq.date_from,
+                sq.date_to,
+            )
+            .unwrap_or_else(|e| {
+                log::warn!("unified_search: query_by_exif failed: {e:?}");
+                Vec::new()
+            })
+        };
+
+        candidate = rows
+            .into_iter()
+            .filter(|row| {
+                // Library scope.
+                if !library_ids.is_empty() && !library_ids.contains(&row.library_id) {
+                    return false;
+                }
+                // Precise GPS distance (the EXIF query only did a coarse box).
+                if let Some((lat, lon, radius_km)) = gps {
+                    match (row.gps_latitude, row.gps_longitude) {
+                        (Some(plat), Some(plon)) => {
+                            if haversine_distance(lat, lon, plat as f64, plon as f64) > radius_km {
+                                return false;
+                            }
+                        }
+                        _ => return false,
+                    }
+                }
+                // Media type.
+                if let Some(mt) = sq.media_type.as_deref() {
+                    let p = Path::new(&row.file_path);
+                    let ok = if mt == "video" {
+                        is_video_file(p)
+                    } else {
+                        is_image_file(p)
+                    };
+                    if !ok {
+                        return false;
+                    }
+                }
+                // Tag membership.
+                if let Some(ts) = &tag_set
+                    && !ts.contains(&row.file_path)
+                {
+                    return false;
+                }
+                true
+            })
+            .collect();
+
+        allowed_hashes = candidate
+            .iter()
+            .filter_map(|r| r.content_hash.clone())
+            .collect();
+        log::info!(
+            "unified_search: candidate_rows={} allowed_hashes={}",
+            candidate.len(),
+            allowed_hashes.len()
+        );
+    }
+
+    // ── 4. Rank ──
+    match semantic {
+        Some(ref sem) => {
+            // When structured filters are present they ARE the constraint —
+            // CLIP only ranks within the candidate set. So drop the global
+            // similarity threshold (it's tuned for whole-library search and
+            // would pre-discard filter-matching photos that scored just under
+            // it — e.g. a 2022 beach photo at 0.18 — before the intersection
+            // ever runs). With no filters, keep the user's threshold for the
+            // plain semantic case.
+            let clip_threshold = if has_struct { -1.0 } else { threshold };
+            let scored = match score_photos(
+                &state,
+                &exif_dao,
+                sem,
+                &library_ids,
+                clip_threshold,
+                None,
+            )
+            .await
+            {
+                Ok(s) => s,
+                Err(e) => return score_error_response(e),
+            };
+            let considered = scored.considered;
+            let clip_hits = scored.hits.len();
+            let hits: Vec<(f32, String)> = if has_struct {
+                scored
+                    .hits
+                    .into_iter()
+                    .filter(|(_, h)| allowed_hashes.contains(h))
+                    .collect()
+            } else {
+                scored.hits
+            };
+            log::info!(
+                "unified_search: clip considered={considered} hits={clip_hits} after_struct_filter={}",
+                hits.len()
+            );
+            let total_matching = hits.len();
+            let page = paginate(&hits, offset, limit);
+            let results = resolve_hits(&exif_dao, &page);
+            HttpResponse::Ok().json(UnifiedResponse {
+                query: nl,
+                interpreted: interpreted(&sq, resolved_place),
+                model_version: Some(scored.model_version),
+                considered: scored.considered,
+                total_matching,
+                offset,
+                results,
+            })
+        }
+        None => {
+            // Filters-only: no semantic term. Require at least one filter,
+            // then return the candidate set newest-first.
+            if !has_struct {
+                return bad_request("query had no searchable terms");
+            }
+            candidate.sort_by(|a, b| b.date_taken.cmp(&a.date_taken));
+            let total_matching = candidate.len();
+            log::info!("unified_search: filters-only matches={total_matching}");
+            let end = (offset + limit).min(total_matching);
+            let results: Vec<SearchHit> = if offset >= total_matching {
+                Vec::new()
+            } else {
+                candidate[offset..end]
+                    .iter()
+                    .map(|r| SearchHit {
+                        library_id: r.library_id,
+                        rel_path: r.file_path.clone(),
+                        content_hash: r.content_hash.clone().unwrap_or_default(),
+                        score: 0.0,
+                    })
+                    .collect()
+            };
+            HttpResponse::Ok().json(UnifiedResponse {
+                query: nl,
+                interpreted: interpreted(&sq, resolved_place),
+                model_version: None,
+                considered: 0,
+                total_matching,
+                offset,
+                results,
+            })
+        }
+    }
+}
+
+/// Slice a sorted hit list at `[offset, offset+limit)`, tolerating
+/// out-of-range offsets (empty page).
+fn paginate(hits: &[(f32, String)], offset: usize, limit: usize) -> Vec<(f32, String)> {
+    if offset >= hits.len() {
+        return Vec::new();
+    }
+    let end = (offset + limit).min(hits.len());
+    hits[offset..end].to_vec()
+}
+
+fn interpreted(sq: &StructuredQuery, place: Option<ResolvedPlace>) -> Interpreted {
+    Interpreted {
+        semantic: sq.semantic.clone(),
+        tag_ids: sq.tag_ids.clone(),
+        exclude_tag_ids: sq.exclude_tag_ids.clone(),
+        unmatched_tags: sq.unmatched_tags.clone(),
+        camera_make: sq.camera_make.clone(),
+        camera_model: sq.camera_model.clone(),
+        lens_model: sq.lens_model.clone(),
+        date_from: sq.date_from,
+        date_to: sq.date_to,
+        media_type: sq.media_type.clone(),
+        place,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::ai::nl_query::StructuredQuery;
+
+    #[test]
+    fn effective_semantic_combines_semantic_and_unmatched() {
+        let sq = StructuredQuery {
+            semantic: Some("sunset".into()),
+            unmatched_tags: vec!["golden hour".into()],
+            ..Default::default()
+        };
+        assert_eq!(
+            effective_semantic(&sq).as_deref(),
+            Some("sunset golden hour")
+        );
+    }
+
+    #[test]
+    fn effective_semantic_none_when_empty() {
+        let sq = StructuredQuery::default();
+        assert_eq!(effective_semantic(&sq), None);
+    }
+
+    #[test]
+    fn effective_semantic_unmatched_only() {
+        let sq = StructuredQuery {
+            unmatched_tags: vec!["disco".into()],
+            ..Default::default()
+        };
+        assert_eq!(effective_semantic(&sq).as_deref(), Some("disco"));
+    }
+
+    #[test]
+    fn paginate_handles_out_of_range_offset() {
+        let hits = vec![(0.9, "a".to_string()), (0.8, "b".to_string())];
+        assert_eq!(paginate(&hits, 5, 10).len(), 0);
+        assert_eq!(paginate(&hits, 0, 1).len(), 1);
+        assert_eq!(paginate(&hits, 1, 10).len(), 1);
+    }
+}
diff --git a/src/video/ffmpeg.rs b/src/video/ffmpeg.rs
index d385cac..019bd86 100644
--- a/src/video/ffmpeg.rs
+++ b/src/video/ffmpeg.rs
@@ -231,7 +231,7 @@ impl Ffmpeg {
 /// a hard failure — previously the `parse::<f64>` on empty stdout produced
 /// "cannot parse float from empty string" and poisoned the preview-clip row
 /// with status=failed, which the watcher would re-queue every full scan.
-async fn get_duration_seconds(input_file: &str) -> Result<Option<f64>> {
+pub async fn get_duration_seconds(input_file: &str) -> Result<Option<f64>> {
     if let Some(d) = probe_duration(input_file, "format=duration").await? {
         return Ok(Some(d));
     }