feat: nightly agentic pre-generation of memory reels

Implement end-to-end nightly pre-generation of memory reels with agentic scripting that grounds narration in calendar, location, messages, and RAG. Sections A-E from the plan: A. Extract produce_reel pipeline core from run_reel_job with ScripterMode::Fast/Agentic and progress callbacks. B. Agentic scripter: factor run_readonly_tool_loop from the insight generator, build read-only tool gate, prompt builder with GPS, and generate_script_agentic with fallback to fast path. C. Precomputed reels ledger (SQLite table + DAO), GET /reels/precomputed handler with validity gate, GET /reels/by-key/{key}/video streaming, and normalize_library_key helper. D. Nightly scheduler: spawn_pregen_scheduler with configurable hour, run_pregen_batch (day/week/month spans), pregen_one with dedup and disk-check, secs_until_next_run_hour time math. E. user_ai_prefs passive mirror table + DAO for param capture in create_reel_handler and replay in the scheduler. Also fixes resolve_library_param signature to take &[Library] and adds resolve_library_param_state wrapper for AppState callers. New files: migrations/2026-06-13-000000_add_precomputed_reels/, migrations/2026-06-13-000010_add_user_ai_prefs/, src/database/precomputed_reel_dao.rs, src/database/user_ai_prefs_dao.rs
2026-06-13 14:29:34 -04:00
parent b30c8c16d0
commit f707353807
26 changed files with 1825 additions and 153 deletions
@@ -9,13 +9,20 @@
 //!
 //! The prompt builder and response parser are pure so the contract is
 //! unit-testable; `generate_script` wires them to the LLM client.
+//!
+//! The agentic scripter (pre-generation) resolves the backend through the
+//! InsightGenerator, builds a read-only tool set, and runs a tool loop to
+//! ground the narration in retrieved context before asking for the final JSON.

 use anyhow::{Context, Result};
 use std::sync::Arc;

 use super::{PlannedBeat, ReelMeta};
+use crate::ai::backend::{BackendKind, SamplingOverrides};
+use crate::ai::insight_generator::InsightGenerator;
 use crate::ai::llamacpp::LlamaCppClient;
-use crate::ai::llm_client::LlmClient;
+use crate::ai::llm_client::{LlmClient, Tool};
+use crate::ai::ollama::ChatMessage;

 /// The narration for a whole reel: a title and one line per beat, in order.
 #[derive(Debug, Clone, PartialEq)]
@@ -35,6 +42,32 @@ can be read aloud in a few seconds. Avoid generic filler like \"what a \
 wonderful day\" — if you have little to go on, simply describe the moment \
 plainly.";

+/// Agentic scripter system prompt: richer version that tells the model it may
+/// call read-only tools to ground each line.
+const AGENTIC_SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \
+slideshow of someone's own photos set to a spoken voiceover. Write warm, \
+specific, first-person narration as if the person is gently looking back on \
+their own memories. Each line plays over one moment, which may be a quick burst \
+of several photos, so narrate the moment as a whole rather than a single frame. \
+Be concrete and grounded in the details given; never invent names, places, or \
+events that aren't supported. Keep each line to one or two short sentences that \
+can be read aloud in a few seconds. Avoid generic filler like \"what a \
+wonderful day\" — if you have little to go on, simply describe the moment \
+plainly.\n\nYou may call read-only tools (search_messages, get_file_tags, \
+reverse_geocode, get_current_datetime, recall_entities, recall_facts_for_photo, \
+recall_facts_for_entity) to ground each line in real context. Never invent \
+details. Return ONLY the JSON object, no prose or code fences.";
+
+/// Maximum agentic tool iterations for pre-generation. Tunable via
+/// `REEL_PREGEN_MAX_TOOL_ITERS` (default 8).
+fn reel_pregen_max_tool_iters() -> usize {
+    std::env::var("REEL_PREGEN_MAX_TOOL_ITERS")
+        .ok()
+        .and_then(|s| s.trim().parse::<usize>().ok())
+        .filter(|x| *x > 0)
+        .unwrap_or(8)
+}
+
 /// Build the (system, user) prompt pair for the scripter. The user message
 /// describes each beat in order and asks for strict JSON back.
 pub fn build_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> (String, String) {
@@ -81,6 +114,61 @@ pub fn build_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> (String,
    (SYSTEM_PROMPT.to_string(), user)
 }

+/// Build a richer (system, user) prompt pair for the agentic scripter. The
+/// system prompt tells the model it may call read-only tools to ground each
+/// line. The user message uses the same per-beat enumeration as
+/// `build_script_messages` plus a GPS line per beat when available.
+pub fn build_agentic_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> Vec<ChatMessage> {
+    let mut user = String::new();
+    user.push_str(&format!(
+        "This reel has {} moments surfaced as memories {}.\n\n",
+        beats.len(),
+        meta.span_phrase()
+    ));
+    if !meta.years.is_empty() {
+        let years: Vec<String> = meta.years.iter().map(|y| y.to_string()).collect();
+        user.push_str(&format!("They span the years: {}.\n\n", years.join(", ")));
+    }
+    user.push_str("Moments, in the order they will appear:\n");
+    for (i, beat) in beats.iter().enumerate() {
+        user.push_str(&format!("\n[{}]", i + 1));
+        if let Some(date) = beat.date_label() {
+            user.push_str(&format!(" {date}"));
+        }
+        if beat.is_clip() {
+            user.push_str(" (a video clip)");
+        } else if beat.media.len() > 1 {
+            user.push_str(&format!(" (a burst of {} photos)", beat.media.len()));
+        }
+        if let Some((lat, lon)) = beat.gps {
+            user.push_str(&format!("\n  GPS: {:.4}, {:.4}", lat, lon));
+        }
+        user.push('\n');
+        match (&beat.insight_title, &beat.insight_summary) {
+            (Some(t), Some(s)) if !s.trim().is_empty() => {
+                user.push_str(&format!("  Known context: {t} — {s}\n"));
+            }
+            (Some(t), _) => user.push_str(&format!("  Known context: {t}\n")),
+            (_, Some(s)) if !s.trim().is_empty() => {
+                user.push_str(&format!("  Known context: {s}\n"));
+            }
+            _ => user.push_str("  (no extra context — narrate plainly from the date)\n"),
+        }
+    }
+    user.push_str(&format!(
+        "\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\
+         {{\"title\": \"<short reel title>\", \"segments\": [\"<line for moment 1>\", \
+         \"<line for moment 2>\", ... ]}}\n\
+         The \"segments\" array MUST have exactly {} items, one per moment in order.",
+        beats.len()
+    ));
+
+    vec![
+        ChatMessage::system(AGENTIC_SYSTEM_PROMPT.to_string()),
+        ChatMessage::user(user),
+    ]
+}
+
 /// Parse the model's response into a script with exactly `n` lines. Tolerant of
 /// code fences and surrounding prose, and of both `segments: [".."]` and
 /// `segments: [{"narration": ".."}]` shapes. Missing/extra lines are padded or
@@ -198,6 +286,74 @@ pub async fn generate_script(
    Ok(parse_script_response(&raw, beats.len()))
 }

+/// Agentic version of script generation: resolves the backend via the
+/// InsightGenerator (honouring LLM_BACKEND, model overrides, etc.), builds
+/// a read-only tool set, runs the tool loop, then parses the JSON response.
+/// Returns the same ReelScript shape. On failure the caller may fall back to
+/// `generate_script`.
+pub async fn generate_script_agentic(
+    generator: &InsightGenerator,
+    meta: &ReelMeta,
+    beats: &[PlannedBeat],
+) -> Result<ReelScript> {
+    // 1. Resolve the backend. Bail if the local model lacks tool-calling.
+    let backend = generator
+        .resolve_backend(
+            BackendKind::Local,
+            &SamplingOverrides {
+                model: None,
+                num_ctx: None,
+                temperature: None,
+                top_p: None,
+                top_k: None,
+                min_p: None,
+            },
+        )
+        .await
+        .context("resolving backend for agentic script")?;
+
+    // 2. Build the read-only tool set. Start from the persona gate (no
+    //    persona context, so corrections are closed), force has_vision=false,
+    //    then filter out write tools.
+    let gate = generator.current_gate_opts_for_persona(false, None);
+    let all_tools = InsightGenerator::build_tool_definitions(gate);
+    let read_only_names: std::collections::HashSet<&str> = [
+        "search_rag",
+        "search_messages",
+        "get_sms_messages",
+        "get_calendar_events",
+        "get_location_history",
+        "get_file_tags",
+        "get_faces_in_photo",
+        "reverse_geocode",
+        "get_personal_place_at",
+        "recall_entities",
+        "recall_facts_for_photo",
+        "recall_facts_for_entity",
+        "get_current_datetime",
+    ]
+    .into_iter()
+    .collect();
+    let tools: Vec<Tool> = all_tools
+        .into_iter()
+        .filter(|t| read_only_names.contains(t.function.name.as_str()))
+        .collect();
+
+    // 3. Build the agentic prompt messages.
+    let messages = build_agentic_script_messages(meta, beats);
+
+    // 4. Run the tool loop.
+    let max_iter = reel_pregen_max_tool_iters();
+    let raw = generator
+        .run_readonly_tool_loop(&backend, messages, tools, max_iter)
+        .await
+        .context("agentic tool loop failed")?;
+
+    // 5. Strip any think-blocks the model may have emitted, then parse.
+    let raw = crate::ai::llm_client::strip_think_blocks(&raw);
+    Ok(parse_script_response(&raw, beats.len()))
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -220,6 +376,7 @@ mod tests {
                date: Some(1_560_000_000 + i as i64 * 86_400),
                insight_title: None,
                insight_summary: None,
+                gps: None,
            })
            .collect()
    }