feat: nightly agentic pre-generation of memory reels
Implement end-to-end nightly pre-generation of memory reels with agentic
scripting that grounds narration in calendar, location, messages, and RAG.
Sections A-E from the plan:
A. Extract produce_reel pipeline core from run_reel_job with
ScripterMode::Fast/Agentic and progress callbacks.
B. Agentic scripter: factor run_readonly_tool_loop from the insight
generator, build read-only tool gate, prompt builder with GPS, and
generate_script_agentic with fallback to fast path.
C. Precomputed reels ledger (SQLite table + DAO), GET /reels/precomputed
handler with validity gate, GET /reels/by-key/{key}/video streaming,
and normalize_library_key helper.
D. Nightly scheduler: spawn_pregen_scheduler with configurable hour,
run_pregen_batch (day/week/month spans), pregen_one with dedup and
disk-check, secs_until_next_run_hour time math.
E. user_ai_prefs passive mirror table + DAO for param capture in
create_reel_handler and replay in the scheduler.
Also fixes resolve_library_param signature to take &[Library] and adds
resolve_library_param_state wrapper for AppState callers.
New files: migrations/2026-06-13-000000_add_precomputed_reels/,
migrations/2026-06-13-000010_add_user_ai_prefs/,
src/database/precomputed_reel_dao.rs,
src/database/user_ai_prefs_dao.rs
This commit is contained in:
+158
-1
@@ -9,13 +9,20 @@
|
||||
//!
|
||||
//! The prompt builder and response parser are pure so the contract is
|
||||
//! unit-testable; `generate_script` wires them to the LLM client.
|
||||
//!
|
||||
//! The agentic scripter (pre-generation) resolves the backend through the
|
||||
//! InsightGenerator, builds a read-only tool set, and runs a tool loop to
|
||||
//! ground the narration in retrieved context before asking for the final JSON.
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::{PlannedBeat, ReelMeta};
|
||||
use crate::ai::backend::{BackendKind, SamplingOverrides};
|
||||
use crate::ai::insight_generator::InsightGenerator;
|
||||
use crate::ai::llamacpp::LlamaCppClient;
|
||||
use crate::ai::llm_client::LlmClient;
|
||||
use crate::ai::llm_client::{LlmClient, Tool};
|
||||
use crate::ai::ollama::ChatMessage;
|
||||
|
||||
/// The narration for a whole reel: a title and one line per beat, in order.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
@@ -35,6 +42,32 @@ can be read aloud in a few seconds. Avoid generic filler like \"what a \
|
||||
wonderful day\" — if you have little to go on, simply describe the moment \
|
||||
plainly.";
|
||||
|
||||
/// Agentic scripter system prompt: richer version that tells the model it may
|
||||
/// call read-only tools to ground each line.
|
||||
const AGENTIC_SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \
|
||||
slideshow of someone's own photos set to a spoken voiceover. Write warm, \
|
||||
specific, first-person narration as if the person is gently looking back on \
|
||||
their own memories. Each line plays over one moment, which may be a quick burst \
|
||||
of several photos, so narrate the moment as a whole rather than a single frame. \
|
||||
Be concrete and grounded in the details given; never invent names, places, or \
|
||||
events that aren't supported. Keep each line to one or two short sentences that \
|
||||
can be read aloud in a few seconds. Avoid generic filler like \"what a \
|
||||
wonderful day\" — if you have little to go on, simply describe the moment \
|
||||
plainly.\n\nYou may call read-only tools (search_messages, get_file_tags, \
|
||||
reverse_geocode, get_current_datetime, recall_entities, recall_facts_for_photo, \
|
||||
recall_facts_for_entity) to ground each line in real context. Never invent \
|
||||
details. Return ONLY the JSON object, no prose or code fences.";
|
||||
|
||||
/// Maximum agentic tool iterations for pre-generation. Tunable via
|
||||
/// `REEL_PREGEN_MAX_TOOL_ITERS` (default 8).
|
||||
fn reel_pregen_max_tool_iters() -> usize {
|
||||
std::env::var("REEL_PREGEN_MAX_TOOL_ITERS")
|
||||
.ok()
|
||||
.and_then(|s| s.trim().parse::<usize>().ok())
|
||||
.filter(|x| *x > 0)
|
||||
.unwrap_or(8)
|
||||
}
|
||||
|
||||
/// Build the (system, user) prompt pair for the scripter. The user message
|
||||
/// describes each beat in order and asks for strict JSON back.
|
||||
pub fn build_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> (String, String) {
|
||||
@@ -81,6 +114,61 @@ pub fn build_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> (String,
|
||||
(SYSTEM_PROMPT.to_string(), user)
|
||||
}
|
||||
|
||||
/// Build a richer (system, user) prompt pair for the agentic scripter. The
|
||||
/// system prompt tells the model it may call read-only tools to ground each
|
||||
/// line. The user message uses the same per-beat enumeration as
|
||||
/// `build_script_messages` plus a GPS line per beat when available.
|
||||
pub fn build_agentic_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> Vec<ChatMessage> {
|
||||
let mut user = String::new();
|
||||
user.push_str(&format!(
|
||||
"This reel has {} moments surfaced as memories {}.\n\n",
|
||||
beats.len(),
|
||||
meta.span_phrase()
|
||||
));
|
||||
if !meta.years.is_empty() {
|
||||
let years: Vec<String> = meta.years.iter().map(|y| y.to_string()).collect();
|
||||
user.push_str(&format!("They span the years: {}.\n\n", years.join(", ")));
|
||||
}
|
||||
user.push_str("Moments, in the order they will appear:\n");
|
||||
for (i, beat) in beats.iter().enumerate() {
|
||||
user.push_str(&format!("\n[{}]", i + 1));
|
||||
if let Some(date) = beat.date_label() {
|
||||
user.push_str(&format!(" {date}"));
|
||||
}
|
||||
if beat.is_clip() {
|
||||
user.push_str(" (a video clip)");
|
||||
} else if beat.media.len() > 1 {
|
||||
user.push_str(&format!(" (a burst of {} photos)", beat.media.len()));
|
||||
}
|
||||
if let Some((lat, lon)) = beat.gps {
|
||||
user.push_str(&format!("\n GPS: {:.4}, {:.4}", lat, lon));
|
||||
}
|
||||
user.push('\n');
|
||||
match (&beat.insight_title, &beat.insight_summary) {
|
||||
(Some(t), Some(s)) if !s.trim().is_empty() => {
|
||||
user.push_str(&format!(" Known context: {t} — {s}\n"));
|
||||
}
|
||||
(Some(t), _) => user.push_str(&format!(" Known context: {t}\n")),
|
||||
(_, Some(s)) if !s.trim().is_empty() => {
|
||||
user.push_str(&format!(" Known context: {s}\n"));
|
||||
}
|
||||
_ => user.push_str(" (no extra context — narrate plainly from the date)\n"),
|
||||
}
|
||||
}
|
||||
user.push_str(&format!(
|
||||
"\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\
|
||||
{{\"title\": \"<short reel title>\", \"segments\": [\"<line for moment 1>\", \
|
||||
\"<line for moment 2>\", ... ]}}\n\
|
||||
The \"segments\" array MUST have exactly {} items, one per moment in order.",
|
||||
beats.len()
|
||||
));
|
||||
|
||||
vec![
|
||||
ChatMessage::system(AGENTIC_SYSTEM_PROMPT.to_string()),
|
||||
ChatMessage::user(user),
|
||||
]
|
||||
}
|
||||
|
||||
/// Parse the model's response into a script with exactly `n` lines. Tolerant of
|
||||
/// code fences and surrounding prose, and of both `segments: [".."]` and
|
||||
/// `segments: [{"narration": ".."}]` shapes. Missing/extra lines are padded or
|
||||
@@ -198,6 +286,74 @@ pub async fn generate_script(
|
||||
Ok(parse_script_response(&raw, beats.len()))
|
||||
}
|
||||
|
||||
/// Agentic version of script generation: resolves the backend via the
|
||||
/// InsightGenerator (honouring LLM_BACKEND, model overrides, etc.), builds
|
||||
/// a read-only tool set, runs the tool loop, then parses the JSON response.
|
||||
/// Returns the same ReelScript shape. On failure the caller may fall back to
|
||||
/// `generate_script`.
|
||||
pub async fn generate_script_agentic(
|
||||
generator: &InsightGenerator,
|
||||
meta: &ReelMeta,
|
||||
beats: &[PlannedBeat],
|
||||
) -> Result<ReelScript> {
|
||||
// 1. Resolve the backend. Bail if the local model lacks tool-calling.
|
||||
let backend = generator
|
||||
.resolve_backend(
|
||||
BackendKind::Local,
|
||||
&SamplingOverrides {
|
||||
model: None,
|
||||
num_ctx: None,
|
||||
temperature: None,
|
||||
top_p: None,
|
||||
top_k: None,
|
||||
min_p: None,
|
||||
},
|
||||
)
|
||||
.await
|
||||
.context("resolving backend for agentic script")?;
|
||||
|
||||
// 2. Build the read-only tool set. Start from the persona gate (no
|
||||
// persona context, so corrections are closed), force has_vision=false,
|
||||
// then filter out write tools.
|
||||
let gate = generator.current_gate_opts_for_persona(false, None);
|
||||
let all_tools = InsightGenerator::build_tool_definitions(gate);
|
||||
let read_only_names: std::collections::HashSet<&str> = [
|
||||
"search_rag",
|
||||
"search_messages",
|
||||
"get_sms_messages",
|
||||
"get_calendar_events",
|
||||
"get_location_history",
|
||||
"get_file_tags",
|
||||
"get_faces_in_photo",
|
||||
"reverse_geocode",
|
||||
"get_personal_place_at",
|
||||
"recall_entities",
|
||||
"recall_facts_for_photo",
|
||||
"recall_facts_for_entity",
|
||||
"get_current_datetime",
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
let tools: Vec<Tool> = all_tools
|
||||
.into_iter()
|
||||
.filter(|t| read_only_names.contains(t.function.name.as_str()))
|
||||
.collect();
|
||||
|
||||
// 3. Build the agentic prompt messages.
|
||||
let messages = build_agentic_script_messages(meta, beats);
|
||||
|
||||
// 4. Run the tool loop.
|
||||
let max_iter = reel_pregen_max_tool_iters();
|
||||
let raw = generator
|
||||
.run_readonly_tool_loop(&backend, messages, tools, max_iter)
|
||||
.await
|
||||
.context("agentic tool loop failed")?;
|
||||
|
||||
// 5. Strip any think-blocks the model may have emitted, then parse.
|
||||
let raw = crate::ai::llm_client::strip_think_blocks(&raw);
|
||||
Ok(parse_script_response(&raw, beats.len()))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -220,6 +376,7 @@ mod tests {
|
||||
date: Some(1_560_000_000 + i as i64 * 86_400),
|
||||
insight_title: None,
|
||||
insight_summary: None,
|
||||
gps: None,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user