From e3f731b3b26eb7f136868921a01ba2370366de4d Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Fri, 12 Jun 2026 22:31:08 -0400 Subject: [PATCH 01/17] Add memory-reel backend: on-demand narrated photo slideshow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New POST /reels + GET /reels/{id} (+ /video) build an MP4 slideshow of a memory span (day/week/month), narrated by the LLM in a cloned voice. Pipeline (src/reels/): a selector resolves which photos + reel metadata, the scripter writes one narration line per photo via a single LLM call (reusing each photo's cached insight as context — no fresh vision calls, so reel generation stays off the GPU's vision slot), each line is synthesized to speech, and the renderer assembles stills + narration via ffmpeg. Jobs run in the background (mirroring the TTS speech-job registry) since a reel takes minutes; the finished MP4 is cached on disk keyed by the selection so a repeat request is instant. The segment model is media-typed (Photo today) so a video-clip segment (phase 2) and a nightly pre-render (phase 3) slot in without reworking the pipeline. Ken Burns motion is implemented but defaulted off pending a visual check on the GPU box. Supporting changes: - memories: extract gather_memory_items() so the reel selector reuses the exact window/exclusion/tz/sort logic behind /memories. - ai::tts: add synthesize_serialized() so reel narration honors the same single-GPU permit + write lease as user TTS requests. - video::ffmpeg: make get_duration_seconds() pub for narration timing. - AppState: reels_path (REELS_DIRECTORY, defaults beside preview clips). Pure logic (cache key, script parsing, ffmpeg arg/filter construction, even sampling, segment timing) is unit-tested (26 tests). The runtime path (ffmpeg render, TTS, LLM) needs a real run on the GPU host to verify end-to-end — not exercisable in CI. Co-Authored-By: Claude Fable 5 --- src/ai/tts.rs | 31 +++ src/main.rs | 4 + src/memories.rs | 86 ++++-- src/reels/mod.rs | 625 ++++++++++++++++++++++++++++++++++++++++++ src/reels/render.rs | 338 +++++++++++++++++++++++ src/reels/script.rs | 289 +++++++++++++++++++ src/reels/selector.rs | 252 +++++++++++++++++ src/state.rs | 18 ++ src/video/ffmpeg.rs | 2 +- 9 files changed, 1615 insertions(+), 30 deletions(-) create mode 100644 src/reels/mod.rs create mode 100644 src/reels/render.rs create mode 100644 src/reels/script.rs create mode 100644 src/reels/selector.rs diff --git a/src/ai/tts.rs b/src/ai/tts.rs index 08d9dcd..4e7544c 100644 --- a/src/ai/tts.rs +++ b/src/ai/tts.rs @@ -23,6 +23,7 @@ use std::time::{Duration, Instant}; use tokio::sync::Semaphore; use uuid::Uuid; +use crate::ai::llamacpp::LlamaCppClient; use crate::data::Claims; use crate::file_types::{is_audio_file, is_video_file}; use crate::files::is_valid_full_path; @@ -473,6 +474,36 @@ pub struct TtsJobStatusResponse { pub error: Option, } +/// Synthesize speech honoring the global single-GPU serialization +/// (`TTS_PERMIT`) and the GPU write lease, exactly as the speech-job path does. +/// Queues on the permit rather than fast-failing, so callers wait their turn +/// instead of contending. Text is run through the same markdown/emoji cleanup + +/// pronunciation pipeline as the HTTP handlers. Reused by the memory-reel +/// pipeline to narrate each segment without racing a user's TTS request on the +/// Chatterbox GPU. +pub async fn synthesize_serialized( + client: &LlamaCppClient, + text: &str, + voice: Option<&str>, + format: &str, +) -> anyhow::Result> { + let prepared = prepare_for_tts(text); + if prepared.is_empty() { + anyhow::bail!("nothing to synthesize after cleanup"); + } + // Queue rather than fast-fail (mirrors create_speech_job_handler). + let _permit = TTS_PERMIT + .acquire() + .await + .map_err(|_| anyhow::anyhow!("TTS permit closed"))?; + // Wait for the LLM side to release the GPU before the request timeout + // starts (see ai::gpu). + let _gpu = crate::ai::gpu::tts_lease().await; + client + .text_to_speech(&prepared, voice, format, None, None, None) + .await +} + /// POST /tts/speech/jobs — durable variant of /tts/speech for long syntheses. /// Returns 202 + a job id immediately; the synth queues on the single GPU /// permit (instead of fast-failing 429) and the client polls the job until diff --git a/src/main.rs b/src/main.rs index 8b56efd..b059e9b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -62,6 +62,7 @@ mod knowledge; mod memories; mod otel; mod personas; +mod reels; mod service; #[cfg(test)] mod testhelpers; @@ -344,6 +345,9 @@ fn main() -> std::io::Result<()> { .service(handlers::image::clear_image_date) .service(handlers::image::get_full_exif) .service(memories::list_memories) + .service(reels::create_reel_handler) + .service(reels::reel_status_handler) + .service(reels::reel_video_handler) .service(ai::generate_insight_handler) .service(ai::generate_agentic_insight_handler) .service(ai::generation_status_handler) diff --git a/src/memories.rs b/src/memories.rs index 4b1682b..c877981 100644 --- a/src/memories.rs +++ b/src/memories.rs @@ -349,12 +349,6 @@ pub async fn list_memories( opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); let span_mode = q.span.unwrap_or(MemoriesSpan::Day); - let span_token = match span_mode { - MemoriesSpan::Day => "day", - MemoriesSpan::Week => "week", - MemoriesSpan::Month => "month", - }; - let years_back: i32 = DEFAULT_YEARS_BACK; // The SQL filter expects a signed offset in minutes from UTC; default // 0 (UTC) when the client didn't send a hint. We also keep a chrono @@ -366,18 +360,66 @@ pub async fn list_memories( .timezone_offset_minutes .and_then(|offset_mins| FixedOffset::east_opt(offset_mins * 60)); - debug!( - "list_memories: span={:?} tz_offset_min={} years_back={}", - span_mode, tz_offset_minutes, years_back - ); - - let library = match crate::libraries::resolve_library_param(&app_state, q.library.as_deref()) { - Ok(lib) => lib, + let items = match gather_memory_items( + &app_state, + &exif_dao, + &span_context, + span_mode, + tz_offset_minutes, + client_timezone, + q.library.as_deref(), + ) { + Ok(items) => items, Err(msg) => { warn!("Rejecting /memories request: {}", msg); return HttpResponse::BadRequest().body(msg); } }; + + span.add_event( + "memories_scanned", + vec![ + KeyValue::new("span", format!("{:?}", span_mode)), + KeyValue::new("years_back", DEFAULT_YEARS_BACK.to_string()), + KeyValue::new("result_count", items.len().to_string()), + KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()), + KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)), + ], + ); + span.set_status(Status::Ok); + + HttpResponse::Ok().json(MemoriesResponse { items }) +} + +/// Resolve an "on this day/week/month across past years" window into an +/// ordered list of [`MemoryItem`]s. Shared by the `/memories` handler and the +/// memory-reel selector so both honour the same library resolution, per-library +/// exclusions, timezone handling, and sort order. Returns `Err(message)` only +/// when the `library` param is invalid (callers map that to 400); per-library +/// query/lock failures are logged and skipped, matching the handler's +/// best-effort behaviour. +pub fn gather_memory_items( + app_state: &AppState, + exif_dao: &Mutex>, + span_context: &opentelemetry::Context, + span_mode: MemoriesSpan, + tz_offset_minutes: i32, + client_timezone: Option, + library_param: Option<&str>, +) -> Result, String> { + let span_token = match span_mode { + MemoriesSpan::Day => "day", + MemoriesSpan::Week => "week", + MemoriesSpan::Month => "month", + }; + let years_back: i32 = DEFAULT_YEARS_BACK; + + debug!( + "gather_memory_items: span={:?} tz_offset_min={} years_back={}", + span_mode, tz_offset_minutes, years_back + ); + + let library = crate::libraries::resolve_library_param(app_state, library_param)?; let libraries_to_scan: Vec<&crate::libraries::Library> = match library { Some(lib) => vec![lib], None => app_state.libraries.iter().collect(), @@ -394,7 +436,7 @@ pub async fn list_memories( let rows = match exif_dao.lock() { Ok(mut dao) => match dao.get_memories_in_window( - &span_context, + span_context, lib.id, span_token, years_back, @@ -469,21 +511,7 @@ pub async fn list_memories( } } - let items: Vec = memories_with_dates.into_iter().map(|(m, _)| m).collect(); - - span.add_event( - "memories_scanned", - vec![ - KeyValue::new("span", format!("{:?}", span_mode)), - KeyValue::new("years_back", years_back.to_string()), - KeyValue::new("result_count", items.len().to_string()), - KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()), - KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)), - ], - ); - span.set_status(Status::Ok); - - HttpResponse::Ok().json(MemoriesResponse { items }) + Ok(memories_with_dates.into_iter().map(|(m, _)| m).collect()) } #[cfg(test)] diff --git a/src/reels/mod.rs b/src/reels/mod.rs new file mode 100644 index 0000000..fe270f8 --- /dev/null +++ b/src/reels/mod.rs @@ -0,0 +1,625 @@ +//! Memory reels: render an MP4 slideshow of a selection of photos with an +//! LLM-written, voice-cloned narration over it. +//! +//! Pipeline: a [`selector`] resolves *which* photos (and the reel metadata), +//! the [`script`] module writes per-photo narration via the LLM, each line is +//! synthesized to speech, and [`render`] assembles the stills + narration into +//! one MP4. Jobs run in the background (mirroring the TTS speech-job registry) +//! because a reel takes minutes; the finished MP4 is cached on disk keyed by +//! the selection so a repeat request is instant. +//! +//! Phase 1 is on-demand and photos-only. The segment model is media-typed so a +//! video-clip segment (phase 2) and a nightly pre-render (phase 3) slot in +//! without reworking the pipeline. + +pub mod render; +pub mod script; +pub mod selector; + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::{LazyLock, Mutex as StdMutex}; +use std::time::{Duration, Instant}; + +use actix_files::NamedFile; +use actix_web::{HttpRequest, HttpResponse, Responder, get, post, web}; +use chrono::DateTime; +use serde::{Deserialize, Serialize}; +use serde_json::json; +use std::sync::Mutex; +use uuid::Uuid; + +use crate::data::Claims; +use crate::database::{ExifDao, InsightDao}; +use crate::memories::MemoriesSpan; +use crate::otel::extract_context_from_request; +use crate::state::AppState; +use selector::ReelSelector; + +/// The media behind one reel segment. Photos-only for now; a `Clip` variant +/// (a section of a source video) is the phase-2 extension point. +#[derive(Debug, Clone)] +pub enum SegmentMedia { + Photo { rel_path: String, library_id: i32 }, +} + +/// A segment before narration: which photo, when it was taken, and any cached +/// insight to feed the scripter. +#[derive(Debug, Clone)] +pub struct PlannedSegment { + pub media: SegmentMedia, + pub date: Option, + pub insight_title: Option, + pub insight_summary: Option, +} + +impl PlannedSegment { + /// Human date for the prompt, e.g. "June 12, 2019". `None` when undated. + pub fn date_label(&self) -> Option { + let ts = self.date?; + let dt = DateTime::from_timestamp(ts, 0)?; + Some(dt.format("%B %-d, %Y").to_string()) + } +} + +/// Reel-wide metadata the scripter uses for framing. +#[derive(Debug, Clone)] +pub struct ReelMeta { + pub span: MemoriesSpan, + pub years: Vec, +} + +impl ReelMeta { + /// Natural-language phrase for the span, e.g. "on this day". + pub fn span_phrase(&self) -> &'static str { + match self.span { + MemoriesSpan::Day => "on this day", + MemoriesSpan::Week => "this week", + MemoriesSpan::Month => "this month", + } + } +} + +// --- Job registry ------------------------------------------------------------ +// +// In-memory, same shape as the TTS speech-job registry: a reel takes minutes, +// too long to hold one HTTP request from a phone. POST /reels returns a job id; +// the client polls GET /reels/{id} until the video URL appears. The heavy +// artifact (the MP4) lives on disk, not in this map — jobs only carry status + +// the output path. State is intentionally not durable across restarts; the +// on-disk cache is what makes a repeat request cheap, not the registry. + +#[derive(Clone, Copy, PartialEq, Eq, Debug, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum ReelJobStatus { + Queued, + Running, + Done, + Error, +} + +impl ReelJobStatus { + fn is_terminal(self) -> bool { + matches!(self, Self::Done | Self::Error) + } +} + +struct ReelJob { + status: ReelJobStatus, + /// Coarse progress label for the client ("scripting", "narrating", …). + stage: &'static str, + title: Option, + output_path: Option, + error: Option, + created_at: Instant, + finished_at: Option, + abort: Option, +} + +/// Finished jobs linger so a client that lost connectivity can still collect +/// the result; anything older than MAX_AGE is dropped (aborted first if somehow +/// still running). Swept lazily on each create. +const REEL_JOB_RESULT_TTL: Duration = Duration::from_secs(30 * 60); +const REEL_JOB_MAX_AGE: Duration = Duration::from_secs(60 * 60); + +static REEL_JOBS: LazyLock>> = + LazyLock::new(|| StdMutex::new(HashMap::new())); + +fn sweep_stale_jobs(jobs: &mut HashMap, now: Instant) { + jobs.retain(|_, job| { + let result_expired = job + .finished_at + .is_some_and(|t| now.duration_since(t) >= REEL_JOB_RESULT_TTL); + let too_old = now.duration_since(job.created_at) >= REEL_JOB_MAX_AGE; + if too_old && let Some(h) = job.abort.take() { + h.abort(); + } + !(result_expired || too_old) + }); +} + +fn with_job(id: Uuid, f: impl FnOnce(&mut ReelJob) -> R) -> Option { + REEL_JOBS.lock().unwrap().get_mut(&id).map(f) +} + +fn set_stage(id: Uuid, stage: &'static str) { + with_job(id, |job| { + if !job.status.is_terminal() { + job.status = ReelJobStatus::Running; + job.stage = stage; + } + }); +} + +/// Move a job to a terminal state (first terminal write wins). +fn finish_job( + id: Uuid, + status: ReelJobStatus, + title: Option, + output_path: Option, + error: Option, +) { + with_job(id, |job| { + if job.status.is_terminal() { + return; + } + job.status = status; + job.stage = match status { + ReelJobStatus::Done => "done", + _ => "error", + }; + job.title = title; + job.output_path = output_path; + job.error = error; + job.finished_at = Some(Instant::now()); + job.abort = None; + }); +} + +// --- On-disk cache ----------------------------------------------------------- + +/// Render version: bump to invalidate every cached reel after a rendering / +/// scripting change that should produce a fresh result. +const RENDER_VERSION: u32 = 1; + +/// Cache key over everything that determines *which* media and *how* it's +/// voiced — but not the (non-deterministic) narration text. Same inputs → same +/// MP4 served instantly. blake3 keeps it filesystem-safe and collision-free. +fn cache_key(selector: &ReelSelector, media: &[SegmentMedia], voice: Option<&str>) -> String { + let mut buf = format!( + "v{}|{}|voice={}|", + RENDER_VERSION, + selector.descriptor(), + voice.unwrap_or("default") + ); + for m in media { + match m { + SegmentMedia::Photo { + rel_path, + library_id, + } => buf.push_str(&format!("{library_id}:{rel_path}|")), + } + } + blake3::hash(buf.as_bytes()).to_hex().to_string() +} + +fn reel_mp4_path(app_state: &AppState, key: &str) -> PathBuf { + Path::new(&app_state.reels_path).join(format!("{key}.mp4")) +} + +fn reel_sidecar_path(app_state: &AppState, key: &str) -> PathBuf { + Path::new(&app_state.reels_path).join(format!("{key}.json")) +} + +#[derive(Serialize, Deserialize)] +struct ReelSidecar { + title: String, +} + +// --- HTTP types -------------------------------------------------------------- + +#[derive(Debug, Deserialize)] +pub struct CreateReelRequest { + #[serde(default)] + pub span: Option, + #[serde(default)] + pub timezone_offset_minutes: Option, + #[serde(default)] + pub library: Option, + /// Cloned TTS voice for the narration; server default when omitted. + #[serde(default)] + pub voice: Option, + /// Cap on photos in the reel (clamped server-side). + #[serde(default)] + pub max_segments: Option, +} + +#[derive(Debug, Serialize)] +pub struct ReelJobCreatedResponse { + pub job_id: String, + pub status: ReelJobStatus, +} + +#[derive(Debug, Serialize)] +pub struct ReelStatusResponse { + pub job_id: String, + pub status: ReelJobStatus, + pub stage: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub title: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub video_url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, +} + +// --- Handlers ---------------------------------------------------------------- + +/// POST /reels — start (or instantly serve from cache) a memory reel for the +/// requested span. Returns 202 + a job id; the client polls GET /reels/{id}. +#[post("/reels")] +pub async fn create_reel_handler( + http_request: HttpRequest, + _claims: Claims, + req: web::Json, + app_state: web::Data, + exif_dao: web::Data>>, + insight_dao: web::Data>>, +) -> impl Responder { + let span_context = extract_context_from_request(&http_request); + + if app_state.llamacpp.is_none() { + return HttpResponse::ServiceUnavailable().json(json!({ + "error": "Reel narration needs the LLM/TTS backend (set LLAMA_SWAP_URL)" + })); + } + + let span = req.span.unwrap_or(MemoriesSpan::Day); + let max_segments = req.max_segments.unwrap_or(selector::DEFAULT_MAX_SEGMENTS); + let selector = ReelSelector::Memories { + span, + tz_offset_minutes: req.timezone_offset_minutes.unwrap_or(0), + library: req.library.clone(), + max_segments, + }; + + // Cheap pass: resolve the media set for the cache key and the emptiness + // check. Insight enrichment + scripting happen in the background job. + let (planned, meta) = match selector::resolve(&app_state, &exif_dao, &span_context, &selector) { + Ok(r) => r, + Err(msg) => return HttpResponse::BadRequest().body(msg), + }; + if planned.is_empty() { + return HttpResponse::UnprocessableEntity().json(json!({ + "error": "No photo memories found for this span" + })); + } + + let media: Vec = planned.iter().map(|p| p.media.clone()).collect(); + let voice = req.voice.clone().filter(|s| !s.is_empty()); + let key = cache_key(&selector, &media, voice.as_deref()); + + let job_id = Uuid::new_v4(); + + // Cache hit: register an already-Done job pointing at the existing MP4 so + // the client's first poll returns the video URL immediately. + let mp4 = reel_mp4_path(&app_state, &key); + if mp4.exists() { + let title = std::fs::read(reel_sidecar_path(&app_state, &key)) + .ok() + .and_then(|b| serde_json::from_slice::(&b).ok()) + .map(|s| s.title); + let mut jobs = REEL_JOBS.lock().unwrap(); + sweep_stale_jobs(&mut jobs, Instant::now()); + jobs.insert( + job_id, + ReelJob { + status: ReelJobStatus::Done, + stage: "done", + title, + output_path: Some(mp4), + error: None, + created_at: Instant::now(), + finished_at: Some(Instant::now()), + abort: None, + }, + ); + return HttpResponse::Accepted().json(ReelJobCreatedResponse { + job_id: job_id.to_string(), + status: ReelJobStatus::Done, + }); + } + + { + let mut jobs = REEL_JOBS.lock().unwrap(); + sweep_stale_jobs(&mut jobs, Instant::now()); + jobs.insert( + job_id, + ReelJob { + status: ReelJobStatus::Queued, + stage: "queued", + title: None, + output_path: None, + error: None, + created_at: Instant::now(), + finished_at: None, + abort: None, + }, + ); + } + + let state = app_state.clone(); + let insight_dao = insight_dao.clone(); + let handle = tokio::spawn(async move { + match run_reel_job(&state, &insight_dao, job_id, planned, meta, voice, &key).await { + Ok((title, path)) => { + finish_job(job_id, ReelJobStatus::Done, Some(title), Some(path), None) + } + Err(e) => { + log::error!("reel job {job_id} failed: {e:?}"); + finish_job( + job_id, + ReelJobStatus::Error, + None, + None, + Some(format!("{e}")), + ) + } + } + }); + with_job(job_id, |job| job.abort = Some(handle.abort_handle())); + + HttpResponse::Accepted().json(ReelJobCreatedResponse { + job_id: job_id.to_string(), + status: ReelJobStatus::Queued, + }) +} + +/// GET /reels/{id} — poll a reel job. Done jobs carry a `video_url`. +#[get("/reels/{id}")] +pub async fn reel_status_handler(_claims: Claims, path: web::Path) -> impl Responder { + let id_str = path.into_inner(); + let Ok(id) = Uuid::parse_str(&id_str) else { + return HttpResponse::BadRequest().json(json!({ "error": "invalid job id" })); + }; + let resp = with_job(id, |job| ReelStatusResponse { + job_id: id_str.clone(), + status: job.status, + stage: job.stage.to_string(), + title: job.title.clone(), + video_url: matches!(job.status, ReelJobStatus::Done) + .then(|| format!("/reels/{id_str}/video")), + error: job.error.clone(), + }); + match resp { + Some(r) => HttpResponse::Ok().json(r), + None => HttpResponse::NotFound().json(json!({ "error": "job not found or expired" })), + } +} + +/// GET /reels/{id}/video — stream the finished MP4 (supports range requests via +/// NamedFile, so the mobile player can seek). +#[get("/reels/{id}/video")] +pub async fn reel_video_handler( + _claims: Claims, + request: HttpRequest, + path: web::Path, +) -> impl Responder { + let id_str = path.into_inner(); + let Ok(id) = Uuid::parse_str(&id_str) else { + return HttpResponse::BadRequest().json(json!({ "error": "invalid job id" })); + }; + let output = with_job(id, |job| job.output_path.clone()).flatten(); + let Some(path) = output else { + return HttpResponse::NotFound().json(json!({ "error": "reel not ready" })); + }; + match NamedFile::open(&path) { + Ok(file) => file.into_response(&request), + Err(e) => { + log::error!("opening reel mp4 {path:?} failed: {e:?}"); + HttpResponse::NotFound().json(json!({ "error": "reel file missing" })) + } + } +} + +// --- Pipeline ---------------------------------------------------------------- + +/// Run the full reel pipeline: enrich → script → narrate → render → concat, +/// then publish the MP4 into the cache. Returns (title, mp4_path). +async fn run_reel_job( + app_state: &AppState, + insight_dao: &Mutex>, + job_id: Uuid, + mut planned: Vec, + meta: ReelMeta, + voice: Option, + key: &str, +) -> anyhow::Result<(String, PathBuf)> { + use anyhow::{Context, anyhow}; + + let client = app_state + .llamacpp + .as_ref() + .ok_or_else(|| anyhow!("TTS/LLM backend not configured"))? + .clone(); + + // 1. Enrich with cached insights, then script (one LLM call). + set_stage(job_id, "scripting"); + let span_context = opentelemetry::Context::new(); + selector::enrich(insight_dao, &span_context, &mut planned); + let script = script::generate_script(&client, &meta, &planned).await?; + + // 2. Narrate each line to speech and 3. render each photo segment. A + // segment whose audio or render fails is skipped (logged) rather than + // sinking the whole reel — handles an odd HEIC/corrupt file gracefully. + set_stage(job_id, "narrating"); + let work = tempfile::tempdir().context("creating reel work dir")?; + let nvenc = render::is_nvenc_available().await; + let opts = render::SegmentOpts { + nvenc, + ..Default::default() + }; + + let mut segment_files: Vec = Vec::new(); + for (i, (seg, line)) in planned.iter().zip(script.lines.iter()).enumerate() { + let image_path = match resolve_image_path(app_state, &seg.media) { + Some(p) => p, + None => { + log::warn!("reel {job_id}: skipping segment {i}, image path unresolved"); + continue; + } + }; + + let audio_bytes = + match crate::ai::tts::synthesize_serialized(&client, line, voice.as_deref(), "wav") + .await + { + Ok(b) => b, + Err(e) => { + log::warn!("reel {job_id}: skipping segment {i}, TTS failed: {e}"); + continue; + } + }; + let audio_path = work.path().join(format!("narration_{i:03}.wav")); + if let Err(e) = tokio::fs::write(&audio_path, &audio_bytes).await { + log::warn!("reel {job_id}: skipping segment {i}, writing audio failed: {e}"); + continue; + } + + let narration_secs = + crate::video::ffmpeg::get_duration_seconds(&audio_path.to_string_lossy()) + .await + .ok() + .flatten() + .unwrap_or(render::MIN_SEGMENT_SECONDS); + let duration = render::segment_duration(narration_secs); + + set_stage(job_id, "rendering"); + let seg_out = work.path().join(format!("seg_{i:03}.mp4")); + if let Err(e) = + render::render_segment(&image_path, &audio_path, &seg_out, duration, &opts).await + { + log::warn!("reel {job_id}: skipping segment {i}, render failed: {e}"); + continue; + } + segment_files.push(seg_out.to_string_lossy().to_string()); + } + + if segment_files.is_empty() { + return Err(anyhow!("no segments rendered successfully")); + } + + // 4. Concat into the cache. Write to a temp name in the reels dir, then + // rename atomically (same filesystem) so a reader never sees a partial. + std::fs::create_dir_all(&app_state.reels_path).context("creating reels dir")?; + let final_path = reel_mp4_path(app_state, key); + let tmp_path = final_path.with_extension("mp4.tmp"); + render::concat_segments(&segment_files, &tmp_path).await?; + std::fs::rename(&tmp_path, &final_path).context("publishing reel mp4")?; + + // Sidecar carries the title so a future cache hit can return it without + // re-running the pipeline. + let sidecar = serde_json::to_vec(&ReelSidecar { + title: script.title.clone(), + }) + .context("serializing reel sidecar")?; + let _ = std::fs::write(reel_sidecar_path(app_state, key), sidecar); + + Ok((script.title, final_path)) +} + +/// Resolve a photo segment's library-relative path to a validated absolute +/// path under its library root. +fn resolve_image_path(app_state: &AppState, media: &SegmentMedia) -> Option { + let SegmentMedia::Photo { + rel_path, + library_id, + } = media; + let lib = app_state.library_by_id(*library_id)?; + crate::files::is_valid_full_path(&lib.root_path, rel_path, false) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn photo(p: &str, lib: i32) -> SegmentMedia { + SegmentMedia::Photo { + rel_path: p.to_string(), + library_id: lib, + } + } + + fn day_selector() -> ReelSelector { + ReelSelector::Memories { + span: MemoriesSpan::Day, + tz_offset_minutes: 0, + library: None, + max_segments: 24, + } + } + + #[test] + fn cache_key_is_stable_for_same_inputs() { + let media = vec![photo("a.jpg", 1), photo("b.jpg", 1)]; + let k1 = cache_key(&day_selector(), &media, Some("grandma")); + let k2 = cache_key(&day_selector(), &media, Some("grandma")); + assert_eq!(k1, k2); + // 64-hex blake3. + assert_eq!(k1.len(), 64); + assert!(k1.chars().all(|c| c.is_ascii_hexdigit())); + } + + #[test] + fn cache_key_changes_with_media_order_voice_and_selector() { + let media = vec![photo("a.jpg", 1), photo("b.jpg", 1)]; + let reordered = vec![photo("b.jpg", 1), photo("a.jpg", 1)]; + let base = cache_key(&day_selector(), &media, Some("grandma")); + // Order matters (the reel sequence differs). + assert_ne!( + base, + cache_key(&day_selector(), &reordered, Some("grandma")) + ); + // Voice matters. + assert_ne!(base, cache_key(&day_selector(), &media, Some("dad"))); + assert_ne!(base, cache_key(&day_selector(), &media, None)); + // Span matters. + let week = ReelSelector::Memories { + span: MemoriesSpan::Week, + tz_offset_minutes: 0, + library: None, + max_segments: 24, + }; + assert_ne!(base, cache_key(&week, &media, Some("grandma"))); + } + + #[test] + fn span_phrase_maps_each_span() { + let mk = |span| ReelMeta { + span, + years: vec![], + }; + assert_eq!(mk(MemoriesSpan::Day).span_phrase(), "on this day"); + assert_eq!(mk(MemoriesSpan::Week).span_phrase(), "this week"); + assert_eq!(mk(MemoriesSpan::Month).span_phrase(), "this month"); + } + + #[test] + fn date_label_formats_or_none() { + let seg = PlannedSegment { + media: photo("a.jpg", 1), + date: Some(1_560_384_000), // 2019-06-13 UTC + insight_title: None, + insight_summary: None, + }; + assert!(seg.date_label().unwrap().contains("2019")); + + let undated = PlannedSegment { + media: photo("a.jpg", 1), + date: None, + insight_title: None, + insight_summary: None, + }; + assert_eq!(undated.date_label(), None); + } +} diff --git a/src/reels/render.rs b/src/reels/render.rs new file mode 100644 index 0000000..ca39515 --- /dev/null +++ b/src/reels/render.rs @@ -0,0 +1,338 @@ +//! ffmpeg assembly for memory reels. +//! +//! Two-stage, per-segment design: each segment is rendered to its own +//! normalized MP4 (identical codec/resolution/fps/timebase), then the segments +//! are joined with the concat demuxer (stream copy, no re-encode). Rendering +//! per segment — rather than one monster filtergraph — keeps each ffmpeg +//! invocation simple to reason about, parallelizes naturally, and means a +//! video-clip segment type (phase 2) slots in as just a different per-segment +//! builder without touching the concat stage. +//! +//! The arg builders are pure (`Vec` out) so the exact ffmpeg command +//! is unit-testable; the runners spawn ffmpeg and surface stderr on failure. + +use anyhow::{Context, Result, bail}; +use std::path::Path; +use tokio::process::Command; + +/// Re-exported so the reel pipeline reaches NVENC detection through this module +/// rather than depending on `video::ffmpeg` directly. +pub use crate::video::ffmpeg::is_nvenc_available; + +/// Reel canvas. Landscape matches the majority of camera photos; portrait +/// shots are letterboxed by the `pad` in [`segment_filter`] rather than +/// cropped, so faces never get cut off. +pub const REEL_WIDTH: u32 = 1920; +pub const REEL_HEIGHT: u32 = 1080; +pub const REEL_FPS: u32 = 30; + +/// A still's screen time is its narration length plus a short breath, with a +/// floor so a terse line still lingers. No ceiling: the segment always covers +/// the full narration so speech is never truncated — the scripter is asked to +/// keep lines short instead. +pub const MIN_SEGMENT_SECONDS: f64 = 2.5; +const NARRATION_TAIL_SECONDS: f64 = 0.6; + +/// Screen time for a photo segment given its narration audio length. +pub fn segment_duration(narration_secs: f64) -> f64 { + let d = narration_secs + NARRATION_TAIL_SECONDS; + if d.is_finite() && d > MIN_SEGMENT_SECONDS { + d + } else { + MIN_SEGMENT_SECONDS + } +} + +/// Options controlling per-segment rendering. `ken_burns` adds a slow zoom for +/// motion; it's defaulted off until the effect is eyeballed on the GPU box, +/// since a wrong zoompan expression reads as jitter and can't be verified here. +#[derive(Debug, Clone, Copy)] +pub struct SegmentOpts { + pub width: u32, + pub height: u32, + pub fps: u32, + pub nvenc: bool, + pub ken_burns: bool, +} + +impl Default for SegmentOpts { + fn default() -> Self { + Self { + width: REEL_WIDTH, + height: REEL_HEIGHT, + fps: REEL_FPS, + nvenc: false, + ken_burns: false, + } + } +} + +/// Video filter for a photo segment: fit the image inside the canvas +/// (preserving aspect, padding the rest), normalize SAR/fps/pixel format, and +/// optionally apply a gentle Ken Burns zoom. +pub fn segment_filter(opts: &SegmentOpts, duration: f64) -> String { + let (w, h, fps) = (opts.width, opts.height, opts.fps); + if opts.ken_burns { + // Upscale first so zoompan samples from a larger frame (avoids + // shimmer), drift the zoom from 1.0→~1.12 across the segment, hold the + // crop centered, then settle to the canvas. + let frames = (duration * fps as f64).round().max(1.0) as u64; + format!( + "scale={w}*2:{h}*2:force_original_aspect_ratio=increase,\ + crop={w}*2:{h}*2,\ + zoompan=z='min(zoom+0.0009,1.12)':d={frames}:\ + x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s={w}x{h}:fps={fps},\ + setsar=1,format=yuv420p" + ) + } else { + format!( + "scale={w}:{h}:force_original_aspect_ratio=decrease,\ + pad={w}:{h}:(ow-iw)/2:(oh-ih)/2,\ + setsar=1,fps={fps},format=yuv420p" + ) + } +} + +fn video_encoder_args(nvenc: bool) -> Vec { + if nvenc { + // p4 ≈ balanced; cq 23 ≈ libx264 crf 21. Matches the HLS transcode path. + [ + "-c:v", + "h264_nvenc", + "-preset", + "p4", + "-cq", + "23", + "-pix_fmt", + "yuv420p", + ] + } else { + [ + "-c:v", "libx264", "-crf", "21", "-preset", "veryfast", "-pix_fmt", "yuv420p", + ] + } + .iter() + .map(|s| s.to_string()) + .collect() +} + +/// Build the ffmpeg args that render one photo segment: a still looped for +/// `duration` seconds with its narration muxed in. The narration is padded +/// with trailing silence (`apad`) so short lines don't end the segment early; +/// `-t` bounds both streams to the segment length. +pub fn build_segment_args( + image_path: &str, + audio_path: &str, + out_path: &str, + duration: f64, + opts: &SegmentOpts, +) -> Vec { + let mut args: Vec = vec!["-y".into()]; + if opts.nvenc { + args.extend(["-hwaccel".into(), "cuda".into()]); + } + args.extend([ + "-loop".into(), + "1".into(), + "-i".into(), + image_path.into(), + "-i".into(), + audio_path.into(), + "-filter_complex".into(), + format!("[0:v]{}[v];[1:a]apad[a]", segment_filter(opts, duration)), + "-map".into(), + "[v]".into(), + "-map".into(), + "[a]".into(), + "-t".into(), + format!("{duration:.3}"), + ]); + args.extend(video_encoder_args(opts.nvenc)); + args.extend( + ["-c:a", "aac", "-b:a", "160k", "-ar", "48000", "-shortest"] + .iter() + .map(|s| s.to_string()), + ); + args.push(out_path.into()); + args +} + +/// Build the concat-demuxer args that join rendered segments losslessly. +/// `+faststart` moves the moov atom up front so the reel streams immediately +/// on the mobile client. +pub fn build_concat_args(list_path: &str, out_path: &str) -> Vec { + [ + "-y", + "-f", + "concat", + "-safe", + "0", + "-i", + list_path, + "-c", + "copy", + "-movflags", + "+faststart", + out_path, + ] + .iter() + .map(|s| s.to_string()) + .collect() +} + +/// Render the concat list file body. Each line points the demuxer at one +/// segment; single quotes in paths are escaped per ffmpeg's concat syntax. +pub fn build_concat_list(segment_paths: &[String]) -> String { + let mut out = String::new(); + for p in segment_paths { + let escaped = p.replace('\'', r"'\''"); + out.push_str(&format!("file '{escaped}'\n")); + } + out +} + +async fn run_ffmpeg(args: &[String], what: &str) -> Result<()> { + let output = Command::new("ffmpeg") + .args(args) + .output() + .await + .with_context(|| format!("spawning ffmpeg for {what}"))?; + if !output.status.success() { + bail!( + "ffmpeg {what} failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + } + Ok(()) +} + +/// Render one photo segment to `out_path`. +pub async fn render_segment( + image_path: &Path, + audio_path: &Path, + out_path: &Path, + duration: f64, + opts: &SegmentOpts, +) -> Result<()> { + let args = build_segment_args( + &image_path.to_string_lossy(), + &audio_path.to_string_lossy(), + &out_path.to_string_lossy(), + duration, + opts, + ); + run_ffmpeg(&args, "segment render").await +} + +/// Join rendered segments into the final reel. Writes the concat list into the +/// same directory as the output so relative paths and cleanup stay local. +pub async fn concat_segments(segment_paths: &[String], out_path: &Path) -> Result<()> { + let list_path = out_path.with_extension("concat.txt"); + let body = build_concat_list(segment_paths); + tokio::fs::write(&list_path, body) + .await + .context("writing concat list")?; + let args = build_concat_args(&list_path.to_string_lossy(), &out_path.to_string_lossy()); + let result = run_ffmpeg(&args, "concat").await; + let _ = tokio::fs::remove_file(&list_path).await; + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn segment_duration_floors_short_lines() { + // A one-word narration still lingers at the floor. + assert_eq!(segment_duration(0.5), MIN_SEGMENT_SECONDS); + assert_eq!(segment_duration(0.0), MIN_SEGMENT_SECONDS); + } + + #[test] + fn segment_duration_covers_full_narration_plus_tail() { + // No ceiling: a long line gets its full length so speech isn't cut. + assert!((segment_duration(5.0) - 5.6).abs() < 1e-9); + assert!((segment_duration(20.0) - 20.6).abs() < 1e-9); + } + + #[test] + fn segment_duration_rejects_nonfinite() { + assert_eq!(segment_duration(f64::NAN), MIN_SEGMENT_SECONDS); + assert_eq!(segment_duration(f64::INFINITY), MIN_SEGMENT_SECONDS); + } + + #[test] + fn static_filter_fits_and_pads_without_cropping() { + let f = segment_filter(&SegmentOpts::default(), 4.0); + assert!(f.contains("force_original_aspect_ratio=decrease")); + assert!(f.contains("pad=1920:1080")); + assert!(f.contains("format=yuv420p")); + // No zoompan when ken_burns is off. + assert!(!f.contains("zoompan")); + } + + #[test] + fn ken_burns_filter_uses_duration_scaled_frame_count() { + let opts = SegmentOpts { + ken_burns: true, + ..SegmentOpts::default() + }; + // 4s * 30fps = 120 frames in the zoompan d= term. + let f = segment_filter(&opts, 4.0); + assert!(f.contains("zoompan")); + assert!(f.contains("d=120:")); + assert!(f.contains("s=1920x1080")); + } + + #[test] + fn segment_args_loop_still_and_bound_with_t() { + let args = build_segment_args( + "/img.jpg", + "/a.wav", + "/out.mp4", + 4.0, + &SegmentOpts::default(), + ); + let joined = args.join(" "); + assert!(joined.contains("-loop 1 -i /img.jpg")); + assert!(joined.contains("-i /a.wav")); + assert!(joined.contains("apad")); + assert!(joined.contains("-t 4.000")); + assert!(joined.contains("libx264")); + assert!(joined.ends_with("/out.mp4")); + } + + #[test] + fn segment_args_use_nvenc_and_cuda_when_enabled() { + let opts = SegmentOpts { + nvenc: true, + ..SegmentOpts::default() + }; + let args = build_segment_args("/img.jpg", "/a.wav", "/out.mp4", 3.0, &opts); + let joined = args.join(" "); + assert!(joined.contains("-hwaccel cuda")); + assert!(joined.contains("h264_nvenc")); + assert!(!joined.contains("libx264")); + } + + #[test] + fn concat_args_stream_copy_with_faststart() { + let args = build_concat_args("/tmp/list.txt", "/out.mp4"); + let joined = args.join(" "); + assert!(joined.contains("-f concat -safe 0 -i /tmp/list.txt")); + assert!(joined.contains("-c copy")); + assert!(joined.contains("+faststart")); + } + + #[test] + fn concat_list_escapes_single_quotes() { + let body = build_concat_list(&[ + "/tmp/seg_000.mp4".into(), + "/tmp/own's dir/seg_001.mp4".into(), + ]); + assert!(body.contains("file '/tmp/seg_000.mp4'\n")); + // The apostrophe is closed-escaped-reopened per ffmpeg concat syntax. + assert!(body.contains(r"own'\''s")); + } +} diff --git a/src/reels/script.rs b/src/reels/script.rs new file mode 100644 index 0000000..1cf3189 --- /dev/null +++ b/src/reels/script.rs @@ -0,0 +1,289 @@ +//! Narration scripting for memory reels. +//! +//! One LLM call turns the planned segments (each carrying its date and, where +//! available, its cached insight) into a short first-person narration line per +//! photo plus a title for the reel. We reuse the cached insight summary as the +//! richest per-photo signal rather than re-running vision at reel time — that +//! keeps reel generation off the GPU's vision slot entirely. +//! +//! The prompt builder and response parser are pure so the contract is +//! unit-testable; `generate_script` wires them to the LLM client. + +use anyhow::{Context, Result}; +use std::sync::Arc; + +use super::{PlannedSegment, ReelMeta}; +use crate::ai::llamacpp::LlamaCppClient; +use crate::ai::llm_client::LlmClient; + +/// The narration for a whole reel: a title and one line per segment, in order. +#[derive(Debug, Clone, PartialEq)] +pub struct ReelScript { + pub title: String, + pub lines: Vec, +} + +const SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \ +slideshow of someone's own photos set to a spoken voiceover. Write warm, \ +specific, first-person narration as if the person is gently looking back on \ +their own memories. Be concrete and grounded in the details given; never \ +invent names, places, or events that aren't supported. Keep each line to one \ +or two short sentences that can be read aloud in a few seconds. Avoid generic \ +filler like \"what a wonderful day\" — if you have little to go on, simply \ +describe the moment plainly."; + +/// Build the (system, user) prompt pair for the scripter. The user message +/// describes each segment in order and asks for strict JSON back. +pub fn build_script_messages(meta: &ReelMeta, planned: &[PlannedSegment]) -> (String, String) { + let mut user = String::new(); + user.push_str(&format!( + "These are {} photos surfaced as memories {}.\n\n", + planned.len(), + meta.span_phrase() + )); + if !meta.years.is_empty() { + let years: Vec = meta.years.iter().map(|y| y.to_string()).collect(); + user.push_str(&format!("They span the years: {}.\n\n", years.join(", "))); + } + user.push_str("Photos, in the order they will appear:\n"); + for (i, seg) in planned.iter().enumerate() { + user.push_str(&format!("\n[{}]", i + 1)); + if let Some(date) = seg.date_label() { + user.push_str(&format!(" {date}")); + } + user.push('\n'); + match (&seg.insight_title, &seg.insight_summary) { + (Some(t), Some(s)) if !s.trim().is_empty() => { + user.push_str(&format!(" Known context: {t} — {s}\n")); + } + (Some(t), _) => user.push_str(&format!(" Known context: {t}\n")), + (_, Some(s)) if !s.trim().is_empty() => { + user.push_str(&format!(" Known context: {s}\n")); + } + _ => user.push_str(" (no extra context — narrate plainly from the date)\n"), + } + } + user.push_str(&format!( + "\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\ + {{\"title\": \"\", \"segments\": [\"\", \ + \"\", ... ]}}\n\ + The \"segments\" array MUST have exactly {} items, one per photo in order.", + planned.len() + )); + (SYSTEM_PROMPT.to_string(), user) +} + +/// Parse the model's response into a script with exactly `n` lines. Tolerant of +/// code fences and surrounding prose, and of both `segments: [".."]` and +/// `segments: [{"narration": ".."}]` shapes. Missing/extra lines are padded or +/// truncated so the caller always gets `n` aligned to the segments. +pub fn parse_script_response(raw: &str, n: usize) -> ReelScript { + let fallback_line = "A moment worth remembering."; + let value = extract_json_object(raw); + + let title = value + .as_ref() + .and_then(|v| v.get("title")) + .and_then(|t| t.as_str()) + .map(clean_text) + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| "Memories".to_string()); + + let mut lines: Vec = value + .as_ref() + .and_then(|v| v.get("segments")) + .and_then(|s| s.as_array()) + .map(|arr| { + arr.iter() + .map(|item| { + let text = item + .as_str() + .map(|s| s.to_string()) + .or_else(|| { + item.get("narration") + .and_then(|n| n.as_str()) + .map(|s| s.to_string()) + }) + .unwrap_or_default(); + clean_text(&text) + }) + .collect() + }) + .unwrap_or_default(); + + // Align to exactly n: drop extras, pad shortfalls with a neutral line so + // every photo still gets spoken audio. + lines.truncate(n); + while lines.len() < n { + lines.push(fallback_line.to_string()); + } + for line in lines.iter_mut() { + if line.is_empty() { + *line = fallback_line.to_string(); + } + } + + ReelScript { title, lines } +} + +/// Pull the first balanced top-level JSON object out of a possibly-noisy model +/// response (code fences, leading prose). Returns None if nothing parses. +fn extract_json_object(raw: &str) -> Option { + // Fast path: the whole thing is valid JSON. + if let Ok(v) = serde_json::from_str::(raw.trim()) { + return Some(v); + } + // Otherwise scan for the first '{' ... matching '}' span, ignoring braces + // inside strings. + let bytes = raw.as_bytes(); + let start = raw.find('{')?; + let mut depth = 0i32; + let mut in_str = false; + let mut escaped = false; + for i in start..bytes.len() { + let c = bytes[i] as char; + if in_str { + if escaped { + escaped = false; + } else if c == '\\' { + escaped = true; + } else if c == '"' { + in_str = false; + } + continue; + } + match c { + '"' => in_str = true, + '{' => depth += 1, + '}' => { + depth -= 1; + if depth == 0 { + return serde_json::from_str(&raw[start..=i]).ok(); + } + } + _ => {} + } + } + None +} + +/// Collapse whitespace and strip stray markdown/quote decorations a model +/// sometimes leaves around a line. +fn clean_text(s: &str) -> String { + let trimmed = s.trim().trim_matches('"').trim(); + trimmed.split_whitespace().collect::>().join(" ") +} + +/// Generate the reel script via the LLM. Text-only (no images) — the per-photo +/// context comes from cached insights. The call takes the GPU read lease +/// internally (see `LlamaCppClient::generate`). +pub async fn generate_script( + client: &Arc, + meta: &ReelMeta, + planned: &[PlannedSegment], +) -> Result { + let (system, user) = build_script_messages(meta, planned); + let raw = client + .generate(&user, Some(&system), None) + .await + .context("LLM script generation failed")?; + Ok(parse_script_response(&raw, planned.len())) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::memories::MemoriesSpan; + + fn meta() -> ReelMeta { + ReelMeta { + span: MemoriesSpan::Day, + years: vec![2019, 2021], + } + } + + fn planned(n: usize) -> Vec { + (0..n) + .map(|i| PlannedSegment { + media: super::super::SegmentMedia::Photo { + rel_path: format!("p{i}.jpg"), + library_id: 1, + }, + date: Some(1_560_000_000 + i as i64 * 86_400), + insight_title: None, + insight_summary: None, + }) + .collect() + } + + #[test] + fn prompt_states_exact_segment_count_and_span() { + let (sys, user) = build_script_messages(&meta(), &planned(3)); + assert!(sys.contains("memory reel")); + assert!(user.contains("3 photos")); + assert!(user.contains("on this day")); + assert!(user.contains("exactly 3 items")); + // Each photo gets an indexed entry. + assert!(user.contains("[1]") && user.contains("[2]") && user.contains("[3]")); + } + + #[test] + fn prompt_includes_insight_context_when_present() { + let mut p = planned(1); + p[0].insight_title = Some("Lake house weekend".into()); + p[0].insight_summary = Some("Swimming with the dogs.".into()); + let (_sys, user) = build_script_messages(&meta(), &p); + assert!(user.contains("Lake house weekend — Swimming with the dogs.")); + } + + #[test] + fn parse_plain_json_object() { + let raw = r#"{"title":"Summer Days","segments":["First line.","Second line."]}"#; + let script = parse_script_response(raw, 2); + assert_eq!(script.title, "Summer Days"); + assert_eq!(script.lines, vec!["First line.", "Second line."]); + } + + #[test] + fn parse_tolerates_code_fences_and_prose() { + let raw = "Sure! Here's your reel:\n```json\n{\"title\": \"Trip\", \"segments\": [\"A.\", \"B.\"]}\n```\nEnjoy!"; + let script = parse_script_response(raw, 2); + assert_eq!(script.title, "Trip"); + assert_eq!(script.lines, vec!["A.", "B."]); + } + + #[test] + fn parse_accepts_object_segment_shape() { + let raw = r#"{"title":"T","segments":[{"narration":"One."},{"narration":"Two."}]}"#; + let script = parse_script_response(raw, 2); + assert_eq!(script.lines, vec!["One.", "Two."]); + } + + #[test] + fn parse_pads_short_and_truncates_long_to_n() { + // Model returned 1 line but we have 3 segments → pad with neutral lines. + let short = parse_script_response(r#"{"title":"T","segments":["Only one."]}"#, 3); + assert_eq!(short.lines.len(), 3); + assert_eq!(short.lines[0], "Only one."); + assert!(!short.lines[1].is_empty()); + + // Model returned 3 but we have 2 → truncate. + let long = parse_script_response(r#"{"title":"T","segments":["a","b","c"]}"#, 2); + assert_eq!(long.lines, vec!["a", "b"]); + } + + #[test] + fn parse_falls_back_on_garbage() { + let script = parse_script_response("the model said no", 2); + assert_eq!(script.title, "Memories"); + assert_eq!(script.lines.len(), 2); + assert!(script.lines.iter().all(|l| !l.is_empty())); + } + + #[test] + fn parse_blank_line_replaced_with_fallback() { + let script = parse_script_response(r#"{"title":"T","segments":[" ","Real."]}"#, 2); + assert!(!script.lines[0].is_empty()); + assert_eq!(script.lines[1], "Real."); + } +} diff --git a/src/reels/selector.rs b/src/reels/selector.rs new file mode 100644 index 0000000..0a53ee5 --- /dev/null +++ b/src/reels/selector.rs @@ -0,0 +1,252 @@ +//! Reel selectors: resolve "what goes in the reel" into an ordered media set +//! plus the metadata the scripter needs. The renderer and scripter are +//! selector-agnostic, so adding tag- or date-range-based reels later means +//! adding a variant here, not touching the pipeline. +//! +//! Resolution is split in two so the handler can compute a cache key (and +//! short-circuit on a cache hit) without the per-photo insight lookups: +//! [`resolve`] is the cheap media-set pass; [`enrich`] adds cached insights and +//! runs in the background job. + +use std::path::Path; +use std::sync::Mutex; + +use chrono::{DateTime, Datelike, FixedOffset}; + +use super::{PlannedSegment, ReelMeta, SegmentMedia}; +use crate::database::{ExifDao, InsightDao}; +use crate::file_types::is_image_file; +use crate::memories::{self, MemoriesSpan}; +use crate::state::AppState; + +/// Default and hard caps on how many photos a reel covers. The cap bounds the +/// LLM/TTS/ffmpeg work per reel; when a span has more, [`sample_evenly`] keeps +/// a representative spread across the years rather than just the oldest. +pub const DEFAULT_MAX_SEGMENTS: usize = 24; +pub const HARD_MAX_SEGMENTS: usize = 40; + +/// What a reel is built from. v1 ships the memories (on this day/week/month) +/// selector; tag and date-range variants slot in here later. +#[derive(Debug, Clone)] +pub enum ReelSelector { + Memories { + span: MemoriesSpan, + tz_offset_minutes: i32, + library: Option, + max_segments: usize, + }, +} + +impl ReelSelector { + /// Stable string identity for the cache key. Captures everything that + /// changes *which* media is selected (but not the non-deterministic + /// narration, which can't be part of a pre-render key). + pub fn descriptor(&self) -> String { + match self { + ReelSelector::Memories { + span, + tz_offset_minutes, + library, + max_segments, + } => format!( + "memories:span={:?}:tz={}:lib={}:max={}", + span, + tz_offset_minutes, + library.as_deref().unwrap_or("all"), + max_segments + ), + } + } +} + +/// Pick at most `max` items spread evenly across the input, always keeping the +/// first and last. Returns the input unchanged when it already fits. +pub fn sample_evenly(items: &[T], max: usize) -> Vec { + if max == 0 { + return Vec::new(); + } + if items.len() <= max { + return items.to_vec(); + } + if max == 1 { + return vec![items[0].clone()]; + } + let last = items.len() - 1; + (0..max) + .map(|i| { + // Spread indices 0..=last across max picks, endpoints included. + let idx = (i * last + (max - 1) / 2) / (max - 1); + items[idx.min(last)].clone() + }) + .collect() +} + +/// Cheap pass: resolve the selector into an ordered list of media (no insight +/// lookups yet) plus reel metadata. `Err` only on an invalid library param. +pub fn resolve( + app_state: &AppState, + exif_dao: &Mutex>, + span_context: &opentelemetry::Context, + selector: &ReelSelector, +) -> Result<(Vec, ReelMeta), String> { + match selector { + ReelSelector::Memories { + span, + tz_offset_minutes, + library, + max_segments, + } => { + let client_tz = FixedOffset::east_opt(tz_offset_minutes * 60); + let items = memories::gather_memory_items( + app_state, + exif_dao, + span_context, + *span, + *tz_offset_minutes, + client_tz, + library.as_deref(), + )?; + + // Phase 1 is photos-only: drop videos (a clip segment type lands + // in phase 2). Filter before sampling so the spread is over the + // photos that will actually appear. + let items: Vec = items + .into_iter() + .filter(|it| is_image_file(Path::new(&it.path))) + .collect(); + + let cap = (*max_segments).clamp(1, HARD_MAX_SEGMENTS); + let items = sample_evenly(&items, cap); + + let years = distinct_years(&items, client_tz); + let meta = ReelMeta { span: *span, years }; + + let planned = items + .into_iter() + .map(|it| PlannedSegment { + media: SegmentMedia::Photo { + rel_path: it.path, + library_id: it.library_id, + }, + date: it.created, + insight_title: None, + insight_summary: None, + }) + .collect(); + Ok((planned, meta)) + } + } +} + +/// Distinct calendar years represented by the selected media, in the client's +/// timezone, ascending. Used to tell the scripter how far back the reel reaches. +fn distinct_years(items: &[memories::MemoryItem], tz: Option) -> Vec { + let mut years: Vec = items + .iter() + .filter_map(|it| it.created) + .filter_map(|ts| DateTime::from_timestamp(ts, 0)) + .map(|dt| match tz { + Some(off) => dt.with_timezone(&off).year(), + None => dt.year(), + }) + .collect(); + years.sort_unstable(); + years.dedup(); + years +} + +/// Background pass: fill each segment's cached insight (title + summary) where +/// one exists. Best-effort — a missing or errored lookup leaves the fields +/// `None` and the scripter narrates from the date alone. +pub fn enrich( + insight_dao: &Mutex>, + span_context: &opentelemetry::Context, + planned: &mut [PlannedSegment], +) { + let Ok(mut dao) = insight_dao.lock() else { + return; + }; + for seg in planned.iter_mut() { + let rel_path = match &seg.media { + SegmentMedia::Photo { rel_path, .. } => rel_path, + }; + if let Ok(Some(insight)) = dao.get_insight(span_context, rel_path) { + seg.insight_title = Some(insight.title); + seg.insight_summary = Some(insight.summary); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sample_evenly_returns_all_when_under_cap() { + let v = vec![1, 2, 3]; + assert_eq!(sample_evenly(&v, 5), vec![1, 2, 3]); + assert_eq!(sample_evenly(&v, 3), vec![1, 2, 3]); + } + + #[test] + fn sample_evenly_keeps_endpoints_and_spreads() { + let v: Vec = (0..100).collect(); + let picked = sample_evenly(&v, 5); + assert_eq!(picked.len(), 5); + assert_eq!(picked[0], 0); // first kept + assert_eq!(*picked.last().unwrap(), 99); // last kept + // Strictly increasing, no dupes. + assert!(picked.windows(2).all(|w| w[0] < w[1])); + } + + #[test] + fn sample_evenly_handles_one_and_zero() { + let v: Vec = (0..10).collect(); + assert_eq!(sample_evenly(&v, 1), vec![0]); + assert!(sample_evenly(&v, 0).is_empty()); + } + + #[test] + fn descriptor_is_stable_and_distinguishes_inputs() { + let a = ReelSelector::Memories { + span: MemoriesSpan::Day, + tz_offset_minutes: -480, + library: None, + max_segments: 24, + }; + let b = ReelSelector::Memories { + span: MemoriesSpan::Week, + tz_offset_minutes: -480, + library: None, + max_segments: 24, + }; + assert_eq!(a.descriptor(), a.clone().descriptor()); + assert_ne!(a.descriptor(), b.descriptor()); + assert!(a.descriptor().contains("lib=all")); + } + + #[test] + fn distinct_years_dedupes_and_sorts() { + let items = vec![ + memories::MemoryItem { + path: "a".into(), + created: Some(1_560_000_000), // 2019 + modified: None, + library_id: 1, + }, + memories::MemoryItem { + path: "b".into(), + created: Some(1_560_086_400), // 2019 + modified: None, + library_id: 1, + }, + memories::MemoryItem { + path: "c".into(), + created: Some(1_623_000_000), // 2021 + modified: None, + library_id: 1, + }, + ]; + assert_eq!(distinct_years(&items, None), vec![2019, 2021]); + } +} diff --git a/src/state.rs b/src/state.rs index e678ad1..bf894f3 100644 --- a/src/state.rs +++ b/src/state.rs @@ -53,6 +53,10 @@ pub struct AppState { pub video_path: String, pub gif_path: String, pub preview_clips_path: String, + /// Directory for cached memory-reel MP4s (+ title sidecars). Derived from + /// `REELS_DIRECTORY`, defaulting to a `reels` dir beside the preview clips. + /// Created lazily by the reel pipeline on first render. + pub reels_path: String, pub excluded_dirs: Vec, pub ollama: OllamaClient, /// `None` when `OPENROUTER_API_KEY` is not configured. Consulted only @@ -141,6 +145,19 @@ impl AppState { preview_dao, ); + // Reels cache dir: explicit env, else a `reels` sibling of the preview + // clips dir (a known-writable, test-safe location). Not created here — + // the reel pipeline does `create_dir_all` before its first write, so + // construction (incl. tests) never touches the filesystem. + let reels_path = std::env::var("REELS_DIRECTORY").unwrap_or_else(|_| { + std::path::Path::new(&preview_clips_path) + .parent() + .map(|p| p.join("reels")) + .unwrap_or_else(|| std::path::PathBuf::from("reels")) + .to_string_lossy() + .to_string() + }); + let library_health = libraries::new_health_map(&libraries_vec); let live_libraries = Arc::new(RwLock::new(libraries_vec.clone())); Self { @@ -155,6 +172,7 @@ impl AppState { video_path, gif_path, preview_clips_path, + reels_path, excluded_dirs, ollama, openrouter, diff --git a/src/video/ffmpeg.rs b/src/video/ffmpeg.rs index d385cac..019bd86 100644 --- a/src/video/ffmpeg.rs +++ b/src/video/ffmpeg.rs @@ -231,7 +231,7 @@ impl Ffmpeg { /// a hard failure — previously the `parse::` on empty stdout produced /// "cannot parse float from empty string" and poisoned the preview-clip row /// with status=failed, which the watcher would re-queue every full scan. -async fn get_duration_seconds(input_file: &str) -> Result> { +pub async fn get_duration_seconds(input_file: &str) -> Result> { if let Some(d) = probe_duration(input_file, "format=duration").await? { return Ok(Some(d)); } -- 2.52.0 From 42453d5786a42613e6b57fc75983abbe42fd241a Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Fri, 12 Jun 2026 22:56:48 -0400 Subject: [PATCH 02/17] Fix reel concat: force -f mp4 for the .tmp output path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The concat stage wrote to .mp4.tmp (for an atomic publish-rename), but ffmpeg infers the muxer from the output extension and can't map .tmp to a format — "Unable to choose an output format". Force the mp4 muxer explicitly so the temp extension is irrelevant. Segment render, NVENC, TTS, and scripting were already working end-to-end; this was the only failure, at the final join. Co-Authored-By: Claude Fable 5 --- src/reels/render.rs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/reels/render.rs b/src/reels/render.rs index ca39515..9643309 100644 --- a/src/reels/render.rs +++ b/src/reels/render.rs @@ -159,7 +159,9 @@ pub fn build_segment_args( /// Build the concat-demuxer args that join rendered segments losslessly. /// `+faststart` moves the moov atom up front so the reel streams immediately -/// on the mobile client. +/// on the mobile client. The output muxer is forced with `-f mp4` because we +/// write to a `.tmp` path (atomic publish) whose extension ffmpeg can't map to +/// a format on its own. pub fn build_concat_args(list_path: &str, out_path: &str) -> Vec { [ "-y", @@ -173,6 +175,8 @@ pub fn build_concat_args(list_path: &str, out_path: &str) -> Vec { "copy", "-movflags", "+faststart", + "-f", + "mp4", out_path, ] .iter() @@ -317,12 +321,19 @@ mod tests { } #[test] - fn concat_args_stream_copy_with_faststart() { - let args = build_concat_args("/tmp/list.txt", "/out.mp4"); + fn concat_args_stream_copy_with_faststart_and_forced_muxer() { + // Output goes to a .tmp path, so the muxer must be forced — ffmpeg + // can't infer mp4 from the extension (the bug this guards against). + let args = build_concat_args("/tmp/list.txt", "/out.mp4.tmp"); let joined = args.join(" "); assert!(joined.contains("-f concat -safe 0 -i /tmp/list.txt")); assert!(joined.contains("-c copy")); assert!(joined.contains("+faststart")); + assert!(joined.contains("-f mp4")); + // The forced muxer must come before the output path. + let f_mp4 = args.windows(2).position(|w| w == ["-f", "mp4"]).unwrap(); + let out = args.iter().position(|a| a == "/out.mp4.tmp").unwrap(); + assert!(f_mp4 < out); } #[test] -- 2.52.0 From 7715a7a905015faa977eb0a45c0670f022120f20 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Fri, 12 Jun 2026 23:10:26 -0400 Subject: [PATCH 03/17] Reels: portrait canvas with blurred fill, fade transitions, warmer TTS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the "image is tiny" problem: a 1920x1080 landscape reel letterboxes to a ~25%-height band on a portrait phone. Switch to a portrait 1080x1920 canvas and fill it per photo with a blurred, zoomed copy of the image behind the sharp fitted photo — so the frame is always full regardless of the photo's orientation, with no black bars and no cropping of the subject. Add a quick 0.35s fade in/out baked into each segment so concatenated photos dip smoothly instead of hard-cutting (fade-out lands in the narration's silent tail, so speech isn't clipped). Drop the unused Ken Burns branch — motion can return deliberately later. Warm up the narration a touch: thread Chatterbox's `exaggeration` through synthesize_serialized and default reels to 0.7 (tunable via REEL_TTS_EXAGGERATION). Bump RENDER_VERSION so existing landscape reels re-render. Co-Authored-By: Claude Fable 5 --- src/ai/tts.rs | 6 ++- src/reels/mod.rs | 38 ++++++++++---- src/reels/render.rs | 123 +++++++++++++++++++++++++------------------- 3 files changed, 101 insertions(+), 66 deletions(-) diff --git a/src/ai/tts.rs b/src/ai/tts.rs index 4e7544c..a9a610a 100644 --- a/src/ai/tts.rs +++ b/src/ai/tts.rs @@ -486,11 +486,15 @@ pub async fn synthesize_serialized( text: &str, voice: Option<&str>, format: &str, + exaggeration: Option, ) -> anyhow::Result> { let prepared = prepare_for_tts(text); if prepared.is_empty() { anyhow::bail!("nothing to synthesize after cleanup"); } + // Clamp to Chatterbox's documented range, matching the HTTP handlers + // (which clamp before forwarding; this path bypasses them). + let exaggeration = exaggeration.map(|x| x.clamp(0.25, 2.0)); // Queue rather than fast-fail (mirrors create_speech_job_handler). let _permit = TTS_PERMIT .acquire() @@ -500,7 +504,7 @@ pub async fn synthesize_serialized( // starts (see ai::gpu). let _gpu = crate::ai::gpu::tts_lease().await; client - .text_to_speech(&prepared, voice, format, None, None, None) + .text_to_speech(&prepared, voice, format, exaggeration, None, None) .await } diff --git a/src/reels/mod.rs b/src/reels/mod.rs index fe270f8..9956984 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -180,7 +180,18 @@ fn finish_job( /// Render version: bump to invalidate every cached reel after a rendering / /// scripting change that should produce a fresh result. -const RENDER_VERSION: u32 = 1; +const RENDER_VERSION: u32 = 2; + +/// Narration expressiveness — Chatterbox's `exaggeration` knob. A modest bump +/// over the ~0.5 default warms up otherwise-flat narration; tune via +/// `REEL_TTS_EXAGGERATION` (0.25–2.0). +fn reel_tts_exaggeration() -> f32 { + std::env::var("REEL_TTS_EXAGGERATION") + .ok() + .and_then(|s| s.trim().parse::().ok()) + .filter(|x| x.is_finite()) + .unwrap_or(0.7) +} /// Cache key over everything that determines *which* media and *how* it's /// voiced — but not the (non-deterministic) narration text. Same inputs → same @@ -470,16 +481,21 @@ async fn run_reel_job( } }; - let audio_bytes = - match crate::ai::tts::synthesize_serialized(&client, line, voice.as_deref(), "wav") - .await - { - Ok(b) => b, - Err(e) => { - log::warn!("reel {job_id}: skipping segment {i}, TTS failed: {e}"); - continue; - } - }; + let audio_bytes = match crate::ai::tts::synthesize_serialized( + &client, + line, + voice.as_deref(), + "wav", + Some(reel_tts_exaggeration()), + ) + .await + { + Ok(b) => b, + Err(e) => { + log::warn!("reel {job_id}: skipping segment {i}, TTS failed: {e}"); + continue; + } + }; let audio_path = work.path().join(format!("narration_{i:03}.wav")); if let Err(e) = tokio::fs::write(&audio_path, &audio_bytes).await { log::warn!("reel {job_id}: skipping segment {i}, writing audio failed: {e}"); diff --git a/src/reels/render.rs b/src/reels/render.rs index 9643309..e40fc3d 100644 --- a/src/reels/render.rs +++ b/src/reels/render.rs @@ -19,11 +19,13 @@ use tokio::process::Command; /// rather than depending on `video::ffmpeg` directly. pub use crate::video::ffmpeg::is_nvenc_available; -/// Reel canvas. Landscape matches the majority of camera photos; portrait -/// shots are letterboxed by the `pad` in [`segment_filter`] rather than -/// cropped, so faces never get cut off. -pub const REEL_WIDTH: u32 = 1920; -pub const REEL_HEIGHT: u32 = 1080; +/// Reel canvas. Portrait, because reels are watched on a phone held upright — +/// a landscape canvas letterboxes to a thin ~25%-height band there. Each photo +/// is fitted sharp and centered over a blurred, zoomed copy of itself (see +/// [`segment_filtergraph`]) so the frame is always filled regardless of the +/// photo's orientation, without cropping the subject. +pub const REEL_WIDTH: u32 = 1080; +pub const REEL_HEIGHT: u32 = 1920; pub const REEL_FPS: u32 = 30; /// A still's screen time is its narration length plus a short breath, with a @@ -33,6 +35,11 @@ pub const REEL_FPS: u32 = 30; pub const MIN_SEGMENT_SECONDS: f64 = 2.5; const NARRATION_TAIL_SECONDS: f64 = 0.6; +/// Quick fade in/out baked into each segment so concatenated photos dip +/// smoothly instead of hard-cutting. The fade-out lands inside the narration's +/// silent tail, so speech is never clipped. +const FADE_SECONDS: f64 = 0.35; + /// Screen time for a photo segment given its narration audio length. pub fn segment_duration(narration_secs: f64) -> f64 { let d = narration_secs + NARRATION_TAIL_SECONDS; @@ -43,16 +50,13 @@ pub fn segment_duration(narration_secs: f64) -> f64 { } } -/// Options controlling per-segment rendering. `ken_burns` adds a slow zoom for -/// motion; it's defaulted off until the effect is eyeballed on the GPU box, -/// since a wrong zoompan expression reads as jitter and can't be verified here. +/// Options controlling per-segment rendering. #[derive(Debug, Clone, Copy)] pub struct SegmentOpts { pub width: u32, pub height: u32, pub fps: u32, pub nvenc: bool, - pub ken_burns: bool, } impl Default for SegmentOpts { @@ -62,35 +66,38 @@ impl Default for SegmentOpts { height: REEL_HEIGHT, fps: REEL_FPS, nvenc: false, - ken_burns: false, } } } -/// Video filter for a photo segment: fit the image inside the canvas -/// (preserving aspect, padding the rest), normalize SAR/fps/pixel format, and -/// optionally apply a gentle Ken Burns zoom. -pub fn segment_filter(opts: &SegmentOpts, duration: f64) -> String { +/// Full `filter_complex` for one photo segment, producing labelled `[v]` (video) +/// and `[a]` (audio) outputs. Input 0 is the looped still, input 1 the +/// narration. +/// +/// Video: split the still into a background and foreground. The background is +/// scaled to *cover* the canvas and heavily blurred; the foreground is scaled to +/// *fit* inside it and overlaid centered. This fills the portrait frame for any +/// photo orientation — no black bars, no cropping of the subject — then a quick +/// fade in/out softens the cut to the next segment. +/// +/// Audio: pad the narration with trailing silence so a short line doesn't end +/// the segment early; `-t` bounds it to the segment duration. +pub fn segment_filtergraph(opts: &SegmentOpts, duration: f64) -> String { let (w, h, fps) = (opts.width, opts.height, opts.fps); - if opts.ken_burns { - // Upscale first so zoompan samples from a larger frame (avoids - // shimmer), drift the zoom from 1.0→~1.12 across the segment, hold the - // crop centered, then settle to the canvas. - let frames = (duration * fps as f64).round().max(1.0) as u64; - format!( - "scale={w}*2:{h}*2:force_original_aspect_ratio=increase,\ - crop={w}*2:{h}*2,\ - zoompan=z='min(zoom+0.0009,1.12)':d={frames}:\ - x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s={w}x{h}:fps={fps},\ - setsar=1,format=yuv420p" - ) - } else { - format!( - "scale={w}:{h}:force_original_aspect_ratio=decrease,\ - pad={w}:{h}:(ow-iw)/2:(oh-ih)/2,\ - setsar=1,fps={fps},format=yuv420p" - ) - } + // Fade-out begins one fade-length before the end; clamp so a floor-length + // segment still gets a valid (non-negative) start time. + let fade_out_start = (duration - FADE_SECONDS).max(0.0); + format!( + "[0:v]split=2[bg][fg];\ + [bg]scale={w}:{h}:force_original_aspect_ratio=increase,\ + crop={w}:{h},boxblur=20:2[bgb];\ + [fg]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs];\ + [bgb][fgs]overlay=(W-w)/2:(H-h)/2,\ + fade=t=in:st=0:d={FADE_SECONDS},\ + fade=t=out:st={fade_out_start:.3}:d={FADE_SECONDS},\ + setsar=1,fps={fps},format=yuv420p[v];\ + [1:a]apad[a]" + ) } fn video_encoder_args(nvenc: bool) -> Vec { @@ -117,9 +124,9 @@ fn video_encoder_args(nvenc: bool) -> Vec { } /// Build the ffmpeg args that render one photo segment: a still looped for -/// `duration` seconds with its narration muxed in. The narration is padded -/// with trailing silence (`apad`) so short lines don't end the segment early; -/// `-t` bounds both streams to the segment length. +/// `duration` seconds, filled to the portrait canvas with a blurred backdrop +/// (see [`segment_filtergraph`]) and the narration muxed in. `-t` bounds both +/// streams to the segment length. pub fn build_segment_args( image_path: &str, audio_path: &str, @@ -139,7 +146,7 @@ pub fn build_segment_args( "-i".into(), audio_path.into(), "-filter_complex".into(), - format!("[0:v]{}[v];[1:a]apad[a]", segment_filter(opts, duration)), + segment_filtergraph(opts, duration), "-map".into(), "[v]".into(), "-map".into(), @@ -267,26 +274,34 @@ mod tests { } #[test] - fn static_filter_fits_and_pads_without_cropping() { - let f = segment_filter(&SegmentOpts::default(), 4.0); - assert!(f.contains("force_original_aspect_ratio=decrease")); - assert!(f.contains("pad=1920:1080")); - assert!(f.contains("format=yuv420p")); - // No zoompan when ken_burns is off. - assert!(!f.contains("zoompan")); + fn filtergraph_fills_portrait_with_blurred_bg_and_fitted_fg() { + let g = segment_filtergraph(&SegmentOpts::default(), 4.0); + // Background covers + blurs; foreground fits and is centered over it. + assert!(g.contains("split=2[bg][fg]")); + assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=increase")); + assert!(g.contains("crop=1080:1920")); + assert!(g.contains("boxblur")); + assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=decrease")); + assert!(g.contains("overlay=(W-w)/2:(H-h)/2")); + // Produces the labelled outputs build_segment_args maps. + assert!(g.contains("[v]")); + assert!(g.contains("[1:a]apad[a]")); + assert!(g.contains("format=yuv420p")); } #[test] - fn ken_burns_filter_uses_duration_scaled_frame_count() { - let opts = SegmentOpts { - ken_burns: true, - ..SegmentOpts::default() - }; - // 4s * 30fps = 120 frames in the zoompan d= term. - let f = segment_filter(&opts, 4.0); - assert!(f.contains("zoompan")); - assert!(f.contains("d=120:")); - assert!(f.contains("s=1920x1080")); + fn filtergraph_fades_in_and_out_within_duration() { + // 4s segment, 0.35s fade → fade-out starts at 3.65s. + let g = segment_filtergraph(&SegmentOpts::default(), 4.0); + assert!(g.contains("fade=t=in:st=0:d=0.35")); + assert!(g.contains("fade=t=out:st=3.650:d=0.35")); + } + + #[test] + fn filtergraph_fade_out_start_never_negative_at_floor() { + // A floor-length segment shorter than a fade still yields st >= 0. + let g = segment_filtergraph(&SegmentOpts::default(), 0.2); + assert!(g.contains("fade=t=out:st=0.000:d=0.35")); } #[test] -- 2.52.0 From 740fc4d84151ff55b571f99338367521fad133a5 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Fri, 12 Jun 2026 23:20:52 -0400 Subject: [PATCH 04/17] Reels: fix steppy fade (fps before fade) and ease the expression bump The fade looked steppy/low-frame-rate because the filtergraph normalized fps AFTER the fade filters: the brightness ramp was sampled at the looped still's coarse input cadence, then duplicated up to 30fps. Move fps ahead of the fades, pin the still's input framerate (-framerate), and force CFR output (-r) so the dip ramps across a full 30 frames and plays steadily. Ease narration expressiveness from 0.7 to 0.6 (still tunable via REEL_TTS_EXAGGERATION). Bump RENDER_VERSION so existing reels re-render. Co-Authored-By: Claude Fable 5 --- src/reels/mod.rs | 10 +++++----- src/reels/render.rs | 30 ++++++++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/src/reels/mod.rs b/src/reels/mod.rs index 9956984..4cfe24b 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -180,17 +180,17 @@ fn finish_job( /// Render version: bump to invalidate every cached reel after a rendering / /// scripting change that should produce a fresh result. -const RENDER_VERSION: u32 = 2; +const RENDER_VERSION: u32 = 3; -/// Narration expressiveness — Chatterbox's `exaggeration` knob. A modest bump -/// over the ~0.5 default warms up otherwise-flat narration; tune via -/// `REEL_TTS_EXAGGERATION` (0.25–2.0). +/// Narration expressiveness — Chatterbox's `exaggeration` knob. A slight bump +/// over the ~0.5 default warms up otherwise-flat narration without over-acting; +/// tune via `REEL_TTS_EXAGGERATION` (0.25–2.0). fn reel_tts_exaggeration() -> f32 { std::env::var("REEL_TTS_EXAGGERATION") .ok() .and_then(|s| s.trim().parse::().ok()) .filter(|x| x.is_finite()) - .unwrap_or(0.7) + .unwrap_or(0.6) } /// Cache key over everything that determines *which* media and *how* it's diff --git a/src/reels/render.rs b/src/reels/render.rs index e40fc3d..3cca6ac 100644 --- a/src/reels/render.rs +++ b/src/reels/render.rs @@ -87,15 +87,20 @@ pub fn segment_filtergraph(opts: &SegmentOpts, duration: f64) -> String { // Fade-out begins one fade-length before the end; clamp so a floor-length // segment still gets a valid (non-negative) start time. let fade_out_start = (duration - FADE_SECONDS).max(0.0); + // `fps` is normalized BEFORE the fades so the brightness ramp is computed + // on a true {fps}-frame timeline. If fps came after, the fade would be + // sampled at the looped still's coarse input cadence and then duplicated up + // to {fps}, which reads as a steppy / low-frame-rate dip. format!( "[0:v]split=2[bg][fg];\ [bg]scale={w}:{h}:force_original_aspect_ratio=increase,\ crop={w}:{h},boxblur=20:2[bgb];\ [fg]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs];\ [bgb][fgs]overlay=(W-w)/2:(H-h)/2,\ + fps={fps},\ fade=t=in:st=0:d={FADE_SECONDS},\ fade=t=out:st={fade_out_start:.3}:d={FADE_SECONDS},\ - setsar=1,fps={fps},format=yuv420p[v];\ + setsar=1,format=yuv420p[v];\ [1:a]apad[a]" ) } @@ -134,11 +139,16 @@ pub fn build_segment_args( duration: f64, opts: &SegmentOpts, ) -> Vec { + let fps = opts.fps.to_string(); let mut args: Vec = vec!["-y".into()]; if opts.nvenc { args.extend(["-hwaccel".into(), "cuda".into()]); } args.extend([ + // Read the looped still at the target rate so frames exist for the + // fade to ramp across (paired with the in-graph `fps` and CFR output). + "-framerate".into(), + fps.clone(), "-loop".into(), "1".into(), "-i".into(), @@ -153,6 +163,10 @@ pub fn build_segment_args( "[a]".into(), "-t".into(), format!("{duration:.3}"), + // Force constant frame rate so the segment (and the concatenated reel) + // plays at a steady {fps} rather than a variable cadence. + "-r".into(), + fps, ]); args.extend(video_encoder_args(opts.nvenc)); args.extend( @@ -297,6 +311,16 @@ mod tests { assert!(g.contains("fade=t=out:st=3.650:d=0.35")); } + #[test] + fn filtergraph_normalizes_fps_before_fading() { + // The fps filter must precede the fades, else the brightness ramp is + // sampled at the still's coarse cadence and looks steppy. + let g = segment_filtergraph(&SegmentOpts::default(), 4.0); + let fps_at = g.find("fps=30").expect("fps in graph"); + let fade_at = g.find("fade=t=in").expect("fade in graph"); + assert!(fps_at < fade_at); + } + #[test] fn filtergraph_fade_out_start_never_negative_at_floor() { // A floor-length segment shorter than a fade still yields st >= 0. @@ -314,10 +338,12 @@ mod tests { &SegmentOpts::default(), ); let joined = args.join(" "); - assert!(joined.contains("-loop 1 -i /img.jpg")); + assert!(joined.contains("-framerate 30 -loop 1 -i /img.jpg")); assert!(joined.contains("-i /a.wav")); assert!(joined.contains("apad")); assert!(joined.contains("-t 4.000")); + // Constant frame rate forced on the output. + assert!(joined.contains("-r 30")); assert!(joined.contains("libx264")); assert!(joined.ends_with("/out.mp4")); } -- 2.52.0 From 6e90f24307209561f626d30a39af2e09afcb97e0 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Fri, 12 Jun 2026 23:43:18 -0400 Subject: [PATCH 05/17] Reels: burst beats + duration budget for week/month, plus step logging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restructures a reel around beats — one narration line over one or more photos — instead of one line per photo. A single-photo beat is a held shot; a multi-photo beat is a quick burst that flashes through several moments of an event while the line is read. So a week/month reel can show everything it spans without a narrated (and timed) segment per photo. Selection (selector.rs): - Duration budget: cap the number of narrated beats to ~REEL_TARGET_SECONDS (default 90, env-tunable) so week/month reels don't run minutes long. - Event clustering by time gap; when there are more events than the beat budget, adjacent events merge so the whole span stays covered. Each beat bursts up to MAX_BURST_PHOTOS (an even spread), so a 40-shot dinner contributes a handful of quick frames, not forty narrated seconds. Render (render.rs): a beat renders its photos as a concat of per-photo fills (blurred-bg portrait, fps-before-fade) under one muxed narration; burst photos get a snappier fade. beat_durations splits the narration across the photos, stretching only if a long burst would flash too fast. Adds high-level info logs across the steps (request → script → per-beat narrate/render → join → done with elapsed) for visibility. Bumps RENDER_VERSION to re-render cached reels. Co-Authored-By: Claude Fable 5 --- src/reels/mod.rs | 131 ++++++++++++++----- src/reels/render.rs | 296 ++++++++++++++++++++++++++++-------------- src/reels/script.rs | 95 +++++++++----- src/reels/selector.rs | 262 ++++++++++++++++++++++++++++++++----- 4 files changed, 580 insertions(+), 204 deletions(-) diff --git a/src/reels/mod.rs b/src/reels/mod.rs index 4cfe24b..be3f52d 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -36,24 +36,27 @@ use crate::otel::extract_context_from_request; use crate::state::AppState; use selector::ReelSelector; -/// The media behind one reel segment. Photos-only for now; a `Clip` variant -/// (a section of a source video) is the phase-2 extension point. +/// The media behind one shot. Photos-only for now; a `Clip` variant (a section +/// of a source video) is the phase-2 extension point. #[derive(Debug, Clone)] pub enum SegmentMedia { Photo { rel_path: String, library_id: i32 }, } -/// A segment before narration: which photo, when it was taken, and any cached -/// insight to feed the scripter. +/// A beat: one narration line over one or more photos. A single-photo beat is a +/// held shot; a multi-photo beat is a quick burst that flashes through several +/// moments of the same event while the line is read — so a week/month reel can +/// *show* everything it spans without a narration line (and the seconds that +/// come with it) per photo. #[derive(Debug, Clone)] -pub struct PlannedSegment { - pub media: SegmentMedia, +pub struct PlannedBeat { + pub photos: Vec, pub date: Option, pub insight_title: Option, pub insight_summary: Option, } -impl PlannedSegment { +impl PlannedBeat { /// Human date for the prompt, e.g. "June 12, 2019". `None` when undated. pub fn date_label(&self) -> Option { let ts = self.date?; @@ -180,7 +183,7 @@ fn finish_job( /// Render version: bump to invalidate every cached reel after a rendering / /// scripting change that should produce a fresh result. -const RENDER_VERSION: u32 = 3; +const RENDER_VERSION: u32 = 4; /// Narration expressiveness — Chatterbox's `exaggeration` knob. A slight bump /// over the ~0.5 default warms up otherwise-flat narration without over-acting; @@ -306,16 +309,25 @@ pub async fn create_reel_handler( })); } - let media: Vec = planned.iter().map(|p| p.media.clone()).collect(); + // Flatten every photo across beats (in order) into the cache key — the key + // tracks exactly which photos appear and in what sequence. + let media: Vec = planned.iter().flat_map(|b| b.photos.clone()).collect(); let voice = req.voice.clone().filter(|s| !s.is_empty()); let key = cache_key(&selector, &media, voice.as_deref()); let job_id = Uuid::new_v4(); + log::info!( + "reel {job_id}: request span={:?} → {} beats, {} photos", + span, + planned.len(), + media.len() + ); // Cache hit: register an already-Done job pointing at the existing MP4 so // the client's first poll returns the video URL immediately. let mp4 = reel_mp4_path(&app_state, &key); if mp4.exists() { + log::info!("reel {job_id}: cache hit, serving existing reel"); let title = std::fs::read(reel_sidecar_path(&app_state, &key)) .ok() .and_then(|b| serde_json::from_slice::(&b).ok()) @@ -358,6 +370,7 @@ pub async fn create_reel_handler( }, ); } + log::info!("reel {job_id}: queued for generation"); let state = app_state.clone(); let insight_dao = insight_dao.clone(); @@ -441,45 +454,73 @@ async fn run_reel_job( app_state: &AppState, insight_dao: &Mutex>, job_id: Uuid, - mut planned: Vec, + mut planned: Vec, meta: ReelMeta, voice: Option, key: &str, ) -> anyhow::Result<(String, PathBuf)> { use anyhow::{Context, anyhow}; + let started = Instant::now(); + let total_photos: usize = planned.iter().map(|b| b.photos.len()).sum(); + log::info!( + "reel {job_id}: starting — span {:?}, {} beats, {} photos, voice={}", + meta.span, + planned.len(), + total_photos, + voice.as_deref().unwrap_or("default") + ); + let client = app_state .llamacpp .as_ref() .ok_or_else(|| anyhow!("TTS/LLM backend not configured"))? .clone(); - // 1. Enrich with cached insights, then script (one LLM call). + // 1. Enrich each beat with its lead photo's cached insight, then script + // (one LLM call → one narration line per beat). set_stage(job_id, "scripting"); + log::info!("reel {job_id}: scripting narration via LLM…"); let span_context = opentelemetry::Context::new(); selector::enrich(insight_dao, &span_context, &mut planned); let script = script::generate_script(&client, &meta, &planned).await?; + log::info!( + "reel {job_id}: scripted \"{}\" ({} lines)", + script.title, + script.lines.len() + ); - // 2. Narrate each line to speech and 3. render each photo segment. A - // segment whose audio or render fails is skipped (logged) rather than - // sinking the whole reel — handles an odd HEIC/corrupt file gracefully. + // 2. Narrate each beat's line and 3. render the beat (its photos shown in + // sequence under that one narration). A beat whose audio or render fails + // is skipped (logged) rather than sinking the whole reel — handles an + // odd HEIC/corrupt file gracefully. set_stage(job_id, "narrating"); let work = tempfile::tempdir().context("creating reel work dir")?; let nvenc = render::is_nvenc_available().await; + log::info!( + "reel {job_id}: narrating + rendering {} beats (encoder: {})", + planned.len(), + if nvenc { "nvenc" } else { "cpu" } + ); let opts = render::SegmentOpts { nvenc, ..Default::default() }; - let mut segment_files: Vec = Vec::new(); - for (i, (seg, line)) in planned.iter().zip(script.lines.iter()).enumerate() { - let image_path = match resolve_image_path(app_state, &seg.media) { - Some(p) => p, - None => { - log::warn!("reel {job_id}: skipping segment {i}, image path unresolved"); - continue; - } - }; + let beat_total = planned.len(); + let mut beat_files: Vec = Vec::new(); + for (i, (beat, line)) in planned.iter().zip(script.lines.iter()).enumerate() { + // Resolve all of the beat's photos to absolute paths; drop any that + // don't resolve. An empty beat is skipped. + let image_paths: Vec = beat + .photos + .iter() + .filter_map(|m| resolve_image_path(app_state, m)) + .collect(); + if image_paths.is_empty() { + log::warn!("reel {job_id}: skipping beat {i}, no image paths resolved"); + continue; + } let audio_bytes = match crate::ai::tts::synthesize_serialized( &client, @@ -492,13 +533,13 @@ async fn run_reel_job( { Ok(b) => b, Err(e) => { - log::warn!("reel {job_id}: skipping segment {i}, TTS failed: {e}"); + log::warn!("reel {job_id}: skipping beat {i}, TTS failed: {e}"); continue; } }; let audio_path = work.path().join(format!("narration_{i:03}.wav")); if let Err(e) = tokio::fs::write(&audio_path, &audio_bytes).await { - log::warn!("reel {job_id}: skipping segment {i}, writing audio failed: {e}"); + log::warn!("reel {job_id}: skipping beat {i}, writing audio failed: {e}"); continue; } @@ -508,25 +549,37 @@ async fn run_reel_job( .ok() .flatten() .unwrap_or(render::MIN_SEGMENT_SECONDS); - let duration = render::segment_duration(narration_secs); set_stage(job_id, "rendering"); - let seg_out = work.path().join(format!("seg_{i:03}.mp4")); + log::info!( + "reel {job_id}: beat {}/{} — {} photo(s), narration {:.1}s", + i + 1, + beat_total, + image_paths.len(), + narration_secs + ); + let beat_out = work.path().join(format!("beat_{i:03}.mp4")); if let Err(e) = - render::render_segment(&image_path, &audio_path, &seg_out, duration, &opts).await + render::render_beat(&image_paths, &audio_path, &beat_out, narration_secs, &opts).await { - log::warn!("reel {job_id}: skipping segment {i}, render failed: {e}"); + log::warn!("reel {job_id}: skipping beat {i}, render failed: {e}"); continue; } - segment_files.push(seg_out.to_string_lossy().to_string()); + beat_files.push(beat_out.to_string_lossy().to_string()); } + let segment_files = beat_files; if segment_files.is_empty() { - return Err(anyhow!("no segments rendered successfully")); + return Err(anyhow!("no beats rendered successfully")); } // 4. Concat into the cache. Write to a temp name in the reels dir, then // rename atomically (same filesystem) so a reader never sees a partial. + set_stage(job_id, "rendering"); + log::info!( + "reel {job_id}: joining {} rendered beats into the final reel", + segment_files.len() + ); std::fs::create_dir_all(&app_state.reels_path).context("creating reels dir")?; let final_path = reel_mp4_path(app_state, key); let tmp_path = final_path.with_extension("mp4.tmp"); @@ -541,6 +594,12 @@ async fn run_reel_job( .context("serializing reel sidecar")?; let _ = std::fs::write(reel_sidecar_path(app_state, key), sidecar); + log::info!( + "reel {job_id}: done in {:.1}s — {} beats → {}", + started.elapsed().as_secs_f64(), + segment_files.len(), + final_path.display() + ); Ok((script.title, final_path)) } @@ -622,16 +681,16 @@ mod tests { #[test] fn date_label_formats_or_none() { - let seg = PlannedSegment { - media: photo("a.jpg", 1), + let beat = PlannedBeat { + photos: vec![photo("a.jpg", 1)], date: Some(1_560_384_000), // 2019-06-13 UTC insight_title: None, insight_summary: None, }; - assert!(seg.date_label().unwrap().contains("2019")); + assert!(beat.date_label().unwrap().contains("2019")); - let undated = PlannedSegment { - media: photo("a.jpg", 1), + let undated = PlannedBeat { + photos: vec![photo("a.jpg", 1)], date: None, insight_title: None, insight_summary: None, diff --git a/src/reels/render.rs b/src/reels/render.rs index 3cca6ac..a36f6f1 100644 --- a/src/reels/render.rs +++ b/src/reels/render.rs @@ -22,25 +22,31 @@ pub use crate::video::ffmpeg::is_nvenc_available; /// Reel canvas. Portrait, because reels are watched on a phone held upright — /// a landscape canvas letterboxes to a thin ~25%-height band there. Each photo /// is fitted sharp and centered over a blurred, zoomed copy of itself (see -/// [`segment_filtergraph`]) so the frame is always filled regardless of the +/// [`photo_filter_chain`]) so the frame is always filled regardless of the /// photo's orientation, without cropping the subject. pub const REEL_WIDTH: u32 = 1080; pub const REEL_HEIGHT: u32 = 1920; pub const REEL_FPS: u32 = 30; -/// A still's screen time is its narration length plus a short breath, with a -/// floor so a terse line still lingers. No ceiling: the segment always covers -/// the full narration so speech is never truncated — the scripter is asked to -/// keep lines short instead. +/// A beat's screen time is its narration length plus a short breath, with a +/// floor so a terse line still lingers. No ceiling: the beat always covers the +/// full narration so speech is never truncated — the scripter is asked to keep +/// lines short instead. pub const MIN_SEGMENT_SECONDS: f64 = 2.5; const NARRATION_TAIL_SECONDS: f64 = 0.6; -/// Quick fade in/out baked into each segment so concatenated photos dip -/// smoothly instead of hard-cutting. The fade-out lands inside the narration's -/// silent tail, so speech is never clipped. -const FADE_SECONDS: f64 = 0.35; +/// Fade durations baked into each photo. A held (single-photo) beat gets a +/// gentle dip; burst photos get a snappier fade so the montage feels quick. +const SINGLE_FADE_SECONDS: f64 = 0.35; +const BURST_FADE_SECONDS: f64 = 0.15; -/// Screen time for a photo segment given its narration audio length. +/// Floor on how long each burst photo stays up, so a long line over many photos +/// doesn't flash them subliminally. If the narration is too short to give every +/// photo this much, the beat is stretched to fit. +const MIN_BURST_PHOTO_SECONDS: f64 = 0.6; + +/// Base screen time for a beat given its narration length: narration + breath, +/// floored. Used as the lower bound on a beat's total duration. pub fn segment_duration(narration_secs: f64) -> f64 { let d = narration_secs + NARRATION_TAIL_SECONDS; if d.is_finite() && d > MIN_SEGMENT_SECONDS { @@ -50,6 +56,29 @@ pub fn segment_duration(narration_secs: f64) -> f64 { } } +/// Split a beat into per-photo durations. The beat lasts at least its narration +/// (so speech isn't cut) and at least `n × MIN_BURST_PHOTO_SECONDS` (so a fast +/// burst stays legible); the photos share that total evenly. Returns +/// `(total_seconds, per_photo_seconds)`. +pub fn beat_durations(narration_secs: f64, n_photos: usize) -> (f64, Vec) { + let n = n_photos.max(1); + let base = segment_duration(narration_secs); + let min_total = n as f64 * MIN_BURST_PHOTO_SECONDS; + let total = if base > min_total { base } else { min_total }; + let each = total / n as f64; + (total, vec![each; n]) +} + +/// Fade length to use for a beat of `n_photos` (gentle when held, snappy in a +/// burst). +fn fade_for(n_photos: usize) -> f64 { + if n_photos > 1 { + BURST_FADE_SECONDS + } else { + SINGLE_FADE_SECONDS + } +} + /// Options controlling per-segment rendering. #[derive(Debug, Clone, Copy)] pub struct SegmentOpts { @@ -70,38 +99,49 @@ impl Default for SegmentOpts { } } -/// Full `filter_complex` for one photo segment, producing labelled `[v]` (video) -/// and `[a]` (audio) outputs. Input 0 is the looped still, input 1 the -/// narration. +/// Filter chain for one photo (input `idx`) producing the labelled output +/// `[v{idx}]`. Splits the still into a background and foreground: the background +/// is scaled to *cover* the canvas and heavily blurred; the foreground is +/// scaled to *fit* and overlaid centered. This fills the portrait frame for any +/// photo orientation — no black bars, no cropping of the subject — then a fade +/// in/out softens the cut. Intermediate labels are suffixed with `idx` so +/// several chains coexist in one `filter_complex`. /// -/// Video: split the still into a background and foreground. The background is -/// scaled to *cover* the canvas and heavily blurred; the foreground is scaled to -/// *fit* inside it and overlaid centered. This fills the portrait frame for any -/// photo orientation — no black bars, no cropping of the subject — then a quick -/// fade in/out softens the cut to the next segment. -/// -/// Audio: pad the narration with trailing silence so a short line doesn't end -/// the segment early; `-t` bounds it to the segment duration. -pub fn segment_filtergraph(opts: &SegmentOpts, duration: f64) -> String { +/// `fps` is normalized BEFORE the fades so the brightness ramp is computed on a +/// true {fps}-frame timeline; otherwise the fade is sampled at the looped +/// still's coarse cadence and duplicated up, which reads as a steppy dip. +fn photo_filter_chain(idx: usize, opts: &SegmentOpts, duration: f64, fade: f64) -> String { let (w, h, fps) = (opts.width, opts.height, opts.fps); - // Fade-out begins one fade-length before the end; clamp so a floor-length - // segment still gets a valid (non-negative) start time. - let fade_out_start = (duration - FADE_SECONDS).max(0.0); - // `fps` is normalized BEFORE the fades so the brightness ramp is computed - // on a true {fps}-frame timeline. If fps came after, the fade would be - // sampled at the looped still's coarse input cadence and then duplicated up - // to {fps}, which reads as a steppy / low-frame-rate dip. + let fade_out_start = (duration - fade).max(0.0); format!( - "[0:v]split=2[bg][fg];\ - [bg]scale={w}:{h}:force_original_aspect_ratio=increase,\ - crop={w}:{h},boxblur=20:2[bgb];\ - [fg]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs];\ - [bgb][fgs]overlay=(W-w)/2:(H-h)/2,\ + "[{idx}:v]split=2[bg{idx}][fg{idx}];\ + [bg{idx}]scale={w}:{h}:force_original_aspect_ratio=increase,\ + crop={w}:{h},boxblur=20:2[bgb{idx}];\ + [fg{idx}]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs{idx}];\ + [bgb{idx}][fgs{idx}]overlay=(W-w)/2:(H-h)/2,\ fps={fps},\ - fade=t=in:st=0:d={FADE_SECONDS},\ - fade=t=out:st={fade_out_start:.3}:d={FADE_SECONDS},\ - setsar=1,format=yuv420p[v];\ - [1:a]apad[a]" + fade=t=in:st=0:d={fade},\ + fade=t=out:st={fade_out_start:.3}:d={fade},\ + setsar=1,format=yuv420p[v{idx}]" + ) +} + +/// Full `filter_complex` for a beat of `per_photo` durations: one chain per +/// photo, concatenated into `[v]`, with the narration (the last input, index +/// `per_photo.len()`) padded with trailing silence into `[a]`. A single-photo +/// beat degenerates to one chain + `concat=n=1` (a passthrough). +pub fn beat_filtergraph(opts: &SegmentOpts, per_photo: &[f64]) -> String { + let n = per_photo.len().max(1); + let fade = fade_for(n); + let chains: Vec = per_photo + .iter() + .enumerate() + .map(|(i, &d)| photo_filter_chain(i, opts, d, fade)) + .collect(); + let concat_inputs: String = (0..n).map(|i| format!("[v{i}]")).collect(); + format!( + "{chains};{concat_inputs}concat=n={n}:v=1:a=0[v];[{n}:a]apad[a]", + chains = chains.join(";") ) } @@ -128,15 +168,16 @@ fn video_encoder_args(nvenc: bool) -> Vec { .collect() } -/// Build the ffmpeg args that render one photo segment: a still looped for -/// `duration` seconds, filled to the portrait canvas with a blurred backdrop -/// (see [`segment_filtergraph`]) and the narration muxed in. `-t` bounds both -/// streams to the segment length. -pub fn build_segment_args( - image_path: &str, +/// Build the ffmpeg args that render one beat: each photo looped for its slice +/// of the beat (filled to the portrait canvas with a blurred backdrop), the +/// slices concatenated, and the single narration muxed over the whole thing. +/// `total` bounds the output (and the apad'd audio) to the beat length. +pub fn build_beat_args( + image_paths: &[String], audio_path: &str, out_path: &str, - duration: f64, + per_photo: &[f64], + total: f64, opts: &SegmentOpts, ) -> Vec { let fps = opts.fps.to_string(); @@ -144,26 +185,33 @@ pub fn build_segment_args( if opts.nvenc { args.extend(["-hwaccel".into(), "cuda".into()]); } + // One looped-still input per photo, each bounded to its slice by an input + // `-t`; reading at the target `-framerate` gives the fades real frames to + // ramp across. + for (path, &dur) in image_paths.iter().zip(per_photo.iter()) { + args.extend([ + "-framerate".into(), + fps.clone(), + "-loop".into(), + "1".into(), + "-t".into(), + format!("{dur:.3}"), + "-i".into(), + path.clone(), + ]); + } args.extend([ - // Read the looped still at the target rate so frames exist for the - // fade to ramp across (paired with the in-graph `fps` and CFR output). - "-framerate".into(), - fps.clone(), - "-loop".into(), - "1".into(), - "-i".into(), - image_path.into(), "-i".into(), audio_path.into(), "-filter_complex".into(), - segment_filtergraph(opts, duration), + beat_filtergraph(opts, per_photo), "-map".into(), "[v]".into(), "-map".into(), "[a]".into(), "-t".into(), - format!("{duration:.3}"), - // Force constant frame rate so the segment (and the concatenated reel) + format!("{total:.3}"), + // Force constant frame rate so the beat (and the concatenated reel) // plays at a steady {fps} rather than a variable cadence. "-r".into(), fps, @@ -231,22 +279,33 @@ async fn run_ffmpeg(args: &[String], what: &str) -> Result<()> { Ok(()) } -/// Render one photo segment to `out_path`. -pub async fn render_segment( - image_path: &Path, +/// Render one beat to `out_path`: its photos shown in sequence (a held shot for +/// one photo, a quick burst for several) under the single narration in +/// `audio_path`, whose measured length sets the beat's pacing. +pub async fn render_beat( + image_paths: &[std::path::PathBuf], audio_path: &Path, out_path: &Path, - duration: f64, + narration_secs: f64, opts: &SegmentOpts, ) -> Result<()> { - let args = build_segment_args( - &image_path.to_string_lossy(), + if image_paths.is_empty() { + bail!("render_beat called with no images"); + } + let (total, per_photo) = beat_durations(narration_secs, image_paths.len()); + let paths: Vec = image_paths + .iter() + .map(|p| p.to_string_lossy().to_string()) + .collect(); + let args = build_beat_args( + &paths, &audio_path.to_string_lossy(), &out_path.to_string_lossy(), - duration, + &per_photo, + total, opts, ); - run_ffmpeg(&args, "segment render").await + run_ffmpeg(&args, "beat render").await } /// Join rendered segments into the final reel. Writes the concat list into the @@ -288,73 +347,108 @@ mod tests { } #[test] - fn filtergraph_fills_portrait_with_blurred_bg_and_fitted_fg() { - let g = segment_filtergraph(&SegmentOpts::default(), 4.0); - // Background covers + blurs; foreground fits and is centered over it. - assert!(g.contains("split=2[bg][fg]")); + fn beat_durations_single_photo_matches_base() { + let (total, per) = beat_durations(4.0, 1); + assert!((total - 4.6).abs() < 1e-9); // narration + tail + assert_eq!(per.len(), 1); + assert!((per[0] - 4.6).abs() < 1e-9); + } + + #[test] + fn beat_durations_burst_splits_evenly() { + // 5 photos, narration 4.6s base → ~0.92s each (above the 0.6 floor). + let (total, per) = beat_durations(4.0, 5); + assert!((total - 4.6).abs() < 1e-9); + assert_eq!(per.len(), 5); + assert!((per.iter().sum::() - total).abs() < 1e-9); + assert!(per.iter().all(|&d| d >= MIN_BURST_PHOTO_SECONDS)); + } + + #[test] + fn beat_durations_stretches_when_narration_too_short_for_burst() { + // Floor narration (2.5s) over 10 photos would be 0.25s each — below the + // legibility floor, so the beat stretches to 10 × 0.6 = 6s. + let (total, per) = beat_durations(0.0, 10); + assert!((total - 6.0).abs() < 1e-9); + assert!(per.iter().all(|&d| (d - 0.6).abs() < 1e-9)); + } + + #[test] + fn beat_filtergraph_single_photo_fills_portrait_and_holds() { + let (_t, per) = beat_durations(4.0, 1); + let g = beat_filtergraph(&SegmentOpts::default(), &per); + assert!(g.contains("[0:v]split=2[bg0][fg0]")); assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=increase")); assert!(g.contains("crop=1080:1920")); - assert!(g.contains("boxblur")); assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=decrease")); assert!(g.contains("overlay=(W-w)/2:(H-h)/2")); - // Produces the labelled outputs build_segment_args maps. - assert!(g.contains("[v]")); + // Single photo → concat of one, gentle fade, audio is input 1. + assert!(g.contains("concat=n=1:v=1:a=0[v]")); + assert!(g.contains("d=0.35")); // SINGLE_FADE assert!(g.contains("[1:a]apad[a]")); - assert!(g.contains("format=yuv420p")); } #[test] - fn filtergraph_fades_in_and_out_within_duration() { - // 4s segment, 0.35s fade → fade-out starts at 3.65s. - let g = segment_filtergraph(&SegmentOpts::default(), 4.0); - assert!(g.contains("fade=t=in:st=0:d=0.35")); - assert!(g.contains("fade=t=out:st=3.650:d=0.35")); + fn beat_filtergraph_burst_chains_concats_and_snappy_fade() { + let (_t, per) = beat_durations(4.0, 3); + let g = beat_filtergraph(&SegmentOpts::default(), &per); + // One chain per photo with index-suffixed labels. + assert!(g.contains("[0:v]split") && g.contains("[1:v]split") && g.contains("[2:v]split")); + // Concatenated in order, audio is the 4th input (index 3). + assert!(g.contains("[v0][v1][v2]concat=n=3:v=1:a=0[v]")); + assert!(g.contains("[3:a]apad[a]")); + // Burst uses the snappier fade. + assert!(g.contains("d=0.15")); + assert!(!g.contains("d=0.35")); } #[test] - fn filtergraph_normalizes_fps_before_fading() { - // The fps filter must precede the fades, else the brightness ramp is - // sampled at the still's coarse cadence and looks steppy. - let g = segment_filtergraph(&SegmentOpts::default(), 4.0); + fn beat_filtergraph_normalizes_fps_before_fading() { + // fps must precede the fades on every chain (else the dip looks steppy). + let (_t, per) = beat_durations(4.0, 1); + let g = beat_filtergraph(&SegmentOpts::default(), &per); let fps_at = g.find("fps=30").expect("fps in graph"); let fade_at = g.find("fade=t=in").expect("fade in graph"); assert!(fps_at < fade_at); } #[test] - fn filtergraph_fade_out_start_never_negative_at_floor() { - // A floor-length segment shorter than a fade still yields st >= 0. - let g = segment_filtergraph(&SegmentOpts::default(), 0.2); - assert!(g.contains("fade=t=out:st=0.000:d=0.35")); - } - - #[test] - fn segment_args_loop_still_and_bound_with_t() { - let args = build_segment_args( - "/img.jpg", - "/a.wav", + fn beat_args_one_input_per_photo_plus_audio_bound_by_total() { + let (total, per) = beat_durations(4.0, 2); + let args = build_beat_args( + &["/a.jpg".into(), "/b.jpg".into()], + "/n.wav", "/out.mp4", - 4.0, + &per, + total, &SegmentOpts::default(), ); let joined = args.join(" "); - assert!(joined.contains("-framerate 30 -loop 1 -i /img.jpg")); - assert!(joined.contains("-i /a.wav")); - assert!(joined.contains("apad")); - assert!(joined.contains("-t 4.000")); - // Constant frame rate forced on the output. + // A looped-still input per photo, each with its slice -t, then the audio. + assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /a.jpg")); + assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /b.jpg")); + assert!(joined.contains("-i /n.wav")); + // Output bounded to the beat total and forced CFR. + assert!(joined.contains("-t 4.600")); assert!(joined.contains("-r 30")); - assert!(joined.contains("libx264")); assert!(joined.ends_with("/out.mp4")); } #[test] - fn segment_args_use_nvenc_and_cuda_when_enabled() { + fn beat_args_use_nvenc_and_cuda_when_enabled() { let opts = SegmentOpts { nvenc: true, ..SegmentOpts::default() }; - let args = build_segment_args("/img.jpg", "/a.wav", "/out.mp4", 3.0, &opts); + let (total, per) = beat_durations(3.0, 1); + let args = build_beat_args( + &["/img.jpg".into()], + "/a.wav", + "/out.mp4", + &per, + total, + &opts, + ); let joined = args.join(" "); assert!(joined.contains("-hwaccel cuda")); assert!(joined.contains("h264_nvenc")); diff --git a/src/reels/script.rs b/src/reels/script.rs index 1cf3189..85fff7c 100644 --- a/src/reels/script.rs +++ b/src/reels/script.rs @@ -1,10 +1,11 @@ //! Narration scripting for memory reels. //! -//! One LLM call turns the planned segments (each carrying its date and, where +//! One LLM call turns the planned beats (each carrying its date and, where //! available, its cached insight) into a short first-person narration line per -//! photo plus a title for the reel. We reuse the cached insight summary as the -//! richest per-photo signal rather than re-running vision at reel time — that -//! keeps reel generation off the GPU's vision slot entirely. +//! beat plus a title for the reel. A beat may show several photos in a quick +//! burst, so a line narrates the *moment*, not a single frame. We reuse the +//! cached insight summary as the richest signal rather than re-running vision +//! at reel time — that keeps reel generation off the GPU's vision slot. //! //! The prompt builder and response parser are pure so the contract is //! unit-testable; `generate_script` wires them to the LLM client. @@ -12,11 +13,11 @@ use anyhow::{Context, Result}; use std::sync::Arc; -use super::{PlannedSegment, ReelMeta}; +use super::{PlannedBeat, ReelMeta}; use crate::ai::llamacpp::LlamaCppClient; use crate::ai::llm_client::LlmClient; -/// The narration for a whole reel: a title and one line per segment, in order. +/// The narration for a whole reel: a title and one line per beat, in order. #[derive(Debug, Clone, PartialEq)] pub struct ReelScript { pub title: String, @@ -26,33 +27,38 @@ pub struct ReelScript { const SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \ slideshow of someone's own photos set to a spoken voiceover. Write warm, \ specific, first-person narration as if the person is gently looking back on \ -their own memories. Be concrete and grounded in the details given; never \ -invent names, places, or events that aren't supported. Keep each line to one \ -or two short sentences that can be read aloud in a few seconds. Avoid generic \ -filler like \"what a wonderful day\" — if you have little to go on, simply \ -describe the moment plainly."; +their own memories. Each line plays over one moment, which may be a quick burst \ +of several photos, so narrate the moment as a whole rather than a single frame. \ +Be concrete and grounded in the details given; never invent names, places, or \ +events that aren't supported. Keep each line to one or two short sentences that \ +can be read aloud in a few seconds. Avoid generic filler like \"what a \ +wonderful day\" — if you have little to go on, simply describe the moment \ +plainly."; /// Build the (system, user) prompt pair for the scripter. The user message -/// describes each segment in order and asks for strict JSON back. -pub fn build_script_messages(meta: &ReelMeta, planned: &[PlannedSegment]) -> (String, String) { +/// describes each beat in order and asks for strict JSON back. +pub fn build_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> (String, String) { let mut user = String::new(); user.push_str(&format!( - "These are {} photos surfaced as memories {}.\n\n", - planned.len(), + "This reel has {} moments surfaced as memories {}.\n\n", + beats.len(), meta.span_phrase() )); if !meta.years.is_empty() { let years: Vec = meta.years.iter().map(|y| y.to_string()).collect(); user.push_str(&format!("They span the years: {}.\n\n", years.join(", "))); } - user.push_str("Photos, in the order they will appear:\n"); - for (i, seg) in planned.iter().enumerate() { + user.push_str("Moments, in the order they will appear:\n"); + for (i, beat) in beats.iter().enumerate() { user.push_str(&format!("\n[{}]", i + 1)); - if let Some(date) = seg.date_label() { + if let Some(date) = beat.date_label() { user.push_str(&format!(" {date}")); } + if beat.photos.len() > 1 { + user.push_str(&format!(" (a burst of {} photos)", beat.photos.len())); + } user.push('\n'); - match (&seg.insight_title, &seg.insight_summary) { + match (&beat.insight_title, &beat.insight_summary) { (Some(t), Some(s)) if !s.trim().is_empty() => { user.push_str(&format!(" Known context: {t} — {s}\n")); } @@ -65,10 +71,10 @@ pub fn build_script_messages(meta: &ReelMeta, planned: &[PlannedSegment]) -> (St } user.push_str(&format!( "\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\ - {{\"title\": \"\", \"segments\": [\"\", \ - \"\", ... ]}}\n\ - The \"segments\" array MUST have exactly {} items, one per photo in order.", - planned.len() + {{\"title\": \"\", \"segments\": [\"\", \ + \"\", ... ]}}\n\ + The \"segments\" array MUST have exactly {} items, one per moment in order.", + beats.len() )); (SYSTEM_PROMPT.to_string(), user) } @@ -174,20 +180,20 @@ fn clean_text(s: &str) -> String { trimmed.split_whitespace().collect::>().join(" ") } -/// Generate the reel script via the LLM. Text-only (no images) — the per-photo +/// Generate the reel script via the LLM. Text-only (no images) — the per-beat /// context comes from cached insights. The call takes the GPU read lease /// internally (see `LlamaCppClient::generate`). pub async fn generate_script( client: &Arc, meta: &ReelMeta, - planned: &[PlannedSegment], + beats: &[PlannedBeat], ) -> Result { - let (system, user) = build_script_messages(meta, planned); + let (system, user) = build_script_messages(meta, beats); let raw = client .generate(&user, Some(&system), None) .await .context("LLM script generation failed")?; - Ok(parse_script_response(&raw, planned.len())) + Ok(parse_script_response(&raw, beats.len())) } #[cfg(test)] @@ -202,13 +208,13 @@ mod tests { } } - fn planned(n: usize) -> Vec { + fn planned(n: usize) -> Vec { (0..n) - .map(|i| PlannedSegment { - media: super::super::SegmentMedia::Photo { + .map(|i| PlannedBeat { + photos: vec![super::super::SegmentMedia::Photo { rel_path: format!("p{i}.jpg"), library_id: 1, - }, + }], date: Some(1_560_000_000 + i as i64 * 86_400), insight_title: None, insight_summary: None, @@ -217,16 +223,37 @@ mod tests { } #[test] - fn prompt_states_exact_segment_count_and_span() { + fn prompt_states_exact_moment_count_and_span() { let (sys, user) = build_script_messages(&meta(), &planned(3)); assert!(sys.contains("memory reel")); - assert!(user.contains("3 photos")); + assert!(user.contains("3 moments")); assert!(user.contains("on this day")); assert!(user.contains("exactly 3 items")); - // Each photo gets an indexed entry. + // Each moment gets an indexed entry. assert!(user.contains("[1]") && user.contains("[2]") && user.contains("[3]")); } + #[test] + fn prompt_notes_burst_photo_count() { + let mut p = planned(1); + p[0].photos = vec![ + super::super::SegmentMedia::Photo { + rel_path: "a.jpg".into(), + library_id: 1, + }, + super::super::SegmentMedia::Photo { + rel_path: "b.jpg".into(), + library_id: 1, + }, + super::super::SegmentMedia::Photo { + rel_path: "c.jpg".into(), + library_id: 1, + }, + ]; + let (_sys, user) = build_script_messages(&meta(), &p); + assert!(user.contains("a burst of 3 photos")); + } + #[test] fn prompt_includes_insight_context_when_present() { let mut p = planned(1); diff --git a/src/reels/selector.rs b/src/reels/selector.rs index 0a53ee5..fb83e38 100644 --- a/src/reels/selector.rs +++ b/src/reels/selector.rs @@ -13,18 +13,51 @@ use std::sync::Mutex; use chrono::{DateTime, Datelike, FixedOffset}; -use super::{PlannedSegment, ReelMeta, SegmentMedia}; +use super::{PlannedBeat, ReelMeta, SegmentMedia}; use crate::database::{ExifDao, InsightDao}; use crate::file_types::is_image_file; use crate::memories::{self, MemoriesSpan}; use crate::state::AppState; -/// Default and hard caps on how many photos a reel covers. The cap bounds the -/// LLM/TTS/ffmpeg work per reel; when a span has more, [`sample_evenly`] keeps -/// a representative spread across the years rather than just the oldest. -pub const DEFAULT_MAX_SEGMENTS: usize = 24; +/// Default and hard caps on how many photos a reel covers. The default is an +/// upper bound on the request; the effective count is usually smaller, set by +/// the duration budget (see [`budget_segments`]). The hard cap bounds work per +/// reel regardless. +pub const DEFAULT_MAX_SEGMENTS: usize = 40; pub const HARD_MAX_SEGMENTS: usize = 40; +/// Target reel length. Week and especially month spans can surface hundreds of +/// photos; at a few seconds of narration each, a naive reel runs minutes. We +/// cap the segment count to keep the reel near this length. Tunable via +/// `REEL_TARGET_SECONDS`. +const DEFAULT_TARGET_REEL_SECONDS: f64 = 90.0; + +/// Rough average wall-time per photo segment (a short narration line + the +/// silent tail). Only used to turn the duration target into a segment count; +/// the real per-segment time is the measured narration length. +const EST_SECONDS_PER_SEGMENT: f64 = 5.0; + +/// Time gap that separates one "event/moment" from the next when clustering a +/// span's photos. Photos within a few hours are treated as the same occasion +/// (and across years/days the gaps are far larger, so each instance clusters +/// on its own). 4 hours splits e.g. a morning hike from an evening dinner. +const EVENT_GAP_SECONDS: i64 = 4 * 3600; + +fn target_reel_seconds() -> f64 { + std::env::var("REEL_TARGET_SECONDS") + .ok() + .and_then(|s| s.trim().parse::().ok()) + .filter(|x| x.is_finite() && *x > 0.0) + .unwrap_or(DEFAULT_TARGET_REEL_SECONDS) +} + +/// How many photo segments fit the duration budget, bounded by the request's +/// max and the hard cap. This is what keeps week/month reels from running long. +pub fn budget_segments(requested_max: usize) -> usize { + let by_budget = (target_reel_seconds() / EST_SECONDS_PER_SEGMENT).floor() as usize; + by_budget.min(requested_max).clamp(1, HARD_MAX_SEGMENTS) +} + /// What a reel is built from. v1 ships the memories (on this day/week/month) /// selector; tag and date-range variants slot in here later. #[derive(Debug, Clone)] @@ -81,6 +114,104 @@ pub fn sample_evenly(items: &[T], max: usize) -> Vec { .collect() } +/// Group time-sorted items into events by gap: a new event starts whenever the +/// jump from the previous photo exceeds `gap_seconds`. Preserves order; items +/// without a timestamp extend the current event. +fn cluster_by_gap( + items: &[memories::MemoryItem], + gap_seconds: i64, +) -> Vec> { + let mut clusters: Vec> = Vec::new(); + let mut prev_ts: Option = None; + for it in items { + let starts_new = match (prev_ts, it.created) { + (Some(p), Some(c)) => c - p > gap_seconds, + _ => false, + }; + if starts_new || clusters.is_empty() { + clusters.push(Vec::new()); + } + clusters.last_mut().unwrap().push(it.clone()); + if let Some(c) = it.created { + prev_ts = Some(c); + } + } + clusters +} + +/// Most photos a single beat will flash through. Bounds the burst so one huge +/// event doesn't dominate, and keeps each photo on screen long enough to +/// register at the per-beat narration length (see render's beat timing). +pub const MAX_BURST_PHOTOS: usize = 10; + +/// Merge a list of (time-ordered) event clusters into exactly `n` contiguous +/// groups, so a span with more events than the beat budget still covers the +/// whole timeline — adjacent events fold together into one beat rather than +/// getting dropped. `n` must be ≥ 1 and ≤ clusters.len(). +fn partition_into_groups( + clusters: Vec>, + n: usize, +) -> Vec> { + let c = clusters.len(); + let mut clusters = clusters.into_iter(); + (0..n) + .map(|j| { + // Even contiguous split of c clusters into n groups. + let start = j * c / n; + let end = (j + 1) * c / n; + let take = end.saturating_sub(start).max(1); + (0..take) + .flat_map(|_| clusters.next().into_iter().flatten()) + .collect() + }) + .collect() +} + +/// Turn a span's photos into `n_beats` beats. Clusters photos into events by +/// time gap; if there are more events than beats, adjacent events are merged so +/// the whole span is still covered. Each beat then flashes up to +/// `max_burst` photos (an even spread of its group) under one narration line — +/// so a week/month reel *shows* all its moments without a narrated (and timed) +/// segment per photo. +pub fn form_beats( + items: &[memories::MemoryItem], + n_beats: usize, + max_burst: usize, +) -> Vec { + if n_beats == 0 || items.is_empty() { + return Vec::new(); + } + let clusters = cluster_by_gap(items, EVENT_GAP_SECONDS); + // One beat per event when they fit; otherwise fold adjacent events together + // into exactly n_beats groups. + let groups = if clusters.len() <= n_beats { + clusters + } else { + partition_into_groups(clusters, n_beats) + }; + + groups + .into_iter() + .filter(|g| !g.is_empty()) + .map(|group| { + let shown = sample_evenly(&group, max_burst); + let date = shown.first().and_then(|it| it.created); + PlannedBeat { + photos: shown + .into_iter() + .map(|it| SegmentMedia::Photo { + rel_path: it.path, + library_id: it.library_id, + }) + .collect(), + date, + insight_title: None, + insight_summary: None, + } + }) + .collect() +} + /// Cheap pass: resolve the selector into an ordered list of media (no insight /// lookups yet) plus reel metadata. `Err` only on an invalid library param. pub fn resolve( @@ -88,7 +219,7 @@ pub fn resolve( exif_dao: &Mutex>, span_context: &opentelemetry::Context, selector: &ReelSelector, -) -> Result<(Vec, ReelMeta), String> { +) -> Result<(Vec, ReelMeta), String> { match selector { ReelSelector::Memories { span, @@ -108,32 +239,23 @@ pub fn resolve( )?; // Phase 1 is photos-only: drop videos (a clip segment type lands - // in phase 2). Filter before sampling so the spread is over the - // photos that will actually appear. + // in phase 2). let items: Vec = items .into_iter() .filter(|it| is_image_file(Path::new(&it.path))) .collect(); - let cap = (*max_segments).clamp(1, HARD_MAX_SEGMENTS); - let items = sample_evenly(&items, cap); - + // Years are derived from the whole span (what the reel represents), + // before the budget narrows it down to beats. let years = distinct_years(&items, client_tz); let meta = ReelMeta { span: *span, years }; - let planned = items - .into_iter() - .map(|it| PlannedSegment { - media: SegmentMedia::Photo { - rel_path: it.path, - library_id: it.library_id, - }, - date: it.created, - insight_title: None, - insight_summary: None, - }) - .collect(); - Ok((planned, meta)) + // The budget caps the number of narrated beats (≈ reel length); + // each beat then bursts through several photos, so the reel covers + // the span's moments without running minutes long. + let n_beats = budget_segments(*max_segments); + let beats = form_beats(&items, n_beats, MAX_BURST_PHOTOS); + Ok((beats, meta)) } } } @@ -155,24 +277,24 @@ fn distinct_years(items: &[memories::MemoryItem], tz: Option) -> Ve years } -/// Background pass: fill each segment's cached insight (title + summary) where -/// one exists. Best-effort — a missing or errored lookup leaves the fields -/// `None` and the scripter narrates from the date alone. +/// Background pass: fill each beat's cached insight (title + summary) from its +/// lead photo, where one exists. Best-effort — a missing or errored lookup +/// leaves the fields `None` and the scripter narrates from the date alone. pub fn enrich( insight_dao: &Mutex>, span_context: &opentelemetry::Context, - planned: &mut [PlannedSegment], + beats: &mut [PlannedBeat], ) { let Ok(mut dao) = insight_dao.lock() else { return; }; - for seg in planned.iter_mut() { - let rel_path = match &seg.media { - SegmentMedia::Photo { rel_path, .. } => rel_path, + for beat in beats.iter_mut() { + let Some(SegmentMedia::Photo { rel_path, .. }) = beat.photos.first() else { + continue; }; if let Ok(Some(insight)) = dao.get_insight(span_context, rel_path) { - seg.insight_title = Some(insight.title); - seg.insight_summary = Some(insight.summary); + beat.insight_title = Some(insight.title); + beat.insight_summary = Some(insight.summary); } } } @@ -249,4 +371,78 @@ mod tests { ]; assert_eq!(distinct_years(&items, None), vec![2019, 2021]); } + + // Build an item at a given unix timestamp (seconds). + fn item_at(ts: i64, name: &str) -> memories::MemoryItem { + memories::MemoryItem { + path: format!("{name}.jpg"), + created: Some(ts), + modified: None, + library_id: 1, + } + } + + #[test] + fn budget_segments_caps_to_duration_target() { + // 90s / 5s ≈ 18, bounded by the request max and hard cap. + assert_eq!(budget_segments(40), 18); + assert_eq!(budget_segments(5), 5); // request asked for fewer + assert_eq!(budget_segments(1000), 18); // hard cap / budget wins + } + + #[test] + fn cluster_by_gap_splits_on_large_jumps() { + // Two photos minutes apart, then one a day later → two events. + let items = vec![ + item_at(1_000_000, "a"), + item_at(1_000_300, "b"), // +5 min → same event + item_at(1_100_000, "c"), // +~27h → new event + ]; + let clusters = cluster_by_gap(&items, EVENT_GAP_SECONDS); + assert_eq!(clusters.len(), 2); + assert_eq!(clusters[0].len(), 2); + assert_eq!(clusters[1].len(), 1); + } + + #[test] + fn form_beats_one_beat_per_event_when_they_fit() { + // Three well-separated events, budget of 10 → three beats, each holding + // all of its (few) photos. + let items = vec![ + item_at(0, "a"), + item_at(50, "b"), // same event as a + item_at(1_000_000, "c"), + item_at(2_000_000, "d"), + ]; + let beats = form_beats(&items, 10, MAX_BURST_PHOTOS); + assert_eq!(beats.len(), 3); + assert_eq!(beats[0].photos.len(), 2); // burst of the first event + assert_eq!(beats[1].photos.len(), 1); + assert_eq!(beats[2].photos.len(), 1); + } + + #[test] + fn form_beats_merges_events_when_over_budget() { + // Six distinct events but only two beats → adjacent events fold in, and + // every event's photos still appear (capped by the burst max). + let items: Vec = (0..6) + .map(|i| item_at(i as i64 * 1_000_000, &format!("e{i}"))) + .collect(); + let beats = form_beats(&items, 2, MAX_BURST_PHOTOS); + assert_eq!(beats.len(), 2); + let shown: usize = beats.iter().map(|b| b.photos.len()).sum(); + assert_eq!(shown, 6); // all six moments still shown across two beats + } + + #[test] + fn form_beats_caps_burst_to_max() { + // One dense event of 30 photos, generous budget → a single beat that + // bursts at most MAX_BURST_PHOTOS, not all 30. + let items: Vec = (0..30) + .map(|i| item_at(i as i64, &format!("p{i}"))) + .collect(); + let beats = form_beats(&items, 18, MAX_BURST_PHOTOS); + assert_eq!(beats.len(), 1); + assert_eq!(beats[0].photos.len(), MAX_BURST_PHOTOS); + } } -- 2.52.0 From 299e32b014f230f83f622ee0e16b3deb8e7048d8 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Fri, 12 Jun 2026 23:45:24 -0400 Subject: [PATCH 06/17] Bump version to 1.4.0 Co-Authored-By: Claude Fable 5 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a35a7d2..9455f5c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2051,7 +2051,7 @@ dependencies = [ [[package]] name = "image-api" -version = "1.3.0" +version = "1.4.0" dependencies = [ "actix", "actix-cors", diff --git a/Cargo.toml b/Cargo.toml index 3b3a08a..860e6ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "image-api" -version = "1.3.0" +version = "1.4.0" authors = ["Cameron Cordes "] edition = "2024" -- 2.52.0 From 65793a2dda9747aaa03547c7c35fb4509d60c5ac Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 00:02:51 -0400 Subject: [PATCH 07/17] Reels: mixed-media (video clip beats) + faster burst fade MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Videos in a span now appear as clip beats: the first few seconds of the video (capped at CLIP_SECONDS=5, and to the source length) filled to the portrait canvas like photos, with its live audio ducked under the narration (amix at 0.35). If the narration outlasts the clip, the last frame is held (tpad); clips with no audio track just play under narration. Selection splits the beat budget between photo beats and clip beats — clips get up to half (≥1 when present), photos the rest — then merges both back into chronological order. SegmentMedia gains a Clip variant; beats carry `media` (photos or one clip) and the cache key tags P/C so a path used as a still vs a clip differ. Also drops the burst fade from 0.15s to 0.08s so a quick burst reads clearly differently from a held shot. Bumps RENDER_VERSION. The clip filtergraph (fill + duck-mix + last-frame hold) is unit-tested but, like the rest of the ffmpeg path, wants a real render check on the GPU host. Co-Authored-By: Claude Fable 5 --- src/reels/mod.rs | 161 ++++++++++++++++++++++--------- src/reels/render.rs | 218 +++++++++++++++++++++++++++++++++++++++++- src/reels/script.rs | 21 +++- src/reels/selector.rs | 170 +++++++++++++++++++++++++------- 4 files changed, 479 insertions(+), 91 deletions(-) diff --git a/src/reels/mod.rs b/src/reels/mod.rs index be3f52d..17def48 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -36,21 +36,40 @@ use crate::otel::extract_context_from_request; use crate::state::AppState; use selector::ReelSelector; -/// The media behind one shot. Photos-only for now; a `Clip` variant (a section -/// of a source video) is the phase-2 extension point. +/// The media behind one shot: a still photo, or a short section of a source +/// video (played with its live audio ducked under the narration). Both carry +/// just the library-relative path; the renderer applies fixed clip framing +/// (start/length) from constants. #[derive(Debug, Clone)] pub enum SegmentMedia { Photo { rel_path: String, library_id: i32 }, + Clip { rel_path: String, library_id: i32 }, } -/// A beat: one narration line over one or more photos. A single-photo beat is a -/// held shot; a multi-photo beat is a quick burst that flashes through several -/// moments of the same event while the line is read — so a week/month reel can -/// *show* everything it spans without a narration line (and the seconds that -/// come with it) per photo. +impl SegmentMedia { + fn rel_path(&self) -> &str { + match self { + SegmentMedia::Photo { rel_path, .. } | SegmentMedia::Clip { rel_path, .. } => rel_path, + } + } + fn library_id(&self) -> i32 { + match self { + SegmentMedia::Photo { library_id, .. } | SegmentMedia::Clip { library_id, .. } => { + *library_id + } + } + } +} + +/// A beat: one narration line over its media. A photo beat holds one still (a +/// held shot) or several (a quick burst that flashes through moments of an +/// event while the line is read). A clip beat holds a single video clip. Either +/// way one narration line covers the whole beat, so a week/month reel can +/// *show* everything it spans without a narration line — and the seconds that +/// come with it — per item. #[derive(Debug, Clone)] pub struct PlannedBeat { - pub photos: Vec, + pub media: Vec, pub date: Option, pub insight_title: Option, pub insight_summary: Option, @@ -63,6 +82,11 @@ impl PlannedBeat { let dt = DateTime::from_timestamp(ts, 0)?; Some(dt.format("%B %-d, %Y").to_string()) } + + /// True when this beat is a single video clip (vs one or more photos). + pub fn is_clip(&self) -> bool { + matches!(self.media.as_slice(), [SegmentMedia::Clip { .. }]) + } } /// Reel-wide metadata the scripter uses for framing. @@ -183,7 +207,7 @@ fn finish_job( /// Render version: bump to invalidate every cached reel after a rendering / /// scripting change that should produce a fresh result. -const RENDER_VERSION: u32 = 4; +const RENDER_VERSION: u32 = 5; /// Narration expressiveness — Chatterbox's `exaggeration` knob. A slight bump /// over the ~0.5 default warms up otherwise-flat narration without over-acting; @@ -207,12 +231,13 @@ fn cache_key(selector: &ReelSelector, media: &[SegmentMedia], voice: Option<&str voice.unwrap_or("default") ); for m in media { - match m { - SegmentMedia::Photo { - rel_path, - library_id, - } => buf.push_str(&format!("{library_id}:{rel_path}|")), - } + // Tag photo vs clip so the same path used as a still and as a video + // clip produce different keys. + let tag = match m { + SegmentMedia::Photo { .. } => 'P', + SegmentMedia::Clip { .. } => 'C', + }; + buf.push_str(&format!("{tag}{}:{}|", m.library_id(), m.rel_path())); } blake3::hash(buf.as_bytes()).to_hex().to_string() } @@ -309,9 +334,9 @@ pub async fn create_reel_handler( })); } - // Flatten every photo across beats (in order) into the cache key — the key - // tracks exactly which photos appear and in what sequence. - let media: Vec = planned.iter().flat_map(|b| b.photos.clone()).collect(); + // Flatten every media item across beats (in order) into the cache key — the + // key tracks exactly which photos/clips appear and in what sequence. + let media: Vec = planned.iter().flat_map(|b| b.media.clone()).collect(); let voice = req.voice.clone().filter(|s| !s.is_empty()); let key = cache_key(&selector, &media, voice.as_deref()); @@ -462,7 +487,7 @@ async fn run_reel_job( use anyhow::{Context, anyhow}; let started = Instant::now(); - let total_photos: usize = planned.iter().map(|b| b.photos.len()).sum(); + let total_photos: usize = planned.iter().map(|b| b.media.len()).sum(); log::info!( "reel {job_id}: starting — span {:?}, {} beats, {} photos, voice={}", meta.span, @@ -510,15 +535,15 @@ async fn run_reel_job( let beat_total = planned.len(); let mut beat_files: Vec = Vec::new(); for (i, (beat, line)) in planned.iter().zip(script.lines.iter()).enumerate() { - // Resolve all of the beat's photos to absolute paths; drop any that - // don't resolve. An empty beat is skipped. - let image_paths: Vec = beat - .photos + // Resolve the beat's media to absolute paths; drop any that don't + // resolve. An empty beat is skipped. + let paths: Vec = beat + .media .iter() - .filter_map(|m| resolve_image_path(app_state, m)) + .filter_map(|m| resolve_media_path(app_state, m)) .collect(); - if image_paths.is_empty() { - log::warn!("reel {job_id}: skipping beat {i}, no image paths resolved"); + if paths.is_empty() { + log::warn!("reel {job_id}: skipping beat {i}, no media paths resolved"); continue; } @@ -551,17 +576,26 @@ async fn run_reel_job( .unwrap_or(render::MIN_SEGMENT_SECONDS); set_stage(job_id, "rendering"); - log::info!( - "reel {job_id}: beat {}/{} — {} photo(s), narration {:.1}s", - i + 1, - beat_total, - image_paths.len(), - narration_secs - ); let beat_out = work.path().join(format!("beat_{i:03}.mp4")); - if let Err(e) = - render::render_beat(&image_paths, &audio_path, &beat_out, narration_secs, &opts).await - { + let render_result = if beat.is_clip() { + log::info!( + "reel {job_id}: beat {}/{} — video clip, narration {:.1}s", + i + 1, + beat_total, + narration_secs + ); + render::render_clip_beat(&paths[0], &audio_path, &beat_out, narration_secs, &opts).await + } else { + log::info!( + "reel {job_id}: beat {}/{} — {} photo(s), narration {:.1}s", + i + 1, + beat_total, + paths.len(), + narration_secs + ); + render::render_beat(&paths, &audio_path, &beat_out, narration_secs, &opts).await + }; + if let Err(e) = render_result { log::warn!("reel {job_id}: skipping beat {i}, render failed: {e}"); continue; } @@ -603,15 +637,12 @@ async fn run_reel_job( Ok((script.title, final_path)) } -/// Resolve a photo segment's library-relative path to a validated absolute -/// path under its library root. -fn resolve_image_path(app_state: &AppState, media: &SegmentMedia) -> Option { - let SegmentMedia::Photo { - rel_path, - library_id, - } = media; - let lib = app_state.library_by_id(*library_id)?; - crate::files::is_valid_full_path(&lib.root_path, rel_path, false) +/// Resolve a media item's library-relative path to a validated absolute path +/// under its library root (works for both photos and clips). +fn resolve_media_path(app_state: &AppState, media: &SegmentMedia) -> Option { + let lib = app_state.library_by_id(media.library_id())?; + let rel = media.rel_path().to_string(); + crate::files::is_valid_full_path(&lib.root_path, &rel, false) } #[cfg(test)] @@ -625,6 +656,13 @@ mod tests { } } + fn clip(p: &str, lib: i32) -> SegmentMedia { + SegmentMedia::Clip { + rel_path: p.to_string(), + library_id: lib, + } + } + fn day_selector() -> ReelSelector { ReelSelector::Memories { span: MemoriesSpan::Day, @@ -668,6 +706,35 @@ mod tests { assert_ne!(base, cache_key(&week, &media, Some("grandma"))); } + #[test] + fn cache_key_distinguishes_photo_from_clip() { + // Same path/library used as a still vs a video clip must differ. + let as_photo = vec![photo("v.mp4", 1)]; + let as_clip = vec![clip("v.mp4", 1)]; + assert_ne!( + cache_key(&day_selector(), &as_photo, None), + cache_key(&day_selector(), &as_clip, None) + ); + } + + #[test] + fn is_clip_only_for_single_clip_beat() { + let clip_beat = PlannedBeat { + media: vec![clip("v.mp4", 1)], + date: None, + insight_title: None, + insight_summary: None, + }; + let photo_beat = PlannedBeat { + media: vec![photo("a.jpg", 1), photo("b.jpg", 1)], + date: None, + insight_title: None, + insight_summary: None, + }; + assert!(clip_beat.is_clip()); + assert!(!photo_beat.is_clip()); + } + #[test] fn span_phrase_maps_each_span() { let mk = |span| ReelMeta { @@ -682,7 +749,7 @@ mod tests { #[test] fn date_label_formats_or_none() { let beat = PlannedBeat { - photos: vec![photo("a.jpg", 1)], + media: vec![photo("a.jpg", 1)], date: Some(1_560_384_000), // 2019-06-13 UTC insight_title: None, insight_summary: None, @@ -690,7 +757,7 @@ mod tests { assert!(beat.date_label().unwrap().contains("2019")); let undated = PlannedBeat { - photos: vec![photo("a.jpg", 1)], + media: vec![photo("a.jpg", 1)], date: None, insight_title: None, insight_summary: None, diff --git a/src/reels/render.rs b/src/reels/render.rs index a36f6f1..4e9431b 100644 --- a/src/reels/render.rs +++ b/src/reels/render.rs @@ -36,9 +36,15 @@ pub const MIN_SEGMENT_SECONDS: f64 = 2.5; const NARRATION_TAIL_SECONDS: f64 = 0.6; /// Fade durations baked into each photo. A held (single-photo) beat gets a -/// gentle dip; burst photos get a snappier fade so the montage feels quick. +/// gentle dip; burst photos get a much snappier fade so the difference between +/// a held shot and a quick burst is obvious. const SINGLE_FADE_SECONDS: f64 = 0.35; -const BURST_FADE_SECONDS: f64 = 0.15; +const BURST_FADE_SECONDS: f64 = 0.08; + +/// Video-clip framing. A clip plays at most this long, with its live audio +/// ducked to `CLIP_DUCK_VOLUME` under the narration. +pub const CLIP_SECONDS: f64 = 5.0; +const CLIP_DUCK_VOLUME: f64 = 0.35; /// Floor on how long each burst photo stays up, so a long line over many photos /// doesn't flash them subliminally. If the narration is too short to give every @@ -308,6 +314,162 @@ pub async fn render_beat( run_ffmpeg(&args, "beat render").await } +// --- Video-clip beats -------------------------------------------------------- + +/// Video chain for a clip beat: fill the clip to the portrait canvas (blurred +/// backdrop, same look as photos), normalize fps, hold the last frame if the +/// narration outlasts the clip (`tpad`), then fade. Produces `[v]`. +fn clip_video_filter(opts: &SegmentOpts, clip_dur: f64, beat_total: f64) -> String { + let (w, h, fps) = (opts.width, opts.height, opts.fps); + let fade = SINGLE_FADE_SECONDS; + let hold = (beat_total - clip_dur).max(0.0); + let fade_out_start = (beat_total - fade).max(0.0); + // Freeze the final frame to cover narration that runs past the clip. + let tpad = if hold > 0.05 { + format!(",tpad=stop_mode=clone:stop_duration={hold:.3}") + } else { + String::new() + }; + format!( + "[0:v]split=2[bg][fg];\ + [bg]scale={w}:{h}:force_original_aspect_ratio=increase,\ + crop={w}:{h},boxblur=20:2[bgb];\ + [fg]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs];\ + [bgb][fgs]overlay=(W-w)/2:(H-h)/2,fps={fps}{tpad},\ + fade=t=in:st=0:d={fade},fade=t=out:st={fade_out_start:.3}:d={fade},\ + setsar=1,format=yuv420p[v]" + ) +} + +/// Audio chain for a clip beat. With a clip audio track, duck it under the +/// narration and mix; without one, just the narration. Produces `[a]`. +fn clip_audio_filter(has_audio: bool) -> String { + if has_audio { + format!( + "[0:a]volume={CLIP_DUCK_VOLUME}[duck];[1:a]apad[narr];\ + [duck][narr]amix=inputs=2:duration=longest:normalize=0[a]" + ) + } else { + "[1:a]apad[a]".to_string() + } +} + +/// Full `filter_complex` for a clip beat (input 0 = clip, input 1 = narration). +pub fn clip_beat_filtergraph( + opts: &SegmentOpts, + clip_dur: f64, + beat_total: f64, + has_audio: bool, +) -> String { + format!( + "{};{}", + clip_video_filter(opts, clip_dur, beat_total), + clip_audio_filter(has_audio) + ) +} + +/// Build the ffmpeg args for a clip beat: the first `clip_dur` seconds of the +/// source video, filled to the portrait canvas with its live audio ducked under +/// the narration, bounded to `beat_total`. +pub fn build_clip_beat_args( + clip_path: &str, + audio_path: &str, + out_path: &str, + clip_dur: f64, + beat_total: f64, + has_audio: bool, + opts: &SegmentOpts, +) -> Vec { + let fps = opts.fps.to_string(); + let mut args: Vec = vec!["-y".into()]; + if opts.nvenc { + args.extend(["-hwaccel".into(), "cuda".into()]); + } + args.extend([ + // Input `-t` limits the clip to its window; audio has none (apad fills). + "-t".into(), + format!("{clip_dur:.3}"), + "-i".into(), + clip_path.into(), + "-i".into(), + audio_path.into(), + "-filter_complex".into(), + clip_beat_filtergraph(opts, clip_dur, beat_total, has_audio), + "-map".into(), + "[v]".into(), + "-map".into(), + "[a]".into(), + "-t".into(), + format!("{beat_total:.3}"), + "-r".into(), + fps, + ]); + args.extend(video_encoder_args(opts.nvenc)); + args.extend( + ["-c:a", "aac", "-b:a", "160k", "-ar", "48000"] + .iter() + .map(|s| s.to_string()), + ); + args.push(out_path.into()); + args +} + +/// Whether a media file has at least one audio stream (so a clip beat knows +/// whether to mix in live audio). Defaults to `false` on any probe failure. +pub async fn has_audio_stream(path: &str) -> bool { + Command::new("ffprobe") + .args([ + "-v", + "error", + "-select_streams", + "a", + "-show_entries", + "stream=index", + "-of", + "csv=p=0", + path, + ]) + .output() + .await + .map(|out| !out.stdout.is_empty()) + .unwrap_or(false) +} + +/// Render one clip beat: a section of `clip_path` (capped at [`CLIP_SECONDS`], +/// and to the source length) under the narration in `audio_path`. The beat +/// lasts at least the narration, freezing the clip's last frame if needed. +pub async fn render_clip_beat( + clip_path: &Path, + audio_path: &Path, + out_path: &Path, + narration_secs: f64, + opts: &SegmentOpts, +) -> Result<()> { + let clip_str = clip_path.to_string_lossy().to_string(); + // Clamp the clip to its own length so a short video isn't padded to the cap. + let source_dur = crate::video::ffmpeg::get_duration_seconds(&clip_str) + .await + .ok() + .flatten(); + let clip_dur = match source_dur { + Some(d) if d > 0.0 && d < CLIP_SECONDS => d, + _ => CLIP_SECONDS, + }; + let beat_total = clip_dur.max(segment_duration(narration_secs)); + let has_audio = has_audio_stream(&clip_str).await; + + let args = build_clip_beat_args( + &clip_str, + &audio_path.to_string_lossy(), + &out_path.to_string_lossy(), + clip_dur, + beat_total, + has_audio, + opts, + ); + run_ffmpeg(&args, "clip beat render").await +} + /// Join rendered segments into the final reel. Writes the concat list into the /// same directory as the output so relative paths and cleanup stay local. pub async fn concat_segments(segment_paths: &[String], out_path: &Path) -> Result<()> { @@ -397,8 +559,8 @@ mod tests { // Concatenated in order, audio is the 4th input (index 3). assert!(g.contains("[v0][v1][v2]concat=n=3:v=1:a=0[v]")); assert!(g.contains("[3:a]apad[a]")); - // Burst uses the snappier fade. - assert!(g.contains("d=0.15")); + // Burst uses the much snappier fade (vs 0.35 for a held shot). + assert!(g.contains("d=0.08")); assert!(!g.contains("d=0.35")); } @@ -455,6 +617,54 @@ mod tests { assert!(!joined.contains("libx264")); } + #[test] + fn clip_filter_ducks_audio_and_holds_last_frame_when_narration_longer() { + // 5s clip, 7s beat → 2s freeze of the last frame, ducked-audio mix. + let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 7.0, true); + assert!(g.contains("tpad=stop_mode=clone:stop_duration=2.000")); + assert!(g.contains("volume=0.35")); + assert!(g.contains("amix=inputs=2")); + assert!(g.contains("[1:a]apad[narr]")); + // Fill applied to the clip too. + assert!(g.contains("boxblur")); + assert!(g.contains("overlay=(W-w)/2:(H-h)/2")); + } + + #[test] + fn clip_filter_no_tpad_when_clip_covers_the_beat() { + // Clip at least as long as the beat → no freeze. + let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 5.0, true); + assert!(!g.contains("tpad")); + } + + #[test] + fn clip_filter_narration_only_without_clip_audio() { + let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 5.0, false); + assert!(!g.contains("amix")); + assert!(!g.contains("volume=")); + assert!(g.contains("[1:a]apad[a]")); + } + + #[test] + fn clip_beat_args_bound_clip_and_output() { + let args = build_clip_beat_args( + "/v.mp4", + "/n.wav", + "/out.mp4", + 5.0, + 6.6, + true, + &SegmentOpts::default(), + ); + let joined = args.join(" "); + // Input -t bounds the clip read; output -t bounds the beat. + assert!(joined.contains("-t 5.000 -i /v.mp4")); + assert!(joined.contains("-i /n.wav")); + assert!(joined.contains("-t 6.600")); + assert!(joined.contains("-r 30")); + assert!(joined.ends_with("/out.mp4")); + } + #[test] fn concat_args_stream_copy_with_faststart_and_forced_muxer() { // Output goes to a .tmp path, so the muxer must be forced — ffmpeg diff --git a/src/reels/script.rs b/src/reels/script.rs index 85fff7c..5be3d64 100644 --- a/src/reels/script.rs +++ b/src/reels/script.rs @@ -54,8 +54,10 @@ pub fn build_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> (String, if let Some(date) = beat.date_label() { user.push_str(&format!(" {date}")); } - if beat.photos.len() > 1 { - user.push_str(&format!(" (a burst of {} photos)", beat.photos.len())); + if beat.is_clip() { + user.push_str(" (a video clip)"); + } else if beat.media.len() > 1 { + user.push_str(&format!(" (a burst of {} photos)", beat.media.len())); } user.push('\n'); match (&beat.insight_title, &beat.insight_summary) { @@ -211,7 +213,7 @@ mod tests { fn planned(n: usize) -> Vec { (0..n) .map(|i| PlannedBeat { - photos: vec![super::super::SegmentMedia::Photo { + media: vec![super::super::SegmentMedia::Photo { rel_path: format!("p{i}.jpg"), library_id: 1, }], @@ -236,7 +238,7 @@ mod tests { #[test] fn prompt_notes_burst_photo_count() { let mut p = planned(1); - p[0].photos = vec![ + p[0].media = vec![ super::super::SegmentMedia::Photo { rel_path: "a.jpg".into(), library_id: 1, @@ -254,6 +256,17 @@ mod tests { assert!(user.contains("a burst of 3 photos")); } + #[test] + fn prompt_marks_clip_beats() { + let mut p = planned(1); + p[0].media = vec![super::super::SegmentMedia::Clip { + rel_path: "v.mp4".into(), + library_id: 1, + }]; + let (_sys, user) = build_script_messages(&meta(), &p); + assert!(user.contains("a video clip")); + } + #[test] fn prompt_includes_insight_context_when_present() { let mut p = planned(1); diff --git a/src/reels/selector.rs b/src/reels/selector.rs index fb83e38..d096f6d 100644 --- a/src/reels/selector.rs +++ b/src/reels/selector.rs @@ -15,7 +15,7 @@ use chrono::{DateTime, Datelike, FixedOffset}; use super::{PlannedBeat, ReelMeta, SegmentMedia}; use crate::database::{ExifDao, InsightDao}; -use crate::file_types::is_image_file; +use crate::file_types::{is_image_file, is_video_file}; use crate::memories::{self, MemoriesSpan}; use crate::state::AppState; @@ -167,13 +167,13 @@ fn partition_into_groups( .collect() } -/// Turn a span's photos into `n_beats` beats. Clusters photos into events by +/// Turn photo items into `n_beats` photo beats. Clusters photos into events by /// time gap; if there are more events than beats, adjacent events are merged so -/// the whole span is still covered. Each beat then flashes up to -/// `max_burst` photos (an even spread of its group) under one narration line — -/// so a week/month reel *shows* all its moments without a narrated (and timed) +/// the whole span is still covered. Each beat then flashes up to `max_burst` +/// photos (an even spread of its group) under one narration line — so a +/// week/month reel *shows* all its moments without a narrated (and timed) /// segment per photo. -pub fn form_beats( +fn form_photo_beats( items: &[memories::MemoryItem], n_beats: usize, max_burst: usize, @@ -197,7 +197,7 @@ pub fn form_beats( let shown = sample_evenly(&group, max_burst); let date = shown.first().and_then(|it| it.created); PlannedBeat { - photos: shown + media: shown .into_iter() .map(|it| SegmentMedia::Photo { rel_path: it.path, @@ -212,6 +212,62 @@ pub fn form_beats( .collect() } +/// Split the beat budget between photo beats and video-clip beats. Clips are +/// individually valuable (motion + live audio) so they get up to half the +/// budget (at least one if any exist); photos take the rest. With only one +/// kind present, it gets the whole budget. +fn split_beat_budget(n_photos: usize, n_videos: usize, n_beats: usize) -> (usize, usize) { + if n_videos == 0 { + return (n_beats, 0); + } + if n_photos == 0 { + return (0, n_beats.min(n_videos)); + } + let clip_beats = n_videos.min((n_beats / 2).max(1)); + let photo_beats = n_beats.saturating_sub(clip_beats); + (photo_beats, clip_beats) +} + +/// Build the reel's beats from a span's photos and videos under a beat budget. +/// Videos become one-clip beats (sampled across time if there are more than the +/// clip budget); photos cluster into burst beats. The two are merged back into +/// chronological order so the reel reads as the span unfolded. +pub fn form_beats( + photos: &[memories::MemoryItem], + videos: &[memories::MemoryItem], + n_beats: usize, + max_burst: usize, +) -> Vec { + if n_beats == 0 { + return Vec::new(); + } + let (photo_budget, clip_budget) = split_beat_budget(photos.len(), videos.len(), n_beats); + + let mut beats = form_photo_beats(photos, photo_budget, max_burst); + + // One clip beat per chosen video, spread across the span's videos. + for v in sample_evenly(videos, clip_budget) { + beats.push(PlannedBeat { + media: vec![SegmentMedia::Clip { + rel_path: v.path, + library_id: v.library_id, + }], + date: v.created, + insight_title: None, + insight_summary: None, + }); + } + + // Merge photo and clip beats back into chronological order (undated last). + beats.sort_by(|a, b| match (a.date, b.date) { + (Some(x), Some(y)) => x.cmp(&y), + (Some(_), None) => std::cmp::Ordering::Less, + (None, Some(_)) => std::cmp::Ordering::Greater, + (None, None) => std::cmp::Ordering::Equal, + }); + beats +} + /// Cheap pass: resolve the selector into an ordered list of media (no insight /// lookups yet) plus reel metadata. `Err` only on an invalid library param. pub fn resolve( @@ -238,23 +294,24 @@ pub fn resolve( library.as_deref(), )?; - // Phase 1 is photos-only: drop videos (a clip segment type lands - // in phase 2). - let items: Vec = items - .into_iter() - .filter(|it| is_image_file(Path::new(&it.path))) - .collect(); - - // Years are derived from the whole span (what the reel represents), - // before the budget narrows it down to beats. + // Split into photos and video clips; anything that's neither is + // dropped. Years span both, computed before the budget narrows it. let years = distinct_years(&items, client_tz); let meta = ReelMeta { span: *span, years }; + let (photos, videos): (Vec<_>, Vec<_>) = items + .into_iter() + .filter(|it| { + is_image_file(Path::new(&it.path)) || is_video_file(Path::new(&it.path)) + }) + .partition(|it| is_image_file(Path::new(&it.path))); + // The budget caps the number of narrated beats (≈ reel length); - // each beat then bursts through several photos, so the reel covers - // the span's moments without running minutes long. + // photo beats then burst through several photos and video beats + // play a short clip, so the reel covers the span without running + // minutes long. let n_beats = budget_segments(*max_segments); - let beats = form_beats(&items, n_beats, MAX_BURST_PHOTOS); + let beats = form_beats(&photos, &videos, n_beats, MAX_BURST_PHOTOS); Ok((beats, meta)) } } @@ -289,10 +346,13 @@ pub fn enrich( return; }; for beat in beats.iter_mut() { - let Some(SegmentMedia::Photo { rel_path, .. }) = beat.photos.first() else { - continue; + let rel_path = match beat.media.first() { + Some(SegmentMedia::Photo { rel_path, .. } | SegmentMedia::Clip { rel_path, .. }) => { + rel_path.clone() + } + None => continue, }; - if let Ok(Some(insight)) = dao.get_insight(span_context, rel_path) { + if let Ok(Some(insight)) = dao.get_insight(span_context, &rel_path) { beat.insight_title = Some(insight.title); beat.insight_summary = Some(insight.summary); } @@ -372,15 +432,18 @@ mod tests { assert_eq!(distinct_years(&items, None), vec![2019, 2021]); } - // Build an item at a given unix timestamp (seconds). - fn item_at(ts: i64, name: &str) -> memories::MemoryItem { + // Build an item at a given unix timestamp (seconds) with a chosen extension. + fn item_ext(ts: i64, name: &str, ext: &str) -> memories::MemoryItem { memories::MemoryItem { - path: format!("{name}.jpg"), + path: format!("{name}.{ext}"), created: Some(ts), modified: None, library_id: 1, } } + fn item_at(ts: i64, name: &str) -> memories::MemoryItem { + item_ext(ts, name, "jpg") + } #[test] fn budget_segments_caps_to_duration_target() { @@ -405,7 +468,7 @@ mod tests { } #[test] - fn form_beats_one_beat_per_event_when_they_fit() { + fn photo_beats_one_per_event_when_they_fit() { // Three well-separated events, budget of 10 → three beats, each holding // all of its (few) photos. let items = vec![ @@ -414,35 +477,70 @@ mod tests { item_at(1_000_000, "c"), item_at(2_000_000, "d"), ]; - let beats = form_beats(&items, 10, MAX_BURST_PHOTOS); + let beats = form_photo_beats(&items, 10, MAX_BURST_PHOTOS); assert_eq!(beats.len(), 3); - assert_eq!(beats[0].photos.len(), 2); // burst of the first event - assert_eq!(beats[1].photos.len(), 1); - assert_eq!(beats[2].photos.len(), 1); + assert_eq!(beats[0].media.len(), 2); // burst of the first event + assert_eq!(beats[1].media.len(), 1); + assert_eq!(beats[2].media.len(), 1); } #[test] - fn form_beats_merges_events_when_over_budget() { + fn photo_beats_merge_events_when_over_budget() { // Six distinct events but only two beats → adjacent events fold in, and // every event's photos still appear (capped by the burst max). let items: Vec = (0..6) .map(|i| item_at(i as i64 * 1_000_000, &format!("e{i}"))) .collect(); - let beats = form_beats(&items, 2, MAX_BURST_PHOTOS); + let beats = form_photo_beats(&items, 2, MAX_BURST_PHOTOS); assert_eq!(beats.len(), 2); - let shown: usize = beats.iter().map(|b| b.photos.len()).sum(); + let shown: usize = beats.iter().map(|b| b.media.len()).sum(); assert_eq!(shown, 6); // all six moments still shown across two beats } #[test] - fn form_beats_caps_burst_to_max() { + fn photo_beats_cap_burst_to_max() { // One dense event of 30 photos, generous budget → a single beat that // bursts at most MAX_BURST_PHOTOS, not all 30. let items: Vec = (0..30) .map(|i| item_at(i as i64, &format!("p{i}"))) .collect(); - let beats = form_beats(&items, 18, MAX_BURST_PHOTOS); + let beats = form_photo_beats(&items, 18, MAX_BURST_PHOTOS); assert_eq!(beats.len(), 1); - assert_eq!(beats[0].photos.len(), MAX_BURST_PHOTOS); + assert_eq!(beats[0].media.len(), MAX_BURST_PHOTOS); + } + + #[test] + fn split_beat_budget_handles_each_mix() { + // Only photos / only videos → that kind gets the whole budget. + assert_eq!(split_beat_budget(10, 0, 18), (18, 0)); + assert_eq!(split_beat_budget(0, 10, 18), (0, 10)); // capped at n_videos + assert_eq!(split_beat_budget(0, 30, 18), (0, 18)); // capped at budget + // Mixed → clips up to half (≥1), photos the rest. + assert_eq!(split_beat_budget(100, 100, 18), (9, 9)); + assert_eq!(split_beat_budget(100, 1, 18), (17, 1)); // few videos + } + + #[test] + fn form_beats_mixes_clip_and_photo_beats_in_time_order() { + let photos = vec![item_at(0, "p0"), item_at(2_000_000, "p1")]; + // A video between the two photo events (in time). + let videos = vec![item_ext(1_000_000, "v0", "mp4")]; + let beats = form_beats(&photos, &videos, 10, MAX_BURST_PHOTOS); + // Two photo events + one clip = three beats, chronological. + assert_eq!(beats.len(), 3); + assert!(!beats[0].is_clip()); // p0 @ t=0 + assert!(beats[1].is_clip()); // v0 @ t=1e6 + assert!(!beats[2].is_clip()); // p1 @ t=2e6 + assert!(matches!(beats[1].media[0], SegmentMedia::Clip { .. })); + } + + #[test] + fn form_beats_videos_only_become_clip_beats() { + let videos: Vec = (0..3) + .map(|i| item_ext(i as i64 * 1_000_000, &format!("v{i}"), "mov")) + .collect(); + let beats = form_beats(&[], &videos, 10, MAX_BURST_PHOTOS); + assert_eq!(beats.len(), 3); + assert!(beats.iter().all(|b| b.is_clip())); } } -- 2.52.0 From f5581edf5ef95e131536cd3ac556bd1f7086bbc3 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 00:07:41 -0400 Subject: [PATCH 08/17] =?UTF-8?q?Reels:=20ease=20burst=20fade=200.08s=20?= =?UTF-8?q?=E2=86=92=200.12s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 0.08s read as too abrupt; 0.12s keeps the burst clearly snappier than the 0.35s held-shot fade without jarring. Bumps RENDER_VERSION. Co-Authored-By: Claude Fable 5 --- src/reels/mod.rs | 2 +- src/reels/render.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/reels/mod.rs b/src/reels/mod.rs index 17def48..c6bfd68 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -207,7 +207,7 @@ fn finish_job( /// Render version: bump to invalidate every cached reel after a rendering / /// scripting change that should produce a fresh result. -const RENDER_VERSION: u32 = 5; +const RENDER_VERSION: u32 = 6; /// Narration expressiveness — Chatterbox's `exaggeration` knob. A slight bump /// over the ~0.5 default warms up otherwise-flat narration without over-acting; diff --git a/src/reels/render.rs b/src/reels/render.rs index 4e9431b..c8ddc04 100644 --- a/src/reels/render.rs +++ b/src/reels/render.rs @@ -39,7 +39,7 @@ const NARRATION_TAIL_SECONDS: f64 = 0.6; /// gentle dip; burst photos get a much snappier fade so the difference between /// a held shot and a quick burst is obvious. const SINGLE_FADE_SECONDS: f64 = 0.35; -const BURST_FADE_SECONDS: f64 = 0.08; +const BURST_FADE_SECONDS: f64 = 0.12; /// Video-clip framing. A clip plays at most this long, with its live audio /// ducked to `CLIP_DUCK_VOLUME` under the narration. @@ -560,7 +560,7 @@ mod tests { assert!(g.contains("[v0][v1][v2]concat=n=3:v=1:a=0[v]")); assert!(g.contains("[3:a]apad[a]")); // Burst uses the much snappier fade (vs 0.35 for a held shot). - assert!(g.contains("d=0.08")); + assert!(g.contains("d=0.12")); assert!(!g.contains("d=0.35")); } -- 2.52.0 From b30c8c16d08d6081c67d8a7bf6c19ffb588ce2f7 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 11:00:01 -0400 Subject: [PATCH 09/17] Reels: clips play through the beat instead of freezing early MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A clip beat capped playback at CLIP_SECONDS and filled the rest of the narration with a tpad freeze-frame, so a clip stopped dead on its last frame for a second or two before the transition — a glitchy pause that stills don't have. Extract clip_beat_plan: the clip now plays for as much of its beat as the source footage covers, and we freeze only when the source is genuinely shorter than the narration. Bump RENDER_VERSION. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/reels/mod.rs | 2 +- src/reels/render.rs | 64 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 57 insertions(+), 9 deletions(-) diff --git a/src/reels/mod.rs b/src/reels/mod.rs index c6bfd68..32635a9 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -207,7 +207,7 @@ fn finish_job( /// Render version: bump to invalidate every cached reel after a rendering / /// scripting change that should produce a fresh result. -const RENDER_VERSION: u32 = 6; +const RENDER_VERSION: u32 = 7; /// Narration expressiveness — Chatterbox's `exaggeration` knob. A slight bump /// over the ~0.5 default warms up otherwise-flat narration without over-acting; diff --git a/src/reels/render.rs b/src/reels/render.rs index c8ddc04..221df5f 100644 --- a/src/reels/render.rs +++ b/src/reels/render.rs @@ -41,8 +41,10 @@ const NARRATION_TAIL_SECONDS: f64 = 0.6; const SINGLE_FADE_SECONDS: f64 = 0.35; const BURST_FADE_SECONDS: f64 = 0.12; -/// Video-clip framing. A clip plays at most this long, with its live audio -/// ducked to `CLIP_DUCK_VOLUME` under the narration. +/// Video-clip framing. Fallback cap on how much of a clip we read when the +/// source length can't be probed; with a known length, a clip instead plays for +/// as much of its beat as its footage allows (see [`clip_beat_plan`]). Its live +/// audio is ducked to `CLIP_DUCK_VOLUME` under the narration. pub const CLIP_SECONDS: f64 = 5.0; const CLIP_DUCK_VOLUME: f64 = 0.35; @@ -316,6 +318,28 @@ pub async fn render_beat( // --- Video-clip beats -------------------------------------------------------- +/// Decide how long the clip plays and how long the whole beat lasts, from the +/// source video's length (if known) and the narration length. Returns +/// `(clip_dur, beat_total)`. +/// +/// The beat always lasts long enough for the full narration. The clip plays for +/// as much of that beat as its footage covers — so the motion fills the screen +/// time rather than stopping early. We only freeze the last frame (the +/// `beat_total - clip_dur` gap, handled by `tpad` in [`clip_video_filter`]) when +/// the source video is genuinely shorter than the narration. Capping clip +/// playback at a fixed length while the narration ran longer was what produced +/// the second-or-two freeze that read as a glitchy pause before the transition. +pub fn clip_beat_plan(source_dur: Option, narration_secs: f64) -> (f64, f64) { + let want = segment_duration(narration_secs); + let clip_dur = match source_dur { + // Known length: play up to the whole beat, but never past the source. + Some(d) if d > 0.0 => d.min(want), + // Unknown length: read up to the fallback cap; tpad covers any shortfall. + _ => want.min(CLIP_SECONDS), + }; + (clip_dur, want.max(clip_dur)) +} + /// Video chain for a clip beat: fill the clip to the portrait canvas (blurred /// backdrop, same look as photos), normalize fps, hold the last frame if the /// narration outlasts the clip (`tpad`), then fade. Produces `[v]`. @@ -446,16 +470,13 @@ pub async fn render_clip_beat( opts: &SegmentOpts, ) -> Result<()> { let clip_str = clip_path.to_string_lossy().to_string(); - // Clamp the clip to its own length so a short video isn't padded to the cap. + // Play the clip for as much of the beat as its footage covers; freeze only + // when the source is genuinely shorter than the narration (see clip_beat_plan). let source_dur = crate::video::ffmpeg::get_duration_seconds(&clip_str) .await .ok() .flatten(); - let clip_dur = match source_dur { - Some(d) if d > 0.0 && d < CLIP_SECONDS => d, - _ => CLIP_SECONDS, - }; - let beat_total = clip_dur.max(segment_duration(narration_secs)); + let (clip_dur, beat_total) = clip_beat_plan(source_dur, narration_secs); let has_audio = has_audio_stream(&clip_str).await; let args = build_clip_beat_args( @@ -630,6 +651,33 @@ mod tests { assert!(g.contains("overlay=(W-w)/2:(H-h)/2")); } + #[test] + fn clip_beat_plan_plays_clip_through_the_whole_beat_when_source_is_long() { + // 30s source, 4s narration → beat is narration+tail (4.6), and the clip + // plays that whole 4.6s of motion: no freeze (clip_dur == beat_total). + let (clip_dur, beat_total) = clip_beat_plan(Some(30.0), 4.0); + assert!((beat_total - 4.6).abs() < 1e-9); + assert!((clip_dur - 4.6).abs() < 1e-9); + assert!((beat_total - clip_dur).abs() < 1e-9); // no hold + } + + #[test] + fn clip_beat_plan_freezes_only_when_source_shorter_than_narration() { + // 2s source under a 4s narration → play all 2s, freeze the remainder. + let (clip_dur, beat_total) = clip_beat_plan(Some(2.0), 4.0); + assert!((clip_dur - 2.0).abs() < 1e-9); + assert!((beat_total - 4.6).abs() < 1e-9); + assert!(beat_total - clip_dur > 2.0); // unavoidable freeze gap + } + + #[test] + fn clip_beat_plan_caps_read_when_source_length_unknown() { + // Probe failed: read up to the fallback cap, beat still covers narration. + let (clip_dur, beat_total) = clip_beat_plan(None, 8.0); + assert!((clip_dur - CLIP_SECONDS).abs() < 1e-9); + assert!((beat_total - 8.6).abs() < 1e-9); + } + #[test] fn clip_filter_no_tpad_when_clip_covers_the_beat() { // Clip at least as long as the beat → no freeze. -- 2.52.0 From f707353807327d3ab3a88c039308ba953be0159a Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 14:29:34 -0400 Subject: [PATCH 10/17] feat: nightly agentic pre-generation of memory reels Implement end-to-end nightly pre-generation of memory reels with agentic scripting that grounds narration in calendar, location, messages, and RAG. Sections A-E from the plan: A. Extract produce_reel pipeline core from run_reel_job with ScripterMode::Fast/Agentic and progress callbacks. B. Agentic scripter: factor run_readonly_tool_loop from the insight generator, build read-only tool gate, prompt builder with GPS, and generate_script_agentic with fallback to fast path. C. Precomputed reels ledger (SQLite table + DAO), GET /reels/precomputed handler with validity gate, GET /reels/by-key/{key}/video streaming, and normalize_library_key helper. D. Nightly scheduler: spawn_pregen_scheduler with configurable hour, run_pregen_batch (day/week/month spans), pregen_one with dedup and disk-check, secs_until_next_run_hour time math. E. user_ai_prefs passive mirror table + DAO for param capture in create_reel_handler and replay in the scheduler. Also fixes resolve_library_param signature to take &[Library] and adds resolve_library_param_state wrapper for AppState callers. New files: migrations/2026-06-13-000000_add_precomputed_reels/, migrations/2026-06-13-000010_add_user_ai_prefs/, src/database/precomputed_reel_dao.rs, src/database/user_ai_prefs_dao.rs --- .env.example | 16 + .../down.sql | 2 + .../up.sql | 14 + .../down.sql | 1 + .../up.sql | 7 + src/ai/handlers.rs | 91 ++- src/ai/insight_generator.rs | 102 +++ src/ai/tts.rs | 2 +- src/database/mod.rs | 4 + src/database/models.rs | 56 +- src/database/precomputed_reel_dao.rs | 321 ++++++++ src/database/schema.rs | 28 + src/database/user_ai_prefs_dao.rs | 212 +++++ src/duplicates.rs | 6 +- src/faces.rs | 19 +- src/files.rs | 18 +- src/handlers/image.rs | 16 +- src/handlers/video.rs | 9 +- src/libraries.rs | 79 +- src/main.rs | 21 + src/memories.rs | 2 +- src/reels/mod.rs | 735 +++++++++++++++++- src/reels/script.rs | 159 +++- src/reels/selector.rs | 24 +- src/state.rs | 32 +- src/tags.rs | 2 +- 26 files changed, 1825 insertions(+), 153 deletions(-) create mode 100644 migrations/2026-06-13-000000_add_precomputed_reels/down.sql create mode 100644 migrations/2026-06-13-000000_add_precomputed_reels/up.sql create mode 100644 migrations/2026-06-13-000010_add_user_ai_prefs/down.sql create mode 100644 migrations/2026-06-13-000010_add_user_ai_prefs/up.sql create mode 100644 src/database/precomputed_reel_dao.rs create mode 100644 src/database/user_ai_prefs_dao.rs diff --git a/.env.example b/.env.example index a45fdd5..bafc0c8 100644 --- a/.env.example +++ b/.env.example @@ -139,3 +139,19 @@ CLIP_REQUEST_TIMEOUT_SEC=60 # ── RAG / search ──────────────────────────────────────────────────────── # Set to `1` to enable cross-encoder reranking on /search results. SEARCH_RAG_RERANK=0 + +# ── Nightly reel pre-generation (Phase 3+) ────────────────────────────── +# Set to `1` to enable the scheduler. Disabled by default. +# REEL_PREGEN_ENABLED=1 +# Hour (0-23) when the nightly batch fires. Default 3 AM. +# REEL_PREGEN_HOUR=3 +# Day of week for weekly reels (0=Sun, 1=Mon, …). Default Monday. +# REEL_PREGEN_WEEK_DOW=1 +# Timezone offset in minutes from UTC (e.g., -480 = PST). Defaults to +# the server's local timezone. +# REEL_PREGEN_TZ_OFFSET_MINUTES= +# Voice ID for narration (e.g., "grandma"). Falls back to the value +# stored in the user_ai_prefs DB row when set. +# REEL_PREGEN_VOICE= +# Library filter: a library id (e.g. "1") or "all" for every library. +# REEL_PREGEN_LIBRARY=all diff --git a/migrations/2026-06-13-000000_add_precomputed_reels/down.sql b/migrations/2026-06-13-000000_add_precomputed_reels/down.sql new file mode 100644 index 0000000..91863c2 --- /dev/null +++ b/migrations/2026-06-13-000000_add_precomputed_reels/down.sql @@ -0,0 +1,2 @@ +DROP INDEX IF EXISTS idx_precomputed_reels_span_library; +DROP TABLE IF EXISTS precomputed_reels; diff --git a/migrations/2026-06-13-000000_add_precomputed_reels/up.sql b/migrations/2026-06-13-000000_add_precomputed_reels/up.sql new file mode 100644 index 0000000..ba49b72 --- /dev/null +++ b/migrations/2026-06-13-000000_add_precomputed_reels/up.sql @@ -0,0 +1,14 @@ +CREATE TABLE precomputed_reels ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + span TEXT NOT NULL, + library_key TEXT NOT NULL, + cache_key TEXT NOT NULL, + output_path TEXT NOT NULL, + title TEXT NOT NULL, + media_count INT NOT NULL, + render_version INT NOT NULL DEFAULT 1, + tz_offset_minutes INT NOT NULL, + voice TEXT, + generated_at BIGINT NOT NULL +); +CREATE INDEX idx_precomputed_reels_span_library ON precomputed_reels(span, library_key, generated_at DESC); diff --git a/migrations/2026-06-13-000010_add_user_ai_prefs/down.sql b/migrations/2026-06-13-000010_add_user_ai_prefs/down.sql new file mode 100644 index 0000000..83b82a3 --- /dev/null +++ b/migrations/2026-06-13-000010_add_user_ai_prefs/down.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS user_ai_prefs; diff --git a/migrations/2026-06-13-000010_add_user_ai_prefs/up.sql b/migrations/2026-06-13-000010_add_user_ai_prefs/up.sql new file mode 100644 index 0000000..fd8f6f2 --- /dev/null +++ b/migrations/2026-06-13-000010_add_user_ai_prefs/up.sql @@ -0,0 +1,7 @@ +CREATE TABLE user_ai_prefs ( + id INTEGER PRIMARY KEY CHECK(id=1), + voice TEXT, + tz_offset_minutes INTEGER, + library TEXT, + updated_at BIGINT NOT NULL +); diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs index cb21b14..c6bc212 100644 --- a/src/ai/handlers.rs +++ b/src/ai/handlers.rs @@ -120,7 +120,7 @@ pub async fn generation_status_handler( } if let Some(ref fp) = query.path { - let library = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -218,10 +218,11 @@ pub async fn cancel_generation_handler( } if let Some(ref fp) = request.file_path { - let library = libraries::resolve_library_param(&app_state, request.library.as_deref()) - .ok() - .flatten() - .unwrap_or_else(|| app_state.primary_library()); + let library = + libraries::resolve_library_param_state(&app_state, request.library.as_deref()) + .ok() + .flatten() + .unwrap_or_else(|| app_state.primary_library()); let normalized = normalize_path(fp); // Get active job ids first, then cancel in DB, then abort tasks @@ -580,7 +581,7 @@ pub async fn get_insight_handler( // Expand to rel_paths sharing content so an insight generated under // library 1 still shows when the same photo is viewed from library 2. - let library = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -1218,15 +1219,16 @@ pub async fn chat_turn_handler( let mut span = tracer.start_with_context("http.insights.chat", &parent_context); span.set_attribute(KeyValue::new("file_path", request.file_path.clone())); - let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { - Ok(Some(lib)) => lib, - Ok(None) => app_state.primary_library(), - Err(e) => { - return HttpResponse::BadRequest().json(serde_json::json!({ - "error": format!("invalid library: {}", e) - })); - } - }; + let library = + match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(e) => { + return HttpResponse::BadRequest().json(serde_json::json!({ + "error": format!("invalid library: {}", e) + })); + } + }; // Service-token claims (sub: "service:apollo") fall through to // user_id=1 — the operator convention. Mobile/web clients have a @@ -1344,15 +1346,16 @@ pub async fn chat_rewind_handler( request: web::Json, app_state: web::Data, ) -> impl Responder { - let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { - Ok(Some(lib)) => lib, - Ok(None) => app_state.primary_library(), - Err(e) => { - return HttpResponse::BadRequest().json(serde_json::json!({ - "error": format!("invalid library: {}", e) - })); - } - }; + let library = + match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(e) => { + return HttpResponse::BadRequest().json(serde_json::json!({ + "error": format!("invalid library: {}", e) + })); + } + }; match app_state .insight_chat @@ -1393,7 +1396,7 @@ pub async fn chat_history_handler( // cross-library lookup when the scoped one misses, so a photo // with no insight in this library but one in another still // surfaces (the "show this photo's primary insight" merge case). - let library = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -1444,15 +1447,16 @@ pub async fn chat_stream_handler( request: web::Json, app_state: web::Data, ) -> HttpResponse { - let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { - Ok(Some(lib)) => lib, - Ok(None) => app_state.primary_library(), - Err(e) => { - return HttpResponse::BadRequest().json(serde_json::json!({ - "error": format!("invalid library: {}", e) - })); - } - }; + let library = + match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(e) => { + return HttpResponse::BadRequest().json(serde_json::json!({ + "error": format!("invalid library: {}", e) + })); + } + }; // Service-token sub falls through to user_id=1 (see chat_turn_handler). let user_id = claims.sub.parse::().unwrap_or(1); @@ -1589,15 +1593,16 @@ pub async fn turn_async_handler( let mut span = tracer.start_with_context("http.insights.chat_turn_async", &parent_context); span.set_attribute(KeyValue::new("file_path", request.file_path.clone())); - let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { - Ok(Some(lib)) => lib, - Ok(None) => app_state.primary_library(), - Err(e) => { - return HttpResponse::BadRequest().json(serde_json::json!({ - "error": format!("invalid library: {}", e) - })); - } - }; + let library = + match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(e) => { + return HttpResponse::BadRequest().json(serde_json::json!({ + "error": format!("invalid library: {}", e) + })); + } + }; let user_id = claims.sub.parse::().unwrap_or(1); diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 3673c43..4871c2e 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -4497,6 +4497,108 @@ Return ONLY the summary, nothing else."#, )) } + /// A read-only agentic tool loop: chat with tools until the model stops + /// calling them, then return the final content. + /// + /// This is the loop body extracted from + /// `generate_agentic_insight_for_photo` (lines 4316-4377) so it can be + /// reused by the reel-scripter without the photo-specific context + /// (image_base64, file_path, persona_id). The photo insight loop still + /// has its own copy because it threads image/file context through + /// `execute_tool`. + /// + /// Calls `execute_tool` with empty file/image context; enabled tools + /// never read those fields. + #[allow(dead_code)] + pub(crate) async fn run_readonly_tool_loop( + &self, + backend: &ResolvedBackend, + mut messages: Vec, + tools: Vec, + max_iter: usize, + ) -> Result { + let mut final_content = String::new(); + + for iteration in 0..max_iter { + log::info!("Agentic iteration {}/{}", iteration + 1, max_iter); + + let (response, _prompt_tokens, _eval_tokens) = backend + .chat() + .chat_with_tools(messages.clone(), tools.clone()) + .await?; + + // Sanitize tool call arguments before pushing back into history. + // Some models occasionally return non-object arguments (bool, + // string, null) which Ollama rejects when they are re-sent in + // a subsequent request. + let mut response = response; + if let Some(ref mut tool_calls) = response.tool_calls { + for tc in tool_calls.iter_mut() { + if !tc.function.arguments.is_object() { + log::warn!( + "Tool '{}' returned non-object arguments ({:?}), normalising to {{}}", + tc.function.name, + tc.function.arguments + ); + tc.function.arguments = serde_json::Value::Object(Default::default()); + } + } + } + + messages.push(response.clone()); + + if let Some(ref tool_calls) = response.tool_calls + && !tool_calls.is_empty() + { + for tool_call in tool_calls { + log::info!( + "Agentic tool call [{}]: {} {}", + iteration, + tool_call.function.name, + tool_call.function.arguments + ); + let result = self + .execute_tool( + &tool_call.function.name, + &tool_call.function.arguments, + backend, + &None, + "", + 0, + "", + &opentelemetry::Context::new(), + ) + .await; + messages.push(ChatMessage::tool_result(result)); + } + continue; + } + + // No tool calls — this is the final answer + final_content = response.content; + break; + } + + // If loop exhausted without final answer, ask for one + if final_content.is_empty() { + log::info!( + "Agentic loop exhausted after {} iterations, requesting final answer", + max_iter + ); + messages.push(ChatMessage::user( + "Based on the context gathered, please write the final answer. Return ONLY the JSON object, no prose or code fences.", + )); + let (final_response, _, _) = backend + .chat() + .chat_with_tools(messages.clone(), vec![]) + .await?; + final_content = final_response.content.clone(); + messages.push(final_response); + } + + Ok(final_content) + } + /// Reverse geocode GPS coordinates to human-readable place names async fn reverse_geocode(&self, lat: f64, lon: f64) -> Option { let url = format!( diff --git a/src/ai/tts.rs b/src/ai/tts.rs index a9a610a..d6ef89d 100644 --- a/src/ai/tts.rs +++ b/src/ai/tts.rs @@ -1020,7 +1020,7 @@ pub async fn create_voice_from_library_handler( let voice_name = append_ref_window(&voice_name, ref_start, ref_duration.round().max(1.0) as u32); - let library = match libraries::resolve_library_param(&app_state, req.library.as_deref()) { + let library = match libraries::resolve_library_param_state(&app_state, req.library.as_deref()) { Ok(Some(l)) => l, Ok(None) => app_state.primary_library(), Err(msg) => { diff --git a/src/database/mod.rs b/src/database/mod.rs index d063bd0..981f6a4 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -51,10 +51,12 @@ pub mod knowledge_dao; pub mod location_dao; pub mod models; pub mod persona_dao; +pub mod precomputed_reel_dao; pub mod preview_dao; pub mod reconcile; pub mod schema; pub mod search_dao; +pub mod user_ai_prefs_dao; pub use calendar_dao::{CalendarEventDao, SqliteCalendarEventDao}; pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao}; @@ -66,8 +68,10 @@ pub use knowledge_dao::{ }; pub use location_dao::{LocationHistoryDao, SqliteLocationHistoryDao}; pub use persona_dao::{ImportPersona, PersonaDao, PersonaPatch, SqlitePersonaDao}; +pub use precomputed_reel_dao::{PrecomputedReelDao, SqlitePrecomputedReelDao}; pub use preview_dao::{PreviewDao, SqlitePreviewDao}; pub use search_dao::{SearchHistoryDao, SqliteSearchHistoryDao}; +pub use user_ai_prefs_dao::{SqliteUserAiPrefsDao, UserAiPrefsDao}; pub trait UserDao { fn create_user(&mut self, user: &str, password: &str) -> Option; diff --git a/src/database/models.rs b/src/database/models.rs index 62274e2..d3d5440 100644 --- a/src/database/models.rs +++ b/src/database/models.rs @@ -1,6 +1,7 @@ use crate::database::schema::{ entities, entity_facts, entity_photo_links, favorites, image_exif, insight_generation_jobs, - libraries, personas, photo_insights, users, video_preview_clips, + libraries, personas, photo_insights, precomputed_reels, user_ai_prefs, users, + video_preview_clips, }; use serde::Serialize; @@ -505,3 +506,56 @@ pub struct InsightGenerationJob { pub result_insight_id: Option, pub error_message: Option, } + +// --- Precomputed reels ------------------------------------------------------- + +#[derive(Insertable)] +#[diesel(table_name = precomputed_reels)] +pub struct InsertablePrecomputedReel { + pub span: String, + pub library_key: String, + pub cache_key: String, + pub output_path: String, + pub title: String, + pub media_count: i32, + pub render_version: i32, + pub tz_offset_minutes: i32, + pub voice: Option, + pub generated_at: i64, +} + +#[derive(Serialize, Queryable, Clone, Debug)] +pub struct PrecomputedReel { + pub id: i32, + pub span: String, + pub library_key: String, + pub cache_key: String, + pub output_path: String, + pub title: String, + pub media_count: i32, + pub render_version: i32, + pub tz_offset_minutes: i32, + pub voice: Option, + pub generated_at: i64, +} + +// --- User AI preferences (Section E) ---------------------------------------- + +#[derive(Queryable, Insertable, Debug, Clone, serde::Deserialize, serde::Serialize)] +#[diesel(table_name = user_ai_prefs)] +pub struct UserAiPrefs { + pub id: i32, + pub voice: Option, + pub tz_offset_minutes: Option, + pub library: Option, + pub updated_at: i64, +} + +#[derive(Insertable, Debug, Clone, serde::Deserialize, serde::Serialize)] +#[diesel(table_name = user_ai_prefs)] +pub struct UpsertUserAiPrefs { + pub voice: Option, + pub tz_offset_minutes: Option, + pub library: Option, + pub updated_at: i64, +} diff --git a/src/database/precomputed_reel_dao.rs b/src/database/precomputed_reel_dao.rs new file mode 100644 index 0000000..7acc098 --- /dev/null +++ b/src/database/precomputed_reel_dao.rs @@ -0,0 +1,321 @@ +use diesel::prelude::*; +use diesel::sqlite::SqliteConnection; +use std::ops::DerefMut; +use std::sync::{Arc, Mutex}; + +use crate::database::models::{InsertablePrecomputedReel, PrecomputedReel}; +use crate::database::schema; +use crate::database::{DbError, DbErrorKind, connect}; +use crate::otel::trace_db_call; + +/// Ledger for precomputed memory reels. The nightly agentic job writes a +/// row after each successful render; the `GET /reels/precomputed` handler +/// reads it to gate on freshness and serve the cached MP4. +pub trait PrecomputedReelDao: Sync + Send { + /// Insert a precomputed reel row. Returns the new row's id. + /// Written by the nightly agentic job (Section D). + #[allow(dead_code)] + fn record_reel( + &mut self, + context: &opentelemetry::Context, + row: &InsertablePrecomputedReel, + ) -> Result; + + /// Find the latest precomputed reel for the given (span, library_key). + fn latest_for( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + ) -> Result, DbError>; + + /// Return true when a fresh precomputed reel exists for the given + /// (span, library_key, render_version) that was generated at or after + /// `min_generated_at`. Used as a fast existence gate before falling + /// back to `latest_for` (avoids a second query path). + fn exists_fresh( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + render_version: i32, + min_generated_at: i64, + ) -> Result; +} + +pub struct SqlitePrecomputedReelDao { + connection: Arc>, +} + +impl Default for SqlitePrecomputedReelDao { + fn default() -> Self { + Self::new() + } +} + +impl SqlitePrecomputedReelDao { + pub fn new() -> Self { + Self { + connection: Arc::new(Mutex::new(connect())), + } + } + + #[cfg(test)] + pub fn from_connection(conn: Arc>) -> Self { + Self { connection: conn } + } +} + +impl PrecomputedReelDao for SqlitePrecomputedReelDao { + fn record_reel( + &mut self, + context: &opentelemetry::Context, + row: &InsertablePrecomputedReel, + ) -> Result { + trace_db_call(context, "insert", "record_reel", |_span| { + use schema::precomputed_reels::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + diesel::insert_into(dsl::precomputed_reels) + .values(row) + .execute(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to insert reel: {}", e))?; + + dsl::precomputed_reels + .order(dsl::id.desc()) + .select(dsl::id) + .first::(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to get reel id: {}", e)) + }) + .map_err(|e| DbError::log(DbErrorKind::InsertError, e)) + } + + fn latest_for( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + ) -> Result, DbError> { + trace_db_call(context, "query", "latest_for", |_span| { + use schema::precomputed_reels::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + dsl::precomputed_reels + .filter(dsl::span.eq(span)) + .filter(dsl::library_key.eq(library_key)) + .order(dsl::generated_at.desc()) + .first::(connection.deref_mut()) + .optional() + .map_err(|e| anyhow::anyhow!("Failed to get latest reel: {}", e)) + }) + .map_err(|e| DbError::log(DbErrorKind::QueryError, e)) + } + + fn exists_fresh( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + render_version: i32, + min_generated_at: i64, + ) -> Result { + trace_db_call(context, "query", "exists_fresh", |_span| { + use schema::precomputed_reels::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + let count: i64 = dsl::precomputed_reels + .filter(dsl::span.eq(span)) + .filter(dsl::library_key.eq(library_key)) + .filter(dsl::render_version.eq(render_version)) + .filter(dsl::generated_at.ge(min_generated_at)) + .count() + .get_result(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to check fresh reel: {}", e))?; + + Ok(count > 0) + }) + .map_err(|e| DbError::log(DbErrorKind::QueryError, e)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use diesel::Connection; + use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations}; + + const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!(); + + fn setup_dao() -> SqlitePrecomputedReelDao { + let mut conn = SqliteConnection::establish(":memory:") + .expect("Unable to create in-memory db connection"); + conn.run_pending_migrations(DB_MIGRATIONS) + .expect("Failure running DB migrations"); + SqlitePrecomputedReelDao::from_connection(Arc::new(Mutex::new(conn))) + } + + fn ctx() -> opentelemetry::Context { + opentelemetry::Context::new() + } + + fn sample_row() -> InsertablePrecomputedReel { + InsertablePrecomputedReel { + span: "day".to_string(), + library_key: "1".to_string(), + cache_key: "abc123".to_string(), + output_path: "/tmp/reel.mp4".to_string(), + title: "Test Reel".to_string(), + media_count: 10, + render_version: 1, + tz_offset_minutes: 0, + voice: Some("default".to_string()), + generated_at: 1_000_000, + } + } + + #[test] + fn record_reel_inserts_and_returns_id() { + let mut dao = setup_dao(); + let ctx = ctx(); + let row = sample_row(); + + let id = dao.record_reel(&ctx, &row).unwrap(); + assert!(id > 0, "should return a positive id"); + } + + #[test] + fn record_reel_returns_increasing_ids() { + let mut dao = setup_dao(); + let ctx = ctx(); + let row = sample_row(); + + let id1 = dao.record_reel(&ctx, &row).unwrap(); + let id2 = dao.record_reel(&ctx, &row).unwrap(); + assert!(id2 > id1, "each insert should get a higher id"); + } + + #[test] + fn latest_for_returns_latest() { + let mut dao = setup_dao(); + let ctx = ctx(); + + let row1 = InsertablePrecomputedReel { + generated_at: 1_000_000, + ..sample_row() + }; + let row2 = InsertablePrecomputedReel { + generated_at: 2_000_000, + ..sample_row() + }; + + dao.record_reel(&ctx, &row1).unwrap(); + dao.record_reel(&ctx, &row2).unwrap(); + + let latest = dao.latest_for(&ctx, "day", "1").unwrap().unwrap(); + assert_eq!(latest.generated_at, 2_000_000); + } + + #[test] + fn latest_for_scoped_by_span_and_library() { + let mut dao = setup_dao(); + let ctx = ctx(); + + let day_row = InsertablePrecomputedReel { + span: "day".to_string(), + library_key: "1".to_string(), + generated_at: 1_000_000, + ..sample_row() + }; + let week_row = InsertablePrecomputedReel { + span: "week".to_string(), + library_key: "1".to_string(), + generated_at: 2_000_000, + ..sample_row() + }; + + dao.record_reel(&ctx, &day_row).unwrap(); + dao.record_reel(&ctx, &week_row).unwrap(); + + let day_latest = dao.latest_for(&ctx, "day", "1").unwrap().unwrap(); + assert_eq!(day_latest.span, "day"); + + let week_latest = dao.latest_for(&ctx, "week", "1").unwrap().unwrap(); + assert_eq!(week_latest.span, "week"); + + // Different library returns None + let missing = dao.latest_for(&ctx, "day", "99").unwrap(); + assert!(missing.is_none()); + } + + #[test] + fn latest_for_returns_none_when_no_rows() { + let mut dao = setup_dao(); + let ctx = ctx(); + + let result = dao.latest_for(&ctx, "day", "1").unwrap(); + assert!(result.is_none()); + } + + #[test] + fn exists_fresh_returns_true_when_present() { + let mut dao = setup_dao(); + let ctx = ctx(); + + dao.record_reel(&ctx, &sample_row()).unwrap(); + + let exists = dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap(); + assert!(exists, "should find the row we just inserted"); + } + + #[test] + fn exists_fresh_returns_false_when_missing() { + let mut dao = setup_dao(); + let ctx = ctx(); + + let exists = dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap(); + assert!(!exists, "should not find anything in empty table"); + } + + #[test] + fn exists_fresh_respects_min_generated_at() { + let mut dao = setup_dao(); + let ctx = ctx(); + + dao.record_reel(&ctx, &sample_row()).unwrap(); + + // Below the threshold — should exist + let exists = dao.exists_fresh(&ctx, "day", "1", 1, 500_000).unwrap(); + assert!(exists); + + // Above the threshold — should not exist + let exists = dao.exists_fresh(&ctx, "day", "1", 1, 2_000_000).unwrap(); + assert!(!exists); + } + + #[test] + fn exists_fresh_respects_render_version() { + let mut dao = setup_dao(); + let ctx = ctx(); + + let row_v1 = InsertablePrecomputedReel { + render_version: 1, + ..sample_row() + }; + dao.record_reel(&ctx, &row_v1).unwrap(); + + assert!(dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap()); + assert!(!dao.exists_fresh(&ctx, "day", "1", 2, 900_000).unwrap()); + } +} diff --git a/src/database/schema.rs b/src/database/schema.rs index bf5791b..846542d 100644 --- a/src/database/schema.rs +++ b/src/database/schema.rs @@ -266,6 +266,16 @@ diesel::table! { } } +diesel::table! { + user_ai_prefs (id) { + id -> Integer, + voice -> Nullable, + tz_offset_minutes -> Nullable, + library -> Nullable, + updated_at -> BigInt, + } +} + diesel::table! { video_preview_clips (id) { id -> Integer, @@ -294,6 +304,22 @@ diesel::table! { } } +diesel::table! { + precomputed_reels (id) { + id -> Integer, + span -> Text, + library_key -> Text, + cache_key -> Text, + output_path -> Text, + title -> Text, + media_count -> Integer, + render_version -> Integer, + tz_offset_minutes -> Integer, + voice -> Nullable, + generated_at -> BigInt, + } +} + diesel::joinable!(entity_facts -> photo_insights (source_insight_id)); diesel::joinable!(entity_photo_links -> entities (entity_id)); diesel::joinable!(entity_photo_links -> libraries (library_id)); @@ -322,9 +348,11 @@ diesel::allow_tables_to_appear_in_same_query!( personas, persons, photo_insights, + precomputed_reels, search_history, tagged_photo, tags, + user_ai_prefs, users, video_preview_clips, ); diff --git a/src/database/user_ai_prefs_dao.rs b/src/database/user_ai_prefs_dao.rs new file mode 100644 index 0000000..d58a56c --- /dev/null +++ b/src/database/user_ai_prefs_dao.rs @@ -0,0 +1,212 @@ +use diesel::prelude::*; +use diesel::sqlite::SqliteConnection; +use std::ops::DerefMut; +use std::sync::{Arc, Mutex}; + +use crate::database::models::{UpsertUserAiPrefs, UserAiPrefs}; +use crate::database::schema; +use crate::database::{DbError, DbErrorKind, connect}; +use crate::otel::trace_db_call; + +/// Generic single-row table that passively mirrors the latest client AI +/// request parameters (voice, timezone, library). Read by the nightly +/// pre-generation scheduler (Section D) to pick up user preferences. +pub trait UserAiPrefsDao: Sync + Send { + /// Read the single row; `None` when it hasn't been populated yet. + fn get_prefs( + &mut self, + context: &opentelemetry::Context, + ) -> Result, DbError>; + + /// Upsert the single row (id is always 1). + #[allow(dead_code)] + fn upsert_prefs( + &mut self, + context: &opentelemetry::Context, + prefs: &UpsertUserAiPrefs, + ) -> Result<(), DbError>; +} + +pub struct SqliteUserAiPrefsDao { + connection: Arc>, +} + +impl Default for SqliteUserAiPrefsDao { + fn default() -> Self { + Self::new() + } +} + +impl SqliteUserAiPrefsDao { + pub fn new() -> Self { + Self { + connection: Arc::new(Mutex::new(connect())), + } + } + + #[cfg(test)] + pub fn from_connection(conn: Arc>) -> Self { + Self { connection: conn } + } +} + +impl UserAiPrefsDao for SqliteUserAiPrefsDao { + fn get_prefs( + &mut self, + context: &opentelemetry::Context, + ) -> Result, DbError> { + trace_db_call(context, "query", "get_prefs", |_span| { + use schema::user_ai_prefs::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock UserAiPrefsDao"); + + dsl::user_ai_prefs + .first::(connection.deref_mut()) + .optional() + .map_err(|e| anyhow::anyhow!("Failed to get prefs: {}", e)) + }) + .map_err(|e| DbError::log(DbErrorKind::QueryError, e)) + } + + fn upsert_prefs( + &mut self, + context: &opentelemetry::Context, + prefs: &UpsertUserAiPrefs, + ) -> Result<(), DbError> { + trace_db_call(context, "upsert", "upsert_prefs", |_span| { + use schema::user_ai_prefs::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock UserAiPrefsDao"); + + // SQLite: INSERT on first call, UPDATE on subsequent calls. + // The first INSERT creates the row with id=1 (auto-increment). + // Subsequent calls UPDATE the existing row. + let result = diesel::insert_into(dsl::user_ai_prefs) + .values(prefs) + .execute(connection.deref_mut()); + + match result { + Ok(_) => { + // First insert succeeded. + Ok(()) + } + Err(_e) => { + // Insert failed (likely due to duplicate key). Update instead. + diesel::update(dsl::user_ai_prefs.filter(dsl::id.eq(1))) + .set(( + dsl::voice.eq(&prefs.voice), + dsl::tz_offset_minutes.eq(&prefs.tz_offset_minutes), + dsl::library.eq(&prefs.library), + dsl::updated_at.eq(&prefs.updated_at), + )) + .execute(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to upsert prefs: {}", e))?; + Ok(()) + } + } + }) + .map_err(|e| DbError::log(DbErrorKind::InsertError, e)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use diesel::Connection; + use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations}; + + const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!(); + + fn setup_dao() -> SqliteUserAiPrefsDao { + let mut conn = SqliteConnection::establish(":memory:") + .expect("Unable to create in-memory db connection"); + conn.run_pending_migrations(DB_MIGRATIONS) + .expect("Failure running DB migrations"); + SqliteUserAiPrefsDao::from_connection(Arc::new(Mutex::new(conn))) + } + + fn ctx() -> opentelemetry::Context { + opentelemetry::Context::new() + } + + #[test] + fn get_prefs_returns_none_when_empty() { + let mut dao = setup_dao(); + let result = dao.get_prefs(&ctx()).unwrap(); + assert!(result.is_none()); + } + + #[test] + fn upsert_prefs_inserts_row() { + let mut dao = setup_dao(); + let now = 1_700_000_000i64; + let prefs = UpsertUserAiPrefs { + voice: Some("grandma".to_string()), + tz_offset_minutes: Some(-480), + library: Some("1".to_string()), + updated_at: now, + }; + dao.upsert_prefs(&ctx(), &prefs).unwrap(); + + let row = dao.get_prefs(&ctx()).unwrap().unwrap(); + assert_eq!(row.id, 1); + assert_eq!(row.voice, Some("grandma".to_string())); + assert_eq!(row.tz_offset_minutes, Some(-480)); + assert_eq!(row.library, Some("1".to_string())); + assert_eq!(row.updated_at, now); + } + + #[test] + fn upsert_prefs_replaces_existing() { + let mut dao = setup_dao(); + let now1 = 1_700_000_000i64; + let now2 = 1_800_000_000i64; + + let prefs1 = UpsertUserAiPrefs { + voice: Some("grandma".to_string()), + tz_offset_minutes: Some(-480), + library: Some("1".to_string()), + updated_at: now1, + }; + dao.upsert_prefs(&ctx(), &prefs1).unwrap(); + + let prefs2 = UpsertUserAiPrefs { + voice: Some("dad".to_string()), + tz_offset_minutes: Some(-300), + library: None, + updated_at: now2, + }; + dao.upsert_prefs(&ctx(), &prefs2).unwrap(); + + let row = dao.get_prefs(&ctx()).unwrap().unwrap(); + assert_eq!(row.voice, Some("dad".to_string())); + assert_eq!(row.tz_offset_minutes, Some(-300)); + assert!(row.library.is_none()); + assert_eq!(row.updated_at, now2); + } + + #[test] + fn upsert_partial_fields() { + let mut dao = setup_dao(); + let now = 1_700_000_000i64; + + let prefs = UpsertUserAiPrefs { + voice: None, + tz_offset_minutes: Some(-480), + library: None, + updated_at: now, + }; + dao.upsert_prefs(&ctx(), &prefs).unwrap(); + + let row = dao.get_prefs(&ctx()).unwrap().unwrap(); + assert_eq!(row.tz_offset_minutes, Some(-480)); + assert!(row.voice.is_none()); + assert!(row.library.is_none()); + } +} diff --git a/src/duplicates.rs b/src/duplicates.rs index 372415b..32ed92b 100644 --- a/src/duplicates.rs +++ b/src/duplicates.rs @@ -234,7 +234,7 @@ async fn list_exact_handler( let span = global_tracer().start_with_context("duplicates.list_exact", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); @@ -265,7 +265,7 @@ async fn list_perceptual_handler( let span = global_tracer().start_with_context("duplicates.list_perceptual", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); @@ -449,7 +449,7 @@ async fn list_folder_pairs_handler( let span = global_tracer().start_with_context("duplicates.list_folder_pairs", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); diff --git a/src/faces.rs b/src/faces.rs index 3288aa3..f619966 100644 --- a/src/faces.rs +++ b/src/faces.rs @@ -1755,7 +1755,7 @@ async fn stats_handler( let span = global_tracer().start_with_context("faces.stats", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); @@ -1782,11 +1782,12 @@ async fn list_faces_handler( let normalized_path = normalize_path(&query.path); // resolve_library_param returns Option<&Library>; clone so the result // is owned (matching the primary_library fallback's type). - let library: Library = libraries::resolve_library_param(&app_state, query.library.as_deref()) - .ok() - .flatten() - .cloned() - .unwrap_or_else(|| app_state.primary_library().clone()); + let library: Library = + libraries::resolve_library_param_state(&app_state, query.library.as_deref()) + .ok() + .flatten() + .cloned() + .unwrap_or_else(|| app_state.primary_library().clone()); let mut dao = face_dao.lock().expect("face dao lock"); let hash = match dao.resolve_content_hash(&span_context, library.id, &normalized_path) { @@ -1870,7 +1871,7 @@ async fn create_face_handler( } let normalized_path = normalize_path(&body.path); - let library: Library = match libraries::resolve_library_param( + let library: Library = match libraries::resolve_library_param_state( &app_state, body.library.as_ref().map(|i| i.to_string()).as_deref(), ) { @@ -2192,7 +2193,7 @@ async fn list_persons_handler( let span = global_tracer().start_with_context("persons.list", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); @@ -2345,7 +2346,7 @@ async fn person_faces_handler( let context = extract_context_from_request(&request); let span = global_tracer().start_with_context("persons.faces", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); diff --git a/src/files.rs b/src/files.rs index 59cd49e..920540e 100644 --- a/src/files.rs +++ b/src/files.rs @@ -275,14 +275,14 @@ pub async fn list_photos( // Resolve the optional library filter. Unknown values return 400. A // `None` result means "union across all libraries" and downstream // walks iterate every configured library root. - let library = match crate::libraries::resolve_library_param(&app_state, req.library.as_deref()) - { - Ok(lib) => lib, - Err(msg) => { - log::warn!("Rejecting /photos request: {}", msg); - return HttpResponse::BadRequest().body(msg); - } - }; + let library = + match crate::libraries::resolve_library_param_state(&app_state, req.library.as_deref()) { + Ok(lib) => lib, + Err(msg) => { + log::warn!("Rejecting /photos request: {}", msg); + return HttpResponse::BadRequest().body(msg); + } + }; let span_context = opentelemetry::Context::current_with_span(span); @@ -1238,7 +1238,7 @@ pub async fn list_exif_summary( // Resolve the library filter up front so a bad id/name 400s before we // ever take the DAO mutex. None == union across all libraries. let library_filter = - match crate::libraries::resolve_library_param(&app_state, req.library.as_deref()) { + match crate::libraries::resolve_library_param_state(&app_state, req.library.as_deref()) { Ok(lib) => lib.map(|l| l.id), Err(msg) => { span.set_status(Status::error(msg.clone())); diff --git a/src/handlers/image.rs b/src/handlers/image.rs index f0d2310..923fff3 100644 --- a/src/handlers/image.rs +++ b/src/handlers/image.rs @@ -53,7 +53,7 @@ pub async fn get_image( // Resolve library from query param; default to primary so clients that // don't yet send `library=` continue to work. - let library = match libraries::resolve_library_param(&app_state, req.library.as_deref()) { + let library = match libraries::resolve_library_param_state(&app_state, req.library.as_deref()) { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(msg) => { @@ -492,7 +492,7 @@ pub async fn get_file_metadata( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); - let library = libraries::resolve_library_param(&app_state, path.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, path.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -580,7 +580,7 @@ pub async fn set_image_gps( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); - let library = libraries::resolve_library_param(&app_state, body.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, body.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -746,7 +746,7 @@ pub async fn get_full_exif( let context = extract_context_from_request(&request); let mut span = tracer.start_with_context("get_full_exif", &context); - let library = libraries::resolve_library_param(&app_state, path.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, path.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -888,7 +888,8 @@ pub async fn set_image_date( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); - let library = match libraries::resolve_library_param(&app_state, body.library.as_deref()) { + let library = match libraries::resolve_library_param_state(&app_state, body.library.as_deref()) + { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(msg) => { @@ -941,7 +942,8 @@ pub async fn clear_image_date( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); - let library = match libraries::resolve_library_param(&app_state, body.library.as_deref()) { + let library = match libraries::resolve_library_param_state(&app_state, body.library.as_deref()) + { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(msg) => { @@ -1001,7 +1003,7 @@ pub async fn upload_image( // Resolve the optional library selector. Absent → primary library // (backwards-compatible with clients that don't yet send `library=`). let target_library = - match libraries::resolve_library_param(&app_state, query.library.as_deref()) { + match libraries::resolve_library_param_state(&app_state, query.library.as_deref()) { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(msg) => { diff --git a/src/handlers/video.rs b/src/handlers/video.rs index f9f4e64..b56a67e 100644 --- a/src/handlers/video.rs +++ b/src/handlers/video.rs @@ -67,10 +67,11 @@ pub async fn generate_video( let context = extract_context_from_request(&request); let mut span = tracer.start_with_context("generate_video", &context); - let preferred_library = libraries::resolve_library_param(&app_state, body.library.as_deref()) - .ok() - .flatten() - .unwrap_or_else(|| app_state.primary_library()); + let preferred_library = + libraries::resolve_library_param_state(&app_state, body.library.as_deref()) + .ok() + .flatten() + .unwrap_or_else(|| app_state.primary_library()); // Try the resolved library first, then fall back to any other library // that actually contains the file — handles union-mode requests where diff --git a/src/libraries.rs b/src/libraries.rs index 55bf5c1..377b442 100644 --- a/src/libraries.rs +++ b/src/libraries.rs @@ -291,11 +291,11 @@ pub fn seed_or_patch_from_env(conn: &mut SqliteConnection, base_path: &str) { } /// Resolve a library request parameter (accepts numeric id as string or name) -/// against the configured libraries. Returns `Ok(None)` when the param is +/// against a list of libraries. Returns `Ok(None)` when the param is /// absent, meaning "span all libraries". Returns `Err` when a value is /// provided but does not match any library. pub fn resolve_library_param<'a>( - state: &'a AppState, + libs: &'a [Library], param: Option<&str>, ) -> Result, String> { let Some(raw) = param.map(str::trim).filter(|s| !s.is_empty()) else { @@ -303,18 +303,29 @@ pub fn resolve_library_param<'a>( }; if let Ok(id) = raw.parse::() { - return state - .library_by_id(id) + return libs + .iter() + .find(|l| l.id == id) .map(Some) .ok_or_else(|| format!("unknown library id: {}", id)); } - state - .library_by_name(raw) + libs.iter() + .find(|l| l.name == raw) .map(Some) .ok_or_else(|| format!("unknown library name: {}", raw)) } +/// Resolve a library request parameter against the AppState's libraries. +/// Returns `Ok(None)` when the param is absent, meaning "span all libraries". +/// Returns `Err` when a value is provided but does not match any library. +pub fn resolve_library_param_state<'a>( + state: &'a AppState, + param: Option<&str>, +) -> Result, String> { + resolve_library_param(&state.libraries, param) +} + /// Health of a library at a point in time. Probed at the top of each /// file-watcher tick. The `Stale` state is the "be conservative" signal: /// destructive paths (ingest writes, future move-handoff and orphan GC in @@ -662,12 +673,6 @@ mod tests { assert_eq!(abs, PathBuf::from("/tmp/media/2024/photo.jpg")); } - fn state_with_libraries(libs: Vec) -> AppState { - let mut state = AppState::test_state(); - state.libraries = libs; - state - } - fn sample_libraries() -> Vec { vec![ Library { @@ -687,52 +692,52 @@ mod tests { ] } - #[actix_rt::test] - async fn resolve_library_param_absent_is_union() { - let state = state_with_libraries(sample_libraries()); - assert!(matches!(resolve_library_param(&state, None), Ok(None))); + #[test] + fn resolve_library_param_absent_is_union() { + let libs = sample_libraries(); + assert!(matches!(resolve_library_param(&libs, None), Ok(None))); } - #[actix_rt::test] - async fn resolve_library_param_empty_or_whitespace_is_union() { - let state = state_with_libraries(sample_libraries()); - assert!(matches!(resolve_library_param(&state, Some("")), Ok(None))); + #[test] + fn resolve_library_param_empty_or_whitespace_is_union() { + let libs = sample_libraries(); + assert!(matches!(resolve_library_param(&libs, Some("")), Ok(None))); assert!(matches!( - resolve_library_param(&state, Some(" ")), + resolve_library_param(&libs, Some(" ")), Ok(None) )); } - #[actix_rt::test] - async fn resolve_library_param_numeric_id_matches() { - let state = state_with_libraries(sample_libraries()); - let lib = resolve_library_param(&state, Some("7")) + #[test] + fn resolve_library_param_numeric_id_matches() { + let libs = sample_libraries(); + let lib = resolve_library_param(&libs, Some("7")) .expect("valid id") .expect("some library"); assert_eq!(lib.id, 7); assert_eq!(lib.name, "archive"); } - #[actix_rt::test] - async fn resolve_library_param_name_matches() { - let state = state_with_libraries(sample_libraries()); - let lib = resolve_library_param(&state, Some("main")) + #[test] + fn resolve_library_param_name_matches() { + let libs = sample_libraries(); + let lib = resolve_library_param(&libs, Some("main")) .expect("valid name") .expect("some library"); assert_eq!(lib.id, 1); } - #[actix_rt::test] - async fn resolve_library_param_unknown_id_errs() { - let state = state_with_libraries(sample_libraries()); - let err = resolve_library_param(&state, Some("999")).unwrap_err(); + #[test] + fn resolve_library_param_unknown_id_errs() { + let libs = sample_libraries(); + let err = resolve_library_param(&libs, Some("999")).unwrap_err(); assert!(err.contains("unknown library id")); } - #[actix_rt::test] - async fn resolve_library_param_unknown_name_errs() { - let state = state_with_libraries(sample_libraries()); - let err = resolve_library_param(&state, Some("missing")).unwrap_err(); + #[test] + fn resolve_library_param_unknown_name_errs() { + let libs = sample_libraries(); + let err = resolve_library_param(&libs, Some("missing")).unwrap_err(); assert!(err.contains("unknown library name")); } diff --git a/src/main.rs b/src/main.rs index b059e9b..e3ded45 100644 --- a/src/main.rs +++ b/src/main.rs @@ -267,6 +267,25 @@ fn main() -> std::io::Result<()> { } } + // Spawn the nightly pre-generation scheduler (Section D). + { + use crate::database::{ + InsightDao, SqliteInsightDao, SqliteUserAiPrefsDao, UserAiPrefsDao, + }; + + let insight_dao: Arc>> = + Arc::new(Mutex::new(Box::new(SqliteInsightDao::new()))); + let prefs_dao: Arc>> = + Arc::new(Mutex::new(Box::new(SqliteUserAiPrefsDao::new()))); + + reels::spawn_pregen_scheduler( + app_state.clone(), + web::Data::new(insight_dao), + web::Data::new(prefs_dao), + ) + .await; + } + HttpServer::new(move || { let user_dao = SqliteUserDao::new(); let favorites_dao = SqliteFavoriteDao::new(); @@ -348,6 +367,8 @@ fn main() -> std::io::Result<()> { .service(reels::create_reel_handler) .service(reels::reel_status_handler) .service(reels::reel_video_handler) + .service(reels::precomputed_reel_handler) + .service(reels::precomputed_video_handler) .service(ai::generate_insight_handler) .service(ai::generate_agentic_insight_handler) .service(ai::generation_status_handler) diff --git a/src/memories.rs b/src/memories.rs index c877981..2b1f473 100644 --- a/src/memories.rs +++ b/src/memories.rs @@ -419,7 +419,7 @@ pub fn gather_memory_items( span_mode, tz_offset_minutes, years_back ); - let library = crate::libraries::resolve_library_param(app_state, library_param)?; + let library = crate::libraries::resolve_library_param_state(app_state, library_param)?; let libraries_to_scan: Vec<&crate::libraries::Library> = match library { Some(lib) => vec![lib], None => app_state.libraries.iter().collect(), diff --git a/src/reels/mod.rs b/src/reels/mod.rs index 32635a9..c51822c 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -18,24 +18,59 @@ pub mod selector; use std::collections::HashMap; use std::path::{Path, PathBuf}; -use std::sync::{LazyLock, Mutex as StdMutex}; +use std::sync::{Arc, LazyLock, Mutex, Mutex as StdMutex}; use std::time::{Duration, Instant}; use actix_files::NamedFile; use actix_web::{HttpRequest, HttpResponse, Responder, get, post, web}; -use chrono::DateTime; +use anyhow::{Context, anyhow}; +use chrono::{DateTime, Datelike, Timelike}; use serde::{Deserialize, Serialize}; use serde_json::json; -use std::sync::Mutex; use uuid::Uuid; use crate::data::Claims; -use crate::database::{ExifDao, InsightDao}; +use crate::database::{ExifDao, InsightDao, PrecomputedReelDao, UserAiPrefsDao}; +use crate::libraries::{Library, resolve_library_param}; use crate::memories::MemoriesSpan; use crate::otel::extract_context_from_request; use crate::state::AppState; use selector::ReelSelector; +// --- Precomputed reel age limits (hours) ------------------------------------- + +/// Maximum age for a precomputed day reel before it's considered stale. +const REEL_PRECOMPUTED_DAY_MAX_AGE_HOURS: u64 = 26; +/// Maximum age for a precomputed week reel. +const REEL_PRECOMPUTED_WEEK_MAX_AGE_HOURS: u64 = 192; +/// Maximum age for a precomputed month reel. +const REEL_PRECOMPUTED_MONTH_MAX_AGE_HOURS: u64 = 768; + +/// Resolve a library request parameter to a stable key string. +/// Returns the library's id as a string when found, or `"all"` when +/// the param is absent or the lookup fails. +pub fn normalize_library_key(libs: &[Library], param: Option<&str>) -> String { + match resolve_library_param(libs, param) { + Ok(Some(lib)) => lib.id.to_string(), + _ => "all".to_string(), + } +} + +/// Which scripting strategy to use for the reel narration. +#[derive(Clone, Copy)] +#[allow(dead_code)] +pub enum ScripterMode { + /// Fast path: single LLM call via the direct client. + Fast, + /// Agentic path: resolves the backend through the InsightGenerator + /// (honouring LLM_BACKEND, model overrides, etc.). Falls back to + /// Fast on error so a scripting failure never sinks a reel. + Agentic, +} + +/// Progress callback type — receives a static-stage label. +pub type ProgressFn<'a> = dyn Fn(&'static str) + Send + Sync + 'a; + /// The media behind one shot: a still photo, or a short section of a source /// video (played with its live audio ducked under the narration). Both carry /// just the library-relative path; the renderer applies fixed clip framing @@ -73,6 +108,8 @@ pub struct PlannedBeat { pub date: Option, pub insight_title: Option, pub insight_summary: Option, + /// GPS coordinates of the lead media item, when available. + pub gps: Option<(f64, f64)>, } impl PlannedBeat { @@ -292,6 +329,13 @@ pub struct ReelStatusResponse { pub error: Option, } +/// Response shape for `GET /reels/precomputed`. +#[derive(Debug, Serialize)] +pub struct PrecomputedReelResponse { + pub video_url: String, + pub title: String, +} + // --- Handlers ---------------------------------------------------------------- /// POST /reels — start (or instantly serve from cache) a memory reel for the @@ -399,8 +443,20 @@ pub async fn create_reel_handler( let state = app_state.clone(); let insight_dao = insight_dao.clone(); + let exif_dao = exif_dao.clone(); let handle = tokio::spawn(async move { - match run_reel_job(&state, &insight_dao, job_id, planned, meta, voice, &key).await { + match run_reel_job( + &state, + &insight_dao, + &exif_dao, + job_id, + planned, + meta, + voice, + &key, + ) + .await + { Ok((title, path)) => { finish_job(job_id, ReelJobStatus::Done, Some(title), Some(path), None) } @@ -471,25 +527,131 @@ pub async fn reel_video_handler( } } +/// GET /reels/precomputed?span=&library= +/// +/// Look up the latest precomputed reel for the given span and library key. +/// Validity gate (all must hold, else 404): +/// 1. `render_version == RENDER_VERSION` +/// 2. `output_path` exists on disk +/// 3. age <= max_age(span) (Day 26h, Week 8d, Month 32d) +/// +/// Returns `{ video_url: "/reels/by-key/{cache_key}/video", title }`. +#[get("/reels/precomputed")] +pub async fn precomputed_reel_handler( + _claims: Claims, + query: web::Query>, + app_state: web::Data, + reel_dao: web::Data>>, +) -> impl Responder { + let span = query.get("span").map(|s| s.as_str()).unwrap_or("day"); + let library_key = normalize_library_key( + &app_state.libraries, + query.get("library").map(|s| s.as_str()), + ); + + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("Time went backwards") + .as_secs() as i64; + + let max_age_hours = match span { + "week" => REEL_PRECOMPUTED_WEEK_MAX_AGE_HOURS as i64, + "month" => REEL_PRECOMPUTED_MONTH_MAX_AGE_HOURS as i64, + _ => REEL_PRECOMPUTED_DAY_MAX_AGE_HOURS as i64, + }; + let min_generated_at = now - (max_age_hours * 3600); + + let ctx = opentelemetry::Context::new(); + let mut dao = reel_dao.lock().expect("Unable to lock PrecomputedReelDao"); + + // Fast existence gate: is there a fresh row at all? + if !dao + .exists_fresh( + &ctx, + span, + &library_key, + RENDER_VERSION as i32, + min_generated_at, + ) + .unwrap_or(false) + { + return HttpResponse::NotFound().json(json!({ "error": "no precomputed reel found" })); + } + + // Fetch the latest row for full validity checks. + let reel = match dao.latest_for(&ctx, span, &library_key) { + Ok(Some(r)) => r, + _ => { + return HttpResponse::NotFound().json(json!({ "error": "no precomputed reel found" })); + } + }; + + // Validity gate 1: render version must match. + if reel.render_version != RENDER_VERSION as i32 { + return HttpResponse::NotFound() + .json(json!({ "error": "precomputed reel is stale (render version mismatch)" })); + } + + // Validity gate 2: output_path must exist. + let output = std::path::Path::new(&reel.output_path); + if !output.exists() { + return HttpResponse::NotFound().json(json!({ "error": "precomputed reel file missing" })); + } + + // Validity gate 3: age <= max_age (re-checked via min_generated_at). + if reel.generated_at < min_generated_at { + return HttpResponse::NotFound().json(json!({ "error": "precomputed reel has expired" })); + } + + HttpResponse::Ok().json(PrecomputedReelResponse { + video_url: format!("/reels/by-key/{}/video", reel.cache_key), + title: reel.title, + }) +} + +/// GET /reels/by-key/{key}/video — stream a precomputed reel MP4 by cache key. +#[get("/reels/by-key/{key}/video")] +pub async fn precomputed_video_handler( + _claims: Claims, + request: HttpRequest, + path: web::Path, + app_state: web::Data, +) -> impl Responder { + let key = path.into_inner(); + let mp4 = reel_mp4_path(&app_state, &key); + match NamedFile::open(&mp4) { + Ok(file) => file.into_response(&request), + Err(e) => { + log::error!("opening precomputed reel {key} failed: {e:?}"); + HttpResponse::NotFound().json(json!({ "error": "precomputed reel file missing" })) + } + } +} + // --- Pipeline ---------------------------------------------------------------- /// Run the full reel pipeline: enrich → script → narrate → render → concat, /// then publish the MP4 into the cache. Returns (title, mp4_path). -async fn run_reel_job( +/// +/// The `scripter` parameter controls which narration-generation strategy is +/// used (fast single-call vs. agentic backend resolution). On scripting +/// failure in Agentic mode the pipeline falls back to the fast path so a +/// single LLM failure never sinks a reel. +pub(crate) async fn produce_reel( app_state: &AppState, insight_dao: &Mutex>, - job_id: Uuid, + exif_dao: &Mutex>, mut planned: Vec, meta: ReelMeta, voice: Option, key: &str, + scripter: ScripterMode, + progress: Option<&ProgressFn<'_>>, ) -> anyhow::Result<(String, PathBuf)> { - use anyhow::{Context, anyhow}; - let started = Instant::now(); let total_photos: usize = planned.iter().map(|b| b.media.len()).sum(); log::info!( - "reel {job_id}: starting — span {:?}, {} beats, {} photos, voice={}", + "reel produce_reel: starting — span {:?}, {} beats, {} photos, voice={}", meta.span, planned.len(), total_photos, @@ -499,18 +661,33 @@ async fn run_reel_job( let client = app_state .llamacpp .as_ref() - .ok_or_else(|| anyhow!("TTS/LLM backend not configured"))? + .ok_or_else(|| anyhow::anyhow!("TTS/LLM backend not configured"))? .clone(); // 1. Enrich each beat with its lead photo's cached insight, then script // (one LLM call → one narration line per beat). - set_stage(job_id, "scripting"); - log::info!("reel {job_id}: scripting narration via LLM…"); + emit_progress(progress, "scripting"); + log::info!("reel produce_reel: scripting narration via LLM…"); let span_context = opentelemetry::Context::new(); - selector::enrich(insight_dao, &span_context, &mut planned); - let script = script::generate_script(&client, &meta, &planned).await?; + selector::enrich(insight_dao, exif_dao, &span_context, &mut planned); + let script = match scripter { + ScripterMode::Fast => script::generate_script(&client, &meta, &planned).await?, + ScripterMode::Agentic => { + match script::generate_script_agentic(&app_state.insight_generator, &meta, &planned) + .await + { + Ok(s) => s, + Err(e) => { + log::warn!( + "reel produce_reel: agentic script failed, falling back to fast: {e}" + ); + script::generate_script(&client, &meta, &planned).await? + } + } + } + }; log::info!( - "reel {job_id}: scripted \"{}\" ({} lines)", + "reel produce_reel: scripted \"{}\" ({} lines)", script.title, script.lines.len() ); @@ -519,11 +696,11 @@ async fn run_reel_job( // sequence under that one narration). A beat whose audio or render fails // is skipped (logged) rather than sinking the whole reel — handles an // odd HEIC/corrupt file gracefully. - set_stage(job_id, "narrating"); + emit_progress(progress, "narrating"); let work = tempfile::tempdir().context("creating reel work dir")?; let nvenc = render::is_nvenc_available().await; log::info!( - "reel {job_id}: narrating + rendering {} beats (encoder: {})", + "reel produce_reel: narrating + rendering {} beats (encoder: {})", planned.len(), if nvenc { "nvenc" } else { "cpu" } ); @@ -543,7 +720,7 @@ async fn run_reel_job( .filter_map(|m| resolve_media_path(app_state, m)) .collect(); if paths.is_empty() { - log::warn!("reel {job_id}: skipping beat {i}, no media paths resolved"); + log::warn!("reel produce_reel: skipping beat {i}, no media paths resolved"); continue; } @@ -558,13 +735,13 @@ async fn run_reel_job( { Ok(b) => b, Err(e) => { - log::warn!("reel {job_id}: skipping beat {i}, TTS failed: {e}"); + log::warn!("reel produce_reel: skipping beat {i}, TTS failed: {e}"); continue; } }; let audio_path = work.path().join(format!("narration_{i:03}.wav")); if let Err(e) = tokio::fs::write(&audio_path, &audio_bytes).await { - log::warn!("reel {job_id}: skipping beat {i}, writing audio failed: {e}"); + log::warn!("reel produce_reel: skipping beat {i}, writing audio failed: {e}"); continue; } @@ -575,11 +752,11 @@ async fn run_reel_job( .flatten() .unwrap_or(render::MIN_SEGMENT_SECONDS); - set_stage(job_id, "rendering"); + emit_progress(progress, "rendering"); let beat_out = work.path().join(format!("beat_{i:03}.mp4")); let render_result = if beat.is_clip() { log::info!( - "reel {job_id}: beat {}/{} — video clip, narration {:.1}s", + "reel produce_reel: beat {}/{} — video clip, narration {:.1}s", i + 1, beat_total, narration_secs @@ -587,7 +764,7 @@ async fn run_reel_job( render::render_clip_beat(&paths[0], &audio_path, &beat_out, narration_secs, &opts).await } else { log::info!( - "reel {job_id}: beat {}/{} — {} photo(s), narration {:.1}s", + "reel produce_reel: beat {}/{} — {} photo(s), narration {:.1}s", i + 1, beat_total, paths.len(), @@ -596,7 +773,7 @@ async fn run_reel_job( render::render_beat(&paths, &audio_path, &beat_out, narration_secs, &opts).await }; if let Err(e) = render_result { - log::warn!("reel {job_id}: skipping beat {i}, render failed: {e}"); + log::warn!("reel produce_reel: skipping beat {i}, render failed: {e}"); continue; } beat_files.push(beat_out.to_string_lossy().to_string()); @@ -609,9 +786,9 @@ async fn run_reel_job( // 4. Concat into the cache. Write to a temp name in the reels dir, then // rename atomically (same filesystem) so a reader never sees a partial. - set_stage(job_id, "rendering"); + emit_progress(progress, "rendering"); log::info!( - "reel {job_id}: joining {} rendered beats into the final reel", + "reel produce_reel: joining {} rendered beats into the final reel", segment_files.len() ); std::fs::create_dir_all(&app_state.reels_path).context("creating reels dir")?; @@ -629,7 +806,7 @@ async fn run_reel_job( let _ = std::fs::write(reel_sidecar_path(app_state, key), sidecar); log::info!( - "reel {job_id}: done in {:.1}s — {} beats → {}", + "reel produce_reel: done in {:.1}s — {} beats → {}", started.elapsed().as_secs_f64(), segment_files.len(), final_path.display() @@ -637,6 +814,42 @@ async fn run_reel_job( Ok((script.title, final_path)) } +/// Emit a progress stage label via the optional callback. +fn emit_progress(progress: Option<&ProgressFn<'_>>, stage: &'static str) { + if let Some(p) = progress { + p(stage); + } +} + +/// Run the full reel pipeline and publish the MP4 into the cache. +/// Thin wrapper around [`produce_reel`] that wires up job-stage tracking. +async fn run_reel_job( + app_state: &AppState, + insight_dao: &Mutex>, + exif_dao: &Mutex>, + job_id: Uuid, + planned: Vec, + meta: ReelMeta, + voice: Option, + key: &str, +) -> anyhow::Result<(String, PathBuf)> { + let progress = move |stage: &'static str| { + set_stage(job_id, stage); + }; + produce_reel( + app_state, + insight_dao, + exif_dao, + planned, + meta, + voice, + key, + ScripterMode::Fast, + Some(&progress), + ) + .await +} + /// Resolve a media item's library-relative path to a validated absolute path /// under its library root (works for both photos and clips). fn resolve_media_path(app_state: &AppState, media: &SegmentMedia) -> Option { @@ -645,9 +858,280 @@ fn resolve_media_path(app_state: &AppState, media: &SegmentMedia) -> Option u32 { + std::env::var("REEL_PREGEN_HOUR") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(3) +} + +/// Env: "1" (default, Monday). Day of week for weekly pre-gen (0=Sun, 1=Mon, ...). +fn pregen_week_dow() -> u32 { + std::env::var("REEL_PREGEN_WEEK_DOW") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(1) +} + +/// Pure: seconds until the next run of `run_hour` given the current local time. +/// Handles same-day vs wrap-around. Recomputed each loop iteration to absorb +/// DST shifts. +pub(crate) fn secs_until_next_run_hour(now: chrono::DateTime, run_hour: u32) -> u64 { + let now_hour = now.hour(); + let diff = if now_hour >= run_hour { + 24 - now_hour + run_hour + } else { + run_hour - now_hour + }; + (diff * 3600) as u64 +} + +/// Load pre-gen parameters: tries the user_ai_prefs DB row first, falls back +/// to env vars, then to server-local defaults. +fn load_pregen_params( + prefs_dao: &web::Data>>>, +) -> (i32, Option, String) { + // Try DB row first + if let Ok(mut dao) = prefs_dao.lock() { + let ctx = opentelemetry::Context::new(); + if let Ok(Some(prefs)) = dao.get_prefs(&ctx) { + let tz = prefs + .tz_offset_minutes + .unwrap_or_else(|| chrono::Local::now().offset().local_minus_utc()); + let voice = prefs.voice; + let library = prefs.library.unwrap_or_else(|| "all".to_string()); + return (tz, voice, library); + } + } + // Fall back to env + let tz = std::env::var("REEL_PREGEN_TZ_OFFSET_MINUTES") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or_else(|| chrono::Local::now().offset().local_minus_utc()); + let voice = std::env::var("REEL_PREGEN_VOICE").ok(); + let library = std::env::var("REEL_PREGEN_LIBRARY") + .ok() + .unwrap_or_else(|| "all".to_string()); + (tz, voice, library) +} + +/// Spawn the nightly pre-generation scheduler. Runs behind `REEL_PREGEN_ENABLED`. +pub(crate) async fn spawn_pregen_scheduler( + app_state: web::Data, + insight_dao: web::Data>>>, + prefs_dao: web::Data>>>, +) { + if std::env::var("REEL_PREGEN_ENABLED").ok() != Some("1".to_string()) { + log::info!("Reel pre-generation scheduler disabled (REEL_PREGEN_ENABLED != 1)"); + return; + } + + let run_hour = pregen_run_hour(); + log::info!( + "Reel pre-generation scheduler enabled, running at hour {} local", + run_hour + ); + + tokio::spawn(async move { + loop { + let now = chrono::Local::now(); + let sleep_secs = secs_until_next_run_hour(now, run_hour); + log::debug!("Next pre-gen run in {}s", sleep_secs); + tokio::time::sleep(std::time::Duration::from_secs(sleep_secs)).await; + + if let Err(e) = run_pregen_batch(&app_state, &insight_dao, &prefs_dao).await { + log::error!("Reel pre-generation batch failed: {}", e); + } + } + }); +} + +/// Run the pre-generation batch for all applicable spans. +async fn run_pregen_batch( + app_state: &AppState, + insight_dao: &web::Data>>>, + prefs_dao: &web::Data>>>, +) -> anyhow::Result<()> { + let now = chrono::Local::now(); + let weekday = now.weekday().num_days_from_sunday(); // 0=Sun, 1=Mon, ... + let day_of_month = now.day(); + + let mut spans = vec!["day"]; + if weekday == pregen_week_dow() { + spans.push("week"); + } + if day_of_month == 1 { + spans.push("month"); + } + + let (tz, voice, library) = load_pregen_params(prefs_dao); + + for span in spans { + if let Err(e) = pregen_one(app_state, insight_dao, span, tz, voice.clone(), &library).await + { + log::error!("Pre-gen failed for span={}: {}", span, e); + } + } + + Ok(()) +} + +/// Pre-generate a single reel for the given span. +async fn pregen_one( + app_state: &AppState, + insight_dao: &web::Data>>>, + span: &str, + tz: i32, + voice: Option, + library: &str, +) -> anyhow::Result<()> { + let memories_span = match span { + "day" => MemoriesSpan::Day, + "week" => MemoriesSpan::Week, + "month" => MemoriesSpan::Month, + _ => MemoriesSpan::Day, + }; + + let selector = ReelSelector::Memories { + span: memories_span, + tz_offset_minutes: tz, + library: if library == "all" { + None + } else { + Some(library.to_string()) + }, + max_segments: 24, + }; + + let exif_dao: Arc>> = Arc::new(StdMutex::new(Box::new( + crate::database::SqliteExifDao::new(), + ))); + let ctx = opentelemetry::Context::new(); + let (planned, reel_meta) = match selector::resolve(app_state, &exif_dao, &ctx, &selector) { + Ok((p, m)) => (p, m), + Err(e) => { + log::warn!("Pre-gen resolve failed for span={}: {}", span, e); + return Ok(()); + } + }; + + if planned.is_empty() { + log::info!("No beats for span={}, skipping", span); + return Ok(()); + } + + // Flatten every media item across beats (in order) into the cache key. + let media: Vec = planned.iter().flat_map(|b| b.media.clone()).collect(); + let key = cache_key(&selector, &media, voice.as_deref()); + + // Dedup: check if fresh ledger row exists + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("Time went backwards") + .as_secs() as i64; + + let max_age_hours = match span { + "week" => REEL_PRECOMPUTED_WEEK_MAX_AGE_HOURS, + "month" => REEL_PRECOMPUTED_MONTH_MAX_AGE_HOURS, + _ => REEL_PRECOMPUTED_DAY_MAX_AGE_HOURS, + }; + let min_generated_at = now - (max_age_hours as i64 * 3600); + + let is_fresh = { + let mut dao = app_state.precomputed_reel_dao.lock().expect("lock"); + dao.exists_fresh(&ctx, span, "all", RENDER_VERSION as i32, min_generated_at) + .unwrap_or(false) + }; + + if is_fresh { + log::info!("Fresh precomputed reel exists for span={}, skipping", span); + return Ok(()); + } + + // Check if MP4 already on disk (from a previous run that crashed after render) + let mp4_path = reel_mp4_path(app_state, &key); + if mp4_path.exists() { + log::info!( + "Precomputed reel MP4 already exists for key={}, recording ledger and skipping render", + key + ); + // Read title from sidecar if available + let sidecar_path = mp4_path.with_extension("json"); + let title = if sidecar_path.exists() { + let sidecar = tokio::fs::read_to_string(&sidecar_path).await.ok(); + sidecar + .and_then(|s| serde_json::from_str::(&s).ok()) + .map(|s| s.title) + .unwrap_or_else(|| format!("{} reel", span)) + } else { + format!("{} reel", span) + }; + let mut reel_dao = app_state.precomputed_reel_dao.lock().expect("lock"); + reel_dao.record_reel( + &ctx, + &crate::database::models::InsertablePrecomputedReel { + span: span.to_string(), + library_key: "all".to_string(), + cache_key: key.clone(), + output_path: mp4_path.to_string_lossy().to_string(), + title, + media_count: planned.len() as i32, + render_version: RENDER_VERSION as i32, + tz_offset_minutes: tz, + voice: voice.clone(), + generated_at: now, + }, + )?; + return Ok(()); + } + + // Generate the reel + log::info!("Generating precomputed reel for span={}, key={}", span, key); + let photo_count = planned.len() as i32; + let (title, mp4) = produce_reel( + app_state, + insight_dao, + &exif_dao, + planned, + reel_meta, + voice.clone(), + &key, + ScripterMode::Agentic, + None, + ) + .await?; + + // Record to ledger + let mut reel_dao = app_state.precomputed_reel_dao.lock().expect("lock"); + reel_dao.record_reel( + &ctx, + &crate::database::models::InsertablePrecomputedReel { + span: span.to_string(), + library_key: "all".to_string(), + cache_key: key.clone(), + output_path: mp4.to_string_lossy().to_string(), + title, + media_count: photo_count, + render_version: RENDER_VERSION as i32, + tz_offset_minutes: tz, + voice: voice.clone(), + generated_at: now, + }, + )?; + + log::info!("Precomputed reel generated for span={}, key={}", span, key); + Ok(()) +} + #[cfg(test)] mod tests { use super::*; + use crate::ai::face_client::FaceClient; + use crate::libraries::Library; + use crate::video::actors::StreamActor; fn photo(p: &str, lib: i32) -> SegmentMedia { SegmentMedia::Photo { @@ -672,6 +1156,128 @@ mod tests { } } + /// Minimal AppState for tests that only need library lookup. + #[allow(dead_code)] + fn test_app_state() -> AppState { + use crate::ai::InsightGenerator; + use crate::ai::insight_chat::{ChatLockMap, InsightChatService}; + use crate::ai::turn_registry::TurnRegistry; + use crate::ai::{OllamaClient, SmsApiClient}; + use crate::database::{ + ExifDao, InsightDao, InsightGenerationJobDao, PreviewDao, SqliteExifDao, + SqliteInsightDao, SqliteInsightGenerationJobDao, SqlitePreviewDao, + }; + use crate::faces; + use crate::state::AppState; + use crate::tags::SqliteTagDao; + use actix::Actor; + use std::sync::Mutex; + + let temp_dir = tempfile::tempdir().expect("Failed to create temp directory"); + let base_path = temp_dir.path().to_path_buf(); + let base_path_str = base_path.to_string_lossy().to_string(); + + let test_lib = Library { + id: crate::libraries::PRIMARY_LIBRARY_ID, + name: "main".to_string(), + root_path: base_path_str.clone(), + enabled: true, + excluded_dirs: Vec::new(), + }; + + let ollama = OllamaClient::new( + "http://localhost:11434".to_string(), + None, + "llama3.2".to_string(), + None, + ); + let sms_client = SmsApiClient::new("http://localhost:8000".to_string(), None); + let apollo_client = crate::ai::apollo_client::ApolloClient::new(None); + + let insight_dao: std::sync::Arc>> = + std::sync::Arc::new(Mutex::new(Box::new(SqliteInsightDao::new()))); + let exif_dao: std::sync::Arc>> = + std::sync::Arc::new(Mutex::new(Box::new(SqliteExifDao::new()))); + let daily_summary_dao: std::sync::Arc>> = + std::sync::Arc::new(Mutex::new(Box::new( + crate::database::SqliteDailySummaryDao::new(), + ))); + let insight_generator = InsightGenerator::new( + ollama.clone(), + None, + None, + sms_client.clone(), + apollo_client.clone(), + insight_dao.clone(), + exif_dao.clone(), + daily_summary_dao, + std::sync::Arc::new(Mutex::new(Box::new( + crate::database::SqliteCalendarEventDao::new(), + ))), + std::sync::Arc::new(Mutex::new(Box::new( + crate::database::SqliteLocationHistoryDao::new(), + ))), + std::sync::Arc::new(Mutex::new(Box::new( + crate::database::SqliteSearchHistoryDao::new(), + ))), + std::sync::Arc::new(Mutex::new(Box::new(SqliteTagDao::default()))), + std::sync::Arc::new(Mutex::new(Box::new(faces::SqliteFaceDao::new()))), + std::sync::Arc::new(Mutex::new(Box::new( + crate::database::SqliteKnowledgeDao::new(), + ))), + std::sync::Arc::new(Mutex::new(Box::new( + crate::database::SqlitePersonaDao::new(), + ))), + vec![test_lib.clone()], + ); + + let chat_locks: ChatLockMap = + std::sync::Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new())); + let insight_chat = std::sync::Arc::new(InsightChatService::new( + std::sync::Arc::new(insight_generator.clone()), + insight_dao.clone(), + chat_locks, + )); + let turn_registry = std::sync::Arc::new(TurnRegistry::new(300)); + let preview_dao: std::sync::Arc>> = + std::sync::Arc::new(Mutex::new(Box::new(SqlitePreviewDao::new()))); + let insight_job_dao: std::sync::Arc>> = + std::sync::Arc::new(Mutex::new(Box::new(SqliteInsightGenerationJobDao::new()))); + let insight_job_handles: std::sync::Arc< + Mutex>, + > = std::sync::Arc::new(Mutex::new(std::collections::HashMap::new())); + + AppState::new( + std::sync::Arc::new(StreamActor {}.start()), + vec![test_lib], + base_path_str.clone(), + base_path_str.clone(), + base_path_str.clone(), + base_path_str.clone(), + Vec::new(), + ollama, + None, + Vec::new(), + None, + Vec::new(), + sms_client, + insight_generator, + insight_chat, + turn_registry, + preview_dao, + FaceClient::new(None), + crate::ai::clip_client::ClipClient::new(None), + insight_job_dao, + insight_job_handles, + std::sync::Arc::new(Mutex::new(Box::new( + crate::database::SqlitePrecomputedReelDao::new(), + ))), + std::sync::Arc::new(Mutex::new(Box::new( + crate::database::SqliteUserAiPrefsDao::new(), + ))), + ) + } + #[test] fn cache_key_is_stable_for_same_inputs() { let media = vec![photo("a.jpg", 1), photo("b.jpg", 1)]; @@ -724,12 +1330,14 @@ mod tests { date: None, insight_title: None, insight_summary: None, + gps: None, }; let photo_beat = PlannedBeat { media: vec![photo("a.jpg", 1), photo("b.jpg", 1)], date: None, insight_title: None, insight_summary: None, + gps: None, }; assert!(clip_beat.is_clip()); assert!(!photo_beat.is_clip()); @@ -753,6 +1361,7 @@ mod tests { date: Some(1_560_384_000), // 2019-06-13 UTC insight_title: None, insight_summary: None, + gps: None, }; assert!(beat.date_label().unwrap().contains("2019")); @@ -761,7 +1370,77 @@ mod tests { date: None, insight_title: None, insight_summary: None, + gps: None, }; assert_eq!(undated.date_label(), None); } + + #[test] + fn normalize_library_key_returns_id_when_found_numeric() { + let libs = vec![ + Library { + id: 1, + name: "main".to_string(), + root_path: "/tmp/main".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }, + Library { + id: 7, + name: "archive".to_string(), + root_path: "/tmp/archive".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }, + ]; + assert_eq!(normalize_library_key(&libs, Some("1")), "1"); + } + + #[test] + fn normalize_library_key_returns_id_when_found_by_name() { + let libs = vec![Library { + id: 1, + name: "main".to_string(), + root_path: "/tmp/main".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }]; + assert_eq!(normalize_library_key(&libs, Some("main")), "1"); + } + + #[test] + fn normalize_library_key_returns_all_when_absent() { + let libs = vec![Library { + id: 1, + name: "main".to_string(), + root_path: "/tmp/main".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }]; + assert_eq!(normalize_library_key(&libs, None), "all"); + } + + #[test] + fn normalize_library_key_returns_all_when_empty() { + let libs = vec![Library { + id: 1, + name: "main".to_string(), + root_path: "/tmp/main".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }]; + assert_eq!(normalize_library_key(&libs, Some("")), "all"); + } + + #[test] + fn normalize_library_key_returns_all_when_unknown() { + let libs = vec![Library { + id: 1, + name: "main".to_string(), + root_path: "/tmp/main".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }]; + assert_eq!(normalize_library_key(&libs, Some("missing")), "all"); + } } diff --git a/src/reels/script.rs b/src/reels/script.rs index 5be3d64..202a22c 100644 --- a/src/reels/script.rs +++ b/src/reels/script.rs @@ -9,13 +9,20 @@ //! //! The prompt builder and response parser are pure so the contract is //! unit-testable; `generate_script` wires them to the LLM client. +//! +//! The agentic scripter (pre-generation) resolves the backend through the +//! InsightGenerator, builds a read-only tool set, and runs a tool loop to +//! ground the narration in retrieved context before asking for the final JSON. use anyhow::{Context, Result}; use std::sync::Arc; use super::{PlannedBeat, ReelMeta}; +use crate::ai::backend::{BackendKind, SamplingOverrides}; +use crate::ai::insight_generator::InsightGenerator; use crate::ai::llamacpp::LlamaCppClient; -use crate::ai::llm_client::LlmClient; +use crate::ai::llm_client::{LlmClient, Tool}; +use crate::ai::ollama::ChatMessage; /// The narration for a whole reel: a title and one line per beat, in order. #[derive(Debug, Clone, PartialEq)] @@ -35,6 +42,32 @@ can be read aloud in a few seconds. Avoid generic filler like \"what a \ wonderful day\" — if you have little to go on, simply describe the moment \ plainly."; +/// Agentic scripter system prompt: richer version that tells the model it may +/// call read-only tools to ground each line. +const AGENTIC_SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \ +slideshow of someone's own photos set to a spoken voiceover. Write warm, \ +specific, first-person narration as if the person is gently looking back on \ +their own memories. Each line plays over one moment, which may be a quick burst \ +of several photos, so narrate the moment as a whole rather than a single frame. \ +Be concrete and grounded in the details given; never invent names, places, or \ +events that aren't supported. Keep each line to one or two short sentences that \ +can be read aloud in a few seconds. Avoid generic filler like \"what a \ +wonderful day\" — if you have little to go on, simply describe the moment \ +plainly.\n\nYou may call read-only tools (search_messages, get_file_tags, \ +reverse_geocode, get_current_datetime, recall_entities, recall_facts_for_photo, \ +recall_facts_for_entity) to ground each line in real context. Never invent \ +details. Return ONLY the JSON object, no prose or code fences."; + +/// Maximum agentic tool iterations for pre-generation. Tunable via +/// `REEL_PREGEN_MAX_TOOL_ITERS` (default 8). +fn reel_pregen_max_tool_iters() -> usize { + std::env::var("REEL_PREGEN_MAX_TOOL_ITERS") + .ok() + .and_then(|s| s.trim().parse::().ok()) + .filter(|x| *x > 0) + .unwrap_or(8) +} + /// Build the (system, user) prompt pair for the scripter. The user message /// describes each beat in order and asks for strict JSON back. pub fn build_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> (String, String) { @@ -81,6 +114,61 @@ pub fn build_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> (String, (SYSTEM_PROMPT.to_string(), user) } +/// Build a richer (system, user) prompt pair for the agentic scripter. The +/// system prompt tells the model it may call read-only tools to ground each +/// line. The user message uses the same per-beat enumeration as +/// `build_script_messages` plus a GPS line per beat when available. +pub fn build_agentic_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> Vec { + let mut user = String::new(); + user.push_str(&format!( + "This reel has {} moments surfaced as memories {}.\n\n", + beats.len(), + meta.span_phrase() + )); + if !meta.years.is_empty() { + let years: Vec = meta.years.iter().map(|y| y.to_string()).collect(); + user.push_str(&format!("They span the years: {}.\n\n", years.join(", "))); + } + user.push_str("Moments, in the order they will appear:\n"); + for (i, beat) in beats.iter().enumerate() { + user.push_str(&format!("\n[{}]", i + 1)); + if let Some(date) = beat.date_label() { + user.push_str(&format!(" {date}")); + } + if beat.is_clip() { + user.push_str(" (a video clip)"); + } else if beat.media.len() > 1 { + user.push_str(&format!(" (a burst of {} photos)", beat.media.len())); + } + if let Some((lat, lon)) = beat.gps { + user.push_str(&format!("\n GPS: {:.4}, {:.4}", lat, lon)); + } + user.push('\n'); + match (&beat.insight_title, &beat.insight_summary) { + (Some(t), Some(s)) if !s.trim().is_empty() => { + user.push_str(&format!(" Known context: {t} — {s}\n")); + } + (Some(t), _) => user.push_str(&format!(" Known context: {t}\n")), + (_, Some(s)) if !s.trim().is_empty() => { + user.push_str(&format!(" Known context: {s}\n")); + } + _ => user.push_str(" (no extra context — narrate plainly from the date)\n"), + } + } + user.push_str(&format!( + "\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\ + {{\"title\": \"\", \"segments\": [\"\", \ + \"\", ... ]}}\n\ + The \"segments\" array MUST have exactly {} items, one per moment in order.", + beats.len() + )); + + vec![ + ChatMessage::system(AGENTIC_SYSTEM_PROMPT.to_string()), + ChatMessage::user(user), + ] +} + /// Parse the model's response into a script with exactly `n` lines. Tolerant of /// code fences and surrounding prose, and of both `segments: [".."]` and /// `segments: [{"narration": ".."}]` shapes. Missing/extra lines are padded or @@ -198,6 +286,74 @@ pub async fn generate_script( Ok(parse_script_response(&raw, beats.len())) } +/// Agentic version of script generation: resolves the backend via the +/// InsightGenerator (honouring LLM_BACKEND, model overrides, etc.), builds +/// a read-only tool set, runs the tool loop, then parses the JSON response. +/// Returns the same ReelScript shape. On failure the caller may fall back to +/// `generate_script`. +pub async fn generate_script_agentic( + generator: &InsightGenerator, + meta: &ReelMeta, + beats: &[PlannedBeat], +) -> Result { + // 1. Resolve the backend. Bail if the local model lacks tool-calling. + let backend = generator + .resolve_backend( + BackendKind::Local, + &SamplingOverrides { + model: None, + num_ctx: None, + temperature: None, + top_p: None, + top_k: None, + min_p: None, + }, + ) + .await + .context("resolving backend for agentic script")?; + + // 2. Build the read-only tool set. Start from the persona gate (no + // persona context, so corrections are closed), force has_vision=false, + // then filter out write tools. + let gate = generator.current_gate_opts_for_persona(false, None); + let all_tools = InsightGenerator::build_tool_definitions(gate); + let read_only_names: std::collections::HashSet<&str> = [ + "search_rag", + "search_messages", + "get_sms_messages", + "get_calendar_events", + "get_location_history", + "get_file_tags", + "get_faces_in_photo", + "reverse_geocode", + "get_personal_place_at", + "recall_entities", + "recall_facts_for_photo", + "recall_facts_for_entity", + "get_current_datetime", + ] + .into_iter() + .collect(); + let tools: Vec = all_tools + .into_iter() + .filter(|t| read_only_names.contains(t.function.name.as_str())) + .collect(); + + // 3. Build the agentic prompt messages. + let messages = build_agentic_script_messages(meta, beats); + + // 4. Run the tool loop. + let max_iter = reel_pregen_max_tool_iters(); + let raw = generator + .run_readonly_tool_loop(&backend, messages, tools, max_iter) + .await + .context("agentic tool loop failed")?; + + // 5. Strip any think-blocks the model may have emitted, then parse. + let raw = crate::ai::llm_client::strip_think_blocks(&raw); + Ok(parse_script_response(&raw, beats.len())) +} + #[cfg(test)] mod tests { use super::*; @@ -220,6 +376,7 @@ mod tests { date: Some(1_560_000_000 + i as i64 * 86_400), insight_title: None, insight_summary: None, + gps: None, }) .collect() } diff --git a/src/reels/selector.rs b/src/reels/selector.rs index d096f6d..a02cbb8 100644 --- a/src/reels/selector.rs +++ b/src/reels/selector.rs @@ -207,6 +207,7 @@ fn form_photo_beats( date, insight_title: None, insight_summary: None, + gps: None, } }) .collect() @@ -255,6 +256,7 @@ pub fn form_beats( date: v.created, insight_title: None, insight_summary: None, + gps: None, }); } @@ -334,15 +336,20 @@ fn distinct_years(items: &[memories::MemoryItem], tz: Option) -> Ve years } -/// Background pass: fill each beat's cached insight (title + summary) from its -/// lead photo, where one exists. Best-effort — a missing or errored lookup -/// leaves the fields `None` and the scripter narrates from the date alone. +/// Background pass: fill each beat's cached insight (title + summary) and +/// GPS coordinates from its lead photo, where one exists. Best-effort — a +/// missing or errored lookup leaves the fields `None` and the scripter +/// narrates from the date alone. pub fn enrich( insight_dao: &Mutex>, + exif_dao: &Mutex>, span_context: &opentelemetry::Context, beats: &mut [PlannedBeat], ) { - let Ok(mut dao) = insight_dao.lock() else { + let Ok(mut insight_dao) = insight_dao.lock() else { + return; + }; + let Ok(mut exif_dao) = exif_dao.lock() else { return; }; for beat in beats.iter_mut() { @@ -352,10 +359,17 @@ pub fn enrich( } None => continue, }; - if let Ok(Some(insight)) = dao.get_insight(span_context, &rel_path) { + if let Ok(Some(insight)) = insight_dao.get_insight(span_context, &rel_path) { beat.insight_title = Some(insight.title); beat.insight_summary = Some(insight.summary); } + // Enrich GPS from EXIF when the lead media is a photo. + if let Some(SegmentMedia::Photo { .. }) = beat.media.first() + && let Ok(Some(exif)) = exif_dao.get_exif(span_context, &rel_path) + && let (Some(lat), Some(lon)) = (exif.gps_latitude, exif.gps_longitude) + { + beat.gps = Some((lat as f64, lon as f64)); + } } } diff --git a/src/state.rs b/src/state.rs index bf894f3..33e8e3f 100644 --- a/src/state.rs +++ b/src/state.rs @@ -8,9 +8,10 @@ use crate::ai::turn_registry::TurnRegistry; use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient}; use crate::database::{ CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, InsightGenerationJobDao, KnowledgeDao, - LocationHistoryDao, SearchHistoryDao, SqliteCalendarEventDao, SqliteDailySummaryDao, - SqliteExifDao, SqliteInsightDao, SqliteInsightGenerationJobDao, SqliteKnowledgeDao, - SqliteLocationHistoryDao, SqliteSearchHistoryDao, connect, + LocationHistoryDao, PrecomputedReelDao, SearchHistoryDao, SqliteCalendarEventDao, + SqliteDailySummaryDao, SqliteExifDao, SqliteInsightDao, SqliteInsightGenerationJobDao, + SqliteKnowledgeDao, SqliteLocationHistoryDao, SqlitePrecomputedReelDao, SqliteSearchHistoryDao, + SqliteUserAiPrefsDao, UserAiPrefsDao, connect, }; use crate::database::{PreviewDao, SqlitePreviewDao}; use crate::faces; @@ -88,6 +89,14 @@ pub struct AppState { pub clip_client: ClipClient, pub insight_job_dao: Arc>>, pub insight_job_handles: Arc>>, + /// Ledger for precomputed memory reels. Written by the nightly agentic + /// job (Section D); read by `GET /reels/precomputed` (Section C). + #[allow(dead_code)] + pub precomputed_reel_dao: Arc>>, + /// User AI preferences (voice, timezone, library). Mirrored by the + /// client; read by the nightly pre-generation scheduler. + #[allow(dead_code)] + pub user_ai_prefs_dao: Arc>>, } impl AppState { @@ -101,6 +110,7 @@ impl AppState { self.libraries.iter().find(|l| l.id == id) } + #[allow(dead_code)] pub fn library_by_name(&self, name: &str) -> Option<&Library> { self.libraries.iter().find(|l| l.name == name) } @@ -129,6 +139,8 @@ impl AppState { clip_client: ClipClient, insight_job_dao: Arc>>, insight_job_handles: Arc>>, + precomputed_reel_dao: Arc>>, + user_ai_prefs_dao: Arc>>, ) -> Self { assert!( !libraries_vec.is_empty(), @@ -187,6 +199,8 @@ impl AppState { clip_client, insight_job_dao, insight_job_handles, + precomputed_reel_dao, + user_ai_prefs_dao, } } @@ -267,6 +281,14 @@ impl Default for AppState { let insight_job_handles: Arc>> = Arc::new(Mutex::new(HashMap::new())); + // Initialize precomputed reel DAO (nightly pre-generation ledger) + let precomputed_reel_dao: Arc>> = + Arc::new(Mutex::new(Box::new(SqlitePrecomputedReelDao::new()))); + + // Initialize user AI preferences DAO (Section E) + let user_ai_prefs_dao: Arc>> = + Arc::new(Mutex::new(Box::new(SqliteUserAiPrefsDao::new()))); + // Load base path and ensure the primary library row reflects it. let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env"); let mut seed_conn = connect(); @@ -344,6 +366,8 @@ impl Default for AppState { clip_client, insight_job_dao, insight_job_handles, + precomputed_reel_dao, + user_ai_prefs_dao, ) } } @@ -553,6 +577,8 @@ impl AppState { ClipClient::new(None), // disabled in test Arc::new(Mutex::new(Box::new(SqliteInsightGenerationJobDao::new()))), // placeholder for test Arc::new(Mutex::new(HashMap::new())), // placeholder for test + Arc::new(Mutex::new(Box::new(SqlitePrecomputedReelDao::new()))), // placeholder for test + Arc::new(Mutex::new(Box::new(SqliteUserAiPrefsDao::new()))), // placeholder for test ) } } diff --git a/src/tags.rs b/src/tags.rs index f3e0135..3dc0859 100644 --- a/src/tags.rs +++ b/src/tags.rs @@ -168,7 +168,7 @@ async fn get_tags( // this file, so tags added under one library show up under the // others when they hold the same file. Falls back to direct rel_path // match when the file hasn't been hashed yet. - let library = libraries::resolve_library_param(&app_state, request.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, request.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); -- 2.52.0 From 5c9ee5652784785bd736c5152bdf9f82165b3074 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 14:59:00 -0400 Subject: [PATCH 11/17] Fix agentic reel audit issues: midnight bug, DAO wiring, dead code, DST timezone, validation Blocking fixes: - secs_until_next_run_hour: same-hour now returns 0 instead of 24h - capture_prefs: called at both handler return points, never fails request - capture_prefs: resolves library param, upserts to user_ai_prefs via DAO - Scheduler: uses AppState DAOs instead of separate connections - Pregen dedup: uses resolved library param instead of hardcoded 'all' - run_readonly_tool_loop: added #[allow(dead_code)] (used in main.rs only) - run_readonly_tool_loop: removed dead messages.push() call - InsightGenerator: added exif_dao() getter for scheduler reuse Medium fixes: - Input validation: run_hour clamped 0-23, week_dow clamped 0-6 - DST-sensitive timezone: fixed_tz_offset() with env var config Low fixes: - Documented REEL_PREGEN_MAX_TOOL_ITERS and REEL_PREGEN_TZ_FIXED_MINUTES - Removed dead test_app_state function and unused imports Also fix: UpsertUserAiPrefs import path, chrono::Local::with_ymd_and_hms requires TimeZone trait + .single(), unwrap_or_else closure simplification --- .env.example | 5 + src/ai/insight_generator.rs | 13 +- src/main.rs | 18 +-- src/reels/mod.rs | 295 ++++++++++++++++-------------------- 4 files changed, 151 insertions(+), 180 deletions(-) diff --git a/.env.example b/.env.example index bafc0c8..a7bd7e5 100644 --- a/.env.example +++ b/.env.example @@ -150,8 +150,13 @@ SEARCH_RAG_RERANK=0 # Timezone offset in minutes from UTC (e.g., -480 = PST). Defaults to # the server's local timezone. # REEL_PREGEN_TZ_OFFSET_MINUTES= +# Fixed timezone offset — overrides auto-detect to avoid DST shifts. +# When set, both the DB fallback and env fallback use this value. +# REEL_PREGEN_TZ_FIXED_MINUTES=-480 # Voice ID for narration (e.g., "grandma"). Falls back to the value # stored in the user_ai_prefs DB row when set. # REEL_PREGEN_VOICE= # Library filter: a library id (e.g. "1") or "all" for every library. # REEL_PREGEN_LIBRARY=all +# Max agentic tool iterations for pre-gen scripter. Default 8. +# REEL_PREGEN_MAX_TOOL_ITERS=8 diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 4871c2e..4ff8494 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -217,6 +217,13 @@ impl InsightGenerator { &self.insight_dao } + /// Accessor for the EXIF DAO (used by the reel scheduler to resolve + /// GPS enrichment without creating a separate DB connection). + #[allow(dead_code)] + pub fn exif_dao(&self) -> &Arc>> { + &self.exif_dao + } + /// Whether the optional Apollo Places integration is wired up. Drives /// tool-definition gating (no point offering `get_personal_place_at` /// when Apollo is unreachable) — exposed publicly so `insight_chat` @@ -4509,6 +4516,9 @@ Return ONLY the summary, nothing else."#, /// /// Calls `execute_tool` with empty file/image context; enabled tools /// never read those fields. + /// + /// Only used by the `reels` module (compiled in `main.rs`, not `lib.rs`), + /// so the `#[allow(dead_code)]` suppresses the lib-target warning. #[allow(dead_code)] pub(crate) async fn run_readonly_tool_loop( &self, @@ -4592,8 +4602,7 @@ Return ONLY the summary, nothing else."#, .chat() .chat_with_tools(messages.clone(), vec![]) .await?; - final_content = final_response.content.clone(); - messages.push(final_response); + final_content = final_response.content; } Ok(final_content) diff --git a/src/main.rs b/src/main.rs index e3ded45..dd2868f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -268,23 +268,7 @@ fn main() -> std::io::Result<()> { } // Spawn the nightly pre-generation scheduler (Section D). - { - use crate::database::{ - InsightDao, SqliteInsightDao, SqliteUserAiPrefsDao, UserAiPrefsDao, - }; - - let insight_dao: Arc>> = - Arc::new(Mutex::new(Box::new(SqliteInsightDao::new()))); - let prefs_dao: Arc>> = - Arc::new(Mutex::new(Box::new(SqliteUserAiPrefsDao::new()))); - - reels::spawn_pregen_scheduler( - app_state.clone(), - web::Data::new(insight_dao), - web::Data::new(prefs_dao), - ) - .await; - } + reels::spawn_pregen_scheduler(app_state.clone()).await; HttpServer::new(move || { let user_dao = SqliteUserDao::new(); diff --git a/src/reels/mod.rs b/src/reels/mod.rs index c51822c..1fc5b3b 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -56,9 +56,46 @@ pub fn normalize_library_key(libs: &[Library], param: Option<&str>) -> String { } } +/// Best-effort: mirror the latest client reel params into `user_ai_prefs` +/// so the nightly pre-gen scheduler can pick them up. Never fails the +/// caller regardless of DB errors. +fn capture_prefs( + app_state: &AppState, + prefs_dao: &web::Data>>>, + req: &web::Json, + library_param: Option<&str>, +) -> Result<(), anyhow::Error> { + use crate::database::models::UpsertUserAiPrefs; + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("Time went backwards") + .as_secs(); + let library = match library_param { + Some(p) if !p.is_empty() => { + // Resolve to the actual library id for the DB row. + normalize_library_key(&app_state.libraries, Some(p)) + } + _ => "all".to_string(), + }; + let mut dao = prefs_dao.lock().expect("lock"); + let ctx = opentelemetry::Context::new(); + dao.upsert_prefs( + &ctx, + &UpsertUserAiPrefs { + voice: req.voice.clone().filter(|s| !s.is_empty()), + tz_offset_minutes: Some( + req.timezone_offset_minutes + .unwrap_or_else(|| chrono::Local::now().offset().local_minus_utc()), + ), + library: Some(library), + updated_at: now as i64, + }, + ) + .map_err(|e| anyhow::anyhow!("failed to upsert user_ai_prefs: {e}")) +} + /// Which scripting strategy to use for the reel narration. #[derive(Clone, Copy)] -#[allow(dead_code)] pub enum ScripterMode { /// Fast path: single LLM call via the direct client. Fast, @@ -348,6 +385,7 @@ pub async fn create_reel_handler( app_state: web::Data, exif_dao: web::Data>>, insight_dao: web::Data>>, + prefs_dao: web::Data>>>, ) -> impl Responder { let span_context = extract_context_from_request(&http_request); @@ -416,7 +454,9 @@ pub async fn create_reel_handler( abort: None, }, ); - return HttpResponse::Accepted().json(ReelJobCreatedResponse { + // Capture params for passive prefs mirror (best-effort, never fails). + let _ = capture_prefs(&app_state, &prefs_dao, &req, req.library.as_deref()); + HttpResponse::Accepted().json(ReelJobCreatedResponse { job_id: job_id.to_string(), status: ReelJobStatus::Done, }); @@ -474,6 +514,9 @@ pub async fn create_reel_handler( }); with_job(job_id, |job| job.abort = Some(handle.abort_handle())); + // Capture params for passive prefs mirror (best-effort, never fails). + let _ = capture_prefs(&app_state, &prefs_dao, &req, req.library.as_deref()); + HttpResponse::Accepted().json(ReelJobCreatedResponse { job_id: job_id.to_string(), status: ReelJobStatus::Queued, @@ -861,18 +904,22 @@ fn resolve_media_path(app_state: &AppState, media: &SegmentMedia) -> Option u32 { std::env::var("REEL_PREGEN_HOUR") .ok() - .and_then(|v| v.parse().ok()) + .and_then(|v| v.trim().parse().ok()) + .filter(|h| *h <= 23) .unwrap_or(3) } /// Env: "1" (default, Monday). Day of week for weekly pre-gen (0=Sun, 1=Mon, ...). +/// Clamped to 0-6; invalid values fall back to default. fn pregen_week_dow() -> u32 { std::env::var("REEL_PREGEN_WEEK_DOW") .ok() - .and_then(|v| v.parse().ok()) + .and_then(|v| v.trim().parse().ok()) + .filter(|d| *d <= 6) .unwrap_or(1) } @@ -881,8 +928,10 @@ fn pregen_week_dow() -> u32 { /// DST shifts. pub(crate) fn secs_until_next_run_hour(now: chrono::DateTime, run_hour: u32) -> u64 { let now_hour = now.hour(); - let diff = if now_hour >= run_hour { + let diff = if now_hour > run_hour { 24 - now_hour + run_hour + } else if now_hour == run_hour { + 0 } else { run_hour - now_hour }; @@ -891,26 +940,22 @@ pub(crate) fn secs_until_next_run_hour(now: chrono::DateTime, run /// Load pre-gen parameters: tries the user_ai_prefs DB row first, falls back /// to env vars, then to server-local defaults. -fn load_pregen_params( - prefs_dao: &web::Data>>>, -) -> (i32, Option, String) { +fn load_pregen_params(app_state: &AppState) -> (i32, Option, String) { // Try DB row first - if let Ok(mut dao) = prefs_dao.lock() { + if let Ok(mut dao) = app_state.user_ai_prefs_dao.lock() { let ctx = opentelemetry::Context::new(); if let Ok(Some(prefs)) = dao.get_prefs(&ctx) { - let tz = prefs - .tz_offset_minutes - .unwrap_or_else(|| chrono::Local::now().offset().local_minus_utc()); + let tz = prefs.tz_offset_minutes.unwrap_or_else(fixed_tz_offset); let voice = prefs.voice; let library = prefs.library.unwrap_or_else(|| "all".to_string()); return (tz, voice, library); } } - // Fall back to env + // Fall back to env (explicit offset overrides auto-detect) let tz = std::env::var("REEL_PREGEN_TZ_OFFSET_MINUTES") .ok() .and_then(|v| v.parse().ok()) - .unwrap_or_else(|| chrono::Local::now().offset().local_minus_utc()); + .unwrap_or_else(fixed_tz_offset); let voice = std::env::var("REEL_PREGEN_VOICE").ok(); let library = std::env::var("REEL_PREGEN_LIBRARY") .ok() @@ -918,12 +963,19 @@ fn load_pregen_params( (tz, voice, library) } +/// Fixed timezone offset: reads `REEL_PREGEN_TZ_FIXED_MINUTES` (e.g. "-480" +/// for US Eastern) when set, falling back to the system local offset. Using +/// a fixed offset avoids DST shifts changing the pre-gen schedule halfway +/// through the year. +fn fixed_tz_offset() -> i32 { + std::env::var("REEL_PREGEN_TZ_FIXED_MINUTES") + .ok() + .and_then(|v| v.trim().parse().ok()) + .unwrap_or_else(|| chrono::Local::now().offset().local_minus_utc()) +} + /// Spawn the nightly pre-generation scheduler. Runs behind `REEL_PREGEN_ENABLED`. -pub(crate) async fn spawn_pregen_scheduler( - app_state: web::Data, - insight_dao: web::Data>>>, - prefs_dao: web::Data>>>, -) { +pub(crate) async fn spawn_pregen_scheduler(app_state: web::Data) { if std::env::var("REEL_PREGEN_ENABLED").ok() != Some("1".to_string()) { log::info!("Reel pre-generation scheduler disabled (REEL_PREGEN_ENABLED != 1)"); return; @@ -942,7 +994,7 @@ pub(crate) async fn spawn_pregen_scheduler( log::debug!("Next pre-gen run in {}s", sleep_secs); tokio::time::sleep(std::time::Duration::from_secs(sleep_secs)).await; - if let Err(e) = run_pregen_batch(&app_state, &insight_dao, &prefs_dao).await { + if let Err(e) = run_pregen_batch(&app_state).await { log::error!("Reel pre-generation batch failed: {}", e); } } @@ -950,11 +1002,7 @@ pub(crate) async fn spawn_pregen_scheduler( } /// Run the pre-generation batch for all applicable spans. -async fn run_pregen_batch( - app_state: &AppState, - insight_dao: &web::Data>>>, - prefs_dao: &web::Data>>>, -) -> anyhow::Result<()> { +async fn run_pregen_batch(app_state: &AppState) -> anyhow::Result<()> { let now = chrono::Local::now(); let weekday = now.weekday().num_days_from_sunday(); // 0=Sun, 1=Mon, ... let day_of_month = now.day(); @@ -967,11 +1015,10 @@ async fn run_pregen_batch( spans.push("month"); } - let (tz, voice, library) = load_pregen_params(prefs_dao); + let (tz, voice, library) = load_pregen_params(app_state); for span in spans { - if let Err(e) = pregen_one(app_state, insight_dao, span, tz, voice.clone(), &library).await - { + if let Err(e) = pregen_one(app_state, span, tz, voice.clone(), &library).await { log::error!("Pre-gen failed for span={}: {}", span, e); } } @@ -982,7 +1029,6 @@ async fn run_pregen_batch( /// Pre-generate a single reel for the given span. async fn pregen_one( app_state: &AppState, - insight_dao: &web::Data>>>, span: &str, tz: i32, voice: Option, @@ -1006,11 +1052,10 @@ async fn pregen_one( max_segments: 24, }; - let exif_dao: Arc>> = Arc::new(StdMutex::new(Box::new( - crate::database::SqliteExifDao::new(), - ))); + let exif_dao = app_state.insight_generator.exif_dao(); + let insight_dao = app_state.insight_generator.insight_dao(); let ctx = opentelemetry::Context::new(); - let (planned, reel_meta) = match selector::resolve(app_state, &exif_dao, &ctx, &selector) { + let (planned, reel_meta) = match selector::resolve(app_state, exif_dao, &ctx, &selector) { Ok((p, m)) => (p, m), Err(e) => { log::warn!("Pre-gen resolve failed for span={}: {}", span, e); @@ -1042,7 +1087,7 @@ async fn pregen_one( let is_fresh = { let mut dao = app_state.precomputed_reel_dao.lock().expect("lock"); - dao.exists_fresh(&ctx, span, "all", RENDER_VERSION as i32, min_generated_at) + dao.exists_fresh(&ctx, span, library, RENDER_VERSION as i32, min_generated_at) .unwrap_or(false) }; @@ -1074,7 +1119,7 @@ async fn pregen_one( &ctx, &crate::database::models::InsertablePrecomputedReel { span: span.to_string(), - library_key: "all".to_string(), + library_key: library.to_string(), cache_key: key.clone(), output_path: mp4_path.to_string_lossy().to_string(), title, @@ -1094,7 +1139,7 @@ async fn pregen_one( let (title, mp4) = produce_reel( app_state, insight_dao, - &exif_dao, + exif_dao, planned, reel_meta, voice.clone(), @@ -1110,7 +1155,7 @@ async fn pregen_one( &ctx, &crate::database::models::InsertablePrecomputedReel { span: span.to_string(), - library_key: "all".to_string(), + library_key: library.to_string(), cache_key: key.clone(), output_path: mp4.to_string_lossy().to_string(), title, @@ -1129,9 +1174,8 @@ async fn pregen_one( #[cfg(test)] mod tests { use super::*; - use crate::ai::face_client::FaceClient; use crate::libraries::Library; - use crate::video::actors::StreamActor; + use chrono::TimeZone; fn photo(p: &str, lib: i32) -> SegmentMedia { SegmentMedia::Photo { @@ -1156,128 +1200,6 @@ mod tests { } } - /// Minimal AppState for tests that only need library lookup. - #[allow(dead_code)] - fn test_app_state() -> AppState { - use crate::ai::InsightGenerator; - use crate::ai::insight_chat::{ChatLockMap, InsightChatService}; - use crate::ai::turn_registry::TurnRegistry; - use crate::ai::{OllamaClient, SmsApiClient}; - use crate::database::{ - ExifDao, InsightDao, InsightGenerationJobDao, PreviewDao, SqliteExifDao, - SqliteInsightDao, SqliteInsightGenerationJobDao, SqlitePreviewDao, - }; - use crate::faces; - use crate::state::AppState; - use crate::tags::SqliteTagDao; - use actix::Actor; - use std::sync::Mutex; - - let temp_dir = tempfile::tempdir().expect("Failed to create temp directory"); - let base_path = temp_dir.path().to_path_buf(); - let base_path_str = base_path.to_string_lossy().to_string(); - - let test_lib = Library { - id: crate::libraries::PRIMARY_LIBRARY_ID, - name: "main".to_string(), - root_path: base_path_str.clone(), - enabled: true, - excluded_dirs: Vec::new(), - }; - - let ollama = OllamaClient::new( - "http://localhost:11434".to_string(), - None, - "llama3.2".to_string(), - None, - ); - let sms_client = SmsApiClient::new("http://localhost:8000".to_string(), None); - let apollo_client = crate::ai::apollo_client::ApolloClient::new(None); - - let insight_dao: std::sync::Arc>> = - std::sync::Arc::new(Mutex::new(Box::new(SqliteInsightDao::new()))); - let exif_dao: std::sync::Arc>> = - std::sync::Arc::new(Mutex::new(Box::new(SqliteExifDao::new()))); - let daily_summary_dao: std::sync::Arc>> = - std::sync::Arc::new(Mutex::new(Box::new( - crate::database::SqliteDailySummaryDao::new(), - ))); - let insight_generator = InsightGenerator::new( - ollama.clone(), - None, - None, - sms_client.clone(), - apollo_client.clone(), - insight_dao.clone(), - exif_dao.clone(), - daily_summary_dao, - std::sync::Arc::new(Mutex::new(Box::new( - crate::database::SqliteCalendarEventDao::new(), - ))), - std::sync::Arc::new(Mutex::new(Box::new( - crate::database::SqliteLocationHistoryDao::new(), - ))), - std::sync::Arc::new(Mutex::new(Box::new( - crate::database::SqliteSearchHistoryDao::new(), - ))), - std::sync::Arc::new(Mutex::new(Box::new(SqliteTagDao::default()))), - std::sync::Arc::new(Mutex::new(Box::new(faces::SqliteFaceDao::new()))), - std::sync::Arc::new(Mutex::new(Box::new( - crate::database::SqliteKnowledgeDao::new(), - ))), - std::sync::Arc::new(Mutex::new(Box::new( - crate::database::SqlitePersonaDao::new(), - ))), - vec![test_lib.clone()], - ); - - let chat_locks: ChatLockMap = - std::sync::Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new())); - let insight_chat = std::sync::Arc::new(InsightChatService::new( - std::sync::Arc::new(insight_generator.clone()), - insight_dao.clone(), - chat_locks, - )); - let turn_registry = std::sync::Arc::new(TurnRegistry::new(300)); - let preview_dao: std::sync::Arc>> = - std::sync::Arc::new(Mutex::new(Box::new(SqlitePreviewDao::new()))); - let insight_job_dao: std::sync::Arc>> = - std::sync::Arc::new(Mutex::new(Box::new(SqliteInsightGenerationJobDao::new()))); - let insight_job_handles: std::sync::Arc< - Mutex>, - > = std::sync::Arc::new(Mutex::new(std::collections::HashMap::new())); - - AppState::new( - std::sync::Arc::new(StreamActor {}.start()), - vec![test_lib], - base_path_str.clone(), - base_path_str.clone(), - base_path_str.clone(), - base_path_str.clone(), - Vec::new(), - ollama, - None, - Vec::new(), - None, - Vec::new(), - sms_client, - insight_generator, - insight_chat, - turn_registry, - preview_dao, - FaceClient::new(None), - crate::ai::clip_client::ClipClient::new(None), - insight_job_dao, - insight_job_handles, - std::sync::Arc::new(Mutex::new(Box::new( - crate::database::SqlitePrecomputedReelDao::new(), - ))), - std::sync::Arc::new(Mutex::new(Box::new( - crate::database::SqliteUserAiPrefsDao::new(), - ))), - ) - } - #[test] fn cache_key_is_stable_for_same_inputs() { let media = vec![photo("a.jpg", 1), photo("b.jpg", 1)]; @@ -1443,4 +1365,55 @@ mod tests { }]; assert_eq!(normalize_library_key(&libs, Some("missing")), "all"); } + + #[test] + fn secs_until_next_run_hour_same_hour_returns_zero() { + let dt = chrono::Local + .with_ymd_and_hms(2026, 6, 13, 3, 30, 0) + .single() + .expect("valid datetime"); + assert_eq!(secs_until_next_run_hour(dt, 3), 0); + } + + #[test] + fn secs_until_next_run_hour_future_today_returns_remaining() { + let dt = chrono::Local + .with_ymd_and_hms(2026, 6, 13, 10, 0, 0) + .single() + .expect("valid datetime"); + assert_eq!(secs_until_next_run_hour(dt, 14), 4 * 3600); + } + + #[test] + fn secs_until_next_run_hour_past_today_wraps() { + let dt = chrono::Local + .with_ymd_and_hms(2026, 6, 13, 20, 0, 0) + .single() + .expect("valid datetime"); + assert_eq!(secs_until_next_run_hour(dt, 3), (24 - 20 + 3) * 3600); + } + + #[test] + fn secs_until_next_run_hour_midnight() { + let dt = chrono::Local + .with_ymd_and_hms(2026, 6, 13, 0, 0, 0) + .single() + .expect("valid datetime"); + // 0:00, run at 3 → 3 hours + assert_eq!(secs_until_next_run_hour(dt, 3), 3 * 3600); + // 0:00, run at 0 → 0 (immediate) + assert_eq!(secs_until_next_run_hour(dt, 0), 0); + } + + #[test] + fn secs_until_next_run_hour_last_hour() { + let dt = chrono::Local + .with_ymd_and_hms(2026, 6, 13, 23, 30, 0) + .single() + .expect("valid datetime"); + // 23:30, run at 23 → 0 (still in hour 23) + assert_eq!(secs_until_next_run_hour(dt, 23), 0); + // 23:30, run at 0 → 1 hour + assert_eq!(secs_until_next_run_hour(dt, 0), 3600); + } } -- 2.52.0 From e4d8d374fb80f93475e6afc5dea920f8b39c589d Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 15:14:36 -0400 Subject: [PATCH 12/17] Reels pre-gen: fix runtime breakers from review (1-5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Drop the unregistered prefs_dao/reel_dao web::Data extractors from create_reel_handler / precomputed_reel_handler and read the DAOs off AppState instead (consistent with the scheduler). Missing app_data would have 500'd every POST /reels and /reels/precomputed at runtime. 2. Restore the dropped 'return' in the cache-hit branch — without it a cache hit fell through, overwrote the Done job with Queued, and re-ran the whole TTS+render pipeline on every request. 3. Make secs_until_next_run_hour minute/second-accurate so a batch that finishes inside the run hour sleeps ~24h instead of busy-looping (wake, re-run, sleep 0) for the rest of the hour. Tests updated. 4. Prune photo/user-bound tools (get_file_tags, get_faces_in_photo, recall_facts_for_photo, recall_facts_for_entity) from the agentic reel scripter's allow-list — they no-op/error with the empty file/user context and only burn iterations. 5. Align AGENTIC_SYSTEM_PROMPT's advertised tool list with the actual (pruned) allow-list. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/reels/mod.rs | 72 +++++++++++++++++++++++++-------------------- src/reels/script.rs | 20 ++++++++----- 2 files changed, 52 insertions(+), 40 deletions(-) diff --git a/src/reels/mod.rs b/src/reels/mod.rs index 1fc5b3b..7fc71b0 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -18,7 +18,7 @@ pub mod selector; use std::collections::HashMap; use std::path::{Path, PathBuf}; -use std::sync::{Arc, LazyLock, Mutex, Mutex as StdMutex}; +use std::sync::{LazyLock, Mutex, Mutex as StdMutex}; use std::time::{Duration, Instant}; use actix_files::NamedFile; @@ -30,7 +30,7 @@ use serde_json::json; use uuid::Uuid; use crate::data::Claims; -use crate::database::{ExifDao, InsightDao, PrecomputedReelDao, UserAiPrefsDao}; +use crate::database::{ExifDao, InsightDao}; use crate::libraries::{Library, resolve_library_param}; use crate::memories::MemoriesSpan; use crate::otel::extract_context_from_request; @@ -61,7 +61,6 @@ pub fn normalize_library_key(libs: &[Library], param: Option<&str>) -> String { /// caller regardless of DB errors. fn capture_prefs( app_state: &AppState, - prefs_dao: &web::Data>>>, req: &web::Json, library_param: Option<&str>, ) -> Result<(), anyhow::Error> { @@ -77,7 +76,7 @@ fn capture_prefs( } _ => "all".to_string(), }; - let mut dao = prefs_dao.lock().expect("lock"); + let mut dao = app_state.user_ai_prefs_dao.lock().expect("lock"); let ctx = opentelemetry::Context::new(); dao.upsert_prefs( &ctx, @@ -385,7 +384,6 @@ pub async fn create_reel_handler( app_state: web::Data, exif_dao: web::Data>>, insight_dao: web::Data>>, - prefs_dao: web::Data>>>, ) -> impl Responder { let span_context = extract_context_from_request(&http_request); @@ -455,8 +453,8 @@ pub async fn create_reel_handler( }, ); // Capture params for passive prefs mirror (best-effort, never fails). - let _ = capture_prefs(&app_state, &prefs_dao, &req, req.library.as_deref()); - HttpResponse::Accepted().json(ReelJobCreatedResponse { + let _ = capture_prefs(&app_state, &req, req.library.as_deref()); + return HttpResponse::Accepted().json(ReelJobCreatedResponse { job_id: job_id.to_string(), status: ReelJobStatus::Done, }); @@ -515,7 +513,7 @@ pub async fn create_reel_handler( with_job(job_id, |job| job.abort = Some(handle.abort_handle())); // Capture params for passive prefs mirror (best-effort, never fails). - let _ = capture_prefs(&app_state, &prefs_dao, &req, req.library.as_deref()); + let _ = capture_prefs(&app_state, &req, req.library.as_deref()); HttpResponse::Accepted().json(ReelJobCreatedResponse { job_id: job_id.to_string(), @@ -584,7 +582,6 @@ pub async fn precomputed_reel_handler( _claims: Claims, query: web::Query>, app_state: web::Data, - reel_dao: web::Data>>, ) -> impl Responder { let span = query.get("span").map(|s| s.as_str()).unwrap_or("day"); let library_key = normalize_library_key( @@ -605,7 +602,10 @@ pub async fn precomputed_reel_handler( let min_generated_at = now - (max_age_hours * 3600); let ctx = opentelemetry::Context::new(); - let mut dao = reel_dao.lock().expect("Unable to lock PrecomputedReelDao"); + let mut dao = app_state + .precomputed_reel_dao + .lock() + .expect("Unable to lock PrecomputedReelDao"); // Fast existence gate: is there a fresh row at all? if !dao @@ -923,19 +923,23 @@ fn pregen_week_dow() -> u32 { .unwrap_or(1) } -/// Pure: seconds until the next run of `run_hour` given the current local time. -/// Handles same-day vs wrap-around. Recomputed each loop iteration to absorb -/// DST shifts. +/// Pure: seconds until the next `run_hour:00:00` strictly after `now`. +/// +/// Minute/second-accurate (not just hour-granular): when `now` is already at or +/// past the target this wraps to the same hour tomorrow, so a batch that +/// finishes inside the run hour sleeps ~24h rather than busy-looping (waking, +/// re-running, and re-sleeping 0s) for the rest of that hour. The tradeoff is +/// that booting at or after `run_hour` waits until the next day. Recomputed each +/// loop iteration from `Local::now()` so DST shifts are absorbed. pub(crate) fn secs_until_next_run_hour(now: chrono::DateTime, run_hour: u32) -> u64 { - let now_hour = now.hour(); - let diff = if now_hour > run_hour { - 24 - now_hour + run_hour - } else if now_hour == run_hour { - 0 + let now_secs = now.hour() * 3600 + now.minute() * 60 + now.second(); + let target_secs = run_hour * 3600; + let diff = if target_secs > now_secs { + target_secs - now_secs } else { - run_hour - now_hour + 86_400 - now_secs + target_secs }; - (diff * 3600) as u64 + diff as u64 } /// Load pre-gen parameters: tries the user_ai_prefs DB row first, falls back @@ -1367,21 +1371,25 @@ mod tests { } #[test] - fn secs_until_next_run_hour_same_hour_returns_zero() { + fn secs_until_next_run_hour_within_run_hour_wraps_to_tomorrow() { + // 03:30, run 3 → already past today's 03:00, so wait until tomorrow + // 03:00 (23h30m). Crucially NOT 0 — that would busy-loop the scheduler + // for the rest of the hour. let dt = chrono::Local .with_ymd_and_hms(2026, 6, 13, 3, 30, 0) .single() .expect("valid datetime"); - assert_eq!(secs_until_next_run_hour(dt, 3), 0); + assert_eq!(secs_until_next_run_hour(dt, 3), 23 * 3600 + 30 * 60); } #[test] - fn secs_until_next_run_hour_future_today_returns_remaining() { + fn secs_until_next_run_hour_future_today_counts_minutes() { + // 10:15 → 14:00 is 3h45m, not a whole-hour 4h (minutes count). let dt = chrono::Local - .with_ymd_and_hms(2026, 6, 13, 10, 0, 0) + .with_ymd_and_hms(2026, 6, 13, 10, 15, 0) .single() .expect("valid datetime"); - assert_eq!(secs_until_next_run_hour(dt, 14), 4 * 3600); + assert_eq!(secs_until_next_run_hour(dt, 14), 3 * 3600 + 45 * 60); } #[test] @@ -1401,19 +1409,19 @@ mod tests { .expect("valid datetime"); // 0:00, run at 3 → 3 hours assert_eq!(secs_until_next_run_hour(dt, 3), 3 * 3600); - // 0:00, run at 0 → 0 (immediate) - assert_eq!(secs_until_next_run_hour(dt, 0), 0); + // 0:00 exactly, run at 0 → wraps to next midnight (not 0, so no busy loop) + assert_eq!(secs_until_next_run_hour(dt, 0), 86_400); } #[test] - fn secs_until_next_run_hour_last_hour() { + fn secs_until_next_run_hour_just_before_target() { + // 23:30, run 0 → 30 minutes to midnight (minute-accurate, not 1h). let dt = chrono::Local .with_ymd_and_hms(2026, 6, 13, 23, 30, 0) .single() .expect("valid datetime"); - // 23:30, run at 23 → 0 (still in hour 23) - assert_eq!(secs_until_next_run_hour(dt, 23), 0); - // 23:30, run at 0 → 1 hour - assert_eq!(secs_until_next_run_hour(dt, 0), 3600); + assert_eq!(secs_until_next_run_hour(dt, 0), 30 * 60); + // 23:30, run 23 → already past today's 23:00, wait until tomorrow. + assert_eq!(secs_until_next_run_hour(dt, 23), 86_400 - 30 * 60); } } diff --git a/src/reels/script.rs b/src/reels/script.rs index 202a22c..858efd1 100644 --- a/src/reels/script.rs +++ b/src/reels/script.rs @@ -53,10 +53,12 @@ Be concrete and grounded in the details given; never invent names, places, or \ events that aren't supported. Keep each line to one or two short sentences that \ can be read aloud in a few seconds. Avoid generic filler like \"what a \ wonderful day\" — if you have little to go on, simply describe the moment \ -plainly.\n\nYou may call read-only tools (search_messages, get_file_tags, \ -reverse_geocode, get_current_datetime, recall_entities, recall_facts_for_photo, \ -recall_facts_for_entity) to ground each line in real context. Never invent \ -details. Return ONLY the JSON object, no prose or code fences."; +plainly.\n\nYou may call read-only tools (search_rag, search_messages, \ +get_sms_messages, get_calendar_events, get_location_history, reverse_geocode, \ +get_personal_place_at, recall_entities, get_current_datetime) to ground each \ +line in real context — e.g. reverse_geocode a moment's GPS to name the place, \ +or check the calendar/messages around its date. Never invent details. Return \ +ONLY the JSON object, no prose or code fences."; /// Maximum agentic tool iterations for pre-generation. Tunable via /// `REEL_PREGEN_MAX_TOOL_ITERS` (default 8). @@ -317,19 +319,21 @@ pub async fn generate_script_agentic( // then filter out write tools. let gate = generator.current_gate_opts_for_persona(false, None); let all_tools = InsightGenerator::build_tool_definitions(gate); + // Whole-reel calls have no single photo and no authenticated user, so the + // loop runs execute_tool with empty file/image context and user_id=0. Only + // tools that work without that context are useful here — photo/user-bound + // tools (get_file_tags, get_faces_in_photo, recall_facts_for_photo, + // recall_facts_for_entity) would just no-op or error, burning iterations, + // so they're excluded. let read_only_names: std::collections::HashSet<&str> = [ "search_rag", "search_messages", "get_sms_messages", "get_calendar_events", "get_location_history", - "get_file_tags", - "get_faces_in_photo", "reverse_geocode", "get_personal_place_at", "recall_entities", - "recall_facts_for_photo", - "recall_facts_for_entity", "get_current_datetime", ] .into_iter() -- 2.52.0 From ca007a618d50c3f58794280d02f56d05054e0ce3 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 15:19:41 -0400 Subject: [PATCH 13/17] Reels pre-gen: record true media count + real upsert for user_ai_prefs - pregen_one recorded media_count as planned.len() (beat count); record the actual media item total (media.len(), photos + clips) in both the cache-hit and freshly-rendered ledger paths. Drops the redundant photo_count binding. - Replace upsert_prefs's insert-then-catch-error-then-update dance with a single atomic INSERT ... ON CONFLICT(id) DO UPDATE. Explicit id=1 makes the conflict target deterministic; explicit column .set((...)) keeps None -> NULL overwrite semantics so the row mirrors the latest request exactly, and genuine insert errors surface instead of being swallowed. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/database/user_ai_prefs_dao.rs | 46 ++++++++++++++----------------- src/reels/mod.rs | 7 +++-- 2 files changed, 24 insertions(+), 29 deletions(-) diff --git a/src/database/user_ai_prefs_dao.rs b/src/database/user_ai_prefs_dao.rs index d58a56c..129ef0c 100644 --- a/src/database/user_ai_prefs_dao.rs +++ b/src/database/user_ai_prefs_dao.rs @@ -84,32 +84,26 @@ impl UserAiPrefsDao for SqliteUserAiPrefsDao { .lock() .expect("Unable to lock UserAiPrefsDao"); - // SQLite: INSERT on first call, UPDATE on subsequent calls. - // The first INSERT creates the row with id=1 (auto-increment). - // Subsequent calls UPDATE the existing row. - let result = diesel::insert_into(dsl::user_ai_prefs) - .values(prefs) - .execute(connection.deref_mut()); - - match result { - Ok(_) => { - // First insert succeeded. - Ok(()) - } - Err(_e) => { - // Insert failed (likely due to duplicate key). Update instead. - diesel::update(dsl::user_ai_prefs.filter(dsl::id.eq(1))) - .set(( - dsl::voice.eq(&prefs.voice), - dsl::tz_offset_minutes.eq(&prefs.tz_offset_minutes), - dsl::library.eq(&prefs.library), - dsl::updated_at.eq(&prefs.updated_at), - )) - .execute(connection.deref_mut()) - .map_err(|e| anyhow::anyhow!("Failed to upsert prefs: {}", e))?; - Ok(()) - } - } + // Single-row table (id=1): one atomic upsert. The explicit id=1 + // makes the conflict target deterministic so the second call + // updates in place rather than tripping the CHECK(id=1) constraint, + // and real insert errors surface instead of being swallowed into a + // separate update branch. The columns are set explicitly (rather + // than via AsChangeset) so a None field overwrites to NULL — the + // row mirrors the latest request exactly, not a merge of past ones. + diesel::insert_into(dsl::user_ai_prefs) + .values((dsl::id.eq(1), prefs)) + .on_conflict(dsl::id) + .do_update() + .set(( + dsl::voice.eq(&prefs.voice), + dsl::tz_offset_minutes.eq(&prefs.tz_offset_minutes), + dsl::library.eq(&prefs.library), + dsl::updated_at.eq(&prefs.updated_at), + )) + .execute(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to upsert prefs: {}", e))?; + Ok(()) }) .map_err(|e| DbError::log(DbErrorKind::InsertError, e)) } diff --git a/src/reels/mod.rs b/src/reels/mod.rs index 7fc71b0..059ce43 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -1075,6 +1075,8 @@ async fn pregen_one( // Flatten every media item across beats (in order) into the cache key. let media: Vec = planned.iter().flat_map(|b| b.media.clone()).collect(); let key = cache_key(&selector, &media, voice.as_deref()); + // Total media items shown (photos + clips), not beat count. + let media_count = media.len() as i32; // Dedup: check if fresh ledger row exists let now = std::time::SystemTime::now() @@ -1127,7 +1129,7 @@ async fn pregen_one( cache_key: key.clone(), output_path: mp4_path.to_string_lossy().to_string(), title, - media_count: planned.len() as i32, + media_count, render_version: RENDER_VERSION as i32, tz_offset_minutes: tz, voice: voice.clone(), @@ -1139,7 +1141,6 @@ async fn pregen_one( // Generate the reel log::info!("Generating precomputed reel for span={}, key={}", span, key); - let photo_count = planned.len() as i32; let (title, mp4) = produce_reel( app_state, insight_dao, @@ -1163,7 +1164,7 @@ async fn pregen_one( cache_key: key.clone(), output_path: mp4.to_string_lossy().to_string(), title, - media_count: photo_count, + media_count, render_version: RENDER_VERSION as i32, tz_offset_minutes: tz, voice: voice.clone(), -- 2.52.0 From 19fc1bbdf81ff1d6a1d2388b1efd0434c1686706 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 23:12:54 -0400 Subject: [PATCH 14/17] Reels pre-gen: use DEFAULT_MAX_SEGMENTS so cache keys match on-demand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pregen_one hardcoded max_segments: 24 while create_reel_handler defaults to DEFAULT_MAX_SEGMENTS (40). Since the cache key encodes the raw max_segments, the pre-generated reel's key never matched the client's on-demand request, so POST /reels cache-hit an older max=40 reel and the agentic pre-gen file was left orphaned. Align to DEFAULT_MAX_SEGMENTS (as the plan specified) so the on-demand cache-hit path serves the pre-gen reel. Content is unchanged — the actual beat count is duration-budgeted either way; only the key descriptor differed. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/reels/mod.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/reels/mod.rs b/src/reels/mod.rs index 059ce43..2a4e6e9 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -1053,7 +1053,11 @@ async fn pregen_one( } else { Some(library.to_string()) }, - max_segments: 24, + // Must match the on-demand default (create_reel_handler) so the cache + // key — which encodes the raw max_segments — lines up and the on-demand + // cache-hit path serves this pre-generated reel. The client sends no + // max_segments, so it defaults to DEFAULT_MAX_SEGMENTS there too. + max_segments: selector::DEFAULT_MAX_SEGMENTS, }; let exif_dao = app_state.insight_generator.exif_dao(); -- 2.52.0 From b52b1eb32334e70446835d526da6558675895f25 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 23:14:39 -0400 Subject: [PATCH 15/17] Reels pre-gen: make dedup cache-key-aware so key changes regenerate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit exists_fresh only matched (span, library, render_version, age), so a cache-key change that doesn't bump RENDER_VERSION (e.g. the max_segments alignment, or any future selection-logic tweak) left last night's ledger row looking 'fresh' — the nightly run would skip and the orphaned reel would persist. Dedup now compares the stored cache_key to the freshly computed key (and confirms the mp4 exists), so a changed key forces a regen within the freshness window. exists_fresh stays as the HTTP endpoint's fast gate. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/reels/mod.rs | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/reels/mod.rs b/src/reels/mod.rs index 2a4e6e9..c5d7492 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -1095,14 +1095,29 @@ async fn pregen_one( }; let min_generated_at = now - (max_age_hours as i64 * 3600); - let is_fresh = { + // Skip only when a fresh ledger row points at THIS exact cache key (same + // media, params, render version) and its file still exists. Comparing the + // stored cache_key — not just (span, library) — means a key change from + // selection-logic/params drift that doesn't bump RENDER_VERSION still forces + // a regen within the freshness window, instead of leaving a stale row that + // points at an orphaned reel. + let already_current = { let mut dao = app_state.precomputed_reel_dao.lock().expect("lock"); - dao.exists_fresh(&ctx, span, library, RENDER_VERSION as i32, min_generated_at) - .unwrap_or(false) + matches!( + dao.latest_for(&ctx, span, library), + Ok(Some(row)) + if row.cache_key == key + && row.render_version == RENDER_VERSION as i32 + && row.generated_at >= min_generated_at + ) && reel_mp4_path(app_state, &key).exists() }; - if is_fresh { - log::info!("Fresh precomputed reel exists for span={}, skipping", span); + if already_current { + log::info!( + "Fresh precomputed reel already current for span={} key={}, skipping", + span, + key + ); return Ok(()); } -- 2.52.0 From 664b3694f8088db76b734790dd236f4b8cd3e9b9 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 23:16:14 -0400 Subject: [PATCH 16/17] Reels pre-gen: always render the agentic reel, don't adopt on-demand mp4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Past the key-aware dedup, any mp4 already at the cache key was not pre-generated by us (no matching ledger row) — typically an on-demand fast-scripted reel sharing the key after the max_segments alignment. Adopting it recorded a ledger row pointing at the fast reel, silently defeating agentic pre-gen. Drop the adopt-existing-mp4 shortcut and always produce_reel (atomic overwrite). Worst case is one redundant re-render if a prior run crashed between render and ledger write. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/reels/mod.rs | 45 +++++++-------------------------------------- 1 file changed, 7 insertions(+), 38 deletions(-) diff --git a/src/reels/mod.rs b/src/reels/mod.rs index c5d7492..95769ad 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -1121,44 +1121,13 @@ async fn pregen_one( return Ok(()); } - // Check if MP4 already on disk (from a previous run that crashed after render) - let mp4_path = reel_mp4_path(app_state, &key); - if mp4_path.exists() { - log::info!( - "Precomputed reel MP4 already exists for key={}, recording ledger and skipping render", - key - ); - // Read title from sidecar if available - let sidecar_path = mp4_path.with_extension("json"); - let title = if sidecar_path.exists() { - let sidecar = tokio::fs::read_to_string(&sidecar_path).await.ok(); - sidecar - .and_then(|s| serde_json::from_str::(&s).ok()) - .map(|s| s.title) - .unwrap_or_else(|| format!("{} reel", span)) - } else { - format!("{} reel", span) - }; - let mut reel_dao = app_state.precomputed_reel_dao.lock().expect("lock"); - reel_dao.record_reel( - &ctx, - &crate::database::models::InsertablePrecomputedReel { - span: span.to_string(), - library_key: library.to_string(), - cache_key: key.clone(), - output_path: mp4_path.to_string_lossy().to_string(), - title, - media_count, - render_version: RENDER_VERSION as i32, - tz_offset_minutes: tz, - voice: voice.clone(), - generated_at: now, - }, - )?; - return Ok(()); - } - - // Generate the reel + // Past the key-aware dedup above, any MP4 already at this key was NOT + // pre-generated by us (it has no matching ledger row) — most likely an + // on-demand fast-scripted reel that happens to share the key. Don't adopt + // it: regenerate so the precomputed reel is the agentic one. produce_reel + // publishes atomically, overwriting whatever is there. (The narrow + // render-succeeded-but-ledger-write-failed crash window just costs one + // redundant re-render next run.) log::info!("Generating precomputed reel for span={}, key={}", span, key); let (title, mp4) = produce_reel( app_state, -- 2.52.0 From 7e21213181be66ed092a2f48e877be408c895ee6 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 23:27:32 -0400 Subject: [PATCH 17/17] Reels: bound disk/ledger growth (pre-gen prune + on-demand cache sweep) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nothing reaped reels before, so the on-disk cache and ledger grew unbounded — each night's daily reel is a new ~4MB file + ledger row that's stale within ~26h. - Pre-gen self-prune: after recording a reel, prune_superseded keeps the newest PREGEN_KEEP_PER_SCOPE (2) rows per (span, library) and unlinks the superseded reels' mp4+sidecar. Caps the ledger/disk at ~spans×libraries×2. - On-disk sweeper (spawn_reel_cache_sweeper): every 24h, removes reel mp4s with no ledger row and no live job older than REEL_CACHE_MAX_AGE_DAYS (7) — bounding the on-demand cache, which has no ledger row and otherwise grows forever — plus crashed-render cruft (.mp4.tmp/.concat.txt/orphan sidecars). Runs regardless of REEL_PREGEN_ENABLED; disable with REEL_CACHE_SWEEP_ENABLED=0. - New DAO methods prune_superseded + all_cache_keys (with tests); env knobs documented in .env.example. Co-Authored-By: Claude Opus 4.8 (1M context) --- .env.example | 7 + src/database/precomputed_reel_dao.rs | 118 +++++++++++++++++ src/main.rs | 2 + src/reels/mod.rs | 186 ++++++++++++++++++++++++--- 4 files changed, 296 insertions(+), 17 deletions(-) diff --git a/.env.example b/.env.example index a7bd7e5..2e431bc 100644 --- a/.env.example +++ b/.env.example @@ -160,3 +160,10 @@ SEARCH_RAG_RERANK=0 # REEL_PREGEN_LIBRARY=all # Max agentic tool iterations for pre-gen scripter. Default 8. # REEL_PREGEN_MAX_TOOL_ITERS=8 +# +# On-disk reel cache sweep (runs every 24h, independent of pre-gen). Removes +# reel MP4s with no ledger row + no live job that are older than the max age — +# i.e. the on-demand cache, which otherwise grows forever. Set to 0 to disable. +# REEL_CACHE_SWEEP_ENABLED=1 +# Age (days) before an unreferenced reel MP4 is swept. Default 7. +# REEL_CACHE_MAX_AGE_DAYS=7 diff --git a/src/database/precomputed_reel_dao.rs b/src/database/precomputed_reel_dao.rs index 7acc098..b66573b 100644 --- a/src/database/precomputed_reel_dao.rs +++ b/src/database/precomputed_reel_dao.rs @@ -41,6 +41,23 @@ pub trait PrecomputedReelDao: Sync + Send { render_version: i32, min_generated_at: i64, ) -> Result; + + /// Delete all but the newest `keep` rows for (span, library_key), returning + /// the deleted rows so the caller can unlink their output files. Used by the + /// nightly job to retire superseded reels (e.g. yesterday's daily). + #[allow(dead_code)] + fn prune_superseded( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + keep: usize, + ) -> Result, DbError>; + + /// Every cache_key currently in the ledger. Used by the on-disk cache sweep + /// to protect files a ledger row still points at. + #[allow(dead_code)] + fn all_cache_keys(&mut self, context: &opentelemetry::Context) -> Result, DbError>; } pub struct SqlitePrecomputedReelDao { @@ -148,6 +165,60 @@ impl PrecomputedReelDao for SqlitePrecomputedReelDao { }) .map_err(|e| DbError::log(DbErrorKind::QueryError, e)) } + + fn prune_superseded( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + keep: usize, + ) -> Result, DbError> { + trace_db_call(context, "delete", "prune_superseded", |_span| { + use schema::precomputed_reels::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + // Newest first; everything past `keep` is superseded. The table + // holds at most a handful of rows per (span, library), so loading + // and slicing in Rust is cheaper than a correlated subquery. + let mut rows: Vec = dsl::precomputed_reels + .filter(dsl::span.eq(span)) + .filter(dsl::library_key.eq(library_key)) + .order(dsl::generated_at.desc()) + .load::(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to load reels for prune: {}", e))?; + + let stale = rows.split_off(rows.len().min(keep)); + if !stale.is_empty() { + let ids: Vec = stale.iter().map(|r| r.id).collect(); + diesel::delete(dsl::precomputed_reels.filter(dsl::id.eq_any(ids))) + .execute(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to delete superseded reels: {}", e))?; + } + Ok(stale) + }) + .map_err(|e| DbError::log(DbErrorKind::UpdateError, e)) + } + + fn all_cache_keys(&mut self, context: &opentelemetry::Context) -> Result, DbError> { + trace_db_call(context, "query", "all_cache_keys", |_span| { + use schema::precomputed_reels::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + dsl::precomputed_reels + .select(dsl::cache_key) + .load::(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to load cache keys: {}", e)) + }) + .map_err(|e| DbError::log(DbErrorKind::QueryError, e)) + } } #[cfg(test)] @@ -318,4 +389,51 @@ mod tests { assert!(dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap()); assert!(!dao.exists_fresh(&ctx, "day", "1", 2, 900_000).unwrap()); } + + #[test] + fn prune_superseded_keeps_newest_and_returns_deleted() { + let mut dao = setup_dao(); + let ctx = ctx(); + // Three day/lib1 reels at increasing timestamps, plus an unrelated one. + for (i, key) in ["k1", "k2", "k3"].iter().enumerate() { + dao.record_reel( + &ctx, + &InsertablePrecomputedReel { + cache_key: key.to_string(), + generated_at: 1_000_000 + i as i64 * 1000, + ..sample_row() + }, + ) + .unwrap(); + } + let other = InsertablePrecomputedReel { + library_key: "2".to_string(), + cache_key: "other".to_string(), + ..sample_row() + }; + dao.record_reel(&ctx, &other).unwrap(); + + // Keep the newest 2 of (day, "1"); k1 (oldest) is superseded. + let deleted = dao.prune_superseded(&ctx, "day", "1", 2).unwrap(); + assert_eq!(deleted.len(), 1); + assert_eq!(deleted[0].cache_key, "k1"); + + // The newest 2 survive; the other-library row is untouched. + let keys = dao.all_cache_keys(&ctx).unwrap(); + assert_eq!(keys.len(), 3); + assert!(keys.contains(&"k2".to_string())); + assert!(keys.contains(&"k3".to_string())); + assert!(keys.contains(&"other".to_string())); + assert!(!keys.contains(&"k1".to_string())); + } + + #[test] + fn prune_superseded_noop_when_within_keep() { + let mut dao = setup_dao(); + let ctx = ctx(); + dao.record_reel(&ctx, &sample_row()).unwrap(); + let deleted = dao.prune_superseded(&ctx, "day", "1", 2).unwrap(); + assert!(deleted.is_empty()); + assert_eq!(dao.all_cache_keys(&ctx).unwrap().len(), 1); + } } diff --git a/src/main.rs b/src/main.rs index dd2868f..e420d8b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -269,6 +269,8 @@ fn main() -> std::io::Result<()> { // Spawn the nightly pre-generation scheduler (Section D). reels::spawn_pregen_scheduler(app_state.clone()).await; + // Spawn the on-disk reel-cache sweeper (bounds pre-gen + on-demand reels). + reels::spawn_reel_cache_sweeper(app_state.clone()).await; HttpServer::new(move || { let user_dao = SqliteUserDao::new(); diff --git a/src/reels/mod.rs b/src/reels/mod.rs index 95769ad..afe2ced 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -46,6 +46,21 @@ const REEL_PRECOMPUTED_WEEK_MAX_AGE_HOURS: u64 = 192; /// Maximum age for a precomputed month reel. const REEL_PRECOMPUTED_MONTH_MAX_AGE_HOURS: u64 = 768; +/// How many precomputed reels to keep per (span, library). The newest is the +/// one served; one extra is a grace window so a regen mid-flight (or a client +/// that started a fetch just before the swap) isn't left without a file. +const PREGEN_KEEP_PER_SCOPE: usize = 2; + +/// On-disk reel cache sweep: an unreferenced reel MP4 older than this is +/// removed. Catches the on-demand cache (which has no ledger row and otherwise +/// grows forever) and any pre-gen orphans. Tunable via `REEL_CACHE_MAX_AGE_DAYS`. +const REEL_CACHE_MAX_AGE_DAYS_DEFAULT: u64 = 7; +/// Interval between on-disk cache sweeps. +const REEL_CACHE_SWEEP_INTERVAL_SECS: u64 = 24 * 3600; +/// Transient render artifacts (`.mp4.tmp`, `.concat.txt`, orphaned sidecars) +/// older than this are leftovers from a crashed render and safe to remove. +const REEL_TMP_MAX_AGE_SECS: u64 = 3600; + /// Resolve a library request parameter to a stable key string. /// Returns the library's id as a string when found, or `"all"` when /// the param is absent or the lookup fails. @@ -1142,28 +1157,165 @@ async fn pregen_one( ) .await?; - // Record to ledger - let mut reel_dao = app_state.precomputed_reel_dao.lock().expect("lock"); - reel_dao.record_reel( - &ctx, - &crate::database::models::InsertablePrecomputedReel { - span: span.to_string(), - library_key: library.to_string(), - cache_key: key.clone(), - output_path: mp4.to_string_lossy().to_string(), - title, - media_count, - render_version: RENDER_VERSION as i32, - tz_offset_minutes: tz, - voice: voice.clone(), - generated_at: now, - }, - )?; + // Record to ledger, then retire superseded reels for this (span, library) + // — yesterday's daily, an older render-version, etc. — keeping a small + // grace window. Done under one lock so the prune sees the row we just wrote. + let superseded = { + let mut reel_dao = app_state.precomputed_reel_dao.lock().expect("lock"); + reel_dao.record_reel( + &ctx, + &crate::database::models::InsertablePrecomputedReel { + span: span.to_string(), + library_key: library.to_string(), + cache_key: key.clone(), + output_path: mp4.to_string_lossy().to_string(), + title, + media_count, + render_version: RENDER_VERSION as i32, + tz_offset_minutes: tz, + voice: voice.clone(), + generated_at: now, + }, + )?; + reel_dao + .prune_superseded(&ctx, span, library, PREGEN_KEEP_PER_SCOPE) + .unwrap_or_default() + }; + for row in &superseded { + delete_reel_files(&row.output_path); + } + if !superseded.is_empty() { + log::info!( + "Pruned {} superseded precomputed reel(s) for span={}", + superseded.len(), + span + ); + } log::info!("Precomputed reel generated for span={}, key={}", span, key); Ok(()) } +// --- On-disk cache sweep ----------------------------------------------------- + +/// Best-effort unlink of a reel's MP4 and its `.json` sidecar. +fn delete_reel_files(mp4_output_path: &str) { + let mp4 = Path::new(mp4_output_path); + let _ = std::fs::remove_file(mp4); + let _ = std::fs::remove_file(mp4.with_extension("json")); +} + +/// Max age (seconds) before an unreferenced reel MP4 is swept. +fn reel_cache_max_age_secs() -> u64 { + std::env::var("REEL_CACHE_MAX_AGE_DAYS") + .ok() + .and_then(|v| v.trim().parse::().ok()) + .filter(|d| *d > 0) + .unwrap_or(REEL_CACHE_MAX_AGE_DAYS_DEFAULT) + * 86_400 +} + +/// Spawn the periodic on-disk reel-cache sweeper. Runs independently of the +/// pre-gen scheduler because the on-demand cache grows whether or not pre-gen +/// is enabled. Disable with `REEL_CACHE_SWEEP_ENABLED=0`. +pub(crate) async fn spawn_reel_cache_sweeper(app_state: web::Data) { + if std::env::var("REEL_CACHE_SWEEP_ENABLED").ok().as_deref() == Some("0") { + log::info!("Reel cache sweeper disabled (REEL_CACHE_SWEEP_ENABLED=0)"); + return; + } + tokio::spawn(async move { + // Settle after startup, then sweep on a fixed cadence. + tokio::time::sleep(Duration::from_secs(300)).await; + loop { + let removed = sweep_reel_cache(&app_state); + if removed > 0 { + log::info!("Reel cache sweep removed {removed} stale file(s)"); + } + tokio::time::sleep(Duration::from_secs(REEL_CACHE_SWEEP_INTERVAL_SECS)).await; + } + }); +} + +/// One sweep of `reels_path`. Removes: stale render artifacts (`.mp4.tmp`, +/// `.concat.txt`, orphaned sidecars) from crashed runs; and reel MP4s that no +/// ledger row references, that no live job points at, and that are older than +/// the cache max age (the on-demand cache, which has no ledger row). Returns the +/// number of files removed. Best-effort — any IO error on one entry is skipped. +fn sweep_reel_cache(app_state: &AppState) -> usize { + let dir = Path::new(&app_state.reels_path); + let read_dir = match std::fs::read_dir(dir) { + Ok(rd) => rd, + Err(_) => return 0, // dir not created yet → nothing to sweep + }; + + // Files a ledger row still points at (current pre-gen reels). + let protected: std::collections::HashSet = { + let ctx = opentelemetry::Context::new(); + let mut dao = app_state.precomputed_reel_dao.lock().expect("lock"); + dao.all_cache_keys(&ctx) + .unwrap_or_default() + .into_iter() + .collect() + }; + // Outputs of live in-memory jobs (a Done reel a client may still be fetching). + let active: std::collections::HashSet = { + let jobs = REEL_JOBS.lock().unwrap(); + jobs.values() + .filter_map(|j| j.output_path.as_ref()) + .map(|p| p.to_string_lossy().to_string()) + .collect() + }; + + let now = std::time::SystemTime::now(); + let max_age = Duration::from_secs(reel_cache_max_age_secs()); + let tmp_max_age = Duration::from_secs(REEL_TMP_MAX_AGE_SECS); + let mut removed = 0usize; + + for entry in read_dir.flatten() { + let path = entry.path(); + let Some(name) = path.file_name().and_then(|n| n.to_str()) else { + continue; + }; + let age = entry + .metadata() + .and_then(|m| m.modified()) + .ok() + .and_then(|t| now.duration_since(t).ok()) + .unwrap_or_default(); + + // Transient render artifacts from a crashed run. + if name.ends_with(".mp4.tmp") || name.ends_with(".concat.txt") { + if age > tmp_max_age && std::fs::remove_file(&path).is_ok() { + removed += 1; + } + continue; + } + + // Reel MP4: keep if referenced (ledger or live job) or still recent. + if let Some(key) = name.strip_suffix(".mp4") { + let p = path.to_string_lossy().to_string(); + if protected.contains(key) || active.contains(&p) || age < max_age { + continue; + } + if std::fs::remove_file(&path).is_ok() { + let _ = std::fs::remove_file(path.with_extension("json")); + removed += 1; + } + continue; + } + + // Orphaned sidecar (its MP4 is gone). + if name.ends_with(".json") + && !path.with_extension("mp4").exists() + && age > tmp_max_age + && std::fs::remove_file(&path).is_ok() + { + removed += 1; + } + } + removed +} + #[cfg(test)] mod tests { use super::*; -- 2.52.0