From e3f731b3b26eb7f136868921a01ba2370366de4d Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Fri, 12 Jun 2026 22:31:08 -0400 Subject: [PATCH 01/26] Add memory-reel backend: on-demand narrated photo slideshow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New POST /reels + GET /reels/{id} (+ /video) build an MP4 slideshow of a memory span (day/week/month), narrated by the LLM in a cloned voice. Pipeline (src/reels/): a selector resolves which photos + reel metadata, the scripter writes one narration line per photo via a single LLM call (reusing each photo's cached insight as context — no fresh vision calls, so reel generation stays off the GPU's vision slot), each line is synthesized to speech, and the renderer assembles stills + narration via ffmpeg. Jobs run in the background (mirroring the TTS speech-job registry) since a reel takes minutes; the finished MP4 is cached on disk keyed by the selection so a repeat request is instant. The segment model is media-typed (Photo today) so a video-clip segment (phase 2) and a nightly pre-render (phase 3) slot in without reworking the pipeline. Ken Burns motion is implemented but defaulted off pending a visual check on the GPU box. Supporting changes: - memories: extract gather_memory_items() so the reel selector reuses the exact window/exclusion/tz/sort logic behind /memories. - ai::tts: add synthesize_serialized() so reel narration honors the same single-GPU permit + write lease as user TTS requests. - video::ffmpeg: make get_duration_seconds() pub for narration timing. - AppState: reels_path (REELS_DIRECTORY, defaults beside preview clips). Pure logic (cache key, script parsing, ffmpeg arg/filter construction, even sampling, segment timing) is unit-tested (26 tests). The runtime path (ffmpeg render, TTS, LLM) needs a real run on the GPU host to verify end-to-end — not exercisable in CI. Co-Authored-By: Claude Fable 5 --- src/ai/tts.rs | 31 +++ src/main.rs | 4 + src/memories.rs | 86 ++++-- src/reels/mod.rs | 625 ++++++++++++++++++++++++++++++++++++++++++ src/reels/render.rs | 338 +++++++++++++++++++++++ src/reels/script.rs | 289 +++++++++++++++++++ src/reels/selector.rs | 252 +++++++++++++++++ src/state.rs | 18 ++ src/video/ffmpeg.rs | 2 +- 9 files changed, 1615 insertions(+), 30 deletions(-) create mode 100644 src/reels/mod.rs create mode 100644 src/reels/render.rs create mode 100644 src/reels/script.rs create mode 100644 src/reels/selector.rs diff --git a/src/ai/tts.rs b/src/ai/tts.rs index 08d9dcd..4e7544c 100644 --- a/src/ai/tts.rs +++ b/src/ai/tts.rs @@ -23,6 +23,7 @@ use std::time::{Duration, Instant}; use tokio::sync::Semaphore; use uuid::Uuid; +use crate::ai::llamacpp::LlamaCppClient; use crate::data::Claims; use crate::file_types::{is_audio_file, is_video_file}; use crate::files::is_valid_full_path; @@ -473,6 +474,36 @@ pub struct TtsJobStatusResponse { pub error: Option, } +/// Synthesize speech honoring the global single-GPU serialization +/// (`TTS_PERMIT`) and the GPU write lease, exactly as the speech-job path does. +/// Queues on the permit rather than fast-failing, so callers wait their turn +/// instead of contending. Text is run through the same markdown/emoji cleanup + +/// pronunciation pipeline as the HTTP handlers. Reused by the memory-reel +/// pipeline to narrate each segment without racing a user's TTS request on the +/// Chatterbox GPU. +pub async fn synthesize_serialized( + client: &LlamaCppClient, + text: &str, + voice: Option<&str>, + format: &str, +) -> anyhow::Result> { + let prepared = prepare_for_tts(text); + if prepared.is_empty() { + anyhow::bail!("nothing to synthesize after cleanup"); + } + // Queue rather than fast-fail (mirrors create_speech_job_handler). + let _permit = TTS_PERMIT + .acquire() + .await + .map_err(|_| anyhow::anyhow!("TTS permit closed"))?; + // Wait for the LLM side to release the GPU before the request timeout + // starts (see ai::gpu). + let _gpu = crate::ai::gpu::tts_lease().await; + client + .text_to_speech(&prepared, voice, format, None, None, None) + .await +} + /// POST /tts/speech/jobs — durable variant of /tts/speech for long syntheses. /// Returns 202 + a job id immediately; the synth queues on the single GPU /// permit (instead of fast-failing 429) and the client polls the job until diff --git a/src/main.rs b/src/main.rs index 8b56efd..b059e9b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -62,6 +62,7 @@ mod knowledge; mod memories; mod otel; mod personas; +mod reels; mod service; #[cfg(test)] mod testhelpers; @@ -344,6 +345,9 @@ fn main() -> std::io::Result<()> { .service(handlers::image::clear_image_date) .service(handlers::image::get_full_exif) .service(memories::list_memories) + .service(reels::create_reel_handler) + .service(reels::reel_status_handler) + .service(reels::reel_video_handler) .service(ai::generate_insight_handler) .service(ai::generate_agentic_insight_handler) .service(ai::generation_status_handler) diff --git a/src/memories.rs b/src/memories.rs index 4b1682b..c877981 100644 --- a/src/memories.rs +++ b/src/memories.rs @@ -349,12 +349,6 @@ pub async fn list_memories( opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); let span_mode = q.span.unwrap_or(MemoriesSpan::Day); - let span_token = match span_mode { - MemoriesSpan::Day => "day", - MemoriesSpan::Week => "week", - MemoriesSpan::Month => "month", - }; - let years_back: i32 = DEFAULT_YEARS_BACK; // The SQL filter expects a signed offset in minutes from UTC; default // 0 (UTC) when the client didn't send a hint. We also keep a chrono @@ -366,18 +360,66 @@ pub async fn list_memories( .timezone_offset_minutes .and_then(|offset_mins| FixedOffset::east_opt(offset_mins * 60)); - debug!( - "list_memories: span={:?} tz_offset_min={} years_back={}", - span_mode, tz_offset_minutes, years_back - ); - - let library = match crate::libraries::resolve_library_param(&app_state, q.library.as_deref()) { - Ok(lib) => lib, + let items = match gather_memory_items( + &app_state, + &exif_dao, + &span_context, + span_mode, + tz_offset_minutes, + client_timezone, + q.library.as_deref(), + ) { + Ok(items) => items, Err(msg) => { warn!("Rejecting /memories request: {}", msg); return HttpResponse::BadRequest().body(msg); } }; + + span.add_event( + "memories_scanned", + vec![ + KeyValue::new("span", format!("{:?}", span_mode)), + KeyValue::new("years_back", DEFAULT_YEARS_BACK.to_string()), + KeyValue::new("result_count", items.len().to_string()), + KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()), + KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)), + ], + ); + span.set_status(Status::Ok); + + HttpResponse::Ok().json(MemoriesResponse { items }) +} + +/// Resolve an "on this day/week/month across past years" window into an +/// ordered list of [`MemoryItem`]s. Shared by the `/memories` handler and the +/// memory-reel selector so both honour the same library resolution, per-library +/// exclusions, timezone handling, and sort order. Returns `Err(message)` only +/// when the `library` param is invalid (callers map that to 400); per-library +/// query/lock failures are logged and skipped, matching the handler's +/// best-effort behaviour. +pub fn gather_memory_items( + app_state: &AppState, + exif_dao: &Mutex>, + span_context: &opentelemetry::Context, + span_mode: MemoriesSpan, + tz_offset_minutes: i32, + client_timezone: Option, + library_param: Option<&str>, +) -> Result, String> { + let span_token = match span_mode { + MemoriesSpan::Day => "day", + MemoriesSpan::Week => "week", + MemoriesSpan::Month => "month", + }; + let years_back: i32 = DEFAULT_YEARS_BACK; + + debug!( + "gather_memory_items: span={:?} tz_offset_min={} years_back={}", + span_mode, tz_offset_minutes, years_back + ); + + let library = crate::libraries::resolve_library_param(app_state, library_param)?; let libraries_to_scan: Vec<&crate::libraries::Library> = match library { Some(lib) => vec![lib], None => app_state.libraries.iter().collect(), @@ -394,7 +436,7 @@ pub async fn list_memories( let rows = match exif_dao.lock() { Ok(mut dao) => match dao.get_memories_in_window( - &span_context, + span_context, lib.id, span_token, years_back, @@ -469,21 +511,7 @@ pub async fn list_memories( } } - let items: Vec = memories_with_dates.into_iter().map(|(m, _)| m).collect(); - - span.add_event( - "memories_scanned", - vec![ - KeyValue::new("span", format!("{:?}", span_mode)), - KeyValue::new("years_back", years_back.to_string()), - KeyValue::new("result_count", items.len().to_string()), - KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()), - KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)), - ], - ); - span.set_status(Status::Ok); - - HttpResponse::Ok().json(MemoriesResponse { items }) + Ok(memories_with_dates.into_iter().map(|(m, _)| m).collect()) } #[cfg(test)] diff --git a/src/reels/mod.rs b/src/reels/mod.rs new file mode 100644 index 0000000..fe270f8 --- /dev/null +++ b/src/reels/mod.rs @@ -0,0 +1,625 @@ +//! Memory reels: render an MP4 slideshow of a selection of photos with an +//! LLM-written, voice-cloned narration over it. +//! +//! Pipeline: a [`selector`] resolves *which* photos (and the reel metadata), +//! the [`script`] module writes per-photo narration via the LLM, each line is +//! synthesized to speech, and [`render`] assembles the stills + narration into +//! one MP4. Jobs run in the background (mirroring the TTS speech-job registry) +//! because a reel takes minutes; the finished MP4 is cached on disk keyed by +//! the selection so a repeat request is instant. +//! +//! Phase 1 is on-demand and photos-only. The segment model is media-typed so a +//! video-clip segment (phase 2) and a nightly pre-render (phase 3) slot in +//! without reworking the pipeline. + +pub mod render; +pub mod script; +pub mod selector; + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::{LazyLock, Mutex as StdMutex}; +use std::time::{Duration, Instant}; + +use actix_files::NamedFile; +use actix_web::{HttpRequest, HttpResponse, Responder, get, post, web}; +use chrono::DateTime; +use serde::{Deserialize, Serialize}; +use serde_json::json; +use std::sync::Mutex; +use uuid::Uuid; + +use crate::data::Claims; +use crate::database::{ExifDao, InsightDao}; +use crate::memories::MemoriesSpan; +use crate::otel::extract_context_from_request; +use crate::state::AppState; +use selector::ReelSelector; + +/// The media behind one reel segment. Photos-only for now; a `Clip` variant +/// (a section of a source video) is the phase-2 extension point. +#[derive(Debug, Clone)] +pub enum SegmentMedia { + Photo { rel_path: String, library_id: i32 }, +} + +/// A segment before narration: which photo, when it was taken, and any cached +/// insight to feed the scripter. +#[derive(Debug, Clone)] +pub struct PlannedSegment { + pub media: SegmentMedia, + pub date: Option, + pub insight_title: Option, + pub insight_summary: Option, +} + +impl PlannedSegment { + /// Human date for the prompt, e.g. "June 12, 2019". `None` when undated. + pub fn date_label(&self) -> Option { + let ts = self.date?; + let dt = DateTime::from_timestamp(ts, 0)?; + Some(dt.format("%B %-d, %Y").to_string()) + } +} + +/// Reel-wide metadata the scripter uses for framing. +#[derive(Debug, Clone)] +pub struct ReelMeta { + pub span: MemoriesSpan, + pub years: Vec, +} + +impl ReelMeta { + /// Natural-language phrase for the span, e.g. "on this day". + pub fn span_phrase(&self) -> &'static str { + match self.span { + MemoriesSpan::Day => "on this day", + MemoriesSpan::Week => "this week", + MemoriesSpan::Month => "this month", + } + } +} + +// --- Job registry ------------------------------------------------------------ +// +// In-memory, same shape as the TTS speech-job registry: a reel takes minutes, +// too long to hold one HTTP request from a phone. POST /reels returns a job id; +// the client polls GET /reels/{id} until the video URL appears. The heavy +// artifact (the MP4) lives on disk, not in this map — jobs only carry status + +// the output path. State is intentionally not durable across restarts; the +// on-disk cache is what makes a repeat request cheap, not the registry. + +#[derive(Clone, Copy, PartialEq, Eq, Debug, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum ReelJobStatus { + Queued, + Running, + Done, + Error, +} + +impl ReelJobStatus { + fn is_terminal(self) -> bool { + matches!(self, Self::Done | Self::Error) + } +} + +struct ReelJob { + status: ReelJobStatus, + /// Coarse progress label for the client ("scripting", "narrating", …). + stage: &'static str, + title: Option, + output_path: Option, + error: Option, + created_at: Instant, + finished_at: Option, + abort: Option, +} + +/// Finished jobs linger so a client that lost connectivity can still collect +/// the result; anything older than MAX_AGE is dropped (aborted first if somehow +/// still running). Swept lazily on each create. +const REEL_JOB_RESULT_TTL: Duration = Duration::from_secs(30 * 60); +const REEL_JOB_MAX_AGE: Duration = Duration::from_secs(60 * 60); + +static REEL_JOBS: LazyLock>> = + LazyLock::new(|| StdMutex::new(HashMap::new())); + +fn sweep_stale_jobs(jobs: &mut HashMap, now: Instant) { + jobs.retain(|_, job| { + let result_expired = job + .finished_at + .is_some_and(|t| now.duration_since(t) >= REEL_JOB_RESULT_TTL); + let too_old = now.duration_since(job.created_at) >= REEL_JOB_MAX_AGE; + if too_old && let Some(h) = job.abort.take() { + h.abort(); + } + !(result_expired || too_old) + }); +} + +fn with_job(id: Uuid, f: impl FnOnce(&mut ReelJob) -> R) -> Option { + REEL_JOBS.lock().unwrap().get_mut(&id).map(f) +} + +fn set_stage(id: Uuid, stage: &'static str) { + with_job(id, |job| { + if !job.status.is_terminal() { + job.status = ReelJobStatus::Running; + job.stage = stage; + } + }); +} + +/// Move a job to a terminal state (first terminal write wins). +fn finish_job( + id: Uuid, + status: ReelJobStatus, + title: Option, + output_path: Option, + error: Option, +) { + with_job(id, |job| { + if job.status.is_terminal() { + return; + } + job.status = status; + job.stage = match status { + ReelJobStatus::Done => "done", + _ => "error", + }; + job.title = title; + job.output_path = output_path; + job.error = error; + job.finished_at = Some(Instant::now()); + job.abort = None; + }); +} + +// --- On-disk cache ----------------------------------------------------------- + +/// Render version: bump to invalidate every cached reel after a rendering / +/// scripting change that should produce a fresh result. +const RENDER_VERSION: u32 = 1; + +/// Cache key over everything that determines *which* media and *how* it's +/// voiced — but not the (non-deterministic) narration text. Same inputs → same +/// MP4 served instantly. blake3 keeps it filesystem-safe and collision-free. +fn cache_key(selector: &ReelSelector, media: &[SegmentMedia], voice: Option<&str>) -> String { + let mut buf = format!( + "v{}|{}|voice={}|", + RENDER_VERSION, + selector.descriptor(), + voice.unwrap_or("default") + ); + for m in media { + match m { + SegmentMedia::Photo { + rel_path, + library_id, + } => buf.push_str(&format!("{library_id}:{rel_path}|")), + } + } + blake3::hash(buf.as_bytes()).to_hex().to_string() +} + +fn reel_mp4_path(app_state: &AppState, key: &str) -> PathBuf { + Path::new(&app_state.reels_path).join(format!("{key}.mp4")) +} + +fn reel_sidecar_path(app_state: &AppState, key: &str) -> PathBuf { + Path::new(&app_state.reels_path).join(format!("{key}.json")) +} + +#[derive(Serialize, Deserialize)] +struct ReelSidecar { + title: String, +} + +// --- HTTP types -------------------------------------------------------------- + +#[derive(Debug, Deserialize)] +pub struct CreateReelRequest { + #[serde(default)] + pub span: Option, + #[serde(default)] + pub timezone_offset_minutes: Option, + #[serde(default)] + pub library: Option, + /// Cloned TTS voice for the narration; server default when omitted. + #[serde(default)] + pub voice: Option, + /// Cap on photos in the reel (clamped server-side). + #[serde(default)] + pub max_segments: Option, +} + +#[derive(Debug, Serialize)] +pub struct ReelJobCreatedResponse { + pub job_id: String, + pub status: ReelJobStatus, +} + +#[derive(Debug, Serialize)] +pub struct ReelStatusResponse { + pub job_id: String, + pub status: ReelJobStatus, + pub stage: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub title: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub video_url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, +} + +// --- Handlers ---------------------------------------------------------------- + +/// POST /reels — start (or instantly serve from cache) a memory reel for the +/// requested span. Returns 202 + a job id; the client polls GET /reels/{id}. +#[post("/reels")] +pub async fn create_reel_handler( + http_request: HttpRequest, + _claims: Claims, + req: web::Json, + app_state: web::Data, + exif_dao: web::Data>>, + insight_dao: web::Data>>, +) -> impl Responder { + let span_context = extract_context_from_request(&http_request); + + if app_state.llamacpp.is_none() { + return HttpResponse::ServiceUnavailable().json(json!({ + "error": "Reel narration needs the LLM/TTS backend (set LLAMA_SWAP_URL)" + })); + } + + let span = req.span.unwrap_or(MemoriesSpan::Day); + let max_segments = req.max_segments.unwrap_or(selector::DEFAULT_MAX_SEGMENTS); + let selector = ReelSelector::Memories { + span, + tz_offset_minutes: req.timezone_offset_minutes.unwrap_or(0), + library: req.library.clone(), + max_segments, + }; + + // Cheap pass: resolve the media set for the cache key and the emptiness + // check. Insight enrichment + scripting happen in the background job. + let (planned, meta) = match selector::resolve(&app_state, &exif_dao, &span_context, &selector) { + Ok(r) => r, + Err(msg) => return HttpResponse::BadRequest().body(msg), + }; + if planned.is_empty() { + return HttpResponse::UnprocessableEntity().json(json!({ + "error": "No photo memories found for this span" + })); + } + + let media: Vec = planned.iter().map(|p| p.media.clone()).collect(); + let voice = req.voice.clone().filter(|s| !s.is_empty()); + let key = cache_key(&selector, &media, voice.as_deref()); + + let job_id = Uuid::new_v4(); + + // Cache hit: register an already-Done job pointing at the existing MP4 so + // the client's first poll returns the video URL immediately. + let mp4 = reel_mp4_path(&app_state, &key); + if mp4.exists() { + let title = std::fs::read(reel_sidecar_path(&app_state, &key)) + .ok() + .and_then(|b| serde_json::from_slice::(&b).ok()) + .map(|s| s.title); + let mut jobs = REEL_JOBS.lock().unwrap(); + sweep_stale_jobs(&mut jobs, Instant::now()); + jobs.insert( + job_id, + ReelJob { + status: ReelJobStatus::Done, + stage: "done", + title, + output_path: Some(mp4), + error: None, + created_at: Instant::now(), + finished_at: Some(Instant::now()), + abort: None, + }, + ); + return HttpResponse::Accepted().json(ReelJobCreatedResponse { + job_id: job_id.to_string(), + status: ReelJobStatus::Done, + }); + } + + { + let mut jobs = REEL_JOBS.lock().unwrap(); + sweep_stale_jobs(&mut jobs, Instant::now()); + jobs.insert( + job_id, + ReelJob { + status: ReelJobStatus::Queued, + stage: "queued", + title: None, + output_path: None, + error: None, + created_at: Instant::now(), + finished_at: None, + abort: None, + }, + ); + } + + let state = app_state.clone(); + let insight_dao = insight_dao.clone(); + let handle = tokio::spawn(async move { + match run_reel_job(&state, &insight_dao, job_id, planned, meta, voice, &key).await { + Ok((title, path)) => { + finish_job(job_id, ReelJobStatus::Done, Some(title), Some(path), None) + } + Err(e) => { + log::error!("reel job {job_id} failed: {e:?}"); + finish_job( + job_id, + ReelJobStatus::Error, + None, + None, + Some(format!("{e}")), + ) + } + } + }); + with_job(job_id, |job| job.abort = Some(handle.abort_handle())); + + HttpResponse::Accepted().json(ReelJobCreatedResponse { + job_id: job_id.to_string(), + status: ReelJobStatus::Queued, + }) +} + +/// GET /reels/{id} — poll a reel job. Done jobs carry a `video_url`. +#[get("/reels/{id}")] +pub async fn reel_status_handler(_claims: Claims, path: web::Path) -> impl Responder { + let id_str = path.into_inner(); + let Ok(id) = Uuid::parse_str(&id_str) else { + return HttpResponse::BadRequest().json(json!({ "error": "invalid job id" })); + }; + let resp = with_job(id, |job| ReelStatusResponse { + job_id: id_str.clone(), + status: job.status, + stage: job.stage.to_string(), + title: job.title.clone(), + video_url: matches!(job.status, ReelJobStatus::Done) + .then(|| format!("/reels/{id_str}/video")), + error: job.error.clone(), + }); + match resp { + Some(r) => HttpResponse::Ok().json(r), + None => HttpResponse::NotFound().json(json!({ "error": "job not found or expired" })), + } +} + +/// GET /reels/{id}/video — stream the finished MP4 (supports range requests via +/// NamedFile, so the mobile player can seek). +#[get("/reels/{id}/video")] +pub async fn reel_video_handler( + _claims: Claims, + request: HttpRequest, + path: web::Path, +) -> impl Responder { + let id_str = path.into_inner(); + let Ok(id) = Uuid::parse_str(&id_str) else { + return HttpResponse::BadRequest().json(json!({ "error": "invalid job id" })); + }; + let output = with_job(id, |job| job.output_path.clone()).flatten(); + let Some(path) = output else { + return HttpResponse::NotFound().json(json!({ "error": "reel not ready" })); + }; + match NamedFile::open(&path) { + Ok(file) => file.into_response(&request), + Err(e) => { + log::error!("opening reel mp4 {path:?} failed: {e:?}"); + HttpResponse::NotFound().json(json!({ "error": "reel file missing" })) + } + } +} + +// --- Pipeline ---------------------------------------------------------------- + +/// Run the full reel pipeline: enrich → script → narrate → render → concat, +/// then publish the MP4 into the cache. Returns (title, mp4_path). +async fn run_reel_job( + app_state: &AppState, + insight_dao: &Mutex>, + job_id: Uuid, + mut planned: Vec, + meta: ReelMeta, + voice: Option, + key: &str, +) -> anyhow::Result<(String, PathBuf)> { + use anyhow::{Context, anyhow}; + + let client = app_state + .llamacpp + .as_ref() + .ok_or_else(|| anyhow!("TTS/LLM backend not configured"))? + .clone(); + + // 1. Enrich with cached insights, then script (one LLM call). + set_stage(job_id, "scripting"); + let span_context = opentelemetry::Context::new(); + selector::enrich(insight_dao, &span_context, &mut planned); + let script = script::generate_script(&client, &meta, &planned).await?; + + // 2. Narrate each line to speech and 3. render each photo segment. A + // segment whose audio or render fails is skipped (logged) rather than + // sinking the whole reel — handles an odd HEIC/corrupt file gracefully. + set_stage(job_id, "narrating"); + let work = tempfile::tempdir().context("creating reel work dir")?; + let nvenc = render::is_nvenc_available().await; + let opts = render::SegmentOpts { + nvenc, + ..Default::default() + }; + + let mut segment_files: Vec = Vec::new(); + for (i, (seg, line)) in planned.iter().zip(script.lines.iter()).enumerate() { + let image_path = match resolve_image_path(app_state, &seg.media) { + Some(p) => p, + None => { + log::warn!("reel {job_id}: skipping segment {i}, image path unresolved"); + continue; + } + }; + + let audio_bytes = + match crate::ai::tts::synthesize_serialized(&client, line, voice.as_deref(), "wav") + .await + { + Ok(b) => b, + Err(e) => { + log::warn!("reel {job_id}: skipping segment {i}, TTS failed: {e}"); + continue; + } + }; + let audio_path = work.path().join(format!("narration_{i:03}.wav")); + if let Err(e) = tokio::fs::write(&audio_path, &audio_bytes).await { + log::warn!("reel {job_id}: skipping segment {i}, writing audio failed: {e}"); + continue; + } + + let narration_secs = + crate::video::ffmpeg::get_duration_seconds(&audio_path.to_string_lossy()) + .await + .ok() + .flatten() + .unwrap_or(render::MIN_SEGMENT_SECONDS); + let duration = render::segment_duration(narration_secs); + + set_stage(job_id, "rendering"); + let seg_out = work.path().join(format!("seg_{i:03}.mp4")); + if let Err(e) = + render::render_segment(&image_path, &audio_path, &seg_out, duration, &opts).await + { + log::warn!("reel {job_id}: skipping segment {i}, render failed: {e}"); + continue; + } + segment_files.push(seg_out.to_string_lossy().to_string()); + } + + if segment_files.is_empty() { + return Err(anyhow!("no segments rendered successfully")); + } + + // 4. Concat into the cache. Write to a temp name in the reels dir, then + // rename atomically (same filesystem) so a reader never sees a partial. + std::fs::create_dir_all(&app_state.reels_path).context("creating reels dir")?; + let final_path = reel_mp4_path(app_state, key); + let tmp_path = final_path.with_extension("mp4.tmp"); + render::concat_segments(&segment_files, &tmp_path).await?; + std::fs::rename(&tmp_path, &final_path).context("publishing reel mp4")?; + + // Sidecar carries the title so a future cache hit can return it without + // re-running the pipeline. + let sidecar = serde_json::to_vec(&ReelSidecar { + title: script.title.clone(), + }) + .context("serializing reel sidecar")?; + let _ = std::fs::write(reel_sidecar_path(app_state, key), sidecar); + + Ok((script.title, final_path)) +} + +/// Resolve a photo segment's library-relative path to a validated absolute +/// path under its library root. +fn resolve_image_path(app_state: &AppState, media: &SegmentMedia) -> Option { + let SegmentMedia::Photo { + rel_path, + library_id, + } = media; + let lib = app_state.library_by_id(*library_id)?; + crate::files::is_valid_full_path(&lib.root_path, rel_path, false) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn photo(p: &str, lib: i32) -> SegmentMedia { + SegmentMedia::Photo { + rel_path: p.to_string(), + library_id: lib, + } + } + + fn day_selector() -> ReelSelector { + ReelSelector::Memories { + span: MemoriesSpan::Day, + tz_offset_minutes: 0, + library: None, + max_segments: 24, + } + } + + #[test] + fn cache_key_is_stable_for_same_inputs() { + let media = vec![photo("a.jpg", 1), photo("b.jpg", 1)]; + let k1 = cache_key(&day_selector(), &media, Some("grandma")); + let k2 = cache_key(&day_selector(), &media, Some("grandma")); + assert_eq!(k1, k2); + // 64-hex blake3. + assert_eq!(k1.len(), 64); + assert!(k1.chars().all(|c| c.is_ascii_hexdigit())); + } + + #[test] + fn cache_key_changes_with_media_order_voice_and_selector() { + let media = vec![photo("a.jpg", 1), photo("b.jpg", 1)]; + let reordered = vec![photo("b.jpg", 1), photo("a.jpg", 1)]; + let base = cache_key(&day_selector(), &media, Some("grandma")); + // Order matters (the reel sequence differs). + assert_ne!( + base, + cache_key(&day_selector(), &reordered, Some("grandma")) + ); + // Voice matters. + assert_ne!(base, cache_key(&day_selector(), &media, Some("dad"))); + assert_ne!(base, cache_key(&day_selector(), &media, None)); + // Span matters. + let week = ReelSelector::Memories { + span: MemoriesSpan::Week, + tz_offset_minutes: 0, + library: None, + max_segments: 24, + }; + assert_ne!(base, cache_key(&week, &media, Some("grandma"))); + } + + #[test] + fn span_phrase_maps_each_span() { + let mk = |span| ReelMeta { + span, + years: vec![], + }; + assert_eq!(mk(MemoriesSpan::Day).span_phrase(), "on this day"); + assert_eq!(mk(MemoriesSpan::Week).span_phrase(), "this week"); + assert_eq!(mk(MemoriesSpan::Month).span_phrase(), "this month"); + } + + #[test] + fn date_label_formats_or_none() { + let seg = PlannedSegment { + media: photo("a.jpg", 1), + date: Some(1_560_384_000), // 2019-06-13 UTC + insight_title: None, + insight_summary: None, + }; + assert!(seg.date_label().unwrap().contains("2019")); + + let undated = PlannedSegment { + media: photo("a.jpg", 1), + date: None, + insight_title: None, + insight_summary: None, + }; + assert_eq!(undated.date_label(), None); + } +} diff --git a/src/reels/render.rs b/src/reels/render.rs new file mode 100644 index 0000000..ca39515 --- /dev/null +++ b/src/reels/render.rs @@ -0,0 +1,338 @@ +//! ffmpeg assembly for memory reels. +//! +//! Two-stage, per-segment design: each segment is rendered to its own +//! normalized MP4 (identical codec/resolution/fps/timebase), then the segments +//! are joined with the concat demuxer (stream copy, no re-encode). Rendering +//! per segment — rather than one monster filtergraph — keeps each ffmpeg +//! invocation simple to reason about, parallelizes naturally, and means a +//! video-clip segment type (phase 2) slots in as just a different per-segment +//! builder without touching the concat stage. +//! +//! The arg builders are pure (`Vec` out) so the exact ffmpeg command +//! is unit-testable; the runners spawn ffmpeg and surface stderr on failure. + +use anyhow::{Context, Result, bail}; +use std::path::Path; +use tokio::process::Command; + +/// Re-exported so the reel pipeline reaches NVENC detection through this module +/// rather than depending on `video::ffmpeg` directly. +pub use crate::video::ffmpeg::is_nvenc_available; + +/// Reel canvas. Landscape matches the majority of camera photos; portrait +/// shots are letterboxed by the `pad` in [`segment_filter`] rather than +/// cropped, so faces never get cut off. +pub const REEL_WIDTH: u32 = 1920; +pub const REEL_HEIGHT: u32 = 1080; +pub const REEL_FPS: u32 = 30; + +/// A still's screen time is its narration length plus a short breath, with a +/// floor so a terse line still lingers. No ceiling: the segment always covers +/// the full narration so speech is never truncated — the scripter is asked to +/// keep lines short instead. +pub const MIN_SEGMENT_SECONDS: f64 = 2.5; +const NARRATION_TAIL_SECONDS: f64 = 0.6; + +/// Screen time for a photo segment given its narration audio length. +pub fn segment_duration(narration_secs: f64) -> f64 { + let d = narration_secs + NARRATION_TAIL_SECONDS; + if d.is_finite() && d > MIN_SEGMENT_SECONDS { + d + } else { + MIN_SEGMENT_SECONDS + } +} + +/// Options controlling per-segment rendering. `ken_burns` adds a slow zoom for +/// motion; it's defaulted off until the effect is eyeballed on the GPU box, +/// since a wrong zoompan expression reads as jitter and can't be verified here. +#[derive(Debug, Clone, Copy)] +pub struct SegmentOpts { + pub width: u32, + pub height: u32, + pub fps: u32, + pub nvenc: bool, + pub ken_burns: bool, +} + +impl Default for SegmentOpts { + fn default() -> Self { + Self { + width: REEL_WIDTH, + height: REEL_HEIGHT, + fps: REEL_FPS, + nvenc: false, + ken_burns: false, + } + } +} + +/// Video filter for a photo segment: fit the image inside the canvas +/// (preserving aspect, padding the rest), normalize SAR/fps/pixel format, and +/// optionally apply a gentle Ken Burns zoom. +pub fn segment_filter(opts: &SegmentOpts, duration: f64) -> String { + let (w, h, fps) = (opts.width, opts.height, opts.fps); + if opts.ken_burns { + // Upscale first so zoompan samples from a larger frame (avoids + // shimmer), drift the zoom from 1.0→~1.12 across the segment, hold the + // crop centered, then settle to the canvas. + let frames = (duration * fps as f64).round().max(1.0) as u64; + format!( + "scale={w}*2:{h}*2:force_original_aspect_ratio=increase,\ + crop={w}*2:{h}*2,\ + zoompan=z='min(zoom+0.0009,1.12)':d={frames}:\ + x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s={w}x{h}:fps={fps},\ + setsar=1,format=yuv420p" + ) + } else { + format!( + "scale={w}:{h}:force_original_aspect_ratio=decrease,\ + pad={w}:{h}:(ow-iw)/2:(oh-ih)/2,\ + setsar=1,fps={fps},format=yuv420p" + ) + } +} + +fn video_encoder_args(nvenc: bool) -> Vec { + if nvenc { + // p4 ≈ balanced; cq 23 ≈ libx264 crf 21. Matches the HLS transcode path. + [ + "-c:v", + "h264_nvenc", + "-preset", + "p4", + "-cq", + "23", + "-pix_fmt", + "yuv420p", + ] + } else { + [ + "-c:v", "libx264", "-crf", "21", "-preset", "veryfast", "-pix_fmt", "yuv420p", + ] + } + .iter() + .map(|s| s.to_string()) + .collect() +} + +/// Build the ffmpeg args that render one photo segment: a still looped for +/// `duration` seconds with its narration muxed in. The narration is padded +/// with trailing silence (`apad`) so short lines don't end the segment early; +/// `-t` bounds both streams to the segment length. +pub fn build_segment_args( + image_path: &str, + audio_path: &str, + out_path: &str, + duration: f64, + opts: &SegmentOpts, +) -> Vec { + let mut args: Vec = vec!["-y".into()]; + if opts.nvenc { + args.extend(["-hwaccel".into(), "cuda".into()]); + } + args.extend([ + "-loop".into(), + "1".into(), + "-i".into(), + image_path.into(), + "-i".into(), + audio_path.into(), + "-filter_complex".into(), + format!("[0:v]{}[v];[1:a]apad[a]", segment_filter(opts, duration)), + "-map".into(), + "[v]".into(), + "-map".into(), + "[a]".into(), + "-t".into(), + format!("{duration:.3}"), + ]); + args.extend(video_encoder_args(opts.nvenc)); + args.extend( + ["-c:a", "aac", "-b:a", "160k", "-ar", "48000", "-shortest"] + .iter() + .map(|s| s.to_string()), + ); + args.push(out_path.into()); + args +} + +/// Build the concat-demuxer args that join rendered segments losslessly. +/// `+faststart` moves the moov atom up front so the reel streams immediately +/// on the mobile client. +pub fn build_concat_args(list_path: &str, out_path: &str) -> Vec { + [ + "-y", + "-f", + "concat", + "-safe", + "0", + "-i", + list_path, + "-c", + "copy", + "-movflags", + "+faststart", + out_path, + ] + .iter() + .map(|s| s.to_string()) + .collect() +} + +/// Render the concat list file body. Each line points the demuxer at one +/// segment; single quotes in paths are escaped per ffmpeg's concat syntax. +pub fn build_concat_list(segment_paths: &[String]) -> String { + let mut out = String::new(); + for p in segment_paths { + let escaped = p.replace('\'', r"'\''"); + out.push_str(&format!("file '{escaped}'\n")); + } + out +} + +async fn run_ffmpeg(args: &[String], what: &str) -> Result<()> { + let output = Command::new("ffmpeg") + .args(args) + .output() + .await + .with_context(|| format!("spawning ffmpeg for {what}"))?; + if !output.status.success() { + bail!( + "ffmpeg {what} failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + } + Ok(()) +} + +/// Render one photo segment to `out_path`. +pub async fn render_segment( + image_path: &Path, + audio_path: &Path, + out_path: &Path, + duration: f64, + opts: &SegmentOpts, +) -> Result<()> { + let args = build_segment_args( + &image_path.to_string_lossy(), + &audio_path.to_string_lossy(), + &out_path.to_string_lossy(), + duration, + opts, + ); + run_ffmpeg(&args, "segment render").await +} + +/// Join rendered segments into the final reel. Writes the concat list into the +/// same directory as the output so relative paths and cleanup stay local. +pub async fn concat_segments(segment_paths: &[String], out_path: &Path) -> Result<()> { + let list_path = out_path.with_extension("concat.txt"); + let body = build_concat_list(segment_paths); + tokio::fs::write(&list_path, body) + .await + .context("writing concat list")?; + let args = build_concat_args(&list_path.to_string_lossy(), &out_path.to_string_lossy()); + let result = run_ffmpeg(&args, "concat").await; + let _ = tokio::fs::remove_file(&list_path).await; + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn segment_duration_floors_short_lines() { + // A one-word narration still lingers at the floor. + assert_eq!(segment_duration(0.5), MIN_SEGMENT_SECONDS); + assert_eq!(segment_duration(0.0), MIN_SEGMENT_SECONDS); + } + + #[test] + fn segment_duration_covers_full_narration_plus_tail() { + // No ceiling: a long line gets its full length so speech isn't cut. + assert!((segment_duration(5.0) - 5.6).abs() < 1e-9); + assert!((segment_duration(20.0) - 20.6).abs() < 1e-9); + } + + #[test] + fn segment_duration_rejects_nonfinite() { + assert_eq!(segment_duration(f64::NAN), MIN_SEGMENT_SECONDS); + assert_eq!(segment_duration(f64::INFINITY), MIN_SEGMENT_SECONDS); + } + + #[test] + fn static_filter_fits_and_pads_without_cropping() { + let f = segment_filter(&SegmentOpts::default(), 4.0); + assert!(f.contains("force_original_aspect_ratio=decrease")); + assert!(f.contains("pad=1920:1080")); + assert!(f.contains("format=yuv420p")); + // No zoompan when ken_burns is off. + assert!(!f.contains("zoompan")); + } + + #[test] + fn ken_burns_filter_uses_duration_scaled_frame_count() { + let opts = SegmentOpts { + ken_burns: true, + ..SegmentOpts::default() + }; + // 4s * 30fps = 120 frames in the zoompan d= term. + let f = segment_filter(&opts, 4.0); + assert!(f.contains("zoompan")); + assert!(f.contains("d=120:")); + assert!(f.contains("s=1920x1080")); + } + + #[test] + fn segment_args_loop_still_and_bound_with_t() { + let args = build_segment_args( + "/img.jpg", + "/a.wav", + "/out.mp4", + 4.0, + &SegmentOpts::default(), + ); + let joined = args.join(" "); + assert!(joined.contains("-loop 1 -i /img.jpg")); + assert!(joined.contains("-i /a.wav")); + assert!(joined.contains("apad")); + assert!(joined.contains("-t 4.000")); + assert!(joined.contains("libx264")); + assert!(joined.ends_with("/out.mp4")); + } + + #[test] + fn segment_args_use_nvenc_and_cuda_when_enabled() { + let opts = SegmentOpts { + nvenc: true, + ..SegmentOpts::default() + }; + let args = build_segment_args("/img.jpg", "/a.wav", "/out.mp4", 3.0, &opts); + let joined = args.join(" "); + assert!(joined.contains("-hwaccel cuda")); + assert!(joined.contains("h264_nvenc")); + assert!(!joined.contains("libx264")); + } + + #[test] + fn concat_args_stream_copy_with_faststart() { + let args = build_concat_args("/tmp/list.txt", "/out.mp4"); + let joined = args.join(" "); + assert!(joined.contains("-f concat -safe 0 -i /tmp/list.txt")); + assert!(joined.contains("-c copy")); + assert!(joined.contains("+faststart")); + } + + #[test] + fn concat_list_escapes_single_quotes() { + let body = build_concat_list(&[ + "/tmp/seg_000.mp4".into(), + "/tmp/own's dir/seg_001.mp4".into(), + ]); + assert!(body.contains("file '/tmp/seg_000.mp4'\n")); + // The apostrophe is closed-escaped-reopened per ffmpeg concat syntax. + assert!(body.contains(r"own'\''s")); + } +} diff --git a/src/reels/script.rs b/src/reels/script.rs new file mode 100644 index 0000000..1cf3189 --- /dev/null +++ b/src/reels/script.rs @@ -0,0 +1,289 @@ +//! Narration scripting for memory reels. +//! +//! One LLM call turns the planned segments (each carrying its date and, where +//! available, its cached insight) into a short first-person narration line per +//! photo plus a title for the reel. We reuse the cached insight summary as the +//! richest per-photo signal rather than re-running vision at reel time — that +//! keeps reel generation off the GPU's vision slot entirely. +//! +//! The prompt builder and response parser are pure so the contract is +//! unit-testable; `generate_script` wires them to the LLM client. + +use anyhow::{Context, Result}; +use std::sync::Arc; + +use super::{PlannedSegment, ReelMeta}; +use crate::ai::llamacpp::LlamaCppClient; +use crate::ai::llm_client::LlmClient; + +/// The narration for a whole reel: a title and one line per segment, in order. +#[derive(Debug, Clone, PartialEq)] +pub struct ReelScript { + pub title: String, + pub lines: Vec, +} + +const SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \ +slideshow of someone's own photos set to a spoken voiceover. Write warm, \ +specific, first-person narration as if the person is gently looking back on \ +their own memories. Be concrete and grounded in the details given; never \ +invent names, places, or events that aren't supported. Keep each line to one \ +or two short sentences that can be read aloud in a few seconds. Avoid generic \ +filler like \"what a wonderful day\" — if you have little to go on, simply \ +describe the moment plainly."; + +/// Build the (system, user) prompt pair for the scripter. The user message +/// describes each segment in order and asks for strict JSON back. +pub fn build_script_messages(meta: &ReelMeta, planned: &[PlannedSegment]) -> (String, String) { + let mut user = String::new(); + user.push_str(&format!( + "These are {} photos surfaced as memories {}.\n\n", + planned.len(), + meta.span_phrase() + )); + if !meta.years.is_empty() { + let years: Vec = meta.years.iter().map(|y| y.to_string()).collect(); + user.push_str(&format!("They span the years: {}.\n\n", years.join(", "))); + } + user.push_str("Photos, in the order they will appear:\n"); + for (i, seg) in planned.iter().enumerate() { + user.push_str(&format!("\n[{}]", i + 1)); + if let Some(date) = seg.date_label() { + user.push_str(&format!(" {date}")); + } + user.push('\n'); + match (&seg.insight_title, &seg.insight_summary) { + (Some(t), Some(s)) if !s.trim().is_empty() => { + user.push_str(&format!(" Known context: {t} — {s}\n")); + } + (Some(t), _) => user.push_str(&format!(" Known context: {t}\n")), + (_, Some(s)) if !s.trim().is_empty() => { + user.push_str(&format!(" Known context: {s}\n")); + } + _ => user.push_str(" (no extra context — narrate plainly from the date)\n"), + } + } + user.push_str(&format!( + "\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\ + {{\"title\": \"\", \"segments\": [\"\", \ + \"\", ... ]}}\n\ + The \"segments\" array MUST have exactly {} items, one per photo in order.", + planned.len() + )); + (SYSTEM_PROMPT.to_string(), user) +} + +/// Parse the model's response into a script with exactly `n` lines. Tolerant of +/// code fences and surrounding prose, and of both `segments: [".."]` and +/// `segments: [{"narration": ".."}]` shapes. Missing/extra lines are padded or +/// truncated so the caller always gets `n` aligned to the segments. +pub fn parse_script_response(raw: &str, n: usize) -> ReelScript { + let fallback_line = "A moment worth remembering."; + let value = extract_json_object(raw); + + let title = value + .as_ref() + .and_then(|v| v.get("title")) + .and_then(|t| t.as_str()) + .map(clean_text) + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| "Memories".to_string()); + + let mut lines: Vec = value + .as_ref() + .and_then(|v| v.get("segments")) + .and_then(|s| s.as_array()) + .map(|arr| { + arr.iter() + .map(|item| { + let text = item + .as_str() + .map(|s| s.to_string()) + .or_else(|| { + item.get("narration") + .and_then(|n| n.as_str()) + .map(|s| s.to_string()) + }) + .unwrap_or_default(); + clean_text(&text) + }) + .collect() + }) + .unwrap_or_default(); + + // Align to exactly n: drop extras, pad shortfalls with a neutral line so + // every photo still gets spoken audio. + lines.truncate(n); + while lines.len() < n { + lines.push(fallback_line.to_string()); + } + for line in lines.iter_mut() { + if line.is_empty() { + *line = fallback_line.to_string(); + } + } + + ReelScript { title, lines } +} + +/// Pull the first balanced top-level JSON object out of a possibly-noisy model +/// response (code fences, leading prose). Returns None if nothing parses. +fn extract_json_object(raw: &str) -> Option { + // Fast path: the whole thing is valid JSON. + if let Ok(v) = serde_json::from_str::(raw.trim()) { + return Some(v); + } + // Otherwise scan for the first '{' ... matching '}' span, ignoring braces + // inside strings. + let bytes = raw.as_bytes(); + let start = raw.find('{')?; + let mut depth = 0i32; + let mut in_str = false; + let mut escaped = false; + for i in start..bytes.len() { + let c = bytes[i] as char; + if in_str { + if escaped { + escaped = false; + } else if c == '\\' { + escaped = true; + } else if c == '"' { + in_str = false; + } + continue; + } + match c { + '"' => in_str = true, + '{' => depth += 1, + '}' => { + depth -= 1; + if depth == 0 { + return serde_json::from_str(&raw[start..=i]).ok(); + } + } + _ => {} + } + } + None +} + +/// Collapse whitespace and strip stray markdown/quote decorations a model +/// sometimes leaves around a line. +fn clean_text(s: &str) -> String { + let trimmed = s.trim().trim_matches('"').trim(); + trimmed.split_whitespace().collect::>().join(" ") +} + +/// Generate the reel script via the LLM. Text-only (no images) — the per-photo +/// context comes from cached insights. The call takes the GPU read lease +/// internally (see `LlamaCppClient::generate`). +pub async fn generate_script( + client: &Arc, + meta: &ReelMeta, + planned: &[PlannedSegment], +) -> Result { + let (system, user) = build_script_messages(meta, planned); + let raw = client + .generate(&user, Some(&system), None) + .await + .context("LLM script generation failed")?; + Ok(parse_script_response(&raw, planned.len())) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::memories::MemoriesSpan; + + fn meta() -> ReelMeta { + ReelMeta { + span: MemoriesSpan::Day, + years: vec![2019, 2021], + } + } + + fn planned(n: usize) -> Vec { + (0..n) + .map(|i| PlannedSegment { + media: super::super::SegmentMedia::Photo { + rel_path: format!("p{i}.jpg"), + library_id: 1, + }, + date: Some(1_560_000_000 + i as i64 * 86_400), + insight_title: None, + insight_summary: None, + }) + .collect() + } + + #[test] + fn prompt_states_exact_segment_count_and_span() { + let (sys, user) = build_script_messages(&meta(), &planned(3)); + assert!(sys.contains("memory reel")); + assert!(user.contains("3 photos")); + assert!(user.contains("on this day")); + assert!(user.contains("exactly 3 items")); + // Each photo gets an indexed entry. + assert!(user.contains("[1]") && user.contains("[2]") && user.contains("[3]")); + } + + #[test] + fn prompt_includes_insight_context_when_present() { + let mut p = planned(1); + p[0].insight_title = Some("Lake house weekend".into()); + p[0].insight_summary = Some("Swimming with the dogs.".into()); + let (_sys, user) = build_script_messages(&meta(), &p); + assert!(user.contains("Lake house weekend — Swimming with the dogs.")); + } + + #[test] + fn parse_plain_json_object() { + let raw = r#"{"title":"Summer Days","segments":["First line.","Second line."]}"#; + let script = parse_script_response(raw, 2); + assert_eq!(script.title, "Summer Days"); + assert_eq!(script.lines, vec!["First line.", "Second line."]); + } + + #[test] + fn parse_tolerates_code_fences_and_prose() { + let raw = "Sure! Here's your reel:\n```json\n{\"title\": \"Trip\", \"segments\": [\"A.\", \"B.\"]}\n```\nEnjoy!"; + let script = parse_script_response(raw, 2); + assert_eq!(script.title, "Trip"); + assert_eq!(script.lines, vec!["A.", "B."]); + } + + #[test] + fn parse_accepts_object_segment_shape() { + let raw = r#"{"title":"T","segments":[{"narration":"One."},{"narration":"Two."}]}"#; + let script = parse_script_response(raw, 2); + assert_eq!(script.lines, vec!["One.", "Two."]); + } + + #[test] + fn parse_pads_short_and_truncates_long_to_n() { + // Model returned 1 line but we have 3 segments → pad with neutral lines. + let short = parse_script_response(r#"{"title":"T","segments":["Only one."]}"#, 3); + assert_eq!(short.lines.len(), 3); + assert_eq!(short.lines[0], "Only one."); + assert!(!short.lines[1].is_empty()); + + // Model returned 3 but we have 2 → truncate. + let long = parse_script_response(r#"{"title":"T","segments":["a","b","c"]}"#, 2); + assert_eq!(long.lines, vec!["a", "b"]); + } + + #[test] + fn parse_falls_back_on_garbage() { + let script = parse_script_response("the model said no", 2); + assert_eq!(script.title, "Memories"); + assert_eq!(script.lines.len(), 2); + assert!(script.lines.iter().all(|l| !l.is_empty())); + } + + #[test] + fn parse_blank_line_replaced_with_fallback() { + let script = parse_script_response(r#"{"title":"T","segments":[" ","Real."]}"#, 2); + assert!(!script.lines[0].is_empty()); + assert_eq!(script.lines[1], "Real."); + } +} diff --git a/src/reels/selector.rs b/src/reels/selector.rs new file mode 100644 index 0000000..0a53ee5 --- /dev/null +++ b/src/reels/selector.rs @@ -0,0 +1,252 @@ +//! Reel selectors: resolve "what goes in the reel" into an ordered media set +//! plus the metadata the scripter needs. The renderer and scripter are +//! selector-agnostic, so adding tag- or date-range-based reels later means +//! adding a variant here, not touching the pipeline. +//! +//! Resolution is split in two so the handler can compute a cache key (and +//! short-circuit on a cache hit) without the per-photo insight lookups: +//! [`resolve`] is the cheap media-set pass; [`enrich`] adds cached insights and +//! runs in the background job. + +use std::path::Path; +use std::sync::Mutex; + +use chrono::{DateTime, Datelike, FixedOffset}; + +use super::{PlannedSegment, ReelMeta, SegmentMedia}; +use crate::database::{ExifDao, InsightDao}; +use crate::file_types::is_image_file; +use crate::memories::{self, MemoriesSpan}; +use crate::state::AppState; + +/// Default and hard caps on how many photos a reel covers. The cap bounds the +/// LLM/TTS/ffmpeg work per reel; when a span has more, [`sample_evenly`] keeps +/// a representative spread across the years rather than just the oldest. +pub const DEFAULT_MAX_SEGMENTS: usize = 24; +pub const HARD_MAX_SEGMENTS: usize = 40; + +/// What a reel is built from. v1 ships the memories (on this day/week/month) +/// selector; tag and date-range variants slot in here later. +#[derive(Debug, Clone)] +pub enum ReelSelector { + Memories { + span: MemoriesSpan, + tz_offset_minutes: i32, + library: Option, + max_segments: usize, + }, +} + +impl ReelSelector { + /// Stable string identity for the cache key. Captures everything that + /// changes *which* media is selected (but not the non-deterministic + /// narration, which can't be part of a pre-render key). + pub fn descriptor(&self) -> String { + match self { + ReelSelector::Memories { + span, + tz_offset_minutes, + library, + max_segments, + } => format!( + "memories:span={:?}:tz={}:lib={}:max={}", + span, + tz_offset_minutes, + library.as_deref().unwrap_or("all"), + max_segments + ), + } + } +} + +/// Pick at most `max` items spread evenly across the input, always keeping the +/// first and last. Returns the input unchanged when it already fits. +pub fn sample_evenly(items: &[T], max: usize) -> Vec { + if max == 0 { + return Vec::new(); + } + if items.len() <= max { + return items.to_vec(); + } + if max == 1 { + return vec![items[0].clone()]; + } + let last = items.len() - 1; + (0..max) + .map(|i| { + // Spread indices 0..=last across max picks, endpoints included. + let idx = (i * last + (max - 1) / 2) / (max - 1); + items[idx.min(last)].clone() + }) + .collect() +} + +/// Cheap pass: resolve the selector into an ordered list of media (no insight +/// lookups yet) plus reel metadata. `Err` only on an invalid library param. +pub fn resolve( + app_state: &AppState, + exif_dao: &Mutex>, + span_context: &opentelemetry::Context, + selector: &ReelSelector, +) -> Result<(Vec, ReelMeta), String> { + match selector { + ReelSelector::Memories { + span, + tz_offset_minutes, + library, + max_segments, + } => { + let client_tz = FixedOffset::east_opt(tz_offset_minutes * 60); + let items = memories::gather_memory_items( + app_state, + exif_dao, + span_context, + *span, + *tz_offset_minutes, + client_tz, + library.as_deref(), + )?; + + // Phase 1 is photos-only: drop videos (a clip segment type lands + // in phase 2). Filter before sampling so the spread is over the + // photos that will actually appear. + let items: Vec = items + .into_iter() + .filter(|it| is_image_file(Path::new(&it.path))) + .collect(); + + let cap = (*max_segments).clamp(1, HARD_MAX_SEGMENTS); + let items = sample_evenly(&items, cap); + + let years = distinct_years(&items, client_tz); + let meta = ReelMeta { span: *span, years }; + + let planned = items + .into_iter() + .map(|it| PlannedSegment { + media: SegmentMedia::Photo { + rel_path: it.path, + library_id: it.library_id, + }, + date: it.created, + insight_title: None, + insight_summary: None, + }) + .collect(); + Ok((planned, meta)) + } + } +} + +/// Distinct calendar years represented by the selected media, in the client's +/// timezone, ascending. Used to tell the scripter how far back the reel reaches. +fn distinct_years(items: &[memories::MemoryItem], tz: Option) -> Vec { + let mut years: Vec = items + .iter() + .filter_map(|it| it.created) + .filter_map(|ts| DateTime::from_timestamp(ts, 0)) + .map(|dt| match tz { + Some(off) => dt.with_timezone(&off).year(), + None => dt.year(), + }) + .collect(); + years.sort_unstable(); + years.dedup(); + years +} + +/// Background pass: fill each segment's cached insight (title + summary) where +/// one exists. Best-effort — a missing or errored lookup leaves the fields +/// `None` and the scripter narrates from the date alone. +pub fn enrich( + insight_dao: &Mutex>, + span_context: &opentelemetry::Context, + planned: &mut [PlannedSegment], +) { + let Ok(mut dao) = insight_dao.lock() else { + return; + }; + for seg in planned.iter_mut() { + let rel_path = match &seg.media { + SegmentMedia::Photo { rel_path, .. } => rel_path, + }; + if let Ok(Some(insight)) = dao.get_insight(span_context, rel_path) { + seg.insight_title = Some(insight.title); + seg.insight_summary = Some(insight.summary); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sample_evenly_returns_all_when_under_cap() { + let v = vec![1, 2, 3]; + assert_eq!(sample_evenly(&v, 5), vec![1, 2, 3]); + assert_eq!(sample_evenly(&v, 3), vec![1, 2, 3]); + } + + #[test] + fn sample_evenly_keeps_endpoints_and_spreads() { + let v: Vec = (0..100).collect(); + let picked = sample_evenly(&v, 5); + assert_eq!(picked.len(), 5); + assert_eq!(picked[0], 0); // first kept + assert_eq!(*picked.last().unwrap(), 99); // last kept + // Strictly increasing, no dupes. + assert!(picked.windows(2).all(|w| w[0] < w[1])); + } + + #[test] + fn sample_evenly_handles_one_and_zero() { + let v: Vec = (0..10).collect(); + assert_eq!(sample_evenly(&v, 1), vec![0]); + assert!(sample_evenly(&v, 0).is_empty()); + } + + #[test] + fn descriptor_is_stable_and_distinguishes_inputs() { + let a = ReelSelector::Memories { + span: MemoriesSpan::Day, + tz_offset_minutes: -480, + library: None, + max_segments: 24, + }; + let b = ReelSelector::Memories { + span: MemoriesSpan::Week, + tz_offset_minutes: -480, + library: None, + max_segments: 24, + }; + assert_eq!(a.descriptor(), a.clone().descriptor()); + assert_ne!(a.descriptor(), b.descriptor()); + assert!(a.descriptor().contains("lib=all")); + } + + #[test] + fn distinct_years_dedupes_and_sorts() { + let items = vec![ + memories::MemoryItem { + path: "a".into(), + created: Some(1_560_000_000), // 2019 + modified: None, + library_id: 1, + }, + memories::MemoryItem { + path: "b".into(), + created: Some(1_560_086_400), // 2019 + modified: None, + library_id: 1, + }, + memories::MemoryItem { + path: "c".into(), + created: Some(1_623_000_000), // 2021 + modified: None, + library_id: 1, + }, + ]; + assert_eq!(distinct_years(&items, None), vec![2019, 2021]); + } +} diff --git a/src/state.rs b/src/state.rs index e678ad1..bf894f3 100644 --- a/src/state.rs +++ b/src/state.rs @@ -53,6 +53,10 @@ pub struct AppState { pub video_path: String, pub gif_path: String, pub preview_clips_path: String, + /// Directory for cached memory-reel MP4s (+ title sidecars). Derived from + /// `REELS_DIRECTORY`, defaulting to a `reels` dir beside the preview clips. + /// Created lazily by the reel pipeline on first render. + pub reels_path: String, pub excluded_dirs: Vec, pub ollama: OllamaClient, /// `None` when `OPENROUTER_API_KEY` is not configured. Consulted only @@ -141,6 +145,19 @@ impl AppState { preview_dao, ); + // Reels cache dir: explicit env, else a `reels` sibling of the preview + // clips dir (a known-writable, test-safe location). Not created here — + // the reel pipeline does `create_dir_all` before its first write, so + // construction (incl. tests) never touches the filesystem. + let reels_path = std::env::var("REELS_DIRECTORY").unwrap_or_else(|_| { + std::path::Path::new(&preview_clips_path) + .parent() + .map(|p| p.join("reels")) + .unwrap_or_else(|| std::path::PathBuf::from("reels")) + .to_string_lossy() + .to_string() + }); + let library_health = libraries::new_health_map(&libraries_vec); let live_libraries = Arc::new(RwLock::new(libraries_vec.clone())); Self { @@ -155,6 +172,7 @@ impl AppState { video_path, gif_path, preview_clips_path, + reels_path, excluded_dirs, ollama, openrouter, diff --git a/src/video/ffmpeg.rs b/src/video/ffmpeg.rs index d385cac..019bd86 100644 --- a/src/video/ffmpeg.rs +++ b/src/video/ffmpeg.rs @@ -231,7 +231,7 @@ impl Ffmpeg { /// a hard failure — previously the `parse::` on empty stdout produced /// "cannot parse float from empty string" and poisoned the preview-clip row /// with status=failed, which the watcher would re-queue every full scan. -async fn get_duration_seconds(input_file: &str) -> Result> { +pub async fn get_duration_seconds(input_file: &str) -> Result> { if let Some(d) = probe_duration(input_file, "format=duration").await? { return Ok(Some(d)); } -- 2.52.0 From 42453d5786a42613e6b57fc75983abbe42fd241a Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Fri, 12 Jun 2026 22:56:48 -0400 Subject: [PATCH 02/26] Fix reel concat: force -f mp4 for the .tmp output path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The concat stage wrote to .mp4.tmp (for an atomic publish-rename), but ffmpeg infers the muxer from the output extension and can't map .tmp to a format — "Unable to choose an output format". Force the mp4 muxer explicitly so the temp extension is irrelevant. Segment render, NVENC, TTS, and scripting were already working end-to-end; this was the only failure, at the final join. Co-Authored-By: Claude Fable 5 --- src/reels/render.rs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/reels/render.rs b/src/reels/render.rs index ca39515..9643309 100644 --- a/src/reels/render.rs +++ b/src/reels/render.rs @@ -159,7 +159,9 @@ pub fn build_segment_args( /// Build the concat-demuxer args that join rendered segments losslessly. /// `+faststart` moves the moov atom up front so the reel streams immediately -/// on the mobile client. +/// on the mobile client. The output muxer is forced with `-f mp4` because we +/// write to a `.tmp` path (atomic publish) whose extension ffmpeg can't map to +/// a format on its own. pub fn build_concat_args(list_path: &str, out_path: &str) -> Vec { [ "-y", @@ -173,6 +175,8 @@ pub fn build_concat_args(list_path: &str, out_path: &str) -> Vec { "copy", "-movflags", "+faststart", + "-f", + "mp4", out_path, ] .iter() @@ -317,12 +321,19 @@ mod tests { } #[test] - fn concat_args_stream_copy_with_faststart() { - let args = build_concat_args("/tmp/list.txt", "/out.mp4"); + fn concat_args_stream_copy_with_faststart_and_forced_muxer() { + // Output goes to a .tmp path, so the muxer must be forced — ffmpeg + // can't infer mp4 from the extension (the bug this guards against). + let args = build_concat_args("/tmp/list.txt", "/out.mp4.tmp"); let joined = args.join(" "); assert!(joined.contains("-f concat -safe 0 -i /tmp/list.txt")); assert!(joined.contains("-c copy")); assert!(joined.contains("+faststart")); + assert!(joined.contains("-f mp4")); + // The forced muxer must come before the output path. + let f_mp4 = args.windows(2).position(|w| w == ["-f", "mp4"]).unwrap(); + let out = args.iter().position(|a| a == "/out.mp4.tmp").unwrap(); + assert!(f_mp4 < out); } #[test] -- 2.52.0 From 7715a7a905015faa977eb0a45c0670f022120f20 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Fri, 12 Jun 2026 23:10:26 -0400 Subject: [PATCH 03/26] Reels: portrait canvas with blurred fill, fade transitions, warmer TTS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the "image is tiny" problem: a 1920x1080 landscape reel letterboxes to a ~25%-height band on a portrait phone. Switch to a portrait 1080x1920 canvas and fill it per photo with a blurred, zoomed copy of the image behind the sharp fitted photo — so the frame is always full regardless of the photo's orientation, with no black bars and no cropping of the subject. Add a quick 0.35s fade in/out baked into each segment so concatenated photos dip smoothly instead of hard-cutting (fade-out lands in the narration's silent tail, so speech isn't clipped). Drop the unused Ken Burns branch — motion can return deliberately later. Warm up the narration a touch: thread Chatterbox's `exaggeration` through synthesize_serialized and default reels to 0.7 (tunable via REEL_TTS_EXAGGERATION). Bump RENDER_VERSION so existing landscape reels re-render. Co-Authored-By: Claude Fable 5 --- src/ai/tts.rs | 6 ++- src/reels/mod.rs | 38 ++++++++++---- src/reels/render.rs | 123 +++++++++++++++++++++++++------------------- 3 files changed, 101 insertions(+), 66 deletions(-) diff --git a/src/ai/tts.rs b/src/ai/tts.rs index 4e7544c..a9a610a 100644 --- a/src/ai/tts.rs +++ b/src/ai/tts.rs @@ -486,11 +486,15 @@ pub async fn synthesize_serialized( text: &str, voice: Option<&str>, format: &str, + exaggeration: Option, ) -> anyhow::Result> { let prepared = prepare_for_tts(text); if prepared.is_empty() { anyhow::bail!("nothing to synthesize after cleanup"); } + // Clamp to Chatterbox's documented range, matching the HTTP handlers + // (which clamp before forwarding; this path bypasses them). + let exaggeration = exaggeration.map(|x| x.clamp(0.25, 2.0)); // Queue rather than fast-fail (mirrors create_speech_job_handler). let _permit = TTS_PERMIT .acquire() @@ -500,7 +504,7 @@ pub async fn synthesize_serialized( // starts (see ai::gpu). let _gpu = crate::ai::gpu::tts_lease().await; client - .text_to_speech(&prepared, voice, format, None, None, None) + .text_to_speech(&prepared, voice, format, exaggeration, None, None) .await } diff --git a/src/reels/mod.rs b/src/reels/mod.rs index fe270f8..9956984 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -180,7 +180,18 @@ fn finish_job( /// Render version: bump to invalidate every cached reel after a rendering / /// scripting change that should produce a fresh result. -const RENDER_VERSION: u32 = 1; +const RENDER_VERSION: u32 = 2; + +/// Narration expressiveness — Chatterbox's `exaggeration` knob. A modest bump +/// over the ~0.5 default warms up otherwise-flat narration; tune via +/// `REEL_TTS_EXAGGERATION` (0.25–2.0). +fn reel_tts_exaggeration() -> f32 { + std::env::var("REEL_TTS_EXAGGERATION") + .ok() + .and_then(|s| s.trim().parse::().ok()) + .filter(|x| x.is_finite()) + .unwrap_or(0.7) +} /// Cache key over everything that determines *which* media and *how* it's /// voiced — but not the (non-deterministic) narration text. Same inputs → same @@ -470,16 +481,21 @@ async fn run_reel_job( } }; - let audio_bytes = - match crate::ai::tts::synthesize_serialized(&client, line, voice.as_deref(), "wav") - .await - { - Ok(b) => b, - Err(e) => { - log::warn!("reel {job_id}: skipping segment {i}, TTS failed: {e}"); - continue; - } - }; + let audio_bytes = match crate::ai::tts::synthesize_serialized( + &client, + line, + voice.as_deref(), + "wav", + Some(reel_tts_exaggeration()), + ) + .await + { + Ok(b) => b, + Err(e) => { + log::warn!("reel {job_id}: skipping segment {i}, TTS failed: {e}"); + continue; + } + }; let audio_path = work.path().join(format!("narration_{i:03}.wav")); if let Err(e) = tokio::fs::write(&audio_path, &audio_bytes).await { log::warn!("reel {job_id}: skipping segment {i}, writing audio failed: {e}"); diff --git a/src/reels/render.rs b/src/reels/render.rs index 9643309..e40fc3d 100644 --- a/src/reels/render.rs +++ b/src/reels/render.rs @@ -19,11 +19,13 @@ use tokio::process::Command; /// rather than depending on `video::ffmpeg` directly. pub use crate::video::ffmpeg::is_nvenc_available; -/// Reel canvas. Landscape matches the majority of camera photos; portrait -/// shots are letterboxed by the `pad` in [`segment_filter`] rather than -/// cropped, so faces never get cut off. -pub const REEL_WIDTH: u32 = 1920; -pub const REEL_HEIGHT: u32 = 1080; +/// Reel canvas. Portrait, because reels are watched on a phone held upright — +/// a landscape canvas letterboxes to a thin ~25%-height band there. Each photo +/// is fitted sharp and centered over a blurred, zoomed copy of itself (see +/// [`segment_filtergraph`]) so the frame is always filled regardless of the +/// photo's orientation, without cropping the subject. +pub const REEL_WIDTH: u32 = 1080; +pub const REEL_HEIGHT: u32 = 1920; pub const REEL_FPS: u32 = 30; /// A still's screen time is its narration length plus a short breath, with a @@ -33,6 +35,11 @@ pub const REEL_FPS: u32 = 30; pub const MIN_SEGMENT_SECONDS: f64 = 2.5; const NARRATION_TAIL_SECONDS: f64 = 0.6; +/// Quick fade in/out baked into each segment so concatenated photos dip +/// smoothly instead of hard-cutting. The fade-out lands inside the narration's +/// silent tail, so speech is never clipped. +const FADE_SECONDS: f64 = 0.35; + /// Screen time for a photo segment given its narration audio length. pub fn segment_duration(narration_secs: f64) -> f64 { let d = narration_secs + NARRATION_TAIL_SECONDS; @@ -43,16 +50,13 @@ pub fn segment_duration(narration_secs: f64) -> f64 { } } -/// Options controlling per-segment rendering. `ken_burns` adds a slow zoom for -/// motion; it's defaulted off until the effect is eyeballed on the GPU box, -/// since a wrong zoompan expression reads as jitter and can't be verified here. +/// Options controlling per-segment rendering. #[derive(Debug, Clone, Copy)] pub struct SegmentOpts { pub width: u32, pub height: u32, pub fps: u32, pub nvenc: bool, - pub ken_burns: bool, } impl Default for SegmentOpts { @@ -62,35 +66,38 @@ impl Default for SegmentOpts { height: REEL_HEIGHT, fps: REEL_FPS, nvenc: false, - ken_burns: false, } } } -/// Video filter for a photo segment: fit the image inside the canvas -/// (preserving aspect, padding the rest), normalize SAR/fps/pixel format, and -/// optionally apply a gentle Ken Burns zoom. -pub fn segment_filter(opts: &SegmentOpts, duration: f64) -> String { +/// Full `filter_complex` for one photo segment, producing labelled `[v]` (video) +/// and `[a]` (audio) outputs. Input 0 is the looped still, input 1 the +/// narration. +/// +/// Video: split the still into a background and foreground. The background is +/// scaled to *cover* the canvas and heavily blurred; the foreground is scaled to +/// *fit* inside it and overlaid centered. This fills the portrait frame for any +/// photo orientation — no black bars, no cropping of the subject — then a quick +/// fade in/out softens the cut to the next segment. +/// +/// Audio: pad the narration with trailing silence so a short line doesn't end +/// the segment early; `-t` bounds it to the segment duration. +pub fn segment_filtergraph(opts: &SegmentOpts, duration: f64) -> String { let (w, h, fps) = (opts.width, opts.height, opts.fps); - if opts.ken_burns { - // Upscale first so zoompan samples from a larger frame (avoids - // shimmer), drift the zoom from 1.0→~1.12 across the segment, hold the - // crop centered, then settle to the canvas. - let frames = (duration * fps as f64).round().max(1.0) as u64; - format!( - "scale={w}*2:{h}*2:force_original_aspect_ratio=increase,\ - crop={w}*2:{h}*2,\ - zoompan=z='min(zoom+0.0009,1.12)':d={frames}:\ - x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s={w}x{h}:fps={fps},\ - setsar=1,format=yuv420p" - ) - } else { - format!( - "scale={w}:{h}:force_original_aspect_ratio=decrease,\ - pad={w}:{h}:(ow-iw)/2:(oh-ih)/2,\ - setsar=1,fps={fps},format=yuv420p" - ) - } + // Fade-out begins one fade-length before the end; clamp so a floor-length + // segment still gets a valid (non-negative) start time. + let fade_out_start = (duration - FADE_SECONDS).max(0.0); + format!( + "[0:v]split=2[bg][fg];\ + [bg]scale={w}:{h}:force_original_aspect_ratio=increase,\ + crop={w}:{h},boxblur=20:2[bgb];\ + [fg]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs];\ + [bgb][fgs]overlay=(W-w)/2:(H-h)/2,\ + fade=t=in:st=0:d={FADE_SECONDS},\ + fade=t=out:st={fade_out_start:.3}:d={FADE_SECONDS},\ + setsar=1,fps={fps},format=yuv420p[v];\ + [1:a]apad[a]" + ) } fn video_encoder_args(nvenc: bool) -> Vec { @@ -117,9 +124,9 @@ fn video_encoder_args(nvenc: bool) -> Vec { } /// Build the ffmpeg args that render one photo segment: a still looped for -/// `duration` seconds with its narration muxed in. The narration is padded -/// with trailing silence (`apad`) so short lines don't end the segment early; -/// `-t` bounds both streams to the segment length. +/// `duration` seconds, filled to the portrait canvas with a blurred backdrop +/// (see [`segment_filtergraph`]) and the narration muxed in. `-t` bounds both +/// streams to the segment length. pub fn build_segment_args( image_path: &str, audio_path: &str, @@ -139,7 +146,7 @@ pub fn build_segment_args( "-i".into(), audio_path.into(), "-filter_complex".into(), - format!("[0:v]{}[v];[1:a]apad[a]", segment_filter(opts, duration)), + segment_filtergraph(opts, duration), "-map".into(), "[v]".into(), "-map".into(), @@ -267,26 +274,34 @@ mod tests { } #[test] - fn static_filter_fits_and_pads_without_cropping() { - let f = segment_filter(&SegmentOpts::default(), 4.0); - assert!(f.contains("force_original_aspect_ratio=decrease")); - assert!(f.contains("pad=1920:1080")); - assert!(f.contains("format=yuv420p")); - // No zoompan when ken_burns is off. - assert!(!f.contains("zoompan")); + fn filtergraph_fills_portrait_with_blurred_bg_and_fitted_fg() { + let g = segment_filtergraph(&SegmentOpts::default(), 4.0); + // Background covers + blurs; foreground fits and is centered over it. + assert!(g.contains("split=2[bg][fg]")); + assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=increase")); + assert!(g.contains("crop=1080:1920")); + assert!(g.contains("boxblur")); + assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=decrease")); + assert!(g.contains("overlay=(W-w)/2:(H-h)/2")); + // Produces the labelled outputs build_segment_args maps. + assert!(g.contains("[v]")); + assert!(g.contains("[1:a]apad[a]")); + assert!(g.contains("format=yuv420p")); } #[test] - fn ken_burns_filter_uses_duration_scaled_frame_count() { - let opts = SegmentOpts { - ken_burns: true, - ..SegmentOpts::default() - }; - // 4s * 30fps = 120 frames in the zoompan d= term. - let f = segment_filter(&opts, 4.0); - assert!(f.contains("zoompan")); - assert!(f.contains("d=120:")); - assert!(f.contains("s=1920x1080")); + fn filtergraph_fades_in_and_out_within_duration() { + // 4s segment, 0.35s fade → fade-out starts at 3.65s. + let g = segment_filtergraph(&SegmentOpts::default(), 4.0); + assert!(g.contains("fade=t=in:st=0:d=0.35")); + assert!(g.contains("fade=t=out:st=3.650:d=0.35")); + } + + #[test] + fn filtergraph_fade_out_start_never_negative_at_floor() { + // A floor-length segment shorter than a fade still yields st >= 0. + let g = segment_filtergraph(&SegmentOpts::default(), 0.2); + assert!(g.contains("fade=t=out:st=0.000:d=0.35")); } #[test] -- 2.52.0 From 740fc4d84151ff55b571f99338367521fad133a5 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Fri, 12 Jun 2026 23:20:52 -0400 Subject: [PATCH 04/26] Reels: fix steppy fade (fps before fade) and ease the expression bump The fade looked steppy/low-frame-rate because the filtergraph normalized fps AFTER the fade filters: the brightness ramp was sampled at the looped still's coarse input cadence, then duplicated up to 30fps. Move fps ahead of the fades, pin the still's input framerate (-framerate), and force CFR output (-r) so the dip ramps across a full 30 frames and plays steadily. Ease narration expressiveness from 0.7 to 0.6 (still tunable via REEL_TTS_EXAGGERATION). Bump RENDER_VERSION so existing reels re-render. Co-Authored-By: Claude Fable 5 --- src/reels/mod.rs | 10 +++++----- src/reels/render.rs | 30 ++++++++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/src/reels/mod.rs b/src/reels/mod.rs index 9956984..4cfe24b 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -180,17 +180,17 @@ fn finish_job( /// Render version: bump to invalidate every cached reel after a rendering / /// scripting change that should produce a fresh result. -const RENDER_VERSION: u32 = 2; +const RENDER_VERSION: u32 = 3; -/// Narration expressiveness — Chatterbox's `exaggeration` knob. A modest bump -/// over the ~0.5 default warms up otherwise-flat narration; tune via -/// `REEL_TTS_EXAGGERATION` (0.25–2.0). +/// Narration expressiveness — Chatterbox's `exaggeration` knob. A slight bump +/// over the ~0.5 default warms up otherwise-flat narration without over-acting; +/// tune via `REEL_TTS_EXAGGERATION` (0.25–2.0). fn reel_tts_exaggeration() -> f32 { std::env::var("REEL_TTS_EXAGGERATION") .ok() .and_then(|s| s.trim().parse::().ok()) .filter(|x| x.is_finite()) - .unwrap_or(0.7) + .unwrap_or(0.6) } /// Cache key over everything that determines *which* media and *how* it's diff --git a/src/reels/render.rs b/src/reels/render.rs index e40fc3d..3cca6ac 100644 --- a/src/reels/render.rs +++ b/src/reels/render.rs @@ -87,15 +87,20 @@ pub fn segment_filtergraph(opts: &SegmentOpts, duration: f64) -> String { // Fade-out begins one fade-length before the end; clamp so a floor-length // segment still gets a valid (non-negative) start time. let fade_out_start = (duration - FADE_SECONDS).max(0.0); + // `fps` is normalized BEFORE the fades so the brightness ramp is computed + // on a true {fps}-frame timeline. If fps came after, the fade would be + // sampled at the looped still's coarse input cadence and then duplicated up + // to {fps}, which reads as a steppy / low-frame-rate dip. format!( "[0:v]split=2[bg][fg];\ [bg]scale={w}:{h}:force_original_aspect_ratio=increase,\ crop={w}:{h},boxblur=20:2[bgb];\ [fg]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs];\ [bgb][fgs]overlay=(W-w)/2:(H-h)/2,\ + fps={fps},\ fade=t=in:st=0:d={FADE_SECONDS},\ fade=t=out:st={fade_out_start:.3}:d={FADE_SECONDS},\ - setsar=1,fps={fps},format=yuv420p[v];\ + setsar=1,format=yuv420p[v];\ [1:a]apad[a]" ) } @@ -134,11 +139,16 @@ pub fn build_segment_args( duration: f64, opts: &SegmentOpts, ) -> Vec { + let fps = opts.fps.to_string(); let mut args: Vec = vec!["-y".into()]; if opts.nvenc { args.extend(["-hwaccel".into(), "cuda".into()]); } args.extend([ + // Read the looped still at the target rate so frames exist for the + // fade to ramp across (paired with the in-graph `fps` and CFR output). + "-framerate".into(), + fps.clone(), "-loop".into(), "1".into(), "-i".into(), @@ -153,6 +163,10 @@ pub fn build_segment_args( "[a]".into(), "-t".into(), format!("{duration:.3}"), + // Force constant frame rate so the segment (and the concatenated reel) + // plays at a steady {fps} rather than a variable cadence. + "-r".into(), + fps, ]); args.extend(video_encoder_args(opts.nvenc)); args.extend( @@ -297,6 +311,16 @@ mod tests { assert!(g.contains("fade=t=out:st=3.650:d=0.35")); } + #[test] + fn filtergraph_normalizes_fps_before_fading() { + // The fps filter must precede the fades, else the brightness ramp is + // sampled at the still's coarse cadence and looks steppy. + let g = segment_filtergraph(&SegmentOpts::default(), 4.0); + let fps_at = g.find("fps=30").expect("fps in graph"); + let fade_at = g.find("fade=t=in").expect("fade in graph"); + assert!(fps_at < fade_at); + } + #[test] fn filtergraph_fade_out_start_never_negative_at_floor() { // A floor-length segment shorter than a fade still yields st >= 0. @@ -314,10 +338,12 @@ mod tests { &SegmentOpts::default(), ); let joined = args.join(" "); - assert!(joined.contains("-loop 1 -i /img.jpg")); + assert!(joined.contains("-framerate 30 -loop 1 -i /img.jpg")); assert!(joined.contains("-i /a.wav")); assert!(joined.contains("apad")); assert!(joined.contains("-t 4.000")); + // Constant frame rate forced on the output. + assert!(joined.contains("-r 30")); assert!(joined.contains("libx264")); assert!(joined.ends_with("/out.mp4")); } -- 2.52.0 From 6e90f24307209561f626d30a39af2e09afcb97e0 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Fri, 12 Jun 2026 23:43:18 -0400 Subject: [PATCH 05/26] Reels: burst beats + duration budget for week/month, plus step logging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restructures a reel around beats — one narration line over one or more photos — instead of one line per photo. A single-photo beat is a held shot; a multi-photo beat is a quick burst that flashes through several moments of an event while the line is read. So a week/month reel can show everything it spans without a narrated (and timed) segment per photo. Selection (selector.rs): - Duration budget: cap the number of narrated beats to ~REEL_TARGET_SECONDS (default 90, env-tunable) so week/month reels don't run minutes long. - Event clustering by time gap; when there are more events than the beat budget, adjacent events merge so the whole span stays covered. Each beat bursts up to MAX_BURST_PHOTOS (an even spread), so a 40-shot dinner contributes a handful of quick frames, not forty narrated seconds. Render (render.rs): a beat renders its photos as a concat of per-photo fills (blurred-bg portrait, fps-before-fade) under one muxed narration; burst photos get a snappier fade. beat_durations splits the narration across the photos, stretching only if a long burst would flash too fast. Adds high-level info logs across the steps (request → script → per-beat narrate/render → join → done with elapsed) for visibility. Bumps RENDER_VERSION to re-render cached reels. Co-Authored-By: Claude Fable 5 --- src/reels/mod.rs | 131 ++++++++++++++----- src/reels/render.rs | 296 ++++++++++++++++++++++++++++-------------- src/reels/script.rs | 95 +++++++++----- src/reels/selector.rs | 262 ++++++++++++++++++++++++++++++++----- 4 files changed, 580 insertions(+), 204 deletions(-) diff --git a/src/reels/mod.rs b/src/reels/mod.rs index 4cfe24b..be3f52d 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -36,24 +36,27 @@ use crate::otel::extract_context_from_request; use crate::state::AppState; use selector::ReelSelector; -/// The media behind one reel segment. Photos-only for now; a `Clip` variant -/// (a section of a source video) is the phase-2 extension point. +/// The media behind one shot. Photos-only for now; a `Clip` variant (a section +/// of a source video) is the phase-2 extension point. #[derive(Debug, Clone)] pub enum SegmentMedia { Photo { rel_path: String, library_id: i32 }, } -/// A segment before narration: which photo, when it was taken, and any cached -/// insight to feed the scripter. +/// A beat: one narration line over one or more photos. A single-photo beat is a +/// held shot; a multi-photo beat is a quick burst that flashes through several +/// moments of the same event while the line is read — so a week/month reel can +/// *show* everything it spans without a narration line (and the seconds that +/// come with it) per photo. #[derive(Debug, Clone)] -pub struct PlannedSegment { - pub media: SegmentMedia, +pub struct PlannedBeat { + pub photos: Vec, pub date: Option, pub insight_title: Option, pub insight_summary: Option, } -impl PlannedSegment { +impl PlannedBeat { /// Human date for the prompt, e.g. "June 12, 2019". `None` when undated. pub fn date_label(&self) -> Option { let ts = self.date?; @@ -180,7 +183,7 @@ fn finish_job( /// Render version: bump to invalidate every cached reel after a rendering / /// scripting change that should produce a fresh result. -const RENDER_VERSION: u32 = 3; +const RENDER_VERSION: u32 = 4; /// Narration expressiveness — Chatterbox's `exaggeration` knob. A slight bump /// over the ~0.5 default warms up otherwise-flat narration without over-acting; @@ -306,16 +309,25 @@ pub async fn create_reel_handler( })); } - let media: Vec = planned.iter().map(|p| p.media.clone()).collect(); + // Flatten every photo across beats (in order) into the cache key — the key + // tracks exactly which photos appear and in what sequence. + let media: Vec = planned.iter().flat_map(|b| b.photos.clone()).collect(); let voice = req.voice.clone().filter(|s| !s.is_empty()); let key = cache_key(&selector, &media, voice.as_deref()); let job_id = Uuid::new_v4(); + log::info!( + "reel {job_id}: request span={:?} → {} beats, {} photos", + span, + planned.len(), + media.len() + ); // Cache hit: register an already-Done job pointing at the existing MP4 so // the client's first poll returns the video URL immediately. let mp4 = reel_mp4_path(&app_state, &key); if mp4.exists() { + log::info!("reel {job_id}: cache hit, serving existing reel"); let title = std::fs::read(reel_sidecar_path(&app_state, &key)) .ok() .and_then(|b| serde_json::from_slice::(&b).ok()) @@ -358,6 +370,7 @@ pub async fn create_reel_handler( }, ); } + log::info!("reel {job_id}: queued for generation"); let state = app_state.clone(); let insight_dao = insight_dao.clone(); @@ -441,45 +454,73 @@ async fn run_reel_job( app_state: &AppState, insight_dao: &Mutex>, job_id: Uuid, - mut planned: Vec, + mut planned: Vec, meta: ReelMeta, voice: Option, key: &str, ) -> anyhow::Result<(String, PathBuf)> { use anyhow::{Context, anyhow}; + let started = Instant::now(); + let total_photos: usize = planned.iter().map(|b| b.photos.len()).sum(); + log::info!( + "reel {job_id}: starting — span {:?}, {} beats, {} photos, voice={}", + meta.span, + planned.len(), + total_photos, + voice.as_deref().unwrap_or("default") + ); + let client = app_state .llamacpp .as_ref() .ok_or_else(|| anyhow!("TTS/LLM backend not configured"))? .clone(); - // 1. Enrich with cached insights, then script (one LLM call). + // 1. Enrich each beat with its lead photo's cached insight, then script + // (one LLM call → one narration line per beat). set_stage(job_id, "scripting"); + log::info!("reel {job_id}: scripting narration via LLM…"); let span_context = opentelemetry::Context::new(); selector::enrich(insight_dao, &span_context, &mut planned); let script = script::generate_script(&client, &meta, &planned).await?; + log::info!( + "reel {job_id}: scripted \"{}\" ({} lines)", + script.title, + script.lines.len() + ); - // 2. Narrate each line to speech and 3. render each photo segment. A - // segment whose audio or render fails is skipped (logged) rather than - // sinking the whole reel — handles an odd HEIC/corrupt file gracefully. + // 2. Narrate each beat's line and 3. render the beat (its photos shown in + // sequence under that one narration). A beat whose audio or render fails + // is skipped (logged) rather than sinking the whole reel — handles an + // odd HEIC/corrupt file gracefully. set_stage(job_id, "narrating"); let work = tempfile::tempdir().context("creating reel work dir")?; let nvenc = render::is_nvenc_available().await; + log::info!( + "reel {job_id}: narrating + rendering {} beats (encoder: {})", + planned.len(), + if nvenc { "nvenc" } else { "cpu" } + ); let opts = render::SegmentOpts { nvenc, ..Default::default() }; - let mut segment_files: Vec = Vec::new(); - for (i, (seg, line)) in planned.iter().zip(script.lines.iter()).enumerate() { - let image_path = match resolve_image_path(app_state, &seg.media) { - Some(p) => p, - None => { - log::warn!("reel {job_id}: skipping segment {i}, image path unresolved"); - continue; - } - }; + let beat_total = planned.len(); + let mut beat_files: Vec = Vec::new(); + for (i, (beat, line)) in planned.iter().zip(script.lines.iter()).enumerate() { + // Resolve all of the beat's photos to absolute paths; drop any that + // don't resolve. An empty beat is skipped. + let image_paths: Vec = beat + .photos + .iter() + .filter_map(|m| resolve_image_path(app_state, m)) + .collect(); + if image_paths.is_empty() { + log::warn!("reel {job_id}: skipping beat {i}, no image paths resolved"); + continue; + } let audio_bytes = match crate::ai::tts::synthesize_serialized( &client, @@ -492,13 +533,13 @@ async fn run_reel_job( { Ok(b) => b, Err(e) => { - log::warn!("reel {job_id}: skipping segment {i}, TTS failed: {e}"); + log::warn!("reel {job_id}: skipping beat {i}, TTS failed: {e}"); continue; } }; let audio_path = work.path().join(format!("narration_{i:03}.wav")); if let Err(e) = tokio::fs::write(&audio_path, &audio_bytes).await { - log::warn!("reel {job_id}: skipping segment {i}, writing audio failed: {e}"); + log::warn!("reel {job_id}: skipping beat {i}, writing audio failed: {e}"); continue; } @@ -508,25 +549,37 @@ async fn run_reel_job( .ok() .flatten() .unwrap_or(render::MIN_SEGMENT_SECONDS); - let duration = render::segment_duration(narration_secs); set_stage(job_id, "rendering"); - let seg_out = work.path().join(format!("seg_{i:03}.mp4")); + log::info!( + "reel {job_id}: beat {}/{} — {} photo(s), narration {:.1}s", + i + 1, + beat_total, + image_paths.len(), + narration_secs + ); + let beat_out = work.path().join(format!("beat_{i:03}.mp4")); if let Err(e) = - render::render_segment(&image_path, &audio_path, &seg_out, duration, &opts).await + render::render_beat(&image_paths, &audio_path, &beat_out, narration_secs, &opts).await { - log::warn!("reel {job_id}: skipping segment {i}, render failed: {e}"); + log::warn!("reel {job_id}: skipping beat {i}, render failed: {e}"); continue; } - segment_files.push(seg_out.to_string_lossy().to_string()); + beat_files.push(beat_out.to_string_lossy().to_string()); } + let segment_files = beat_files; if segment_files.is_empty() { - return Err(anyhow!("no segments rendered successfully")); + return Err(anyhow!("no beats rendered successfully")); } // 4. Concat into the cache. Write to a temp name in the reels dir, then // rename atomically (same filesystem) so a reader never sees a partial. + set_stage(job_id, "rendering"); + log::info!( + "reel {job_id}: joining {} rendered beats into the final reel", + segment_files.len() + ); std::fs::create_dir_all(&app_state.reels_path).context("creating reels dir")?; let final_path = reel_mp4_path(app_state, key); let tmp_path = final_path.with_extension("mp4.tmp"); @@ -541,6 +594,12 @@ async fn run_reel_job( .context("serializing reel sidecar")?; let _ = std::fs::write(reel_sidecar_path(app_state, key), sidecar); + log::info!( + "reel {job_id}: done in {:.1}s — {} beats → {}", + started.elapsed().as_secs_f64(), + segment_files.len(), + final_path.display() + ); Ok((script.title, final_path)) } @@ -622,16 +681,16 @@ mod tests { #[test] fn date_label_formats_or_none() { - let seg = PlannedSegment { - media: photo("a.jpg", 1), + let beat = PlannedBeat { + photos: vec![photo("a.jpg", 1)], date: Some(1_560_384_000), // 2019-06-13 UTC insight_title: None, insight_summary: None, }; - assert!(seg.date_label().unwrap().contains("2019")); + assert!(beat.date_label().unwrap().contains("2019")); - let undated = PlannedSegment { - media: photo("a.jpg", 1), + let undated = PlannedBeat { + photos: vec![photo("a.jpg", 1)], date: None, insight_title: None, insight_summary: None, diff --git a/src/reels/render.rs b/src/reels/render.rs index 3cca6ac..a36f6f1 100644 --- a/src/reels/render.rs +++ b/src/reels/render.rs @@ -22,25 +22,31 @@ pub use crate::video::ffmpeg::is_nvenc_available; /// Reel canvas. Portrait, because reels are watched on a phone held upright — /// a landscape canvas letterboxes to a thin ~25%-height band there. Each photo /// is fitted sharp and centered over a blurred, zoomed copy of itself (see -/// [`segment_filtergraph`]) so the frame is always filled regardless of the +/// [`photo_filter_chain`]) so the frame is always filled regardless of the /// photo's orientation, without cropping the subject. pub const REEL_WIDTH: u32 = 1080; pub const REEL_HEIGHT: u32 = 1920; pub const REEL_FPS: u32 = 30; -/// A still's screen time is its narration length plus a short breath, with a -/// floor so a terse line still lingers. No ceiling: the segment always covers -/// the full narration so speech is never truncated — the scripter is asked to -/// keep lines short instead. +/// A beat's screen time is its narration length plus a short breath, with a +/// floor so a terse line still lingers. No ceiling: the beat always covers the +/// full narration so speech is never truncated — the scripter is asked to keep +/// lines short instead. pub const MIN_SEGMENT_SECONDS: f64 = 2.5; const NARRATION_TAIL_SECONDS: f64 = 0.6; -/// Quick fade in/out baked into each segment so concatenated photos dip -/// smoothly instead of hard-cutting. The fade-out lands inside the narration's -/// silent tail, so speech is never clipped. -const FADE_SECONDS: f64 = 0.35; +/// Fade durations baked into each photo. A held (single-photo) beat gets a +/// gentle dip; burst photos get a snappier fade so the montage feels quick. +const SINGLE_FADE_SECONDS: f64 = 0.35; +const BURST_FADE_SECONDS: f64 = 0.15; -/// Screen time for a photo segment given its narration audio length. +/// Floor on how long each burst photo stays up, so a long line over many photos +/// doesn't flash them subliminally. If the narration is too short to give every +/// photo this much, the beat is stretched to fit. +const MIN_BURST_PHOTO_SECONDS: f64 = 0.6; + +/// Base screen time for a beat given its narration length: narration + breath, +/// floored. Used as the lower bound on a beat's total duration. pub fn segment_duration(narration_secs: f64) -> f64 { let d = narration_secs + NARRATION_TAIL_SECONDS; if d.is_finite() && d > MIN_SEGMENT_SECONDS { @@ -50,6 +56,29 @@ pub fn segment_duration(narration_secs: f64) -> f64 { } } +/// Split a beat into per-photo durations. The beat lasts at least its narration +/// (so speech isn't cut) and at least `n × MIN_BURST_PHOTO_SECONDS` (so a fast +/// burst stays legible); the photos share that total evenly. Returns +/// `(total_seconds, per_photo_seconds)`. +pub fn beat_durations(narration_secs: f64, n_photos: usize) -> (f64, Vec) { + let n = n_photos.max(1); + let base = segment_duration(narration_secs); + let min_total = n as f64 * MIN_BURST_PHOTO_SECONDS; + let total = if base > min_total { base } else { min_total }; + let each = total / n as f64; + (total, vec![each; n]) +} + +/// Fade length to use for a beat of `n_photos` (gentle when held, snappy in a +/// burst). +fn fade_for(n_photos: usize) -> f64 { + if n_photos > 1 { + BURST_FADE_SECONDS + } else { + SINGLE_FADE_SECONDS + } +} + /// Options controlling per-segment rendering. #[derive(Debug, Clone, Copy)] pub struct SegmentOpts { @@ -70,38 +99,49 @@ impl Default for SegmentOpts { } } -/// Full `filter_complex` for one photo segment, producing labelled `[v]` (video) -/// and `[a]` (audio) outputs. Input 0 is the looped still, input 1 the -/// narration. +/// Filter chain for one photo (input `idx`) producing the labelled output +/// `[v{idx}]`. Splits the still into a background and foreground: the background +/// is scaled to *cover* the canvas and heavily blurred; the foreground is +/// scaled to *fit* and overlaid centered. This fills the portrait frame for any +/// photo orientation — no black bars, no cropping of the subject — then a fade +/// in/out softens the cut. Intermediate labels are suffixed with `idx` so +/// several chains coexist in one `filter_complex`. /// -/// Video: split the still into a background and foreground. The background is -/// scaled to *cover* the canvas and heavily blurred; the foreground is scaled to -/// *fit* inside it and overlaid centered. This fills the portrait frame for any -/// photo orientation — no black bars, no cropping of the subject — then a quick -/// fade in/out softens the cut to the next segment. -/// -/// Audio: pad the narration with trailing silence so a short line doesn't end -/// the segment early; `-t` bounds it to the segment duration. -pub fn segment_filtergraph(opts: &SegmentOpts, duration: f64) -> String { +/// `fps` is normalized BEFORE the fades so the brightness ramp is computed on a +/// true {fps}-frame timeline; otherwise the fade is sampled at the looped +/// still's coarse cadence and duplicated up, which reads as a steppy dip. +fn photo_filter_chain(idx: usize, opts: &SegmentOpts, duration: f64, fade: f64) -> String { let (w, h, fps) = (opts.width, opts.height, opts.fps); - // Fade-out begins one fade-length before the end; clamp so a floor-length - // segment still gets a valid (non-negative) start time. - let fade_out_start = (duration - FADE_SECONDS).max(0.0); - // `fps` is normalized BEFORE the fades so the brightness ramp is computed - // on a true {fps}-frame timeline. If fps came after, the fade would be - // sampled at the looped still's coarse input cadence and then duplicated up - // to {fps}, which reads as a steppy / low-frame-rate dip. + let fade_out_start = (duration - fade).max(0.0); format!( - "[0:v]split=2[bg][fg];\ - [bg]scale={w}:{h}:force_original_aspect_ratio=increase,\ - crop={w}:{h},boxblur=20:2[bgb];\ - [fg]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs];\ - [bgb][fgs]overlay=(W-w)/2:(H-h)/2,\ + "[{idx}:v]split=2[bg{idx}][fg{idx}];\ + [bg{idx}]scale={w}:{h}:force_original_aspect_ratio=increase,\ + crop={w}:{h},boxblur=20:2[bgb{idx}];\ + [fg{idx}]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs{idx}];\ + [bgb{idx}][fgs{idx}]overlay=(W-w)/2:(H-h)/2,\ fps={fps},\ - fade=t=in:st=0:d={FADE_SECONDS},\ - fade=t=out:st={fade_out_start:.3}:d={FADE_SECONDS},\ - setsar=1,format=yuv420p[v];\ - [1:a]apad[a]" + fade=t=in:st=0:d={fade},\ + fade=t=out:st={fade_out_start:.3}:d={fade},\ + setsar=1,format=yuv420p[v{idx}]" + ) +} + +/// Full `filter_complex` for a beat of `per_photo` durations: one chain per +/// photo, concatenated into `[v]`, with the narration (the last input, index +/// `per_photo.len()`) padded with trailing silence into `[a]`. A single-photo +/// beat degenerates to one chain + `concat=n=1` (a passthrough). +pub fn beat_filtergraph(opts: &SegmentOpts, per_photo: &[f64]) -> String { + let n = per_photo.len().max(1); + let fade = fade_for(n); + let chains: Vec = per_photo + .iter() + .enumerate() + .map(|(i, &d)| photo_filter_chain(i, opts, d, fade)) + .collect(); + let concat_inputs: String = (0..n).map(|i| format!("[v{i}]")).collect(); + format!( + "{chains};{concat_inputs}concat=n={n}:v=1:a=0[v];[{n}:a]apad[a]", + chains = chains.join(";") ) } @@ -128,15 +168,16 @@ fn video_encoder_args(nvenc: bool) -> Vec { .collect() } -/// Build the ffmpeg args that render one photo segment: a still looped for -/// `duration` seconds, filled to the portrait canvas with a blurred backdrop -/// (see [`segment_filtergraph`]) and the narration muxed in. `-t` bounds both -/// streams to the segment length. -pub fn build_segment_args( - image_path: &str, +/// Build the ffmpeg args that render one beat: each photo looped for its slice +/// of the beat (filled to the portrait canvas with a blurred backdrop), the +/// slices concatenated, and the single narration muxed over the whole thing. +/// `total` bounds the output (and the apad'd audio) to the beat length. +pub fn build_beat_args( + image_paths: &[String], audio_path: &str, out_path: &str, - duration: f64, + per_photo: &[f64], + total: f64, opts: &SegmentOpts, ) -> Vec { let fps = opts.fps.to_string(); @@ -144,26 +185,33 @@ pub fn build_segment_args( if opts.nvenc { args.extend(["-hwaccel".into(), "cuda".into()]); } + // One looped-still input per photo, each bounded to its slice by an input + // `-t`; reading at the target `-framerate` gives the fades real frames to + // ramp across. + for (path, &dur) in image_paths.iter().zip(per_photo.iter()) { + args.extend([ + "-framerate".into(), + fps.clone(), + "-loop".into(), + "1".into(), + "-t".into(), + format!("{dur:.3}"), + "-i".into(), + path.clone(), + ]); + } args.extend([ - // Read the looped still at the target rate so frames exist for the - // fade to ramp across (paired with the in-graph `fps` and CFR output). - "-framerate".into(), - fps.clone(), - "-loop".into(), - "1".into(), - "-i".into(), - image_path.into(), "-i".into(), audio_path.into(), "-filter_complex".into(), - segment_filtergraph(opts, duration), + beat_filtergraph(opts, per_photo), "-map".into(), "[v]".into(), "-map".into(), "[a]".into(), "-t".into(), - format!("{duration:.3}"), - // Force constant frame rate so the segment (and the concatenated reel) + format!("{total:.3}"), + // Force constant frame rate so the beat (and the concatenated reel) // plays at a steady {fps} rather than a variable cadence. "-r".into(), fps, @@ -231,22 +279,33 @@ async fn run_ffmpeg(args: &[String], what: &str) -> Result<()> { Ok(()) } -/// Render one photo segment to `out_path`. -pub async fn render_segment( - image_path: &Path, +/// Render one beat to `out_path`: its photos shown in sequence (a held shot for +/// one photo, a quick burst for several) under the single narration in +/// `audio_path`, whose measured length sets the beat's pacing. +pub async fn render_beat( + image_paths: &[std::path::PathBuf], audio_path: &Path, out_path: &Path, - duration: f64, + narration_secs: f64, opts: &SegmentOpts, ) -> Result<()> { - let args = build_segment_args( - &image_path.to_string_lossy(), + if image_paths.is_empty() { + bail!("render_beat called with no images"); + } + let (total, per_photo) = beat_durations(narration_secs, image_paths.len()); + let paths: Vec = image_paths + .iter() + .map(|p| p.to_string_lossy().to_string()) + .collect(); + let args = build_beat_args( + &paths, &audio_path.to_string_lossy(), &out_path.to_string_lossy(), - duration, + &per_photo, + total, opts, ); - run_ffmpeg(&args, "segment render").await + run_ffmpeg(&args, "beat render").await } /// Join rendered segments into the final reel. Writes the concat list into the @@ -288,73 +347,108 @@ mod tests { } #[test] - fn filtergraph_fills_portrait_with_blurred_bg_and_fitted_fg() { - let g = segment_filtergraph(&SegmentOpts::default(), 4.0); - // Background covers + blurs; foreground fits and is centered over it. - assert!(g.contains("split=2[bg][fg]")); + fn beat_durations_single_photo_matches_base() { + let (total, per) = beat_durations(4.0, 1); + assert!((total - 4.6).abs() < 1e-9); // narration + tail + assert_eq!(per.len(), 1); + assert!((per[0] - 4.6).abs() < 1e-9); + } + + #[test] + fn beat_durations_burst_splits_evenly() { + // 5 photos, narration 4.6s base → ~0.92s each (above the 0.6 floor). + let (total, per) = beat_durations(4.0, 5); + assert!((total - 4.6).abs() < 1e-9); + assert_eq!(per.len(), 5); + assert!((per.iter().sum::() - total).abs() < 1e-9); + assert!(per.iter().all(|&d| d >= MIN_BURST_PHOTO_SECONDS)); + } + + #[test] + fn beat_durations_stretches_when_narration_too_short_for_burst() { + // Floor narration (2.5s) over 10 photos would be 0.25s each — below the + // legibility floor, so the beat stretches to 10 × 0.6 = 6s. + let (total, per) = beat_durations(0.0, 10); + assert!((total - 6.0).abs() < 1e-9); + assert!(per.iter().all(|&d| (d - 0.6).abs() < 1e-9)); + } + + #[test] + fn beat_filtergraph_single_photo_fills_portrait_and_holds() { + let (_t, per) = beat_durations(4.0, 1); + let g = beat_filtergraph(&SegmentOpts::default(), &per); + assert!(g.contains("[0:v]split=2[bg0][fg0]")); assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=increase")); assert!(g.contains("crop=1080:1920")); - assert!(g.contains("boxblur")); assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=decrease")); assert!(g.contains("overlay=(W-w)/2:(H-h)/2")); - // Produces the labelled outputs build_segment_args maps. - assert!(g.contains("[v]")); + // Single photo → concat of one, gentle fade, audio is input 1. + assert!(g.contains("concat=n=1:v=1:a=0[v]")); + assert!(g.contains("d=0.35")); // SINGLE_FADE assert!(g.contains("[1:a]apad[a]")); - assert!(g.contains("format=yuv420p")); } #[test] - fn filtergraph_fades_in_and_out_within_duration() { - // 4s segment, 0.35s fade → fade-out starts at 3.65s. - let g = segment_filtergraph(&SegmentOpts::default(), 4.0); - assert!(g.contains("fade=t=in:st=0:d=0.35")); - assert!(g.contains("fade=t=out:st=3.650:d=0.35")); + fn beat_filtergraph_burst_chains_concats_and_snappy_fade() { + let (_t, per) = beat_durations(4.0, 3); + let g = beat_filtergraph(&SegmentOpts::default(), &per); + // One chain per photo with index-suffixed labels. + assert!(g.contains("[0:v]split") && g.contains("[1:v]split") && g.contains("[2:v]split")); + // Concatenated in order, audio is the 4th input (index 3). + assert!(g.contains("[v0][v1][v2]concat=n=3:v=1:a=0[v]")); + assert!(g.contains("[3:a]apad[a]")); + // Burst uses the snappier fade. + assert!(g.contains("d=0.15")); + assert!(!g.contains("d=0.35")); } #[test] - fn filtergraph_normalizes_fps_before_fading() { - // The fps filter must precede the fades, else the brightness ramp is - // sampled at the still's coarse cadence and looks steppy. - let g = segment_filtergraph(&SegmentOpts::default(), 4.0); + fn beat_filtergraph_normalizes_fps_before_fading() { + // fps must precede the fades on every chain (else the dip looks steppy). + let (_t, per) = beat_durations(4.0, 1); + let g = beat_filtergraph(&SegmentOpts::default(), &per); let fps_at = g.find("fps=30").expect("fps in graph"); let fade_at = g.find("fade=t=in").expect("fade in graph"); assert!(fps_at < fade_at); } #[test] - fn filtergraph_fade_out_start_never_negative_at_floor() { - // A floor-length segment shorter than a fade still yields st >= 0. - let g = segment_filtergraph(&SegmentOpts::default(), 0.2); - assert!(g.contains("fade=t=out:st=0.000:d=0.35")); - } - - #[test] - fn segment_args_loop_still_and_bound_with_t() { - let args = build_segment_args( - "/img.jpg", - "/a.wav", + fn beat_args_one_input_per_photo_plus_audio_bound_by_total() { + let (total, per) = beat_durations(4.0, 2); + let args = build_beat_args( + &["/a.jpg".into(), "/b.jpg".into()], + "/n.wav", "/out.mp4", - 4.0, + &per, + total, &SegmentOpts::default(), ); let joined = args.join(" "); - assert!(joined.contains("-framerate 30 -loop 1 -i /img.jpg")); - assert!(joined.contains("-i /a.wav")); - assert!(joined.contains("apad")); - assert!(joined.contains("-t 4.000")); - // Constant frame rate forced on the output. + // A looped-still input per photo, each with its slice -t, then the audio. + assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /a.jpg")); + assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /b.jpg")); + assert!(joined.contains("-i /n.wav")); + // Output bounded to the beat total and forced CFR. + assert!(joined.contains("-t 4.600")); assert!(joined.contains("-r 30")); - assert!(joined.contains("libx264")); assert!(joined.ends_with("/out.mp4")); } #[test] - fn segment_args_use_nvenc_and_cuda_when_enabled() { + fn beat_args_use_nvenc_and_cuda_when_enabled() { let opts = SegmentOpts { nvenc: true, ..SegmentOpts::default() }; - let args = build_segment_args("/img.jpg", "/a.wav", "/out.mp4", 3.0, &opts); + let (total, per) = beat_durations(3.0, 1); + let args = build_beat_args( + &["/img.jpg".into()], + "/a.wav", + "/out.mp4", + &per, + total, + &opts, + ); let joined = args.join(" "); assert!(joined.contains("-hwaccel cuda")); assert!(joined.contains("h264_nvenc")); diff --git a/src/reels/script.rs b/src/reels/script.rs index 1cf3189..85fff7c 100644 --- a/src/reels/script.rs +++ b/src/reels/script.rs @@ -1,10 +1,11 @@ //! Narration scripting for memory reels. //! -//! One LLM call turns the planned segments (each carrying its date and, where +//! One LLM call turns the planned beats (each carrying its date and, where //! available, its cached insight) into a short first-person narration line per -//! photo plus a title for the reel. We reuse the cached insight summary as the -//! richest per-photo signal rather than re-running vision at reel time — that -//! keeps reel generation off the GPU's vision slot entirely. +//! beat plus a title for the reel. A beat may show several photos in a quick +//! burst, so a line narrates the *moment*, not a single frame. We reuse the +//! cached insight summary as the richest signal rather than re-running vision +//! at reel time — that keeps reel generation off the GPU's vision slot. //! //! The prompt builder and response parser are pure so the contract is //! unit-testable; `generate_script` wires them to the LLM client. @@ -12,11 +13,11 @@ use anyhow::{Context, Result}; use std::sync::Arc; -use super::{PlannedSegment, ReelMeta}; +use super::{PlannedBeat, ReelMeta}; use crate::ai::llamacpp::LlamaCppClient; use crate::ai::llm_client::LlmClient; -/// The narration for a whole reel: a title and one line per segment, in order. +/// The narration for a whole reel: a title and one line per beat, in order. #[derive(Debug, Clone, PartialEq)] pub struct ReelScript { pub title: String, @@ -26,33 +27,38 @@ pub struct ReelScript { const SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \ slideshow of someone's own photos set to a spoken voiceover. Write warm, \ specific, first-person narration as if the person is gently looking back on \ -their own memories. Be concrete and grounded in the details given; never \ -invent names, places, or events that aren't supported. Keep each line to one \ -or two short sentences that can be read aloud in a few seconds. Avoid generic \ -filler like \"what a wonderful day\" — if you have little to go on, simply \ -describe the moment plainly."; +their own memories. Each line plays over one moment, which may be a quick burst \ +of several photos, so narrate the moment as a whole rather than a single frame. \ +Be concrete and grounded in the details given; never invent names, places, or \ +events that aren't supported. Keep each line to one or two short sentences that \ +can be read aloud in a few seconds. Avoid generic filler like \"what a \ +wonderful day\" — if you have little to go on, simply describe the moment \ +plainly."; /// Build the (system, user) prompt pair for the scripter. The user message -/// describes each segment in order and asks for strict JSON back. -pub fn build_script_messages(meta: &ReelMeta, planned: &[PlannedSegment]) -> (String, String) { +/// describes each beat in order and asks for strict JSON back. +pub fn build_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> (String, String) { let mut user = String::new(); user.push_str(&format!( - "These are {} photos surfaced as memories {}.\n\n", - planned.len(), + "This reel has {} moments surfaced as memories {}.\n\n", + beats.len(), meta.span_phrase() )); if !meta.years.is_empty() { let years: Vec = meta.years.iter().map(|y| y.to_string()).collect(); user.push_str(&format!("They span the years: {}.\n\n", years.join(", "))); } - user.push_str("Photos, in the order they will appear:\n"); - for (i, seg) in planned.iter().enumerate() { + user.push_str("Moments, in the order they will appear:\n"); + for (i, beat) in beats.iter().enumerate() { user.push_str(&format!("\n[{}]", i + 1)); - if let Some(date) = seg.date_label() { + if let Some(date) = beat.date_label() { user.push_str(&format!(" {date}")); } + if beat.photos.len() > 1 { + user.push_str(&format!(" (a burst of {} photos)", beat.photos.len())); + } user.push('\n'); - match (&seg.insight_title, &seg.insight_summary) { + match (&beat.insight_title, &beat.insight_summary) { (Some(t), Some(s)) if !s.trim().is_empty() => { user.push_str(&format!(" Known context: {t} — {s}\n")); } @@ -65,10 +71,10 @@ pub fn build_script_messages(meta: &ReelMeta, planned: &[PlannedSegment]) -> (St } user.push_str(&format!( "\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\ - {{\"title\": \"\", \"segments\": [\"\", \ - \"\", ... ]}}\n\ - The \"segments\" array MUST have exactly {} items, one per photo in order.", - planned.len() + {{\"title\": \"\", \"segments\": [\"\", \ + \"\", ... ]}}\n\ + The \"segments\" array MUST have exactly {} items, one per moment in order.", + beats.len() )); (SYSTEM_PROMPT.to_string(), user) } @@ -174,20 +180,20 @@ fn clean_text(s: &str) -> String { trimmed.split_whitespace().collect::>().join(" ") } -/// Generate the reel script via the LLM. Text-only (no images) — the per-photo +/// Generate the reel script via the LLM. Text-only (no images) — the per-beat /// context comes from cached insights. The call takes the GPU read lease /// internally (see `LlamaCppClient::generate`). pub async fn generate_script( client: &Arc, meta: &ReelMeta, - planned: &[PlannedSegment], + beats: &[PlannedBeat], ) -> Result { - let (system, user) = build_script_messages(meta, planned); + let (system, user) = build_script_messages(meta, beats); let raw = client .generate(&user, Some(&system), None) .await .context("LLM script generation failed")?; - Ok(parse_script_response(&raw, planned.len())) + Ok(parse_script_response(&raw, beats.len())) } #[cfg(test)] @@ -202,13 +208,13 @@ mod tests { } } - fn planned(n: usize) -> Vec { + fn planned(n: usize) -> Vec { (0..n) - .map(|i| PlannedSegment { - media: super::super::SegmentMedia::Photo { + .map(|i| PlannedBeat { + photos: vec![super::super::SegmentMedia::Photo { rel_path: format!("p{i}.jpg"), library_id: 1, - }, + }], date: Some(1_560_000_000 + i as i64 * 86_400), insight_title: None, insight_summary: None, @@ -217,16 +223,37 @@ mod tests { } #[test] - fn prompt_states_exact_segment_count_and_span() { + fn prompt_states_exact_moment_count_and_span() { let (sys, user) = build_script_messages(&meta(), &planned(3)); assert!(sys.contains("memory reel")); - assert!(user.contains("3 photos")); + assert!(user.contains("3 moments")); assert!(user.contains("on this day")); assert!(user.contains("exactly 3 items")); - // Each photo gets an indexed entry. + // Each moment gets an indexed entry. assert!(user.contains("[1]") && user.contains("[2]") && user.contains("[3]")); } + #[test] + fn prompt_notes_burst_photo_count() { + let mut p = planned(1); + p[0].photos = vec![ + super::super::SegmentMedia::Photo { + rel_path: "a.jpg".into(), + library_id: 1, + }, + super::super::SegmentMedia::Photo { + rel_path: "b.jpg".into(), + library_id: 1, + }, + super::super::SegmentMedia::Photo { + rel_path: "c.jpg".into(), + library_id: 1, + }, + ]; + let (_sys, user) = build_script_messages(&meta(), &p); + assert!(user.contains("a burst of 3 photos")); + } + #[test] fn prompt_includes_insight_context_when_present() { let mut p = planned(1); diff --git a/src/reels/selector.rs b/src/reels/selector.rs index 0a53ee5..fb83e38 100644 --- a/src/reels/selector.rs +++ b/src/reels/selector.rs @@ -13,18 +13,51 @@ use std::sync::Mutex; use chrono::{DateTime, Datelike, FixedOffset}; -use super::{PlannedSegment, ReelMeta, SegmentMedia}; +use super::{PlannedBeat, ReelMeta, SegmentMedia}; use crate::database::{ExifDao, InsightDao}; use crate::file_types::is_image_file; use crate::memories::{self, MemoriesSpan}; use crate::state::AppState; -/// Default and hard caps on how many photos a reel covers. The cap bounds the -/// LLM/TTS/ffmpeg work per reel; when a span has more, [`sample_evenly`] keeps -/// a representative spread across the years rather than just the oldest. -pub const DEFAULT_MAX_SEGMENTS: usize = 24; +/// Default and hard caps on how many photos a reel covers. The default is an +/// upper bound on the request; the effective count is usually smaller, set by +/// the duration budget (see [`budget_segments`]). The hard cap bounds work per +/// reel regardless. +pub const DEFAULT_MAX_SEGMENTS: usize = 40; pub const HARD_MAX_SEGMENTS: usize = 40; +/// Target reel length. Week and especially month spans can surface hundreds of +/// photos; at a few seconds of narration each, a naive reel runs minutes. We +/// cap the segment count to keep the reel near this length. Tunable via +/// `REEL_TARGET_SECONDS`. +const DEFAULT_TARGET_REEL_SECONDS: f64 = 90.0; + +/// Rough average wall-time per photo segment (a short narration line + the +/// silent tail). Only used to turn the duration target into a segment count; +/// the real per-segment time is the measured narration length. +const EST_SECONDS_PER_SEGMENT: f64 = 5.0; + +/// Time gap that separates one "event/moment" from the next when clustering a +/// span's photos. Photos within a few hours are treated as the same occasion +/// (and across years/days the gaps are far larger, so each instance clusters +/// on its own). 4 hours splits e.g. a morning hike from an evening dinner. +const EVENT_GAP_SECONDS: i64 = 4 * 3600; + +fn target_reel_seconds() -> f64 { + std::env::var("REEL_TARGET_SECONDS") + .ok() + .and_then(|s| s.trim().parse::().ok()) + .filter(|x| x.is_finite() && *x > 0.0) + .unwrap_or(DEFAULT_TARGET_REEL_SECONDS) +} + +/// How many photo segments fit the duration budget, bounded by the request's +/// max and the hard cap. This is what keeps week/month reels from running long. +pub fn budget_segments(requested_max: usize) -> usize { + let by_budget = (target_reel_seconds() / EST_SECONDS_PER_SEGMENT).floor() as usize; + by_budget.min(requested_max).clamp(1, HARD_MAX_SEGMENTS) +} + /// What a reel is built from. v1 ships the memories (on this day/week/month) /// selector; tag and date-range variants slot in here later. #[derive(Debug, Clone)] @@ -81,6 +114,104 @@ pub fn sample_evenly(items: &[T], max: usize) -> Vec { .collect() } +/// Group time-sorted items into events by gap: a new event starts whenever the +/// jump from the previous photo exceeds `gap_seconds`. Preserves order; items +/// without a timestamp extend the current event. +fn cluster_by_gap( + items: &[memories::MemoryItem], + gap_seconds: i64, +) -> Vec> { + let mut clusters: Vec> = Vec::new(); + let mut prev_ts: Option = None; + for it in items { + let starts_new = match (prev_ts, it.created) { + (Some(p), Some(c)) => c - p > gap_seconds, + _ => false, + }; + if starts_new || clusters.is_empty() { + clusters.push(Vec::new()); + } + clusters.last_mut().unwrap().push(it.clone()); + if let Some(c) = it.created { + prev_ts = Some(c); + } + } + clusters +} + +/// Most photos a single beat will flash through. Bounds the burst so one huge +/// event doesn't dominate, and keeps each photo on screen long enough to +/// register at the per-beat narration length (see render's beat timing). +pub const MAX_BURST_PHOTOS: usize = 10; + +/// Merge a list of (time-ordered) event clusters into exactly `n` contiguous +/// groups, so a span with more events than the beat budget still covers the +/// whole timeline — adjacent events fold together into one beat rather than +/// getting dropped. `n` must be ≥ 1 and ≤ clusters.len(). +fn partition_into_groups( + clusters: Vec>, + n: usize, +) -> Vec> { + let c = clusters.len(); + let mut clusters = clusters.into_iter(); + (0..n) + .map(|j| { + // Even contiguous split of c clusters into n groups. + let start = j * c / n; + let end = (j + 1) * c / n; + let take = end.saturating_sub(start).max(1); + (0..take) + .flat_map(|_| clusters.next().into_iter().flatten()) + .collect() + }) + .collect() +} + +/// Turn a span's photos into `n_beats` beats. Clusters photos into events by +/// time gap; if there are more events than beats, adjacent events are merged so +/// the whole span is still covered. Each beat then flashes up to +/// `max_burst` photos (an even spread of its group) under one narration line — +/// so a week/month reel *shows* all its moments without a narrated (and timed) +/// segment per photo. +pub fn form_beats( + items: &[memories::MemoryItem], + n_beats: usize, + max_burst: usize, +) -> Vec { + if n_beats == 0 || items.is_empty() { + return Vec::new(); + } + let clusters = cluster_by_gap(items, EVENT_GAP_SECONDS); + // One beat per event when they fit; otherwise fold adjacent events together + // into exactly n_beats groups. + let groups = if clusters.len() <= n_beats { + clusters + } else { + partition_into_groups(clusters, n_beats) + }; + + groups + .into_iter() + .filter(|g| !g.is_empty()) + .map(|group| { + let shown = sample_evenly(&group, max_burst); + let date = shown.first().and_then(|it| it.created); + PlannedBeat { + photos: shown + .into_iter() + .map(|it| SegmentMedia::Photo { + rel_path: it.path, + library_id: it.library_id, + }) + .collect(), + date, + insight_title: None, + insight_summary: None, + } + }) + .collect() +} + /// Cheap pass: resolve the selector into an ordered list of media (no insight /// lookups yet) plus reel metadata. `Err` only on an invalid library param. pub fn resolve( @@ -88,7 +219,7 @@ pub fn resolve( exif_dao: &Mutex>, span_context: &opentelemetry::Context, selector: &ReelSelector, -) -> Result<(Vec, ReelMeta), String> { +) -> Result<(Vec, ReelMeta), String> { match selector { ReelSelector::Memories { span, @@ -108,32 +239,23 @@ pub fn resolve( )?; // Phase 1 is photos-only: drop videos (a clip segment type lands - // in phase 2). Filter before sampling so the spread is over the - // photos that will actually appear. + // in phase 2). let items: Vec = items .into_iter() .filter(|it| is_image_file(Path::new(&it.path))) .collect(); - let cap = (*max_segments).clamp(1, HARD_MAX_SEGMENTS); - let items = sample_evenly(&items, cap); - + // Years are derived from the whole span (what the reel represents), + // before the budget narrows it down to beats. let years = distinct_years(&items, client_tz); let meta = ReelMeta { span: *span, years }; - let planned = items - .into_iter() - .map(|it| PlannedSegment { - media: SegmentMedia::Photo { - rel_path: it.path, - library_id: it.library_id, - }, - date: it.created, - insight_title: None, - insight_summary: None, - }) - .collect(); - Ok((planned, meta)) + // The budget caps the number of narrated beats (≈ reel length); + // each beat then bursts through several photos, so the reel covers + // the span's moments without running minutes long. + let n_beats = budget_segments(*max_segments); + let beats = form_beats(&items, n_beats, MAX_BURST_PHOTOS); + Ok((beats, meta)) } } } @@ -155,24 +277,24 @@ fn distinct_years(items: &[memories::MemoryItem], tz: Option) -> Ve years } -/// Background pass: fill each segment's cached insight (title + summary) where -/// one exists. Best-effort — a missing or errored lookup leaves the fields -/// `None` and the scripter narrates from the date alone. +/// Background pass: fill each beat's cached insight (title + summary) from its +/// lead photo, where one exists. Best-effort — a missing or errored lookup +/// leaves the fields `None` and the scripter narrates from the date alone. pub fn enrich( insight_dao: &Mutex>, span_context: &opentelemetry::Context, - planned: &mut [PlannedSegment], + beats: &mut [PlannedBeat], ) { let Ok(mut dao) = insight_dao.lock() else { return; }; - for seg in planned.iter_mut() { - let rel_path = match &seg.media { - SegmentMedia::Photo { rel_path, .. } => rel_path, + for beat in beats.iter_mut() { + let Some(SegmentMedia::Photo { rel_path, .. }) = beat.photos.first() else { + continue; }; if let Ok(Some(insight)) = dao.get_insight(span_context, rel_path) { - seg.insight_title = Some(insight.title); - seg.insight_summary = Some(insight.summary); + beat.insight_title = Some(insight.title); + beat.insight_summary = Some(insight.summary); } } } @@ -249,4 +371,78 @@ mod tests { ]; assert_eq!(distinct_years(&items, None), vec![2019, 2021]); } + + // Build an item at a given unix timestamp (seconds). + fn item_at(ts: i64, name: &str) -> memories::MemoryItem { + memories::MemoryItem { + path: format!("{name}.jpg"), + created: Some(ts), + modified: None, + library_id: 1, + } + } + + #[test] + fn budget_segments_caps_to_duration_target() { + // 90s / 5s ≈ 18, bounded by the request max and hard cap. + assert_eq!(budget_segments(40), 18); + assert_eq!(budget_segments(5), 5); // request asked for fewer + assert_eq!(budget_segments(1000), 18); // hard cap / budget wins + } + + #[test] + fn cluster_by_gap_splits_on_large_jumps() { + // Two photos minutes apart, then one a day later → two events. + let items = vec![ + item_at(1_000_000, "a"), + item_at(1_000_300, "b"), // +5 min → same event + item_at(1_100_000, "c"), // +~27h → new event + ]; + let clusters = cluster_by_gap(&items, EVENT_GAP_SECONDS); + assert_eq!(clusters.len(), 2); + assert_eq!(clusters[0].len(), 2); + assert_eq!(clusters[1].len(), 1); + } + + #[test] + fn form_beats_one_beat_per_event_when_they_fit() { + // Three well-separated events, budget of 10 → three beats, each holding + // all of its (few) photos. + let items = vec![ + item_at(0, "a"), + item_at(50, "b"), // same event as a + item_at(1_000_000, "c"), + item_at(2_000_000, "d"), + ]; + let beats = form_beats(&items, 10, MAX_BURST_PHOTOS); + assert_eq!(beats.len(), 3); + assert_eq!(beats[0].photos.len(), 2); // burst of the first event + assert_eq!(beats[1].photos.len(), 1); + assert_eq!(beats[2].photos.len(), 1); + } + + #[test] + fn form_beats_merges_events_when_over_budget() { + // Six distinct events but only two beats → adjacent events fold in, and + // every event's photos still appear (capped by the burst max). + let items: Vec = (0..6) + .map(|i| item_at(i as i64 * 1_000_000, &format!("e{i}"))) + .collect(); + let beats = form_beats(&items, 2, MAX_BURST_PHOTOS); + assert_eq!(beats.len(), 2); + let shown: usize = beats.iter().map(|b| b.photos.len()).sum(); + assert_eq!(shown, 6); // all six moments still shown across two beats + } + + #[test] + fn form_beats_caps_burst_to_max() { + // One dense event of 30 photos, generous budget → a single beat that + // bursts at most MAX_BURST_PHOTOS, not all 30. + let items: Vec = (0..30) + .map(|i| item_at(i as i64, &format!("p{i}"))) + .collect(); + let beats = form_beats(&items, 18, MAX_BURST_PHOTOS); + assert_eq!(beats.len(), 1); + assert_eq!(beats[0].photos.len(), MAX_BURST_PHOTOS); + } } -- 2.52.0 From 299e32b014f230f83f622ee0e16b3deb8e7048d8 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Fri, 12 Jun 2026 23:45:24 -0400 Subject: [PATCH 06/26] Bump version to 1.4.0 Co-Authored-By: Claude Fable 5 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a35a7d2..9455f5c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2051,7 +2051,7 @@ dependencies = [ [[package]] name = "image-api" -version = "1.3.0" +version = "1.4.0" dependencies = [ "actix", "actix-cors", diff --git a/Cargo.toml b/Cargo.toml index 3b3a08a..860e6ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "image-api" -version = "1.3.0" +version = "1.4.0" authors = ["Cameron Cordes "] edition = "2024" -- 2.52.0 From 65793a2dda9747aaa03547c7c35fb4509d60c5ac Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 00:02:51 -0400 Subject: [PATCH 07/26] Reels: mixed-media (video clip beats) + faster burst fade MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Videos in a span now appear as clip beats: the first few seconds of the video (capped at CLIP_SECONDS=5, and to the source length) filled to the portrait canvas like photos, with its live audio ducked under the narration (amix at 0.35). If the narration outlasts the clip, the last frame is held (tpad); clips with no audio track just play under narration. Selection splits the beat budget between photo beats and clip beats — clips get up to half (≥1 when present), photos the rest — then merges both back into chronological order. SegmentMedia gains a Clip variant; beats carry `media` (photos or one clip) and the cache key tags P/C so a path used as a still vs a clip differ. Also drops the burst fade from 0.15s to 0.08s so a quick burst reads clearly differently from a held shot. Bumps RENDER_VERSION. The clip filtergraph (fill + duck-mix + last-frame hold) is unit-tested but, like the rest of the ffmpeg path, wants a real render check on the GPU host. Co-Authored-By: Claude Fable 5 --- src/reels/mod.rs | 161 ++++++++++++++++++++++--------- src/reels/render.rs | 218 +++++++++++++++++++++++++++++++++++++++++- src/reels/script.rs | 21 +++- src/reels/selector.rs | 170 +++++++++++++++++++++++++------- 4 files changed, 479 insertions(+), 91 deletions(-) diff --git a/src/reels/mod.rs b/src/reels/mod.rs index be3f52d..17def48 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -36,21 +36,40 @@ use crate::otel::extract_context_from_request; use crate::state::AppState; use selector::ReelSelector; -/// The media behind one shot. Photos-only for now; a `Clip` variant (a section -/// of a source video) is the phase-2 extension point. +/// The media behind one shot: a still photo, or a short section of a source +/// video (played with its live audio ducked under the narration). Both carry +/// just the library-relative path; the renderer applies fixed clip framing +/// (start/length) from constants. #[derive(Debug, Clone)] pub enum SegmentMedia { Photo { rel_path: String, library_id: i32 }, + Clip { rel_path: String, library_id: i32 }, } -/// A beat: one narration line over one or more photos. A single-photo beat is a -/// held shot; a multi-photo beat is a quick burst that flashes through several -/// moments of the same event while the line is read — so a week/month reel can -/// *show* everything it spans without a narration line (and the seconds that -/// come with it) per photo. +impl SegmentMedia { + fn rel_path(&self) -> &str { + match self { + SegmentMedia::Photo { rel_path, .. } | SegmentMedia::Clip { rel_path, .. } => rel_path, + } + } + fn library_id(&self) -> i32 { + match self { + SegmentMedia::Photo { library_id, .. } | SegmentMedia::Clip { library_id, .. } => { + *library_id + } + } + } +} + +/// A beat: one narration line over its media. A photo beat holds one still (a +/// held shot) or several (a quick burst that flashes through moments of an +/// event while the line is read). A clip beat holds a single video clip. Either +/// way one narration line covers the whole beat, so a week/month reel can +/// *show* everything it spans without a narration line — and the seconds that +/// come with it — per item. #[derive(Debug, Clone)] pub struct PlannedBeat { - pub photos: Vec, + pub media: Vec, pub date: Option, pub insight_title: Option, pub insight_summary: Option, @@ -63,6 +82,11 @@ impl PlannedBeat { let dt = DateTime::from_timestamp(ts, 0)?; Some(dt.format("%B %-d, %Y").to_string()) } + + /// True when this beat is a single video clip (vs one or more photos). + pub fn is_clip(&self) -> bool { + matches!(self.media.as_slice(), [SegmentMedia::Clip { .. }]) + } } /// Reel-wide metadata the scripter uses for framing. @@ -183,7 +207,7 @@ fn finish_job( /// Render version: bump to invalidate every cached reel after a rendering / /// scripting change that should produce a fresh result. -const RENDER_VERSION: u32 = 4; +const RENDER_VERSION: u32 = 5; /// Narration expressiveness — Chatterbox's `exaggeration` knob. A slight bump /// over the ~0.5 default warms up otherwise-flat narration without over-acting; @@ -207,12 +231,13 @@ fn cache_key(selector: &ReelSelector, media: &[SegmentMedia], voice: Option<&str voice.unwrap_or("default") ); for m in media { - match m { - SegmentMedia::Photo { - rel_path, - library_id, - } => buf.push_str(&format!("{library_id}:{rel_path}|")), - } + // Tag photo vs clip so the same path used as a still and as a video + // clip produce different keys. + let tag = match m { + SegmentMedia::Photo { .. } => 'P', + SegmentMedia::Clip { .. } => 'C', + }; + buf.push_str(&format!("{tag}{}:{}|", m.library_id(), m.rel_path())); } blake3::hash(buf.as_bytes()).to_hex().to_string() } @@ -309,9 +334,9 @@ pub async fn create_reel_handler( })); } - // Flatten every photo across beats (in order) into the cache key — the key - // tracks exactly which photos appear and in what sequence. - let media: Vec = planned.iter().flat_map(|b| b.photos.clone()).collect(); + // Flatten every media item across beats (in order) into the cache key — the + // key tracks exactly which photos/clips appear and in what sequence. + let media: Vec = planned.iter().flat_map(|b| b.media.clone()).collect(); let voice = req.voice.clone().filter(|s| !s.is_empty()); let key = cache_key(&selector, &media, voice.as_deref()); @@ -462,7 +487,7 @@ async fn run_reel_job( use anyhow::{Context, anyhow}; let started = Instant::now(); - let total_photos: usize = planned.iter().map(|b| b.photos.len()).sum(); + let total_photos: usize = planned.iter().map(|b| b.media.len()).sum(); log::info!( "reel {job_id}: starting — span {:?}, {} beats, {} photos, voice={}", meta.span, @@ -510,15 +535,15 @@ async fn run_reel_job( let beat_total = planned.len(); let mut beat_files: Vec = Vec::new(); for (i, (beat, line)) in planned.iter().zip(script.lines.iter()).enumerate() { - // Resolve all of the beat's photos to absolute paths; drop any that - // don't resolve. An empty beat is skipped. - let image_paths: Vec = beat - .photos + // Resolve the beat's media to absolute paths; drop any that don't + // resolve. An empty beat is skipped. + let paths: Vec = beat + .media .iter() - .filter_map(|m| resolve_image_path(app_state, m)) + .filter_map(|m| resolve_media_path(app_state, m)) .collect(); - if image_paths.is_empty() { - log::warn!("reel {job_id}: skipping beat {i}, no image paths resolved"); + if paths.is_empty() { + log::warn!("reel {job_id}: skipping beat {i}, no media paths resolved"); continue; } @@ -551,17 +576,26 @@ async fn run_reel_job( .unwrap_or(render::MIN_SEGMENT_SECONDS); set_stage(job_id, "rendering"); - log::info!( - "reel {job_id}: beat {}/{} — {} photo(s), narration {:.1}s", - i + 1, - beat_total, - image_paths.len(), - narration_secs - ); let beat_out = work.path().join(format!("beat_{i:03}.mp4")); - if let Err(e) = - render::render_beat(&image_paths, &audio_path, &beat_out, narration_secs, &opts).await - { + let render_result = if beat.is_clip() { + log::info!( + "reel {job_id}: beat {}/{} — video clip, narration {:.1}s", + i + 1, + beat_total, + narration_secs + ); + render::render_clip_beat(&paths[0], &audio_path, &beat_out, narration_secs, &opts).await + } else { + log::info!( + "reel {job_id}: beat {}/{} — {} photo(s), narration {:.1}s", + i + 1, + beat_total, + paths.len(), + narration_secs + ); + render::render_beat(&paths, &audio_path, &beat_out, narration_secs, &opts).await + }; + if let Err(e) = render_result { log::warn!("reel {job_id}: skipping beat {i}, render failed: {e}"); continue; } @@ -603,15 +637,12 @@ async fn run_reel_job( Ok((script.title, final_path)) } -/// Resolve a photo segment's library-relative path to a validated absolute -/// path under its library root. -fn resolve_image_path(app_state: &AppState, media: &SegmentMedia) -> Option { - let SegmentMedia::Photo { - rel_path, - library_id, - } = media; - let lib = app_state.library_by_id(*library_id)?; - crate::files::is_valid_full_path(&lib.root_path, rel_path, false) +/// Resolve a media item's library-relative path to a validated absolute path +/// under its library root (works for both photos and clips). +fn resolve_media_path(app_state: &AppState, media: &SegmentMedia) -> Option { + let lib = app_state.library_by_id(media.library_id())?; + let rel = media.rel_path().to_string(); + crate::files::is_valid_full_path(&lib.root_path, &rel, false) } #[cfg(test)] @@ -625,6 +656,13 @@ mod tests { } } + fn clip(p: &str, lib: i32) -> SegmentMedia { + SegmentMedia::Clip { + rel_path: p.to_string(), + library_id: lib, + } + } + fn day_selector() -> ReelSelector { ReelSelector::Memories { span: MemoriesSpan::Day, @@ -668,6 +706,35 @@ mod tests { assert_ne!(base, cache_key(&week, &media, Some("grandma"))); } + #[test] + fn cache_key_distinguishes_photo_from_clip() { + // Same path/library used as a still vs a video clip must differ. + let as_photo = vec![photo("v.mp4", 1)]; + let as_clip = vec![clip("v.mp4", 1)]; + assert_ne!( + cache_key(&day_selector(), &as_photo, None), + cache_key(&day_selector(), &as_clip, None) + ); + } + + #[test] + fn is_clip_only_for_single_clip_beat() { + let clip_beat = PlannedBeat { + media: vec![clip("v.mp4", 1)], + date: None, + insight_title: None, + insight_summary: None, + }; + let photo_beat = PlannedBeat { + media: vec![photo("a.jpg", 1), photo("b.jpg", 1)], + date: None, + insight_title: None, + insight_summary: None, + }; + assert!(clip_beat.is_clip()); + assert!(!photo_beat.is_clip()); + } + #[test] fn span_phrase_maps_each_span() { let mk = |span| ReelMeta { @@ -682,7 +749,7 @@ mod tests { #[test] fn date_label_formats_or_none() { let beat = PlannedBeat { - photos: vec![photo("a.jpg", 1)], + media: vec![photo("a.jpg", 1)], date: Some(1_560_384_000), // 2019-06-13 UTC insight_title: None, insight_summary: None, @@ -690,7 +757,7 @@ mod tests { assert!(beat.date_label().unwrap().contains("2019")); let undated = PlannedBeat { - photos: vec![photo("a.jpg", 1)], + media: vec![photo("a.jpg", 1)], date: None, insight_title: None, insight_summary: None, diff --git a/src/reels/render.rs b/src/reels/render.rs index a36f6f1..4e9431b 100644 --- a/src/reels/render.rs +++ b/src/reels/render.rs @@ -36,9 +36,15 @@ pub const MIN_SEGMENT_SECONDS: f64 = 2.5; const NARRATION_TAIL_SECONDS: f64 = 0.6; /// Fade durations baked into each photo. A held (single-photo) beat gets a -/// gentle dip; burst photos get a snappier fade so the montage feels quick. +/// gentle dip; burst photos get a much snappier fade so the difference between +/// a held shot and a quick burst is obvious. const SINGLE_FADE_SECONDS: f64 = 0.35; -const BURST_FADE_SECONDS: f64 = 0.15; +const BURST_FADE_SECONDS: f64 = 0.08; + +/// Video-clip framing. A clip plays at most this long, with its live audio +/// ducked to `CLIP_DUCK_VOLUME` under the narration. +pub const CLIP_SECONDS: f64 = 5.0; +const CLIP_DUCK_VOLUME: f64 = 0.35; /// Floor on how long each burst photo stays up, so a long line over many photos /// doesn't flash them subliminally. If the narration is too short to give every @@ -308,6 +314,162 @@ pub async fn render_beat( run_ffmpeg(&args, "beat render").await } +// --- Video-clip beats -------------------------------------------------------- + +/// Video chain for a clip beat: fill the clip to the portrait canvas (blurred +/// backdrop, same look as photos), normalize fps, hold the last frame if the +/// narration outlasts the clip (`tpad`), then fade. Produces `[v]`. +fn clip_video_filter(opts: &SegmentOpts, clip_dur: f64, beat_total: f64) -> String { + let (w, h, fps) = (opts.width, opts.height, opts.fps); + let fade = SINGLE_FADE_SECONDS; + let hold = (beat_total - clip_dur).max(0.0); + let fade_out_start = (beat_total - fade).max(0.0); + // Freeze the final frame to cover narration that runs past the clip. + let tpad = if hold > 0.05 { + format!(",tpad=stop_mode=clone:stop_duration={hold:.3}") + } else { + String::new() + }; + format!( + "[0:v]split=2[bg][fg];\ + [bg]scale={w}:{h}:force_original_aspect_ratio=increase,\ + crop={w}:{h},boxblur=20:2[bgb];\ + [fg]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs];\ + [bgb][fgs]overlay=(W-w)/2:(H-h)/2,fps={fps}{tpad},\ + fade=t=in:st=0:d={fade},fade=t=out:st={fade_out_start:.3}:d={fade},\ + setsar=1,format=yuv420p[v]" + ) +} + +/// Audio chain for a clip beat. With a clip audio track, duck it under the +/// narration and mix; without one, just the narration. Produces `[a]`. +fn clip_audio_filter(has_audio: bool) -> String { + if has_audio { + format!( + "[0:a]volume={CLIP_DUCK_VOLUME}[duck];[1:a]apad[narr];\ + [duck][narr]amix=inputs=2:duration=longest:normalize=0[a]" + ) + } else { + "[1:a]apad[a]".to_string() + } +} + +/// Full `filter_complex` for a clip beat (input 0 = clip, input 1 = narration). +pub fn clip_beat_filtergraph( + opts: &SegmentOpts, + clip_dur: f64, + beat_total: f64, + has_audio: bool, +) -> String { + format!( + "{};{}", + clip_video_filter(opts, clip_dur, beat_total), + clip_audio_filter(has_audio) + ) +} + +/// Build the ffmpeg args for a clip beat: the first `clip_dur` seconds of the +/// source video, filled to the portrait canvas with its live audio ducked under +/// the narration, bounded to `beat_total`. +pub fn build_clip_beat_args( + clip_path: &str, + audio_path: &str, + out_path: &str, + clip_dur: f64, + beat_total: f64, + has_audio: bool, + opts: &SegmentOpts, +) -> Vec { + let fps = opts.fps.to_string(); + let mut args: Vec = vec!["-y".into()]; + if opts.nvenc { + args.extend(["-hwaccel".into(), "cuda".into()]); + } + args.extend([ + // Input `-t` limits the clip to its window; audio has none (apad fills). + "-t".into(), + format!("{clip_dur:.3}"), + "-i".into(), + clip_path.into(), + "-i".into(), + audio_path.into(), + "-filter_complex".into(), + clip_beat_filtergraph(opts, clip_dur, beat_total, has_audio), + "-map".into(), + "[v]".into(), + "-map".into(), + "[a]".into(), + "-t".into(), + format!("{beat_total:.3}"), + "-r".into(), + fps, + ]); + args.extend(video_encoder_args(opts.nvenc)); + args.extend( + ["-c:a", "aac", "-b:a", "160k", "-ar", "48000"] + .iter() + .map(|s| s.to_string()), + ); + args.push(out_path.into()); + args +} + +/// Whether a media file has at least one audio stream (so a clip beat knows +/// whether to mix in live audio). Defaults to `false` on any probe failure. +pub async fn has_audio_stream(path: &str) -> bool { + Command::new("ffprobe") + .args([ + "-v", + "error", + "-select_streams", + "a", + "-show_entries", + "stream=index", + "-of", + "csv=p=0", + path, + ]) + .output() + .await + .map(|out| !out.stdout.is_empty()) + .unwrap_or(false) +} + +/// Render one clip beat: a section of `clip_path` (capped at [`CLIP_SECONDS`], +/// and to the source length) under the narration in `audio_path`. The beat +/// lasts at least the narration, freezing the clip's last frame if needed. +pub async fn render_clip_beat( + clip_path: &Path, + audio_path: &Path, + out_path: &Path, + narration_secs: f64, + opts: &SegmentOpts, +) -> Result<()> { + let clip_str = clip_path.to_string_lossy().to_string(); + // Clamp the clip to its own length so a short video isn't padded to the cap. + let source_dur = crate::video::ffmpeg::get_duration_seconds(&clip_str) + .await + .ok() + .flatten(); + let clip_dur = match source_dur { + Some(d) if d > 0.0 && d < CLIP_SECONDS => d, + _ => CLIP_SECONDS, + }; + let beat_total = clip_dur.max(segment_duration(narration_secs)); + let has_audio = has_audio_stream(&clip_str).await; + + let args = build_clip_beat_args( + &clip_str, + &audio_path.to_string_lossy(), + &out_path.to_string_lossy(), + clip_dur, + beat_total, + has_audio, + opts, + ); + run_ffmpeg(&args, "clip beat render").await +} + /// Join rendered segments into the final reel. Writes the concat list into the /// same directory as the output so relative paths and cleanup stay local. pub async fn concat_segments(segment_paths: &[String], out_path: &Path) -> Result<()> { @@ -397,8 +559,8 @@ mod tests { // Concatenated in order, audio is the 4th input (index 3). assert!(g.contains("[v0][v1][v2]concat=n=3:v=1:a=0[v]")); assert!(g.contains("[3:a]apad[a]")); - // Burst uses the snappier fade. - assert!(g.contains("d=0.15")); + // Burst uses the much snappier fade (vs 0.35 for a held shot). + assert!(g.contains("d=0.08")); assert!(!g.contains("d=0.35")); } @@ -455,6 +617,54 @@ mod tests { assert!(!joined.contains("libx264")); } + #[test] + fn clip_filter_ducks_audio_and_holds_last_frame_when_narration_longer() { + // 5s clip, 7s beat → 2s freeze of the last frame, ducked-audio mix. + let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 7.0, true); + assert!(g.contains("tpad=stop_mode=clone:stop_duration=2.000")); + assert!(g.contains("volume=0.35")); + assert!(g.contains("amix=inputs=2")); + assert!(g.contains("[1:a]apad[narr]")); + // Fill applied to the clip too. + assert!(g.contains("boxblur")); + assert!(g.contains("overlay=(W-w)/2:(H-h)/2")); + } + + #[test] + fn clip_filter_no_tpad_when_clip_covers_the_beat() { + // Clip at least as long as the beat → no freeze. + let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 5.0, true); + assert!(!g.contains("tpad")); + } + + #[test] + fn clip_filter_narration_only_without_clip_audio() { + let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 5.0, false); + assert!(!g.contains("amix")); + assert!(!g.contains("volume=")); + assert!(g.contains("[1:a]apad[a]")); + } + + #[test] + fn clip_beat_args_bound_clip_and_output() { + let args = build_clip_beat_args( + "/v.mp4", + "/n.wav", + "/out.mp4", + 5.0, + 6.6, + true, + &SegmentOpts::default(), + ); + let joined = args.join(" "); + // Input -t bounds the clip read; output -t bounds the beat. + assert!(joined.contains("-t 5.000 -i /v.mp4")); + assert!(joined.contains("-i /n.wav")); + assert!(joined.contains("-t 6.600")); + assert!(joined.contains("-r 30")); + assert!(joined.ends_with("/out.mp4")); + } + #[test] fn concat_args_stream_copy_with_faststart_and_forced_muxer() { // Output goes to a .tmp path, so the muxer must be forced — ffmpeg diff --git a/src/reels/script.rs b/src/reels/script.rs index 85fff7c..5be3d64 100644 --- a/src/reels/script.rs +++ b/src/reels/script.rs @@ -54,8 +54,10 @@ pub fn build_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> (String, if let Some(date) = beat.date_label() { user.push_str(&format!(" {date}")); } - if beat.photos.len() > 1 { - user.push_str(&format!(" (a burst of {} photos)", beat.photos.len())); + if beat.is_clip() { + user.push_str(" (a video clip)"); + } else if beat.media.len() > 1 { + user.push_str(&format!(" (a burst of {} photos)", beat.media.len())); } user.push('\n'); match (&beat.insight_title, &beat.insight_summary) { @@ -211,7 +213,7 @@ mod tests { fn planned(n: usize) -> Vec { (0..n) .map(|i| PlannedBeat { - photos: vec![super::super::SegmentMedia::Photo { + media: vec![super::super::SegmentMedia::Photo { rel_path: format!("p{i}.jpg"), library_id: 1, }], @@ -236,7 +238,7 @@ mod tests { #[test] fn prompt_notes_burst_photo_count() { let mut p = planned(1); - p[0].photos = vec![ + p[0].media = vec![ super::super::SegmentMedia::Photo { rel_path: "a.jpg".into(), library_id: 1, @@ -254,6 +256,17 @@ mod tests { assert!(user.contains("a burst of 3 photos")); } + #[test] + fn prompt_marks_clip_beats() { + let mut p = planned(1); + p[0].media = vec![super::super::SegmentMedia::Clip { + rel_path: "v.mp4".into(), + library_id: 1, + }]; + let (_sys, user) = build_script_messages(&meta(), &p); + assert!(user.contains("a video clip")); + } + #[test] fn prompt_includes_insight_context_when_present() { let mut p = planned(1); diff --git a/src/reels/selector.rs b/src/reels/selector.rs index fb83e38..d096f6d 100644 --- a/src/reels/selector.rs +++ b/src/reels/selector.rs @@ -15,7 +15,7 @@ use chrono::{DateTime, Datelike, FixedOffset}; use super::{PlannedBeat, ReelMeta, SegmentMedia}; use crate::database::{ExifDao, InsightDao}; -use crate::file_types::is_image_file; +use crate::file_types::{is_image_file, is_video_file}; use crate::memories::{self, MemoriesSpan}; use crate::state::AppState; @@ -167,13 +167,13 @@ fn partition_into_groups( .collect() } -/// Turn a span's photos into `n_beats` beats. Clusters photos into events by +/// Turn photo items into `n_beats` photo beats. Clusters photos into events by /// time gap; if there are more events than beats, adjacent events are merged so -/// the whole span is still covered. Each beat then flashes up to -/// `max_burst` photos (an even spread of its group) under one narration line — -/// so a week/month reel *shows* all its moments without a narrated (and timed) +/// the whole span is still covered. Each beat then flashes up to `max_burst` +/// photos (an even spread of its group) under one narration line — so a +/// week/month reel *shows* all its moments without a narrated (and timed) /// segment per photo. -pub fn form_beats( +fn form_photo_beats( items: &[memories::MemoryItem], n_beats: usize, max_burst: usize, @@ -197,7 +197,7 @@ pub fn form_beats( let shown = sample_evenly(&group, max_burst); let date = shown.first().and_then(|it| it.created); PlannedBeat { - photos: shown + media: shown .into_iter() .map(|it| SegmentMedia::Photo { rel_path: it.path, @@ -212,6 +212,62 @@ pub fn form_beats( .collect() } +/// Split the beat budget between photo beats and video-clip beats. Clips are +/// individually valuable (motion + live audio) so they get up to half the +/// budget (at least one if any exist); photos take the rest. With only one +/// kind present, it gets the whole budget. +fn split_beat_budget(n_photos: usize, n_videos: usize, n_beats: usize) -> (usize, usize) { + if n_videos == 0 { + return (n_beats, 0); + } + if n_photos == 0 { + return (0, n_beats.min(n_videos)); + } + let clip_beats = n_videos.min((n_beats / 2).max(1)); + let photo_beats = n_beats.saturating_sub(clip_beats); + (photo_beats, clip_beats) +} + +/// Build the reel's beats from a span's photos and videos under a beat budget. +/// Videos become one-clip beats (sampled across time if there are more than the +/// clip budget); photos cluster into burst beats. The two are merged back into +/// chronological order so the reel reads as the span unfolded. +pub fn form_beats( + photos: &[memories::MemoryItem], + videos: &[memories::MemoryItem], + n_beats: usize, + max_burst: usize, +) -> Vec { + if n_beats == 0 { + return Vec::new(); + } + let (photo_budget, clip_budget) = split_beat_budget(photos.len(), videos.len(), n_beats); + + let mut beats = form_photo_beats(photos, photo_budget, max_burst); + + // One clip beat per chosen video, spread across the span's videos. + for v in sample_evenly(videos, clip_budget) { + beats.push(PlannedBeat { + media: vec![SegmentMedia::Clip { + rel_path: v.path, + library_id: v.library_id, + }], + date: v.created, + insight_title: None, + insight_summary: None, + }); + } + + // Merge photo and clip beats back into chronological order (undated last). + beats.sort_by(|a, b| match (a.date, b.date) { + (Some(x), Some(y)) => x.cmp(&y), + (Some(_), None) => std::cmp::Ordering::Less, + (None, Some(_)) => std::cmp::Ordering::Greater, + (None, None) => std::cmp::Ordering::Equal, + }); + beats +} + /// Cheap pass: resolve the selector into an ordered list of media (no insight /// lookups yet) plus reel metadata. `Err` only on an invalid library param. pub fn resolve( @@ -238,23 +294,24 @@ pub fn resolve( library.as_deref(), )?; - // Phase 1 is photos-only: drop videos (a clip segment type lands - // in phase 2). - let items: Vec = items - .into_iter() - .filter(|it| is_image_file(Path::new(&it.path))) - .collect(); - - // Years are derived from the whole span (what the reel represents), - // before the budget narrows it down to beats. + // Split into photos and video clips; anything that's neither is + // dropped. Years span both, computed before the budget narrows it. let years = distinct_years(&items, client_tz); let meta = ReelMeta { span: *span, years }; + let (photos, videos): (Vec<_>, Vec<_>) = items + .into_iter() + .filter(|it| { + is_image_file(Path::new(&it.path)) || is_video_file(Path::new(&it.path)) + }) + .partition(|it| is_image_file(Path::new(&it.path))); + // The budget caps the number of narrated beats (≈ reel length); - // each beat then bursts through several photos, so the reel covers - // the span's moments without running minutes long. + // photo beats then burst through several photos and video beats + // play a short clip, so the reel covers the span without running + // minutes long. let n_beats = budget_segments(*max_segments); - let beats = form_beats(&items, n_beats, MAX_BURST_PHOTOS); + let beats = form_beats(&photos, &videos, n_beats, MAX_BURST_PHOTOS); Ok((beats, meta)) } } @@ -289,10 +346,13 @@ pub fn enrich( return; }; for beat in beats.iter_mut() { - let Some(SegmentMedia::Photo { rel_path, .. }) = beat.photos.first() else { - continue; + let rel_path = match beat.media.first() { + Some(SegmentMedia::Photo { rel_path, .. } | SegmentMedia::Clip { rel_path, .. }) => { + rel_path.clone() + } + None => continue, }; - if let Ok(Some(insight)) = dao.get_insight(span_context, rel_path) { + if let Ok(Some(insight)) = dao.get_insight(span_context, &rel_path) { beat.insight_title = Some(insight.title); beat.insight_summary = Some(insight.summary); } @@ -372,15 +432,18 @@ mod tests { assert_eq!(distinct_years(&items, None), vec![2019, 2021]); } - // Build an item at a given unix timestamp (seconds). - fn item_at(ts: i64, name: &str) -> memories::MemoryItem { + // Build an item at a given unix timestamp (seconds) with a chosen extension. + fn item_ext(ts: i64, name: &str, ext: &str) -> memories::MemoryItem { memories::MemoryItem { - path: format!("{name}.jpg"), + path: format!("{name}.{ext}"), created: Some(ts), modified: None, library_id: 1, } } + fn item_at(ts: i64, name: &str) -> memories::MemoryItem { + item_ext(ts, name, "jpg") + } #[test] fn budget_segments_caps_to_duration_target() { @@ -405,7 +468,7 @@ mod tests { } #[test] - fn form_beats_one_beat_per_event_when_they_fit() { + fn photo_beats_one_per_event_when_they_fit() { // Three well-separated events, budget of 10 → three beats, each holding // all of its (few) photos. let items = vec![ @@ -414,35 +477,70 @@ mod tests { item_at(1_000_000, "c"), item_at(2_000_000, "d"), ]; - let beats = form_beats(&items, 10, MAX_BURST_PHOTOS); + let beats = form_photo_beats(&items, 10, MAX_BURST_PHOTOS); assert_eq!(beats.len(), 3); - assert_eq!(beats[0].photos.len(), 2); // burst of the first event - assert_eq!(beats[1].photos.len(), 1); - assert_eq!(beats[2].photos.len(), 1); + assert_eq!(beats[0].media.len(), 2); // burst of the first event + assert_eq!(beats[1].media.len(), 1); + assert_eq!(beats[2].media.len(), 1); } #[test] - fn form_beats_merges_events_when_over_budget() { + fn photo_beats_merge_events_when_over_budget() { // Six distinct events but only two beats → adjacent events fold in, and // every event's photos still appear (capped by the burst max). let items: Vec = (0..6) .map(|i| item_at(i as i64 * 1_000_000, &format!("e{i}"))) .collect(); - let beats = form_beats(&items, 2, MAX_BURST_PHOTOS); + let beats = form_photo_beats(&items, 2, MAX_BURST_PHOTOS); assert_eq!(beats.len(), 2); - let shown: usize = beats.iter().map(|b| b.photos.len()).sum(); + let shown: usize = beats.iter().map(|b| b.media.len()).sum(); assert_eq!(shown, 6); // all six moments still shown across two beats } #[test] - fn form_beats_caps_burst_to_max() { + fn photo_beats_cap_burst_to_max() { // One dense event of 30 photos, generous budget → a single beat that // bursts at most MAX_BURST_PHOTOS, not all 30. let items: Vec = (0..30) .map(|i| item_at(i as i64, &format!("p{i}"))) .collect(); - let beats = form_beats(&items, 18, MAX_BURST_PHOTOS); + let beats = form_photo_beats(&items, 18, MAX_BURST_PHOTOS); assert_eq!(beats.len(), 1); - assert_eq!(beats[0].photos.len(), MAX_BURST_PHOTOS); + assert_eq!(beats[0].media.len(), MAX_BURST_PHOTOS); + } + + #[test] + fn split_beat_budget_handles_each_mix() { + // Only photos / only videos → that kind gets the whole budget. + assert_eq!(split_beat_budget(10, 0, 18), (18, 0)); + assert_eq!(split_beat_budget(0, 10, 18), (0, 10)); // capped at n_videos + assert_eq!(split_beat_budget(0, 30, 18), (0, 18)); // capped at budget + // Mixed → clips up to half (≥1), photos the rest. + assert_eq!(split_beat_budget(100, 100, 18), (9, 9)); + assert_eq!(split_beat_budget(100, 1, 18), (17, 1)); // few videos + } + + #[test] + fn form_beats_mixes_clip_and_photo_beats_in_time_order() { + let photos = vec![item_at(0, "p0"), item_at(2_000_000, "p1")]; + // A video between the two photo events (in time). + let videos = vec![item_ext(1_000_000, "v0", "mp4")]; + let beats = form_beats(&photos, &videos, 10, MAX_BURST_PHOTOS); + // Two photo events + one clip = three beats, chronological. + assert_eq!(beats.len(), 3); + assert!(!beats[0].is_clip()); // p0 @ t=0 + assert!(beats[1].is_clip()); // v0 @ t=1e6 + assert!(!beats[2].is_clip()); // p1 @ t=2e6 + assert!(matches!(beats[1].media[0], SegmentMedia::Clip { .. })); + } + + #[test] + fn form_beats_videos_only_become_clip_beats() { + let videos: Vec = (0..3) + .map(|i| item_ext(i as i64 * 1_000_000, &format!("v{i}"), "mov")) + .collect(); + let beats = form_beats(&[], &videos, 10, MAX_BURST_PHOTOS); + assert_eq!(beats.len(), 3); + assert!(beats.iter().all(|b| b.is_clip())); } } -- 2.52.0 From f5581edf5ef95e131536cd3ac556bd1f7086bbc3 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 00:07:41 -0400 Subject: [PATCH 08/26] =?UTF-8?q?Reels:=20ease=20burst=20fade=200.08s=20?= =?UTF-8?q?=E2=86=92=200.12s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 0.08s read as too abrupt; 0.12s keeps the burst clearly snappier than the 0.35s held-shot fade without jarring. Bumps RENDER_VERSION. Co-Authored-By: Claude Fable 5 --- src/reels/mod.rs | 2 +- src/reels/render.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/reels/mod.rs b/src/reels/mod.rs index 17def48..c6bfd68 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -207,7 +207,7 @@ fn finish_job( /// Render version: bump to invalidate every cached reel after a rendering / /// scripting change that should produce a fresh result. -const RENDER_VERSION: u32 = 5; +const RENDER_VERSION: u32 = 6; /// Narration expressiveness — Chatterbox's `exaggeration` knob. A slight bump /// over the ~0.5 default warms up otherwise-flat narration without over-acting; diff --git a/src/reels/render.rs b/src/reels/render.rs index 4e9431b..c8ddc04 100644 --- a/src/reels/render.rs +++ b/src/reels/render.rs @@ -39,7 +39,7 @@ const NARRATION_TAIL_SECONDS: f64 = 0.6; /// gentle dip; burst photos get a much snappier fade so the difference between /// a held shot and a quick burst is obvious. const SINGLE_FADE_SECONDS: f64 = 0.35; -const BURST_FADE_SECONDS: f64 = 0.08; +const BURST_FADE_SECONDS: f64 = 0.12; /// Video-clip framing. A clip plays at most this long, with its live audio /// ducked to `CLIP_DUCK_VOLUME` under the narration. @@ -560,7 +560,7 @@ mod tests { assert!(g.contains("[v0][v1][v2]concat=n=3:v=1:a=0[v]")); assert!(g.contains("[3:a]apad[a]")); // Burst uses the much snappier fade (vs 0.35 for a held shot). - assert!(g.contains("d=0.08")); + assert!(g.contains("d=0.12")); assert!(!g.contains("d=0.35")); } -- 2.52.0 From b30c8c16d08d6081c67d8a7bf6c19ffb588ce2f7 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 11:00:01 -0400 Subject: [PATCH 09/26] Reels: clips play through the beat instead of freezing early MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A clip beat capped playback at CLIP_SECONDS and filled the rest of the narration with a tpad freeze-frame, so a clip stopped dead on its last frame for a second or two before the transition — a glitchy pause that stills don't have. Extract clip_beat_plan: the clip now plays for as much of its beat as the source footage covers, and we freeze only when the source is genuinely shorter than the narration. Bump RENDER_VERSION. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/reels/mod.rs | 2 +- src/reels/render.rs | 64 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 57 insertions(+), 9 deletions(-) diff --git a/src/reels/mod.rs b/src/reels/mod.rs index c6bfd68..32635a9 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -207,7 +207,7 @@ fn finish_job( /// Render version: bump to invalidate every cached reel after a rendering / /// scripting change that should produce a fresh result. -const RENDER_VERSION: u32 = 6; +const RENDER_VERSION: u32 = 7; /// Narration expressiveness — Chatterbox's `exaggeration` knob. A slight bump /// over the ~0.5 default warms up otherwise-flat narration without over-acting; diff --git a/src/reels/render.rs b/src/reels/render.rs index c8ddc04..221df5f 100644 --- a/src/reels/render.rs +++ b/src/reels/render.rs @@ -41,8 +41,10 @@ const NARRATION_TAIL_SECONDS: f64 = 0.6; const SINGLE_FADE_SECONDS: f64 = 0.35; const BURST_FADE_SECONDS: f64 = 0.12; -/// Video-clip framing. A clip plays at most this long, with its live audio -/// ducked to `CLIP_DUCK_VOLUME` under the narration. +/// Video-clip framing. Fallback cap on how much of a clip we read when the +/// source length can't be probed; with a known length, a clip instead plays for +/// as much of its beat as its footage allows (see [`clip_beat_plan`]). Its live +/// audio is ducked to `CLIP_DUCK_VOLUME` under the narration. pub const CLIP_SECONDS: f64 = 5.0; const CLIP_DUCK_VOLUME: f64 = 0.35; @@ -316,6 +318,28 @@ pub async fn render_beat( // --- Video-clip beats -------------------------------------------------------- +/// Decide how long the clip plays and how long the whole beat lasts, from the +/// source video's length (if known) and the narration length. Returns +/// `(clip_dur, beat_total)`. +/// +/// The beat always lasts long enough for the full narration. The clip plays for +/// as much of that beat as its footage covers — so the motion fills the screen +/// time rather than stopping early. We only freeze the last frame (the +/// `beat_total - clip_dur` gap, handled by `tpad` in [`clip_video_filter`]) when +/// the source video is genuinely shorter than the narration. Capping clip +/// playback at a fixed length while the narration ran longer was what produced +/// the second-or-two freeze that read as a glitchy pause before the transition. +pub fn clip_beat_plan(source_dur: Option, narration_secs: f64) -> (f64, f64) { + let want = segment_duration(narration_secs); + let clip_dur = match source_dur { + // Known length: play up to the whole beat, but never past the source. + Some(d) if d > 0.0 => d.min(want), + // Unknown length: read up to the fallback cap; tpad covers any shortfall. + _ => want.min(CLIP_SECONDS), + }; + (clip_dur, want.max(clip_dur)) +} + /// Video chain for a clip beat: fill the clip to the portrait canvas (blurred /// backdrop, same look as photos), normalize fps, hold the last frame if the /// narration outlasts the clip (`tpad`), then fade. Produces `[v]`. @@ -446,16 +470,13 @@ pub async fn render_clip_beat( opts: &SegmentOpts, ) -> Result<()> { let clip_str = clip_path.to_string_lossy().to_string(); - // Clamp the clip to its own length so a short video isn't padded to the cap. + // Play the clip for as much of the beat as its footage covers; freeze only + // when the source is genuinely shorter than the narration (see clip_beat_plan). let source_dur = crate::video::ffmpeg::get_duration_seconds(&clip_str) .await .ok() .flatten(); - let clip_dur = match source_dur { - Some(d) if d > 0.0 && d < CLIP_SECONDS => d, - _ => CLIP_SECONDS, - }; - let beat_total = clip_dur.max(segment_duration(narration_secs)); + let (clip_dur, beat_total) = clip_beat_plan(source_dur, narration_secs); let has_audio = has_audio_stream(&clip_str).await; let args = build_clip_beat_args( @@ -630,6 +651,33 @@ mod tests { assert!(g.contains("overlay=(W-w)/2:(H-h)/2")); } + #[test] + fn clip_beat_plan_plays_clip_through_the_whole_beat_when_source_is_long() { + // 30s source, 4s narration → beat is narration+tail (4.6), and the clip + // plays that whole 4.6s of motion: no freeze (clip_dur == beat_total). + let (clip_dur, beat_total) = clip_beat_plan(Some(30.0), 4.0); + assert!((beat_total - 4.6).abs() < 1e-9); + assert!((clip_dur - 4.6).abs() < 1e-9); + assert!((beat_total - clip_dur).abs() < 1e-9); // no hold + } + + #[test] + fn clip_beat_plan_freezes_only_when_source_shorter_than_narration() { + // 2s source under a 4s narration → play all 2s, freeze the remainder. + let (clip_dur, beat_total) = clip_beat_plan(Some(2.0), 4.0); + assert!((clip_dur - 2.0).abs() < 1e-9); + assert!((beat_total - 4.6).abs() < 1e-9); + assert!(beat_total - clip_dur > 2.0); // unavoidable freeze gap + } + + #[test] + fn clip_beat_plan_caps_read_when_source_length_unknown() { + // Probe failed: read up to the fallback cap, beat still covers narration. + let (clip_dur, beat_total) = clip_beat_plan(None, 8.0); + assert!((clip_dur - CLIP_SECONDS).abs() < 1e-9); + assert!((beat_total - 8.6).abs() < 1e-9); + } + #[test] fn clip_filter_no_tpad_when_clip_covers_the_beat() { // Clip at least as long as the beat → no freeze. -- 2.52.0 From f707353807327d3ab3a88c039308ba953be0159a Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 14:29:34 -0400 Subject: [PATCH 10/26] feat: nightly agentic pre-generation of memory reels Implement end-to-end nightly pre-generation of memory reels with agentic scripting that grounds narration in calendar, location, messages, and RAG. Sections A-E from the plan: A. Extract produce_reel pipeline core from run_reel_job with ScripterMode::Fast/Agentic and progress callbacks. B. Agentic scripter: factor run_readonly_tool_loop from the insight generator, build read-only tool gate, prompt builder with GPS, and generate_script_agentic with fallback to fast path. C. Precomputed reels ledger (SQLite table + DAO), GET /reels/precomputed handler with validity gate, GET /reels/by-key/{key}/video streaming, and normalize_library_key helper. D. Nightly scheduler: spawn_pregen_scheduler with configurable hour, run_pregen_batch (day/week/month spans), pregen_one with dedup and disk-check, secs_until_next_run_hour time math. E. user_ai_prefs passive mirror table + DAO for param capture in create_reel_handler and replay in the scheduler. Also fixes resolve_library_param signature to take &[Library] and adds resolve_library_param_state wrapper for AppState callers. New files: migrations/2026-06-13-000000_add_precomputed_reels/, migrations/2026-06-13-000010_add_user_ai_prefs/, src/database/precomputed_reel_dao.rs, src/database/user_ai_prefs_dao.rs --- .env.example | 16 + .../down.sql | 2 + .../up.sql | 14 + .../down.sql | 1 + .../up.sql | 7 + src/ai/handlers.rs | 91 ++- src/ai/insight_generator.rs | 102 +++ src/ai/tts.rs | 2 +- src/database/mod.rs | 4 + src/database/models.rs | 56 +- src/database/precomputed_reel_dao.rs | 321 ++++++++ src/database/schema.rs | 28 + src/database/user_ai_prefs_dao.rs | 212 +++++ src/duplicates.rs | 6 +- src/faces.rs | 19 +- src/files.rs | 18 +- src/handlers/image.rs | 16 +- src/handlers/video.rs | 9 +- src/libraries.rs | 79 +- src/main.rs | 21 + src/memories.rs | 2 +- src/reels/mod.rs | 735 +++++++++++++++++- src/reels/script.rs | 159 +++- src/reels/selector.rs | 24 +- src/state.rs | 32 +- src/tags.rs | 2 +- 26 files changed, 1825 insertions(+), 153 deletions(-) create mode 100644 migrations/2026-06-13-000000_add_precomputed_reels/down.sql create mode 100644 migrations/2026-06-13-000000_add_precomputed_reels/up.sql create mode 100644 migrations/2026-06-13-000010_add_user_ai_prefs/down.sql create mode 100644 migrations/2026-06-13-000010_add_user_ai_prefs/up.sql create mode 100644 src/database/precomputed_reel_dao.rs create mode 100644 src/database/user_ai_prefs_dao.rs diff --git a/.env.example b/.env.example index a45fdd5..bafc0c8 100644 --- a/.env.example +++ b/.env.example @@ -139,3 +139,19 @@ CLIP_REQUEST_TIMEOUT_SEC=60 # ── RAG / search ──────────────────────────────────────────────────────── # Set to `1` to enable cross-encoder reranking on /search results. SEARCH_RAG_RERANK=0 + +# ── Nightly reel pre-generation (Phase 3+) ────────────────────────────── +# Set to `1` to enable the scheduler. Disabled by default. +# REEL_PREGEN_ENABLED=1 +# Hour (0-23) when the nightly batch fires. Default 3 AM. +# REEL_PREGEN_HOUR=3 +# Day of week for weekly reels (0=Sun, 1=Mon, …). Default Monday. +# REEL_PREGEN_WEEK_DOW=1 +# Timezone offset in minutes from UTC (e.g., -480 = PST). Defaults to +# the server's local timezone. +# REEL_PREGEN_TZ_OFFSET_MINUTES= +# Voice ID for narration (e.g., "grandma"). Falls back to the value +# stored in the user_ai_prefs DB row when set. +# REEL_PREGEN_VOICE= +# Library filter: a library id (e.g. "1") or "all" for every library. +# REEL_PREGEN_LIBRARY=all diff --git a/migrations/2026-06-13-000000_add_precomputed_reels/down.sql b/migrations/2026-06-13-000000_add_precomputed_reels/down.sql new file mode 100644 index 0000000..91863c2 --- /dev/null +++ b/migrations/2026-06-13-000000_add_precomputed_reels/down.sql @@ -0,0 +1,2 @@ +DROP INDEX IF EXISTS idx_precomputed_reels_span_library; +DROP TABLE IF EXISTS precomputed_reels; diff --git a/migrations/2026-06-13-000000_add_precomputed_reels/up.sql b/migrations/2026-06-13-000000_add_precomputed_reels/up.sql new file mode 100644 index 0000000..ba49b72 --- /dev/null +++ b/migrations/2026-06-13-000000_add_precomputed_reels/up.sql @@ -0,0 +1,14 @@ +CREATE TABLE precomputed_reels ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + span TEXT NOT NULL, + library_key TEXT NOT NULL, + cache_key TEXT NOT NULL, + output_path TEXT NOT NULL, + title TEXT NOT NULL, + media_count INT NOT NULL, + render_version INT NOT NULL DEFAULT 1, + tz_offset_minutes INT NOT NULL, + voice TEXT, + generated_at BIGINT NOT NULL +); +CREATE INDEX idx_precomputed_reels_span_library ON precomputed_reels(span, library_key, generated_at DESC); diff --git a/migrations/2026-06-13-000010_add_user_ai_prefs/down.sql b/migrations/2026-06-13-000010_add_user_ai_prefs/down.sql new file mode 100644 index 0000000..83b82a3 --- /dev/null +++ b/migrations/2026-06-13-000010_add_user_ai_prefs/down.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS user_ai_prefs; diff --git a/migrations/2026-06-13-000010_add_user_ai_prefs/up.sql b/migrations/2026-06-13-000010_add_user_ai_prefs/up.sql new file mode 100644 index 0000000..fd8f6f2 --- /dev/null +++ b/migrations/2026-06-13-000010_add_user_ai_prefs/up.sql @@ -0,0 +1,7 @@ +CREATE TABLE user_ai_prefs ( + id INTEGER PRIMARY KEY CHECK(id=1), + voice TEXT, + tz_offset_minutes INTEGER, + library TEXT, + updated_at BIGINT NOT NULL +); diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs index cb21b14..c6bc212 100644 --- a/src/ai/handlers.rs +++ b/src/ai/handlers.rs @@ -120,7 +120,7 @@ pub async fn generation_status_handler( } if let Some(ref fp) = query.path { - let library = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -218,10 +218,11 @@ pub async fn cancel_generation_handler( } if let Some(ref fp) = request.file_path { - let library = libraries::resolve_library_param(&app_state, request.library.as_deref()) - .ok() - .flatten() - .unwrap_or_else(|| app_state.primary_library()); + let library = + libraries::resolve_library_param_state(&app_state, request.library.as_deref()) + .ok() + .flatten() + .unwrap_or_else(|| app_state.primary_library()); let normalized = normalize_path(fp); // Get active job ids first, then cancel in DB, then abort tasks @@ -580,7 +581,7 @@ pub async fn get_insight_handler( // Expand to rel_paths sharing content so an insight generated under // library 1 still shows when the same photo is viewed from library 2. - let library = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -1218,15 +1219,16 @@ pub async fn chat_turn_handler( let mut span = tracer.start_with_context("http.insights.chat", &parent_context); span.set_attribute(KeyValue::new("file_path", request.file_path.clone())); - let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { - Ok(Some(lib)) => lib, - Ok(None) => app_state.primary_library(), - Err(e) => { - return HttpResponse::BadRequest().json(serde_json::json!({ - "error": format!("invalid library: {}", e) - })); - } - }; + let library = + match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(e) => { + return HttpResponse::BadRequest().json(serde_json::json!({ + "error": format!("invalid library: {}", e) + })); + } + }; // Service-token claims (sub: "service:apollo") fall through to // user_id=1 — the operator convention. Mobile/web clients have a @@ -1344,15 +1346,16 @@ pub async fn chat_rewind_handler( request: web::Json, app_state: web::Data, ) -> impl Responder { - let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { - Ok(Some(lib)) => lib, - Ok(None) => app_state.primary_library(), - Err(e) => { - return HttpResponse::BadRequest().json(serde_json::json!({ - "error": format!("invalid library: {}", e) - })); - } - }; + let library = + match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(e) => { + return HttpResponse::BadRequest().json(serde_json::json!({ + "error": format!("invalid library: {}", e) + })); + } + }; match app_state .insight_chat @@ -1393,7 +1396,7 @@ pub async fn chat_history_handler( // cross-library lookup when the scoped one misses, so a photo // with no insight in this library but one in another still // surfaces (the "show this photo's primary insight" merge case). - let library = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -1444,15 +1447,16 @@ pub async fn chat_stream_handler( request: web::Json, app_state: web::Data, ) -> HttpResponse { - let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { - Ok(Some(lib)) => lib, - Ok(None) => app_state.primary_library(), - Err(e) => { - return HttpResponse::BadRequest().json(serde_json::json!({ - "error": format!("invalid library: {}", e) - })); - } - }; + let library = + match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(e) => { + return HttpResponse::BadRequest().json(serde_json::json!({ + "error": format!("invalid library: {}", e) + })); + } + }; // Service-token sub falls through to user_id=1 (see chat_turn_handler). let user_id = claims.sub.parse::().unwrap_or(1); @@ -1589,15 +1593,16 @@ pub async fn turn_async_handler( let mut span = tracer.start_with_context("http.insights.chat_turn_async", &parent_context); span.set_attribute(KeyValue::new("file_path", request.file_path.clone())); - let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { - Ok(Some(lib)) => lib, - Ok(None) => app_state.primary_library(), - Err(e) => { - return HttpResponse::BadRequest().json(serde_json::json!({ - "error": format!("invalid library: {}", e) - })); - } - }; + let library = + match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(e) => { + return HttpResponse::BadRequest().json(serde_json::json!({ + "error": format!("invalid library: {}", e) + })); + } + }; let user_id = claims.sub.parse::().unwrap_or(1); diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 3673c43..4871c2e 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -4497,6 +4497,108 @@ Return ONLY the summary, nothing else."#, )) } + /// A read-only agentic tool loop: chat with tools until the model stops + /// calling them, then return the final content. + /// + /// This is the loop body extracted from + /// `generate_agentic_insight_for_photo` (lines 4316-4377) so it can be + /// reused by the reel-scripter without the photo-specific context + /// (image_base64, file_path, persona_id). The photo insight loop still + /// has its own copy because it threads image/file context through + /// `execute_tool`. + /// + /// Calls `execute_tool` with empty file/image context; enabled tools + /// never read those fields. + #[allow(dead_code)] + pub(crate) async fn run_readonly_tool_loop( + &self, + backend: &ResolvedBackend, + mut messages: Vec, + tools: Vec, + max_iter: usize, + ) -> Result { + let mut final_content = String::new(); + + for iteration in 0..max_iter { + log::info!("Agentic iteration {}/{}", iteration + 1, max_iter); + + let (response, _prompt_tokens, _eval_tokens) = backend + .chat() + .chat_with_tools(messages.clone(), tools.clone()) + .await?; + + // Sanitize tool call arguments before pushing back into history. + // Some models occasionally return non-object arguments (bool, + // string, null) which Ollama rejects when they are re-sent in + // a subsequent request. + let mut response = response; + if let Some(ref mut tool_calls) = response.tool_calls { + for tc in tool_calls.iter_mut() { + if !tc.function.arguments.is_object() { + log::warn!( + "Tool '{}' returned non-object arguments ({:?}), normalising to {{}}", + tc.function.name, + tc.function.arguments + ); + tc.function.arguments = serde_json::Value::Object(Default::default()); + } + } + } + + messages.push(response.clone()); + + if let Some(ref tool_calls) = response.tool_calls + && !tool_calls.is_empty() + { + for tool_call in tool_calls { + log::info!( + "Agentic tool call [{}]: {} {}", + iteration, + tool_call.function.name, + tool_call.function.arguments + ); + let result = self + .execute_tool( + &tool_call.function.name, + &tool_call.function.arguments, + backend, + &None, + "", + 0, + "", + &opentelemetry::Context::new(), + ) + .await; + messages.push(ChatMessage::tool_result(result)); + } + continue; + } + + // No tool calls — this is the final answer + final_content = response.content; + break; + } + + // If loop exhausted without final answer, ask for one + if final_content.is_empty() { + log::info!( + "Agentic loop exhausted after {} iterations, requesting final answer", + max_iter + ); + messages.push(ChatMessage::user( + "Based on the context gathered, please write the final answer. Return ONLY the JSON object, no prose or code fences.", + )); + let (final_response, _, _) = backend + .chat() + .chat_with_tools(messages.clone(), vec![]) + .await?; + final_content = final_response.content.clone(); + messages.push(final_response); + } + + Ok(final_content) + } + /// Reverse geocode GPS coordinates to human-readable place names async fn reverse_geocode(&self, lat: f64, lon: f64) -> Option { let url = format!( diff --git a/src/ai/tts.rs b/src/ai/tts.rs index a9a610a..d6ef89d 100644 --- a/src/ai/tts.rs +++ b/src/ai/tts.rs @@ -1020,7 +1020,7 @@ pub async fn create_voice_from_library_handler( let voice_name = append_ref_window(&voice_name, ref_start, ref_duration.round().max(1.0) as u32); - let library = match libraries::resolve_library_param(&app_state, req.library.as_deref()) { + let library = match libraries::resolve_library_param_state(&app_state, req.library.as_deref()) { Ok(Some(l)) => l, Ok(None) => app_state.primary_library(), Err(msg) => { diff --git a/src/database/mod.rs b/src/database/mod.rs index d063bd0..981f6a4 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -51,10 +51,12 @@ pub mod knowledge_dao; pub mod location_dao; pub mod models; pub mod persona_dao; +pub mod precomputed_reel_dao; pub mod preview_dao; pub mod reconcile; pub mod schema; pub mod search_dao; +pub mod user_ai_prefs_dao; pub use calendar_dao::{CalendarEventDao, SqliteCalendarEventDao}; pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao}; @@ -66,8 +68,10 @@ pub use knowledge_dao::{ }; pub use location_dao::{LocationHistoryDao, SqliteLocationHistoryDao}; pub use persona_dao::{ImportPersona, PersonaDao, PersonaPatch, SqlitePersonaDao}; +pub use precomputed_reel_dao::{PrecomputedReelDao, SqlitePrecomputedReelDao}; pub use preview_dao::{PreviewDao, SqlitePreviewDao}; pub use search_dao::{SearchHistoryDao, SqliteSearchHistoryDao}; +pub use user_ai_prefs_dao::{SqliteUserAiPrefsDao, UserAiPrefsDao}; pub trait UserDao { fn create_user(&mut self, user: &str, password: &str) -> Option; diff --git a/src/database/models.rs b/src/database/models.rs index 62274e2..d3d5440 100644 --- a/src/database/models.rs +++ b/src/database/models.rs @@ -1,6 +1,7 @@ use crate::database::schema::{ entities, entity_facts, entity_photo_links, favorites, image_exif, insight_generation_jobs, - libraries, personas, photo_insights, users, video_preview_clips, + libraries, personas, photo_insights, precomputed_reels, user_ai_prefs, users, + video_preview_clips, }; use serde::Serialize; @@ -505,3 +506,56 @@ pub struct InsightGenerationJob { pub result_insight_id: Option, pub error_message: Option, } + +// --- Precomputed reels ------------------------------------------------------- + +#[derive(Insertable)] +#[diesel(table_name = precomputed_reels)] +pub struct InsertablePrecomputedReel { + pub span: String, + pub library_key: String, + pub cache_key: String, + pub output_path: String, + pub title: String, + pub media_count: i32, + pub render_version: i32, + pub tz_offset_minutes: i32, + pub voice: Option, + pub generated_at: i64, +} + +#[derive(Serialize, Queryable, Clone, Debug)] +pub struct PrecomputedReel { + pub id: i32, + pub span: String, + pub library_key: String, + pub cache_key: String, + pub output_path: String, + pub title: String, + pub media_count: i32, + pub render_version: i32, + pub tz_offset_minutes: i32, + pub voice: Option, + pub generated_at: i64, +} + +// --- User AI preferences (Section E) ---------------------------------------- + +#[derive(Queryable, Insertable, Debug, Clone, serde::Deserialize, serde::Serialize)] +#[diesel(table_name = user_ai_prefs)] +pub struct UserAiPrefs { + pub id: i32, + pub voice: Option, + pub tz_offset_minutes: Option, + pub library: Option, + pub updated_at: i64, +} + +#[derive(Insertable, Debug, Clone, serde::Deserialize, serde::Serialize)] +#[diesel(table_name = user_ai_prefs)] +pub struct UpsertUserAiPrefs { + pub voice: Option, + pub tz_offset_minutes: Option, + pub library: Option, + pub updated_at: i64, +} diff --git a/src/database/precomputed_reel_dao.rs b/src/database/precomputed_reel_dao.rs new file mode 100644 index 0000000..7acc098 --- /dev/null +++ b/src/database/precomputed_reel_dao.rs @@ -0,0 +1,321 @@ +use diesel::prelude::*; +use diesel::sqlite::SqliteConnection; +use std::ops::DerefMut; +use std::sync::{Arc, Mutex}; + +use crate::database::models::{InsertablePrecomputedReel, PrecomputedReel}; +use crate::database::schema; +use crate::database::{DbError, DbErrorKind, connect}; +use crate::otel::trace_db_call; + +/// Ledger for precomputed memory reels. The nightly agentic job writes a +/// row after each successful render; the `GET /reels/precomputed` handler +/// reads it to gate on freshness and serve the cached MP4. +pub trait PrecomputedReelDao: Sync + Send { + /// Insert a precomputed reel row. Returns the new row's id. + /// Written by the nightly agentic job (Section D). + #[allow(dead_code)] + fn record_reel( + &mut self, + context: &opentelemetry::Context, + row: &InsertablePrecomputedReel, + ) -> Result; + + /// Find the latest precomputed reel for the given (span, library_key). + fn latest_for( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + ) -> Result, DbError>; + + /// Return true when a fresh precomputed reel exists for the given + /// (span, library_key, render_version) that was generated at or after + /// `min_generated_at`. Used as a fast existence gate before falling + /// back to `latest_for` (avoids a second query path). + fn exists_fresh( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + render_version: i32, + min_generated_at: i64, + ) -> Result; +} + +pub struct SqlitePrecomputedReelDao { + connection: Arc>, +} + +impl Default for SqlitePrecomputedReelDao { + fn default() -> Self { + Self::new() + } +} + +impl SqlitePrecomputedReelDao { + pub fn new() -> Self { + Self { + connection: Arc::new(Mutex::new(connect())), + } + } + + #[cfg(test)] + pub fn from_connection(conn: Arc>) -> Self { + Self { connection: conn } + } +} + +impl PrecomputedReelDao for SqlitePrecomputedReelDao { + fn record_reel( + &mut self, + context: &opentelemetry::Context, + row: &InsertablePrecomputedReel, + ) -> Result { + trace_db_call(context, "insert", "record_reel", |_span| { + use schema::precomputed_reels::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + diesel::insert_into(dsl::precomputed_reels) + .values(row) + .execute(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to insert reel: {}", e))?; + + dsl::precomputed_reels + .order(dsl::id.desc()) + .select(dsl::id) + .first::(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to get reel id: {}", e)) + }) + .map_err(|e| DbError::log(DbErrorKind::InsertError, e)) + } + + fn latest_for( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + ) -> Result, DbError> { + trace_db_call(context, "query", "latest_for", |_span| { + use schema::precomputed_reels::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + dsl::precomputed_reels + .filter(dsl::span.eq(span)) + .filter(dsl::library_key.eq(library_key)) + .order(dsl::generated_at.desc()) + .first::(connection.deref_mut()) + .optional() + .map_err(|e| anyhow::anyhow!("Failed to get latest reel: {}", e)) + }) + .map_err(|e| DbError::log(DbErrorKind::QueryError, e)) + } + + fn exists_fresh( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + render_version: i32, + min_generated_at: i64, + ) -> Result { + trace_db_call(context, "query", "exists_fresh", |_span| { + use schema::precomputed_reels::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + let count: i64 = dsl::precomputed_reels + .filter(dsl::span.eq(span)) + .filter(dsl::library_key.eq(library_key)) + .filter(dsl::render_version.eq(render_version)) + .filter(dsl::generated_at.ge(min_generated_at)) + .count() + .get_result(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to check fresh reel: {}", e))?; + + Ok(count > 0) + }) + .map_err(|e| DbError::log(DbErrorKind::QueryError, e)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use diesel::Connection; + use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations}; + + const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!(); + + fn setup_dao() -> SqlitePrecomputedReelDao { + let mut conn = SqliteConnection::establish(":memory:") + .expect("Unable to create in-memory db connection"); + conn.run_pending_migrations(DB_MIGRATIONS) + .expect("Failure running DB migrations"); + SqlitePrecomputedReelDao::from_connection(Arc::new(Mutex::new(conn))) + } + + fn ctx() -> opentelemetry::Context { + opentelemetry::Context::new() + } + + fn sample_row() -> InsertablePrecomputedReel { + InsertablePrecomputedReel { + span: "day".to_string(), + library_key: "1".to_string(), + cache_key: "abc123".to_string(), + output_path: "/tmp/reel.mp4".to_string(), + title: "Test Reel".to_string(), + media_count: 10, + render_version: 1, + tz_offset_minutes: 0, + voice: Some("default".to_string()), + generated_at: 1_000_000, + } + } + + #[test] + fn record_reel_inserts_and_returns_id() { + let mut dao = setup_dao(); + let ctx = ctx(); + let row = sample_row(); + + let id = dao.record_reel(&ctx, &row).unwrap(); + assert!(id > 0, "should return a positive id"); + } + + #[test] + fn record_reel_returns_increasing_ids() { + let mut dao = setup_dao(); + let ctx = ctx(); + let row = sample_row(); + + let id1 = dao.record_reel(&ctx, &row).unwrap(); + let id2 = dao.record_reel(&ctx, &row).unwrap(); + assert!(id2 > id1, "each insert should get a higher id"); + } + + #[test] + fn latest_for_returns_latest() { + let mut dao = setup_dao(); + let ctx = ctx(); + + let row1 = InsertablePrecomputedReel { + generated_at: 1_000_000, + ..sample_row() + }; + let row2 = InsertablePrecomputedReel { + generated_at: 2_000_000, + ..sample_row() + }; + + dao.record_reel(&ctx, &row1).unwrap(); + dao.record_reel(&ctx, &row2).unwrap(); + + let latest = dao.latest_for(&ctx, "day", "1").unwrap().unwrap(); + assert_eq!(latest.generated_at, 2_000_000); + } + + #[test] + fn latest_for_scoped_by_span_and_library() { + let mut dao = setup_dao(); + let ctx = ctx(); + + let day_row = InsertablePrecomputedReel { + span: "day".to_string(), + library_key: "1".to_string(), + generated_at: 1_000_000, + ..sample_row() + }; + let week_row = InsertablePrecomputedReel { + span: "week".to_string(), + library_key: "1".to_string(), + generated_at: 2_000_000, + ..sample_row() + }; + + dao.record_reel(&ctx, &day_row).unwrap(); + dao.record_reel(&ctx, &week_row).unwrap(); + + let day_latest = dao.latest_for(&ctx, "day", "1").unwrap().unwrap(); + assert_eq!(day_latest.span, "day"); + + let week_latest = dao.latest_for(&ctx, "week", "1").unwrap().unwrap(); + assert_eq!(week_latest.span, "week"); + + // Different library returns None + let missing = dao.latest_for(&ctx, "day", "99").unwrap(); + assert!(missing.is_none()); + } + + #[test] + fn latest_for_returns_none_when_no_rows() { + let mut dao = setup_dao(); + let ctx = ctx(); + + let result = dao.latest_for(&ctx, "day", "1").unwrap(); + assert!(result.is_none()); + } + + #[test] + fn exists_fresh_returns_true_when_present() { + let mut dao = setup_dao(); + let ctx = ctx(); + + dao.record_reel(&ctx, &sample_row()).unwrap(); + + let exists = dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap(); + assert!(exists, "should find the row we just inserted"); + } + + #[test] + fn exists_fresh_returns_false_when_missing() { + let mut dao = setup_dao(); + let ctx = ctx(); + + let exists = dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap(); + assert!(!exists, "should not find anything in empty table"); + } + + #[test] + fn exists_fresh_respects_min_generated_at() { + let mut dao = setup_dao(); + let ctx = ctx(); + + dao.record_reel(&ctx, &sample_row()).unwrap(); + + // Below the threshold — should exist + let exists = dao.exists_fresh(&ctx, "day", "1", 1, 500_000).unwrap(); + assert!(exists); + + // Above the threshold — should not exist + let exists = dao.exists_fresh(&ctx, "day", "1", 1, 2_000_000).unwrap(); + assert!(!exists); + } + + #[test] + fn exists_fresh_respects_render_version() { + let mut dao = setup_dao(); + let ctx = ctx(); + + let row_v1 = InsertablePrecomputedReel { + render_version: 1, + ..sample_row() + }; + dao.record_reel(&ctx, &row_v1).unwrap(); + + assert!(dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap()); + assert!(!dao.exists_fresh(&ctx, "day", "1", 2, 900_000).unwrap()); + } +} diff --git a/src/database/schema.rs b/src/database/schema.rs index bf5791b..846542d 100644 --- a/src/database/schema.rs +++ b/src/database/schema.rs @@ -266,6 +266,16 @@ diesel::table! { } } +diesel::table! { + user_ai_prefs (id) { + id -> Integer, + voice -> Nullable, + tz_offset_minutes -> Nullable, + library -> Nullable, + updated_at -> BigInt, + } +} + diesel::table! { video_preview_clips (id) { id -> Integer, @@ -294,6 +304,22 @@ diesel::table! { } } +diesel::table! { + precomputed_reels (id) { + id -> Integer, + span -> Text, + library_key -> Text, + cache_key -> Text, + output_path -> Text, + title -> Text, + media_count -> Integer, + render_version -> Integer, + tz_offset_minutes -> Integer, + voice -> Nullable, + generated_at -> BigInt, + } +} + diesel::joinable!(entity_facts -> photo_insights (source_insight_id)); diesel::joinable!(entity_photo_links -> entities (entity_id)); diesel::joinable!(entity_photo_links -> libraries (library_id)); @@ -322,9 +348,11 @@ diesel::allow_tables_to_appear_in_same_query!( personas, persons, photo_insights, + precomputed_reels, search_history, tagged_photo, tags, + user_ai_prefs, users, video_preview_clips, ); diff --git a/src/database/user_ai_prefs_dao.rs b/src/database/user_ai_prefs_dao.rs new file mode 100644 index 0000000..d58a56c --- /dev/null +++ b/src/database/user_ai_prefs_dao.rs @@ -0,0 +1,212 @@ +use diesel::prelude::*; +use diesel::sqlite::SqliteConnection; +use std::ops::DerefMut; +use std::sync::{Arc, Mutex}; + +use crate::database::models::{UpsertUserAiPrefs, UserAiPrefs}; +use crate::database::schema; +use crate::database::{DbError, DbErrorKind, connect}; +use crate::otel::trace_db_call; + +/// Generic single-row table that passively mirrors the latest client AI +/// request parameters (voice, timezone, library). Read by the nightly +/// pre-generation scheduler (Section D) to pick up user preferences. +pub trait UserAiPrefsDao: Sync + Send { + /// Read the single row; `None` when it hasn't been populated yet. + fn get_prefs( + &mut self, + context: &opentelemetry::Context, + ) -> Result, DbError>; + + /// Upsert the single row (id is always 1). + #[allow(dead_code)] + fn upsert_prefs( + &mut self, + context: &opentelemetry::Context, + prefs: &UpsertUserAiPrefs, + ) -> Result<(), DbError>; +} + +pub struct SqliteUserAiPrefsDao { + connection: Arc>, +} + +impl Default for SqliteUserAiPrefsDao { + fn default() -> Self { + Self::new() + } +} + +impl SqliteUserAiPrefsDao { + pub fn new() -> Self { + Self { + connection: Arc::new(Mutex::new(connect())), + } + } + + #[cfg(test)] + pub fn from_connection(conn: Arc>) -> Self { + Self { connection: conn } + } +} + +impl UserAiPrefsDao for SqliteUserAiPrefsDao { + fn get_prefs( + &mut self, + context: &opentelemetry::Context, + ) -> Result, DbError> { + trace_db_call(context, "query", "get_prefs", |_span| { + use schema::user_ai_prefs::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock UserAiPrefsDao"); + + dsl::user_ai_prefs + .first::(connection.deref_mut()) + .optional() + .map_err(|e| anyhow::anyhow!("Failed to get prefs: {}", e)) + }) + .map_err(|e| DbError::log(DbErrorKind::QueryError, e)) + } + + fn upsert_prefs( + &mut self, + context: &opentelemetry::Context, + prefs: &UpsertUserAiPrefs, + ) -> Result<(), DbError> { + trace_db_call(context, "upsert", "upsert_prefs", |_span| { + use schema::user_ai_prefs::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock UserAiPrefsDao"); + + // SQLite: INSERT on first call, UPDATE on subsequent calls. + // The first INSERT creates the row with id=1 (auto-increment). + // Subsequent calls UPDATE the existing row. + let result = diesel::insert_into(dsl::user_ai_prefs) + .values(prefs) + .execute(connection.deref_mut()); + + match result { + Ok(_) => { + // First insert succeeded. + Ok(()) + } + Err(_e) => { + // Insert failed (likely due to duplicate key). Update instead. + diesel::update(dsl::user_ai_prefs.filter(dsl::id.eq(1))) + .set(( + dsl::voice.eq(&prefs.voice), + dsl::tz_offset_minutes.eq(&prefs.tz_offset_minutes), + dsl::library.eq(&prefs.library), + dsl::updated_at.eq(&prefs.updated_at), + )) + .execute(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to upsert prefs: {}", e))?; + Ok(()) + } + } + }) + .map_err(|e| DbError::log(DbErrorKind::InsertError, e)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use diesel::Connection; + use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations}; + + const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!(); + + fn setup_dao() -> SqliteUserAiPrefsDao { + let mut conn = SqliteConnection::establish(":memory:") + .expect("Unable to create in-memory db connection"); + conn.run_pending_migrations(DB_MIGRATIONS) + .expect("Failure running DB migrations"); + SqliteUserAiPrefsDao::from_connection(Arc::new(Mutex::new(conn))) + } + + fn ctx() -> opentelemetry::Context { + opentelemetry::Context::new() + } + + #[test] + fn get_prefs_returns_none_when_empty() { + let mut dao = setup_dao(); + let result = dao.get_prefs(&ctx()).unwrap(); + assert!(result.is_none()); + } + + #[test] + fn upsert_prefs_inserts_row() { + let mut dao = setup_dao(); + let now = 1_700_000_000i64; + let prefs = UpsertUserAiPrefs { + voice: Some("grandma".to_string()), + tz_offset_minutes: Some(-480), + library: Some("1".to_string()), + updated_at: now, + }; + dao.upsert_prefs(&ctx(), &prefs).unwrap(); + + let row = dao.get_prefs(&ctx()).unwrap().unwrap(); + assert_eq!(row.id, 1); + assert_eq!(row.voice, Some("grandma".to_string())); + assert_eq!(row.tz_offset_minutes, Some(-480)); + assert_eq!(row.library, Some("1".to_string())); + assert_eq!(row.updated_at, now); + } + + #[test] + fn upsert_prefs_replaces_existing() { + let mut dao = setup_dao(); + let now1 = 1_700_000_000i64; + let now2 = 1_800_000_000i64; + + let prefs1 = UpsertUserAiPrefs { + voice: Some("grandma".to_string()), + tz_offset_minutes: Some(-480), + library: Some("1".to_string()), + updated_at: now1, + }; + dao.upsert_prefs(&ctx(), &prefs1).unwrap(); + + let prefs2 = UpsertUserAiPrefs { + voice: Some("dad".to_string()), + tz_offset_minutes: Some(-300), + library: None, + updated_at: now2, + }; + dao.upsert_prefs(&ctx(), &prefs2).unwrap(); + + let row = dao.get_prefs(&ctx()).unwrap().unwrap(); + assert_eq!(row.voice, Some("dad".to_string())); + assert_eq!(row.tz_offset_minutes, Some(-300)); + assert!(row.library.is_none()); + assert_eq!(row.updated_at, now2); + } + + #[test] + fn upsert_partial_fields() { + let mut dao = setup_dao(); + let now = 1_700_000_000i64; + + let prefs = UpsertUserAiPrefs { + voice: None, + tz_offset_minutes: Some(-480), + library: None, + updated_at: now, + }; + dao.upsert_prefs(&ctx(), &prefs).unwrap(); + + let row = dao.get_prefs(&ctx()).unwrap().unwrap(); + assert_eq!(row.tz_offset_minutes, Some(-480)); + assert!(row.voice.is_none()); + assert!(row.library.is_none()); + } +} diff --git a/src/duplicates.rs b/src/duplicates.rs index 372415b..32ed92b 100644 --- a/src/duplicates.rs +++ b/src/duplicates.rs @@ -234,7 +234,7 @@ async fn list_exact_handler( let span = global_tracer().start_with_context("duplicates.list_exact", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); @@ -265,7 +265,7 @@ async fn list_perceptual_handler( let span = global_tracer().start_with_context("duplicates.list_perceptual", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); @@ -449,7 +449,7 @@ async fn list_folder_pairs_handler( let span = global_tracer().start_with_context("duplicates.list_folder_pairs", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); diff --git a/src/faces.rs b/src/faces.rs index 3288aa3..f619966 100644 --- a/src/faces.rs +++ b/src/faces.rs @@ -1755,7 +1755,7 @@ async fn stats_handler( let span = global_tracer().start_with_context("faces.stats", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); @@ -1782,11 +1782,12 @@ async fn list_faces_handler( let normalized_path = normalize_path(&query.path); // resolve_library_param returns Option<&Library>; clone so the result // is owned (matching the primary_library fallback's type). - let library: Library = libraries::resolve_library_param(&app_state, query.library.as_deref()) - .ok() - .flatten() - .cloned() - .unwrap_or_else(|| app_state.primary_library().clone()); + let library: Library = + libraries::resolve_library_param_state(&app_state, query.library.as_deref()) + .ok() + .flatten() + .cloned() + .unwrap_or_else(|| app_state.primary_library().clone()); let mut dao = face_dao.lock().expect("face dao lock"); let hash = match dao.resolve_content_hash(&span_context, library.id, &normalized_path) { @@ -1870,7 +1871,7 @@ async fn create_face_handler( } let normalized_path = normalize_path(&body.path); - let library: Library = match libraries::resolve_library_param( + let library: Library = match libraries::resolve_library_param_state( &app_state, body.library.as_ref().map(|i| i.to_string()).as_deref(), ) { @@ -2192,7 +2193,7 @@ async fn list_persons_handler( let span = global_tracer().start_with_context("persons.list", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); @@ -2345,7 +2346,7 @@ async fn person_faces_handler( let context = extract_context_from_request(&request); let span = global_tracer().start_with_context("persons.faces", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); diff --git a/src/files.rs b/src/files.rs index 59cd49e..920540e 100644 --- a/src/files.rs +++ b/src/files.rs @@ -275,14 +275,14 @@ pub async fn list_photos( // Resolve the optional library filter. Unknown values return 400. A // `None` result means "union across all libraries" and downstream // walks iterate every configured library root. - let library = match crate::libraries::resolve_library_param(&app_state, req.library.as_deref()) - { - Ok(lib) => lib, - Err(msg) => { - log::warn!("Rejecting /photos request: {}", msg); - return HttpResponse::BadRequest().body(msg); - } - }; + let library = + match crate::libraries::resolve_library_param_state(&app_state, req.library.as_deref()) { + Ok(lib) => lib, + Err(msg) => { + log::warn!("Rejecting /photos request: {}", msg); + return HttpResponse::BadRequest().body(msg); + } + }; let span_context = opentelemetry::Context::current_with_span(span); @@ -1238,7 +1238,7 @@ pub async fn list_exif_summary( // Resolve the library filter up front so a bad id/name 400s before we // ever take the DAO mutex. None == union across all libraries. let library_filter = - match crate::libraries::resolve_library_param(&app_state, req.library.as_deref()) { + match crate::libraries::resolve_library_param_state(&app_state, req.library.as_deref()) { Ok(lib) => lib.map(|l| l.id), Err(msg) => { span.set_status(Status::error(msg.clone())); diff --git a/src/handlers/image.rs b/src/handlers/image.rs index f0d2310..923fff3 100644 --- a/src/handlers/image.rs +++ b/src/handlers/image.rs @@ -53,7 +53,7 @@ pub async fn get_image( // Resolve library from query param; default to primary so clients that // don't yet send `library=` continue to work. - let library = match libraries::resolve_library_param(&app_state, req.library.as_deref()) { + let library = match libraries::resolve_library_param_state(&app_state, req.library.as_deref()) { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(msg) => { @@ -492,7 +492,7 @@ pub async fn get_file_metadata( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); - let library = libraries::resolve_library_param(&app_state, path.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, path.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -580,7 +580,7 @@ pub async fn set_image_gps( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); - let library = libraries::resolve_library_param(&app_state, body.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, body.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -746,7 +746,7 @@ pub async fn get_full_exif( let context = extract_context_from_request(&request); let mut span = tracer.start_with_context("get_full_exif", &context); - let library = libraries::resolve_library_param(&app_state, path.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, path.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -888,7 +888,8 @@ pub async fn set_image_date( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); - let library = match libraries::resolve_library_param(&app_state, body.library.as_deref()) { + let library = match libraries::resolve_library_param_state(&app_state, body.library.as_deref()) + { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(msg) => { @@ -941,7 +942,8 @@ pub async fn clear_image_date( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); - let library = match libraries::resolve_library_param(&app_state, body.library.as_deref()) { + let library = match libraries::resolve_library_param_state(&app_state, body.library.as_deref()) + { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(msg) => { @@ -1001,7 +1003,7 @@ pub async fn upload_image( // Resolve the optional library selector. Absent → primary library // (backwards-compatible with clients that don't yet send `library=`). let target_library = - match libraries::resolve_library_param(&app_state, query.library.as_deref()) { + match libraries::resolve_library_param_state(&app_state, query.library.as_deref()) { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(msg) => { diff --git a/src/handlers/video.rs b/src/handlers/video.rs index f9f4e64..b56a67e 100644 --- a/src/handlers/video.rs +++ b/src/handlers/video.rs @@ -67,10 +67,11 @@ pub async fn generate_video( let context = extract_context_from_request(&request); let mut span = tracer.start_with_context("generate_video", &context); - let preferred_library = libraries::resolve_library_param(&app_state, body.library.as_deref()) - .ok() - .flatten() - .unwrap_or_else(|| app_state.primary_library()); + let preferred_library = + libraries::resolve_library_param_state(&app_state, body.library.as_deref()) + .ok() + .flatten() + .unwrap_or_else(|| app_state.primary_library()); // Try the resolved library first, then fall back to any other library // that actually contains the file — handles union-mode requests where diff --git a/src/libraries.rs b/src/libraries.rs index 55bf5c1..377b442 100644 --- a/src/libraries.rs +++ b/src/libraries.rs @@ -291,11 +291,11 @@ pub fn seed_or_patch_from_env(conn: &mut SqliteConnection, base_path: &str) { } /// Resolve a library request parameter (accepts numeric id as string or name) -/// against the configured libraries. Returns `Ok(None)` when the param is +/// against a list of libraries. Returns `Ok(None)` when the param is /// absent, meaning "span all libraries". Returns `Err` when a value is /// provided but does not match any library. pub fn resolve_library_param<'a>( - state: &'a AppState, + libs: &'a [Library], param: Option<&str>, ) -> Result, String> { let Some(raw) = param.map(str::trim).filter(|s| !s.is_empty()) else { @@ -303,18 +303,29 @@ pub fn resolve_library_param<'a>( }; if let Ok(id) = raw.parse::() { - return state - .library_by_id(id) + return libs + .iter() + .find(|l| l.id == id) .map(Some) .ok_or_else(|| format!("unknown library id: {}", id)); } - state - .library_by_name(raw) + libs.iter() + .find(|l| l.name == raw) .map(Some) .ok_or_else(|| format!("unknown library name: {}", raw)) } +/// Resolve a library request parameter against the AppState's libraries. +/// Returns `Ok(None)` when the param is absent, meaning "span all libraries". +/// Returns `Err` when a value is provided but does not match any library. +pub fn resolve_library_param_state<'a>( + state: &'a AppState, + param: Option<&str>, +) -> Result, String> { + resolve_library_param(&state.libraries, param) +} + /// Health of a library at a point in time. Probed at the top of each /// file-watcher tick. The `Stale` state is the "be conservative" signal: /// destructive paths (ingest writes, future move-handoff and orphan GC in @@ -662,12 +673,6 @@ mod tests { assert_eq!(abs, PathBuf::from("/tmp/media/2024/photo.jpg")); } - fn state_with_libraries(libs: Vec) -> AppState { - let mut state = AppState::test_state(); - state.libraries = libs; - state - } - fn sample_libraries() -> Vec { vec![ Library { @@ -687,52 +692,52 @@ mod tests { ] } - #[actix_rt::test] - async fn resolve_library_param_absent_is_union() { - let state = state_with_libraries(sample_libraries()); - assert!(matches!(resolve_library_param(&state, None), Ok(None))); + #[test] + fn resolve_library_param_absent_is_union() { + let libs = sample_libraries(); + assert!(matches!(resolve_library_param(&libs, None), Ok(None))); } - #[actix_rt::test] - async fn resolve_library_param_empty_or_whitespace_is_union() { - let state = state_with_libraries(sample_libraries()); - assert!(matches!(resolve_library_param(&state, Some("")), Ok(None))); + #[test] + fn resolve_library_param_empty_or_whitespace_is_union() { + let libs = sample_libraries(); + assert!(matches!(resolve_library_param(&libs, Some("")), Ok(None))); assert!(matches!( - resolve_library_param(&state, Some(" ")), + resolve_library_param(&libs, Some(" ")), Ok(None) )); } - #[actix_rt::test] - async fn resolve_library_param_numeric_id_matches() { - let state = state_with_libraries(sample_libraries()); - let lib = resolve_library_param(&state, Some("7")) + #[test] + fn resolve_library_param_numeric_id_matches() { + let libs = sample_libraries(); + let lib = resolve_library_param(&libs, Some("7")) .expect("valid id") .expect("some library"); assert_eq!(lib.id, 7); assert_eq!(lib.name, "archive"); } - #[actix_rt::test] - async fn resolve_library_param_name_matches() { - let state = state_with_libraries(sample_libraries()); - let lib = resolve_library_param(&state, Some("main")) + #[test] + fn resolve_library_param_name_matches() { + let libs = sample_libraries(); + let lib = resolve_library_param(&libs, Some("main")) .expect("valid name") .expect("some library"); assert_eq!(lib.id, 1); } - #[actix_rt::test] - async fn resolve_library_param_unknown_id_errs() { - let state = state_with_libraries(sample_libraries()); - let err = resolve_library_param(&state, Some("999")).unwrap_err(); + #[test] + fn resolve_library_param_unknown_id_errs() { + let libs = sample_libraries(); + let err = resolve_library_param(&libs, Some("999")).unwrap_err(); assert!(err.contains("unknown library id")); } - #[actix_rt::test] - async fn resolve_library_param_unknown_name_errs() { - let state = state_with_libraries(sample_libraries()); - let err = resolve_library_param(&state, Some("missing")).unwrap_err(); + #[test] + fn resolve_library_param_unknown_name_errs() { + let libs = sample_libraries(); + let err = resolve_library_param(&libs, Some("missing")).unwrap_err(); assert!(err.contains("unknown library name")); } diff --git a/src/main.rs b/src/main.rs index b059e9b..e3ded45 100644 --- a/src/main.rs +++ b/src/main.rs @@ -267,6 +267,25 @@ fn main() -> std::io::Result<()> { } } + // Spawn the nightly pre-generation scheduler (Section D). + { + use crate::database::{ + InsightDao, SqliteInsightDao, SqliteUserAiPrefsDao, UserAiPrefsDao, + }; + + let insight_dao: Arc>> = + Arc::new(Mutex::new(Box::new(SqliteInsightDao::new()))); + let prefs_dao: Arc>> = + Arc::new(Mutex::new(Box::new(SqliteUserAiPrefsDao::new()))); + + reels::spawn_pregen_scheduler( + app_state.clone(), + web::Data::new(insight_dao), + web::Data::new(prefs_dao), + ) + .await; + } + HttpServer::new(move || { let user_dao = SqliteUserDao::new(); let favorites_dao = SqliteFavoriteDao::new(); @@ -348,6 +367,8 @@ fn main() -> std::io::Result<()> { .service(reels::create_reel_handler) .service(reels::reel_status_handler) .service(reels::reel_video_handler) + .service(reels::precomputed_reel_handler) + .service(reels::precomputed_video_handler) .service(ai::generate_insight_handler) .service(ai::generate_agentic_insight_handler) .service(ai::generation_status_handler) diff --git a/src/memories.rs b/src/memories.rs index c877981..2b1f473 100644 --- a/src/memories.rs +++ b/src/memories.rs @@ -419,7 +419,7 @@ pub fn gather_memory_items( span_mode, tz_offset_minutes, years_back ); - let library = crate::libraries::resolve_library_param(app_state, library_param)?; + let library = crate::libraries::resolve_library_param_state(app_state, library_param)?; let libraries_to_scan: Vec<&crate::libraries::Library> = match library { Some(lib) => vec![lib], None => app_state.libraries.iter().collect(), diff --git a/src/reels/mod.rs b/src/reels/mod.rs index 32635a9..c51822c 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -18,24 +18,59 @@ pub mod selector; use std::collections::HashMap; use std::path::{Path, PathBuf}; -use std::sync::{LazyLock, Mutex as StdMutex}; +use std::sync::{Arc, LazyLock, Mutex, Mutex as StdMutex}; use std::time::{Duration, Instant}; use actix_files::NamedFile; use actix_web::{HttpRequest, HttpResponse, Responder, get, post, web}; -use chrono::DateTime; +use anyhow::{Context, anyhow}; +use chrono::{DateTime, Datelike, Timelike}; use serde::{Deserialize, Serialize}; use serde_json::json; -use std::sync::Mutex; use uuid::Uuid; use crate::data::Claims; -use crate::database::{ExifDao, InsightDao}; +use crate::database::{ExifDao, InsightDao, PrecomputedReelDao, UserAiPrefsDao}; +use crate::libraries::{Library, resolve_library_param}; use crate::memories::MemoriesSpan; use crate::otel::extract_context_from_request; use crate::state::AppState; use selector::ReelSelector; +// --- Precomputed reel age limits (hours) ------------------------------------- + +/// Maximum age for a precomputed day reel before it's considered stale. +const REEL_PRECOMPUTED_DAY_MAX_AGE_HOURS: u64 = 26; +/// Maximum age for a precomputed week reel. +const REEL_PRECOMPUTED_WEEK_MAX_AGE_HOURS: u64 = 192; +/// Maximum age for a precomputed month reel. +const REEL_PRECOMPUTED_MONTH_MAX_AGE_HOURS: u64 = 768; + +/// Resolve a library request parameter to a stable key string. +/// Returns the library's id as a string when found, or `"all"` when +/// the param is absent or the lookup fails. +pub fn normalize_library_key(libs: &[Library], param: Option<&str>) -> String { + match resolve_library_param(libs, param) { + Ok(Some(lib)) => lib.id.to_string(), + _ => "all".to_string(), + } +} + +/// Which scripting strategy to use for the reel narration. +#[derive(Clone, Copy)] +#[allow(dead_code)] +pub enum ScripterMode { + /// Fast path: single LLM call via the direct client. + Fast, + /// Agentic path: resolves the backend through the InsightGenerator + /// (honouring LLM_BACKEND, model overrides, etc.). Falls back to + /// Fast on error so a scripting failure never sinks a reel. + Agentic, +} + +/// Progress callback type — receives a static-stage label. +pub type ProgressFn<'a> = dyn Fn(&'static str) + Send + Sync + 'a; + /// The media behind one shot: a still photo, or a short section of a source /// video (played with its live audio ducked under the narration). Both carry /// just the library-relative path; the renderer applies fixed clip framing @@ -73,6 +108,8 @@ pub struct PlannedBeat { pub date: Option, pub insight_title: Option, pub insight_summary: Option, + /// GPS coordinates of the lead media item, when available. + pub gps: Option<(f64, f64)>, } impl PlannedBeat { @@ -292,6 +329,13 @@ pub struct ReelStatusResponse { pub error: Option, } +/// Response shape for `GET /reels/precomputed`. +#[derive(Debug, Serialize)] +pub struct PrecomputedReelResponse { + pub video_url: String, + pub title: String, +} + // --- Handlers ---------------------------------------------------------------- /// POST /reels — start (or instantly serve from cache) a memory reel for the @@ -399,8 +443,20 @@ pub async fn create_reel_handler( let state = app_state.clone(); let insight_dao = insight_dao.clone(); + let exif_dao = exif_dao.clone(); let handle = tokio::spawn(async move { - match run_reel_job(&state, &insight_dao, job_id, planned, meta, voice, &key).await { + match run_reel_job( + &state, + &insight_dao, + &exif_dao, + job_id, + planned, + meta, + voice, + &key, + ) + .await + { Ok((title, path)) => { finish_job(job_id, ReelJobStatus::Done, Some(title), Some(path), None) } @@ -471,25 +527,131 @@ pub async fn reel_video_handler( } } +/// GET /reels/precomputed?span=&library= +/// +/// Look up the latest precomputed reel for the given span and library key. +/// Validity gate (all must hold, else 404): +/// 1. `render_version == RENDER_VERSION` +/// 2. `output_path` exists on disk +/// 3. age <= max_age(span) (Day 26h, Week 8d, Month 32d) +/// +/// Returns `{ video_url: "/reels/by-key/{cache_key}/video", title }`. +#[get("/reels/precomputed")] +pub async fn precomputed_reel_handler( + _claims: Claims, + query: web::Query>, + app_state: web::Data, + reel_dao: web::Data>>, +) -> impl Responder { + let span = query.get("span").map(|s| s.as_str()).unwrap_or("day"); + let library_key = normalize_library_key( + &app_state.libraries, + query.get("library").map(|s| s.as_str()), + ); + + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("Time went backwards") + .as_secs() as i64; + + let max_age_hours = match span { + "week" => REEL_PRECOMPUTED_WEEK_MAX_AGE_HOURS as i64, + "month" => REEL_PRECOMPUTED_MONTH_MAX_AGE_HOURS as i64, + _ => REEL_PRECOMPUTED_DAY_MAX_AGE_HOURS as i64, + }; + let min_generated_at = now - (max_age_hours * 3600); + + let ctx = opentelemetry::Context::new(); + let mut dao = reel_dao.lock().expect("Unable to lock PrecomputedReelDao"); + + // Fast existence gate: is there a fresh row at all? + if !dao + .exists_fresh( + &ctx, + span, + &library_key, + RENDER_VERSION as i32, + min_generated_at, + ) + .unwrap_or(false) + { + return HttpResponse::NotFound().json(json!({ "error": "no precomputed reel found" })); + } + + // Fetch the latest row for full validity checks. + let reel = match dao.latest_for(&ctx, span, &library_key) { + Ok(Some(r)) => r, + _ => { + return HttpResponse::NotFound().json(json!({ "error": "no precomputed reel found" })); + } + }; + + // Validity gate 1: render version must match. + if reel.render_version != RENDER_VERSION as i32 { + return HttpResponse::NotFound() + .json(json!({ "error": "precomputed reel is stale (render version mismatch)" })); + } + + // Validity gate 2: output_path must exist. + let output = std::path::Path::new(&reel.output_path); + if !output.exists() { + return HttpResponse::NotFound().json(json!({ "error": "precomputed reel file missing" })); + } + + // Validity gate 3: age <= max_age (re-checked via min_generated_at). + if reel.generated_at < min_generated_at { + return HttpResponse::NotFound().json(json!({ "error": "precomputed reel has expired" })); + } + + HttpResponse::Ok().json(PrecomputedReelResponse { + video_url: format!("/reels/by-key/{}/video", reel.cache_key), + title: reel.title, + }) +} + +/// GET /reels/by-key/{key}/video — stream a precomputed reel MP4 by cache key. +#[get("/reels/by-key/{key}/video")] +pub async fn precomputed_video_handler( + _claims: Claims, + request: HttpRequest, + path: web::Path, + app_state: web::Data, +) -> impl Responder { + let key = path.into_inner(); + let mp4 = reel_mp4_path(&app_state, &key); + match NamedFile::open(&mp4) { + Ok(file) => file.into_response(&request), + Err(e) => { + log::error!("opening precomputed reel {key} failed: {e:?}"); + HttpResponse::NotFound().json(json!({ "error": "precomputed reel file missing" })) + } + } +} + // --- Pipeline ---------------------------------------------------------------- /// Run the full reel pipeline: enrich → script → narrate → render → concat, /// then publish the MP4 into the cache. Returns (title, mp4_path). -async fn run_reel_job( +/// +/// The `scripter` parameter controls which narration-generation strategy is +/// used (fast single-call vs. agentic backend resolution). On scripting +/// failure in Agentic mode the pipeline falls back to the fast path so a +/// single LLM failure never sinks a reel. +pub(crate) async fn produce_reel( app_state: &AppState, insight_dao: &Mutex>, - job_id: Uuid, + exif_dao: &Mutex>, mut planned: Vec, meta: ReelMeta, voice: Option, key: &str, + scripter: ScripterMode, + progress: Option<&ProgressFn<'_>>, ) -> anyhow::Result<(String, PathBuf)> { - use anyhow::{Context, anyhow}; - let started = Instant::now(); let total_photos: usize = planned.iter().map(|b| b.media.len()).sum(); log::info!( - "reel {job_id}: starting — span {:?}, {} beats, {} photos, voice={}", + "reel produce_reel: starting — span {:?}, {} beats, {} photos, voice={}", meta.span, planned.len(), total_photos, @@ -499,18 +661,33 @@ async fn run_reel_job( let client = app_state .llamacpp .as_ref() - .ok_or_else(|| anyhow!("TTS/LLM backend not configured"))? + .ok_or_else(|| anyhow::anyhow!("TTS/LLM backend not configured"))? .clone(); // 1. Enrich each beat with its lead photo's cached insight, then script // (one LLM call → one narration line per beat). - set_stage(job_id, "scripting"); - log::info!("reel {job_id}: scripting narration via LLM…"); + emit_progress(progress, "scripting"); + log::info!("reel produce_reel: scripting narration via LLM…"); let span_context = opentelemetry::Context::new(); - selector::enrich(insight_dao, &span_context, &mut planned); - let script = script::generate_script(&client, &meta, &planned).await?; + selector::enrich(insight_dao, exif_dao, &span_context, &mut planned); + let script = match scripter { + ScripterMode::Fast => script::generate_script(&client, &meta, &planned).await?, + ScripterMode::Agentic => { + match script::generate_script_agentic(&app_state.insight_generator, &meta, &planned) + .await + { + Ok(s) => s, + Err(e) => { + log::warn!( + "reel produce_reel: agentic script failed, falling back to fast: {e}" + ); + script::generate_script(&client, &meta, &planned).await? + } + } + } + }; log::info!( - "reel {job_id}: scripted \"{}\" ({} lines)", + "reel produce_reel: scripted \"{}\" ({} lines)", script.title, script.lines.len() ); @@ -519,11 +696,11 @@ async fn run_reel_job( // sequence under that one narration). A beat whose audio or render fails // is skipped (logged) rather than sinking the whole reel — handles an // odd HEIC/corrupt file gracefully. - set_stage(job_id, "narrating"); + emit_progress(progress, "narrating"); let work = tempfile::tempdir().context("creating reel work dir")?; let nvenc = render::is_nvenc_available().await; log::info!( - "reel {job_id}: narrating + rendering {} beats (encoder: {})", + "reel produce_reel: narrating + rendering {} beats (encoder: {})", planned.len(), if nvenc { "nvenc" } else { "cpu" } ); @@ -543,7 +720,7 @@ async fn run_reel_job( .filter_map(|m| resolve_media_path(app_state, m)) .collect(); if paths.is_empty() { - log::warn!("reel {job_id}: skipping beat {i}, no media paths resolved"); + log::warn!("reel produce_reel: skipping beat {i}, no media paths resolved"); continue; } @@ -558,13 +735,13 @@ async fn run_reel_job( { Ok(b) => b, Err(e) => { - log::warn!("reel {job_id}: skipping beat {i}, TTS failed: {e}"); + log::warn!("reel produce_reel: skipping beat {i}, TTS failed: {e}"); continue; } }; let audio_path = work.path().join(format!("narration_{i:03}.wav")); if let Err(e) = tokio::fs::write(&audio_path, &audio_bytes).await { - log::warn!("reel {job_id}: skipping beat {i}, writing audio failed: {e}"); + log::warn!("reel produce_reel: skipping beat {i}, writing audio failed: {e}"); continue; } @@ -575,11 +752,11 @@ async fn run_reel_job( .flatten() .unwrap_or(render::MIN_SEGMENT_SECONDS); - set_stage(job_id, "rendering"); + emit_progress(progress, "rendering"); let beat_out = work.path().join(format!("beat_{i:03}.mp4")); let render_result = if beat.is_clip() { log::info!( - "reel {job_id}: beat {}/{} — video clip, narration {:.1}s", + "reel produce_reel: beat {}/{} — video clip, narration {:.1}s", i + 1, beat_total, narration_secs @@ -587,7 +764,7 @@ async fn run_reel_job( render::render_clip_beat(&paths[0], &audio_path, &beat_out, narration_secs, &opts).await } else { log::info!( - "reel {job_id}: beat {}/{} — {} photo(s), narration {:.1}s", + "reel produce_reel: beat {}/{} — {} photo(s), narration {:.1}s", i + 1, beat_total, paths.len(), @@ -596,7 +773,7 @@ async fn run_reel_job( render::render_beat(&paths, &audio_path, &beat_out, narration_secs, &opts).await }; if let Err(e) = render_result { - log::warn!("reel {job_id}: skipping beat {i}, render failed: {e}"); + log::warn!("reel produce_reel: skipping beat {i}, render failed: {e}"); continue; } beat_files.push(beat_out.to_string_lossy().to_string()); @@ -609,9 +786,9 @@ async fn run_reel_job( // 4. Concat into the cache. Write to a temp name in the reels dir, then // rename atomically (same filesystem) so a reader never sees a partial. - set_stage(job_id, "rendering"); + emit_progress(progress, "rendering"); log::info!( - "reel {job_id}: joining {} rendered beats into the final reel", + "reel produce_reel: joining {} rendered beats into the final reel", segment_files.len() ); std::fs::create_dir_all(&app_state.reels_path).context("creating reels dir")?; @@ -629,7 +806,7 @@ async fn run_reel_job( let _ = std::fs::write(reel_sidecar_path(app_state, key), sidecar); log::info!( - "reel {job_id}: done in {:.1}s — {} beats → {}", + "reel produce_reel: done in {:.1}s — {} beats → {}", started.elapsed().as_secs_f64(), segment_files.len(), final_path.display() @@ -637,6 +814,42 @@ async fn run_reel_job( Ok((script.title, final_path)) } +/// Emit a progress stage label via the optional callback. +fn emit_progress(progress: Option<&ProgressFn<'_>>, stage: &'static str) { + if let Some(p) = progress { + p(stage); + } +} + +/// Run the full reel pipeline and publish the MP4 into the cache. +/// Thin wrapper around [`produce_reel`] that wires up job-stage tracking. +async fn run_reel_job( + app_state: &AppState, + insight_dao: &Mutex>, + exif_dao: &Mutex>, + job_id: Uuid, + planned: Vec, + meta: ReelMeta, + voice: Option, + key: &str, +) -> anyhow::Result<(String, PathBuf)> { + let progress = move |stage: &'static str| { + set_stage(job_id, stage); + }; + produce_reel( + app_state, + insight_dao, + exif_dao, + planned, + meta, + voice, + key, + ScripterMode::Fast, + Some(&progress), + ) + .await +} + /// Resolve a media item's library-relative path to a validated absolute path /// under its library root (works for both photos and clips). fn resolve_media_path(app_state: &AppState, media: &SegmentMedia) -> Option { @@ -645,9 +858,280 @@ fn resolve_media_path(app_state: &AppState, media: &SegmentMedia) -> Option u32 { + std::env::var("REEL_PREGEN_HOUR") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(3) +} + +/// Env: "1" (default, Monday). Day of week for weekly pre-gen (0=Sun, 1=Mon, ...). +fn pregen_week_dow() -> u32 { + std::env::var("REEL_PREGEN_WEEK_DOW") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(1) +} + +/// Pure: seconds until the next run of `run_hour` given the current local time. +/// Handles same-day vs wrap-around. Recomputed each loop iteration to absorb +/// DST shifts. +pub(crate) fn secs_until_next_run_hour(now: chrono::DateTime, run_hour: u32) -> u64 { + let now_hour = now.hour(); + let diff = if now_hour >= run_hour { + 24 - now_hour + run_hour + } else { + run_hour - now_hour + }; + (diff * 3600) as u64 +} + +/// Load pre-gen parameters: tries the user_ai_prefs DB row first, falls back +/// to env vars, then to server-local defaults. +fn load_pregen_params( + prefs_dao: &web::Data>>>, +) -> (i32, Option, String) { + // Try DB row first + if let Ok(mut dao) = prefs_dao.lock() { + let ctx = opentelemetry::Context::new(); + if let Ok(Some(prefs)) = dao.get_prefs(&ctx) { + let tz = prefs + .tz_offset_minutes + .unwrap_or_else(|| chrono::Local::now().offset().local_minus_utc()); + let voice = prefs.voice; + let library = prefs.library.unwrap_or_else(|| "all".to_string()); + return (tz, voice, library); + } + } + // Fall back to env + let tz = std::env::var("REEL_PREGEN_TZ_OFFSET_MINUTES") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or_else(|| chrono::Local::now().offset().local_minus_utc()); + let voice = std::env::var("REEL_PREGEN_VOICE").ok(); + let library = std::env::var("REEL_PREGEN_LIBRARY") + .ok() + .unwrap_or_else(|| "all".to_string()); + (tz, voice, library) +} + +/// Spawn the nightly pre-generation scheduler. Runs behind `REEL_PREGEN_ENABLED`. +pub(crate) async fn spawn_pregen_scheduler( + app_state: web::Data, + insight_dao: web::Data>>>, + prefs_dao: web::Data>>>, +) { + if std::env::var("REEL_PREGEN_ENABLED").ok() != Some("1".to_string()) { + log::info!("Reel pre-generation scheduler disabled (REEL_PREGEN_ENABLED != 1)"); + return; + } + + let run_hour = pregen_run_hour(); + log::info!( + "Reel pre-generation scheduler enabled, running at hour {} local", + run_hour + ); + + tokio::spawn(async move { + loop { + let now = chrono::Local::now(); + let sleep_secs = secs_until_next_run_hour(now, run_hour); + log::debug!("Next pre-gen run in {}s", sleep_secs); + tokio::time::sleep(std::time::Duration::from_secs(sleep_secs)).await; + + if let Err(e) = run_pregen_batch(&app_state, &insight_dao, &prefs_dao).await { + log::error!("Reel pre-generation batch failed: {}", e); + } + } + }); +} + +/// Run the pre-generation batch for all applicable spans. +async fn run_pregen_batch( + app_state: &AppState, + insight_dao: &web::Data>>>, + prefs_dao: &web::Data>>>, +) -> anyhow::Result<()> { + let now = chrono::Local::now(); + let weekday = now.weekday().num_days_from_sunday(); // 0=Sun, 1=Mon, ... + let day_of_month = now.day(); + + let mut spans = vec!["day"]; + if weekday == pregen_week_dow() { + spans.push("week"); + } + if day_of_month == 1 { + spans.push("month"); + } + + let (tz, voice, library) = load_pregen_params(prefs_dao); + + for span in spans { + if let Err(e) = pregen_one(app_state, insight_dao, span, tz, voice.clone(), &library).await + { + log::error!("Pre-gen failed for span={}: {}", span, e); + } + } + + Ok(()) +} + +/// Pre-generate a single reel for the given span. +async fn pregen_one( + app_state: &AppState, + insight_dao: &web::Data>>>, + span: &str, + tz: i32, + voice: Option, + library: &str, +) -> anyhow::Result<()> { + let memories_span = match span { + "day" => MemoriesSpan::Day, + "week" => MemoriesSpan::Week, + "month" => MemoriesSpan::Month, + _ => MemoriesSpan::Day, + }; + + let selector = ReelSelector::Memories { + span: memories_span, + tz_offset_minutes: tz, + library: if library == "all" { + None + } else { + Some(library.to_string()) + }, + max_segments: 24, + }; + + let exif_dao: Arc>> = Arc::new(StdMutex::new(Box::new( + crate::database::SqliteExifDao::new(), + ))); + let ctx = opentelemetry::Context::new(); + let (planned, reel_meta) = match selector::resolve(app_state, &exif_dao, &ctx, &selector) { + Ok((p, m)) => (p, m), + Err(e) => { + log::warn!("Pre-gen resolve failed for span={}: {}", span, e); + return Ok(()); + } + }; + + if planned.is_empty() { + log::info!("No beats for span={}, skipping", span); + return Ok(()); + } + + // Flatten every media item across beats (in order) into the cache key. + let media: Vec = planned.iter().flat_map(|b| b.media.clone()).collect(); + let key = cache_key(&selector, &media, voice.as_deref()); + + // Dedup: check if fresh ledger row exists + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("Time went backwards") + .as_secs() as i64; + + let max_age_hours = match span { + "week" => REEL_PRECOMPUTED_WEEK_MAX_AGE_HOURS, + "month" => REEL_PRECOMPUTED_MONTH_MAX_AGE_HOURS, + _ => REEL_PRECOMPUTED_DAY_MAX_AGE_HOURS, + }; + let min_generated_at = now - (max_age_hours as i64 * 3600); + + let is_fresh = { + let mut dao = app_state.precomputed_reel_dao.lock().expect("lock"); + dao.exists_fresh(&ctx, span, "all", RENDER_VERSION as i32, min_generated_at) + .unwrap_or(false) + }; + + if is_fresh { + log::info!("Fresh precomputed reel exists for span={}, skipping", span); + return Ok(()); + } + + // Check if MP4 already on disk (from a previous run that crashed after render) + let mp4_path = reel_mp4_path(app_state, &key); + if mp4_path.exists() { + log::info!( + "Precomputed reel MP4 already exists for key={}, recording ledger and skipping render", + key + ); + // Read title from sidecar if available + let sidecar_path = mp4_path.with_extension("json"); + let title = if sidecar_path.exists() { + let sidecar = tokio::fs::read_to_string(&sidecar_path).await.ok(); + sidecar + .and_then(|s| serde_json::from_str::(&s).ok()) + .map(|s| s.title) + .unwrap_or_else(|| format!("{} reel", span)) + } else { + format!("{} reel", span) + }; + let mut reel_dao = app_state.precomputed_reel_dao.lock().expect("lock"); + reel_dao.record_reel( + &ctx, + &crate::database::models::InsertablePrecomputedReel { + span: span.to_string(), + library_key: "all".to_string(), + cache_key: key.clone(), + output_path: mp4_path.to_string_lossy().to_string(), + title, + media_count: planned.len() as i32, + render_version: RENDER_VERSION as i32, + tz_offset_minutes: tz, + voice: voice.clone(), + generated_at: now, + }, + )?; + return Ok(()); + } + + // Generate the reel + log::info!("Generating precomputed reel for span={}, key={}", span, key); + let photo_count = planned.len() as i32; + let (title, mp4) = produce_reel( + app_state, + insight_dao, + &exif_dao, + planned, + reel_meta, + voice.clone(), + &key, + ScripterMode::Agentic, + None, + ) + .await?; + + // Record to ledger + let mut reel_dao = app_state.precomputed_reel_dao.lock().expect("lock"); + reel_dao.record_reel( + &ctx, + &crate::database::models::InsertablePrecomputedReel { + span: span.to_string(), + library_key: "all".to_string(), + cache_key: key.clone(), + output_path: mp4.to_string_lossy().to_string(), + title, + media_count: photo_count, + render_version: RENDER_VERSION as i32, + tz_offset_minutes: tz, + voice: voice.clone(), + generated_at: now, + }, + )?; + + log::info!("Precomputed reel generated for span={}, key={}", span, key); + Ok(()) +} + #[cfg(test)] mod tests { use super::*; + use crate::ai::face_client::FaceClient; + use crate::libraries::Library; + use crate::video::actors::StreamActor; fn photo(p: &str, lib: i32) -> SegmentMedia { SegmentMedia::Photo { @@ -672,6 +1156,128 @@ mod tests { } } + /// Minimal AppState for tests that only need library lookup. + #[allow(dead_code)] + fn test_app_state() -> AppState { + use crate::ai::InsightGenerator; + use crate::ai::insight_chat::{ChatLockMap, InsightChatService}; + use crate::ai::turn_registry::TurnRegistry; + use crate::ai::{OllamaClient, SmsApiClient}; + use crate::database::{ + ExifDao, InsightDao, InsightGenerationJobDao, PreviewDao, SqliteExifDao, + SqliteInsightDao, SqliteInsightGenerationJobDao, SqlitePreviewDao, + }; + use crate::faces; + use crate::state::AppState; + use crate::tags::SqliteTagDao; + use actix::Actor; + use std::sync::Mutex; + + let temp_dir = tempfile::tempdir().expect("Failed to create temp directory"); + let base_path = temp_dir.path().to_path_buf(); + let base_path_str = base_path.to_string_lossy().to_string(); + + let test_lib = Library { + id: crate::libraries::PRIMARY_LIBRARY_ID, + name: "main".to_string(), + root_path: base_path_str.clone(), + enabled: true, + excluded_dirs: Vec::new(), + }; + + let ollama = OllamaClient::new( + "http://localhost:11434".to_string(), + None, + "llama3.2".to_string(), + None, + ); + let sms_client = SmsApiClient::new("http://localhost:8000".to_string(), None); + let apollo_client = crate::ai::apollo_client::ApolloClient::new(None); + + let insight_dao: std::sync::Arc>> = + std::sync::Arc::new(Mutex::new(Box::new(SqliteInsightDao::new()))); + let exif_dao: std::sync::Arc>> = + std::sync::Arc::new(Mutex::new(Box::new(SqliteExifDao::new()))); + let daily_summary_dao: std::sync::Arc>> = + std::sync::Arc::new(Mutex::new(Box::new( + crate::database::SqliteDailySummaryDao::new(), + ))); + let insight_generator = InsightGenerator::new( + ollama.clone(), + None, + None, + sms_client.clone(), + apollo_client.clone(), + insight_dao.clone(), + exif_dao.clone(), + daily_summary_dao, + std::sync::Arc::new(Mutex::new(Box::new( + crate::database::SqliteCalendarEventDao::new(), + ))), + std::sync::Arc::new(Mutex::new(Box::new( + crate::database::SqliteLocationHistoryDao::new(), + ))), + std::sync::Arc::new(Mutex::new(Box::new( + crate::database::SqliteSearchHistoryDao::new(), + ))), + std::sync::Arc::new(Mutex::new(Box::new(SqliteTagDao::default()))), + std::sync::Arc::new(Mutex::new(Box::new(faces::SqliteFaceDao::new()))), + std::sync::Arc::new(Mutex::new(Box::new( + crate::database::SqliteKnowledgeDao::new(), + ))), + std::sync::Arc::new(Mutex::new(Box::new( + crate::database::SqlitePersonaDao::new(), + ))), + vec![test_lib.clone()], + ); + + let chat_locks: ChatLockMap = + std::sync::Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new())); + let insight_chat = std::sync::Arc::new(InsightChatService::new( + std::sync::Arc::new(insight_generator.clone()), + insight_dao.clone(), + chat_locks, + )); + let turn_registry = std::sync::Arc::new(TurnRegistry::new(300)); + let preview_dao: std::sync::Arc>> = + std::sync::Arc::new(Mutex::new(Box::new(SqlitePreviewDao::new()))); + let insight_job_dao: std::sync::Arc>> = + std::sync::Arc::new(Mutex::new(Box::new(SqliteInsightGenerationJobDao::new()))); + let insight_job_handles: std::sync::Arc< + Mutex>, + > = std::sync::Arc::new(Mutex::new(std::collections::HashMap::new())); + + AppState::new( + std::sync::Arc::new(StreamActor {}.start()), + vec![test_lib], + base_path_str.clone(), + base_path_str.clone(), + base_path_str.clone(), + base_path_str.clone(), + Vec::new(), + ollama, + None, + Vec::new(), + None, + Vec::new(), + sms_client, + insight_generator, + insight_chat, + turn_registry, + preview_dao, + FaceClient::new(None), + crate::ai::clip_client::ClipClient::new(None), + insight_job_dao, + insight_job_handles, + std::sync::Arc::new(Mutex::new(Box::new( + crate::database::SqlitePrecomputedReelDao::new(), + ))), + std::sync::Arc::new(Mutex::new(Box::new( + crate::database::SqliteUserAiPrefsDao::new(), + ))), + ) + } + #[test] fn cache_key_is_stable_for_same_inputs() { let media = vec![photo("a.jpg", 1), photo("b.jpg", 1)]; @@ -724,12 +1330,14 @@ mod tests { date: None, insight_title: None, insight_summary: None, + gps: None, }; let photo_beat = PlannedBeat { media: vec![photo("a.jpg", 1), photo("b.jpg", 1)], date: None, insight_title: None, insight_summary: None, + gps: None, }; assert!(clip_beat.is_clip()); assert!(!photo_beat.is_clip()); @@ -753,6 +1361,7 @@ mod tests { date: Some(1_560_384_000), // 2019-06-13 UTC insight_title: None, insight_summary: None, + gps: None, }; assert!(beat.date_label().unwrap().contains("2019")); @@ -761,7 +1370,77 @@ mod tests { date: None, insight_title: None, insight_summary: None, + gps: None, }; assert_eq!(undated.date_label(), None); } + + #[test] + fn normalize_library_key_returns_id_when_found_numeric() { + let libs = vec![ + Library { + id: 1, + name: "main".to_string(), + root_path: "/tmp/main".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }, + Library { + id: 7, + name: "archive".to_string(), + root_path: "/tmp/archive".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }, + ]; + assert_eq!(normalize_library_key(&libs, Some("1")), "1"); + } + + #[test] + fn normalize_library_key_returns_id_when_found_by_name() { + let libs = vec![Library { + id: 1, + name: "main".to_string(), + root_path: "/tmp/main".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }]; + assert_eq!(normalize_library_key(&libs, Some("main")), "1"); + } + + #[test] + fn normalize_library_key_returns_all_when_absent() { + let libs = vec![Library { + id: 1, + name: "main".to_string(), + root_path: "/tmp/main".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }]; + assert_eq!(normalize_library_key(&libs, None), "all"); + } + + #[test] + fn normalize_library_key_returns_all_when_empty() { + let libs = vec![Library { + id: 1, + name: "main".to_string(), + root_path: "/tmp/main".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }]; + assert_eq!(normalize_library_key(&libs, Some("")), "all"); + } + + #[test] + fn normalize_library_key_returns_all_when_unknown() { + let libs = vec![Library { + id: 1, + name: "main".to_string(), + root_path: "/tmp/main".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }]; + assert_eq!(normalize_library_key(&libs, Some("missing")), "all"); + } } diff --git a/src/reels/script.rs b/src/reels/script.rs index 5be3d64..202a22c 100644 --- a/src/reels/script.rs +++ b/src/reels/script.rs @@ -9,13 +9,20 @@ //! //! The prompt builder and response parser are pure so the contract is //! unit-testable; `generate_script` wires them to the LLM client. +//! +//! The agentic scripter (pre-generation) resolves the backend through the +//! InsightGenerator, builds a read-only tool set, and runs a tool loop to +//! ground the narration in retrieved context before asking for the final JSON. use anyhow::{Context, Result}; use std::sync::Arc; use super::{PlannedBeat, ReelMeta}; +use crate::ai::backend::{BackendKind, SamplingOverrides}; +use crate::ai::insight_generator::InsightGenerator; use crate::ai::llamacpp::LlamaCppClient; -use crate::ai::llm_client::LlmClient; +use crate::ai::llm_client::{LlmClient, Tool}; +use crate::ai::ollama::ChatMessage; /// The narration for a whole reel: a title and one line per beat, in order. #[derive(Debug, Clone, PartialEq)] @@ -35,6 +42,32 @@ can be read aloud in a few seconds. Avoid generic filler like \"what a \ wonderful day\" — if you have little to go on, simply describe the moment \ plainly."; +/// Agentic scripter system prompt: richer version that tells the model it may +/// call read-only tools to ground each line. +const AGENTIC_SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \ +slideshow of someone's own photos set to a spoken voiceover. Write warm, \ +specific, first-person narration as if the person is gently looking back on \ +their own memories. Each line plays over one moment, which may be a quick burst \ +of several photos, so narrate the moment as a whole rather than a single frame. \ +Be concrete and grounded in the details given; never invent names, places, or \ +events that aren't supported. Keep each line to one or two short sentences that \ +can be read aloud in a few seconds. Avoid generic filler like \"what a \ +wonderful day\" — if you have little to go on, simply describe the moment \ +plainly.\n\nYou may call read-only tools (search_messages, get_file_tags, \ +reverse_geocode, get_current_datetime, recall_entities, recall_facts_for_photo, \ +recall_facts_for_entity) to ground each line in real context. Never invent \ +details. Return ONLY the JSON object, no prose or code fences."; + +/// Maximum agentic tool iterations for pre-generation. Tunable via +/// `REEL_PREGEN_MAX_TOOL_ITERS` (default 8). +fn reel_pregen_max_tool_iters() -> usize { + std::env::var("REEL_PREGEN_MAX_TOOL_ITERS") + .ok() + .and_then(|s| s.trim().parse::().ok()) + .filter(|x| *x > 0) + .unwrap_or(8) +} + /// Build the (system, user) prompt pair for the scripter. The user message /// describes each beat in order and asks for strict JSON back. pub fn build_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> (String, String) { @@ -81,6 +114,61 @@ pub fn build_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> (String, (SYSTEM_PROMPT.to_string(), user) } +/// Build a richer (system, user) prompt pair for the agentic scripter. The +/// system prompt tells the model it may call read-only tools to ground each +/// line. The user message uses the same per-beat enumeration as +/// `build_script_messages` plus a GPS line per beat when available. +pub fn build_agentic_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> Vec { + let mut user = String::new(); + user.push_str(&format!( + "This reel has {} moments surfaced as memories {}.\n\n", + beats.len(), + meta.span_phrase() + )); + if !meta.years.is_empty() { + let years: Vec = meta.years.iter().map(|y| y.to_string()).collect(); + user.push_str(&format!("They span the years: {}.\n\n", years.join(", "))); + } + user.push_str("Moments, in the order they will appear:\n"); + for (i, beat) in beats.iter().enumerate() { + user.push_str(&format!("\n[{}]", i + 1)); + if let Some(date) = beat.date_label() { + user.push_str(&format!(" {date}")); + } + if beat.is_clip() { + user.push_str(" (a video clip)"); + } else if beat.media.len() > 1 { + user.push_str(&format!(" (a burst of {} photos)", beat.media.len())); + } + if let Some((lat, lon)) = beat.gps { + user.push_str(&format!("\n GPS: {:.4}, {:.4}", lat, lon)); + } + user.push('\n'); + match (&beat.insight_title, &beat.insight_summary) { + (Some(t), Some(s)) if !s.trim().is_empty() => { + user.push_str(&format!(" Known context: {t} — {s}\n")); + } + (Some(t), _) => user.push_str(&format!(" Known context: {t}\n")), + (_, Some(s)) if !s.trim().is_empty() => { + user.push_str(&format!(" Known context: {s}\n")); + } + _ => user.push_str(" (no extra context — narrate plainly from the date)\n"), + } + } + user.push_str(&format!( + "\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\ + {{\"title\": \"\", \"segments\": [\"\", \ + \"\", ... ]}}\n\ + The \"segments\" array MUST have exactly {} items, one per moment in order.", + beats.len() + )); + + vec![ + ChatMessage::system(AGENTIC_SYSTEM_PROMPT.to_string()), + ChatMessage::user(user), + ] +} + /// Parse the model's response into a script with exactly `n` lines. Tolerant of /// code fences and surrounding prose, and of both `segments: [".."]` and /// `segments: [{"narration": ".."}]` shapes. Missing/extra lines are padded or @@ -198,6 +286,74 @@ pub async fn generate_script( Ok(parse_script_response(&raw, beats.len())) } +/// Agentic version of script generation: resolves the backend via the +/// InsightGenerator (honouring LLM_BACKEND, model overrides, etc.), builds +/// a read-only tool set, runs the tool loop, then parses the JSON response. +/// Returns the same ReelScript shape. On failure the caller may fall back to +/// `generate_script`. +pub async fn generate_script_agentic( + generator: &InsightGenerator, + meta: &ReelMeta, + beats: &[PlannedBeat], +) -> Result { + // 1. Resolve the backend. Bail if the local model lacks tool-calling. + let backend = generator + .resolve_backend( + BackendKind::Local, + &SamplingOverrides { + model: None, + num_ctx: None, + temperature: None, + top_p: None, + top_k: None, + min_p: None, + }, + ) + .await + .context("resolving backend for agentic script")?; + + // 2. Build the read-only tool set. Start from the persona gate (no + // persona context, so corrections are closed), force has_vision=false, + // then filter out write tools. + let gate = generator.current_gate_opts_for_persona(false, None); + let all_tools = InsightGenerator::build_tool_definitions(gate); + let read_only_names: std::collections::HashSet<&str> = [ + "search_rag", + "search_messages", + "get_sms_messages", + "get_calendar_events", + "get_location_history", + "get_file_tags", + "get_faces_in_photo", + "reverse_geocode", + "get_personal_place_at", + "recall_entities", + "recall_facts_for_photo", + "recall_facts_for_entity", + "get_current_datetime", + ] + .into_iter() + .collect(); + let tools: Vec = all_tools + .into_iter() + .filter(|t| read_only_names.contains(t.function.name.as_str())) + .collect(); + + // 3. Build the agentic prompt messages. + let messages = build_agentic_script_messages(meta, beats); + + // 4. Run the tool loop. + let max_iter = reel_pregen_max_tool_iters(); + let raw = generator + .run_readonly_tool_loop(&backend, messages, tools, max_iter) + .await + .context("agentic tool loop failed")?; + + // 5. Strip any think-blocks the model may have emitted, then parse. + let raw = crate::ai::llm_client::strip_think_blocks(&raw); + Ok(parse_script_response(&raw, beats.len())) +} + #[cfg(test)] mod tests { use super::*; @@ -220,6 +376,7 @@ mod tests { date: Some(1_560_000_000 + i as i64 * 86_400), insight_title: None, insight_summary: None, + gps: None, }) .collect() } diff --git a/src/reels/selector.rs b/src/reels/selector.rs index d096f6d..a02cbb8 100644 --- a/src/reels/selector.rs +++ b/src/reels/selector.rs @@ -207,6 +207,7 @@ fn form_photo_beats( date, insight_title: None, insight_summary: None, + gps: None, } }) .collect() @@ -255,6 +256,7 @@ pub fn form_beats( date: v.created, insight_title: None, insight_summary: None, + gps: None, }); } @@ -334,15 +336,20 @@ fn distinct_years(items: &[memories::MemoryItem], tz: Option) -> Ve years } -/// Background pass: fill each beat's cached insight (title + summary) from its -/// lead photo, where one exists. Best-effort — a missing or errored lookup -/// leaves the fields `None` and the scripter narrates from the date alone. +/// Background pass: fill each beat's cached insight (title + summary) and +/// GPS coordinates from its lead photo, where one exists. Best-effort — a +/// missing or errored lookup leaves the fields `None` and the scripter +/// narrates from the date alone. pub fn enrich( insight_dao: &Mutex>, + exif_dao: &Mutex>, span_context: &opentelemetry::Context, beats: &mut [PlannedBeat], ) { - let Ok(mut dao) = insight_dao.lock() else { + let Ok(mut insight_dao) = insight_dao.lock() else { + return; + }; + let Ok(mut exif_dao) = exif_dao.lock() else { return; }; for beat in beats.iter_mut() { @@ -352,10 +359,17 @@ pub fn enrich( } None => continue, }; - if let Ok(Some(insight)) = dao.get_insight(span_context, &rel_path) { + if let Ok(Some(insight)) = insight_dao.get_insight(span_context, &rel_path) { beat.insight_title = Some(insight.title); beat.insight_summary = Some(insight.summary); } + // Enrich GPS from EXIF when the lead media is a photo. + if let Some(SegmentMedia::Photo { .. }) = beat.media.first() + && let Ok(Some(exif)) = exif_dao.get_exif(span_context, &rel_path) + && let (Some(lat), Some(lon)) = (exif.gps_latitude, exif.gps_longitude) + { + beat.gps = Some((lat as f64, lon as f64)); + } } } diff --git a/src/state.rs b/src/state.rs index bf894f3..33e8e3f 100644 --- a/src/state.rs +++ b/src/state.rs @@ -8,9 +8,10 @@ use crate::ai::turn_registry::TurnRegistry; use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient}; use crate::database::{ CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, InsightGenerationJobDao, KnowledgeDao, - LocationHistoryDao, SearchHistoryDao, SqliteCalendarEventDao, SqliteDailySummaryDao, - SqliteExifDao, SqliteInsightDao, SqliteInsightGenerationJobDao, SqliteKnowledgeDao, - SqliteLocationHistoryDao, SqliteSearchHistoryDao, connect, + LocationHistoryDao, PrecomputedReelDao, SearchHistoryDao, SqliteCalendarEventDao, + SqliteDailySummaryDao, SqliteExifDao, SqliteInsightDao, SqliteInsightGenerationJobDao, + SqliteKnowledgeDao, SqliteLocationHistoryDao, SqlitePrecomputedReelDao, SqliteSearchHistoryDao, + SqliteUserAiPrefsDao, UserAiPrefsDao, connect, }; use crate::database::{PreviewDao, SqlitePreviewDao}; use crate::faces; @@ -88,6 +89,14 @@ pub struct AppState { pub clip_client: ClipClient, pub insight_job_dao: Arc>>, pub insight_job_handles: Arc>>, + /// Ledger for precomputed memory reels. Written by the nightly agentic + /// job (Section D); read by `GET /reels/precomputed` (Section C). + #[allow(dead_code)] + pub precomputed_reel_dao: Arc>>, + /// User AI preferences (voice, timezone, library). Mirrored by the + /// client; read by the nightly pre-generation scheduler. + #[allow(dead_code)] + pub user_ai_prefs_dao: Arc>>, } impl AppState { @@ -101,6 +110,7 @@ impl AppState { self.libraries.iter().find(|l| l.id == id) } + #[allow(dead_code)] pub fn library_by_name(&self, name: &str) -> Option<&Library> { self.libraries.iter().find(|l| l.name == name) } @@ -129,6 +139,8 @@ impl AppState { clip_client: ClipClient, insight_job_dao: Arc>>, insight_job_handles: Arc>>, + precomputed_reel_dao: Arc>>, + user_ai_prefs_dao: Arc>>, ) -> Self { assert!( !libraries_vec.is_empty(), @@ -187,6 +199,8 @@ impl AppState { clip_client, insight_job_dao, insight_job_handles, + precomputed_reel_dao, + user_ai_prefs_dao, } } @@ -267,6 +281,14 @@ impl Default for AppState { let insight_job_handles: Arc>> = Arc::new(Mutex::new(HashMap::new())); + // Initialize precomputed reel DAO (nightly pre-generation ledger) + let precomputed_reel_dao: Arc>> = + Arc::new(Mutex::new(Box::new(SqlitePrecomputedReelDao::new()))); + + // Initialize user AI preferences DAO (Section E) + let user_ai_prefs_dao: Arc>> = + Arc::new(Mutex::new(Box::new(SqliteUserAiPrefsDao::new()))); + // Load base path and ensure the primary library row reflects it. let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env"); let mut seed_conn = connect(); @@ -344,6 +366,8 @@ impl Default for AppState { clip_client, insight_job_dao, insight_job_handles, + precomputed_reel_dao, + user_ai_prefs_dao, ) } } @@ -553,6 +577,8 @@ impl AppState { ClipClient::new(None), // disabled in test Arc::new(Mutex::new(Box::new(SqliteInsightGenerationJobDao::new()))), // placeholder for test Arc::new(Mutex::new(HashMap::new())), // placeholder for test + Arc::new(Mutex::new(Box::new(SqlitePrecomputedReelDao::new()))), // placeholder for test + Arc::new(Mutex::new(Box::new(SqliteUserAiPrefsDao::new()))), // placeholder for test ) } } diff --git a/src/tags.rs b/src/tags.rs index f3e0135..3dc0859 100644 --- a/src/tags.rs +++ b/src/tags.rs @@ -168,7 +168,7 @@ async fn get_tags( // this file, so tags added under one library show up under the // others when they hold the same file. Falls back to direct rel_path // match when the file hasn't been hashed yet. - let library = libraries::resolve_library_param(&app_state, request.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, request.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); -- 2.52.0 From 5c9ee5652784785bd736c5152bdf9f82165b3074 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 14:59:00 -0400 Subject: [PATCH 11/26] Fix agentic reel audit issues: midnight bug, DAO wiring, dead code, DST timezone, validation Blocking fixes: - secs_until_next_run_hour: same-hour now returns 0 instead of 24h - capture_prefs: called at both handler return points, never fails request - capture_prefs: resolves library param, upserts to user_ai_prefs via DAO - Scheduler: uses AppState DAOs instead of separate connections - Pregen dedup: uses resolved library param instead of hardcoded 'all' - run_readonly_tool_loop: added #[allow(dead_code)] (used in main.rs only) - run_readonly_tool_loop: removed dead messages.push() call - InsightGenerator: added exif_dao() getter for scheduler reuse Medium fixes: - Input validation: run_hour clamped 0-23, week_dow clamped 0-6 - DST-sensitive timezone: fixed_tz_offset() with env var config Low fixes: - Documented REEL_PREGEN_MAX_TOOL_ITERS and REEL_PREGEN_TZ_FIXED_MINUTES - Removed dead test_app_state function and unused imports Also fix: UpsertUserAiPrefs import path, chrono::Local::with_ymd_and_hms requires TimeZone trait + .single(), unwrap_or_else closure simplification --- .env.example | 5 + src/ai/insight_generator.rs | 13 +- src/main.rs | 18 +-- src/reels/mod.rs | 295 ++++++++++++++++-------------------- 4 files changed, 151 insertions(+), 180 deletions(-) diff --git a/.env.example b/.env.example index bafc0c8..a7bd7e5 100644 --- a/.env.example +++ b/.env.example @@ -150,8 +150,13 @@ SEARCH_RAG_RERANK=0 # Timezone offset in minutes from UTC (e.g., -480 = PST). Defaults to # the server's local timezone. # REEL_PREGEN_TZ_OFFSET_MINUTES= +# Fixed timezone offset — overrides auto-detect to avoid DST shifts. +# When set, both the DB fallback and env fallback use this value. +# REEL_PREGEN_TZ_FIXED_MINUTES=-480 # Voice ID for narration (e.g., "grandma"). Falls back to the value # stored in the user_ai_prefs DB row when set. # REEL_PREGEN_VOICE= # Library filter: a library id (e.g. "1") or "all" for every library. # REEL_PREGEN_LIBRARY=all +# Max agentic tool iterations for pre-gen scripter. Default 8. +# REEL_PREGEN_MAX_TOOL_ITERS=8 diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 4871c2e..4ff8494 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -217,6 +217,13 @@ impl InsightGenerator { &self.insight_dao } + /// Accessor for the EXIF DAO (used by the reel scheduler to resolve + /// GPS enrichment without creating a separate DB connection). + #[allow(dead_code)] + pub fn exif_dao(&self) -> &Arc>> { + &self.exif_dao + } + /// Whether the optional Apollo Places integration is wired up. Drives /// tool-definition gating (no point offering `get_personal_place_at` /// when Apollo is unreachable) — exposed publicly so `insight_chat` @@ -4509,6 +4516,9 @@ Return ONLY the summary, nothing else."#, /// /// Calls `execute_tool` with empty file/image context; enabled tools /// never read those fields. + /// + /// Only used by the `reels` module (compiled in `main.rs`, not `lib.rs`), + /// so the `#[allow(dead_code)]` suppresses the lib-target warning. #[allow(dead_code)] pub(crate) async fn run_readonly_tool_loop( &self, @@ -4592,8 +4602,7 @@ Return ONLY the summary, nothing else."#, .chat() .chat_with_tools(messages.clone(), vec![]) .await?; - final_content = final_response.content.clone(); - messages.push(final_response); + final_content = final_response.content; } Ok(final_content) diff --git a/src/main.rs b/src/main.rs index e3ded45..dd2868f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -268,23 +268,7 @@ fn main() -> std::io::Result<()> { } // Spawn the nightly pre-generation scheduler (Section D). - { - use crate::database::{ - InsightDao, SqliteInsightDao, SqliteUserAiPrefsDao, UserAiPrefsDao, - }; - - let insight_dao: Arc>> = - Arc::new(Mutex::new(Box::new(SqliteInsightDao::new()))); - let prefs_dao: Arc>> = - Arc::new(Mutex::new(Box::new(SqliteUserAiPrefsDao::new()))); - - reels::spawn_pregen_scheduler( - app_state.clone(), - web::Data::new(insight_dao), - web::Data::new(prefs_dao), - ) - .await; - } + reels::spawn_pregen_scheduler(app_state.clone()).await; HttpServer::new(move || { let user_dao = SqliteUserDao::new(); diff --git a/src/reels/mod.rs b/src/reels/mod.rs index c51822c..1fc5b3b 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -56,9 +56,46 @@ pub fn normalize_library_key(libs: &[Library], param: Option<&str>) -> String { } } +/// Best-effort: mirror the latest client reel params into `user_ai_prefs` +/// so the nightly pre-gen scheduler can pick them up. Never fails the +/// caller regardless of DB errors. +fn capture_prefs( + app_state: &AppState, + prefs_dao: &web::Data>>>, + req: &web::Json, + library_param: Option<&str>, +) -> Result<(), anyhow::Error> { + use crate::database::models::UpsertUserAiPrefs; + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("Time went backwards") + .as_secs(); + let library = match library_param { + Some(p) if !p.is_empty() => { + // Resolve to the actual library id for the DB row. + normalize_library_key(&app_state.libraries, Some(p)) + } + _ => "all".to_string(), + }; + let mut dao = prefs_dao.lock().expect("lock"); + let ctx = opentelemetry::Context::new(); + dao.upsert_prefs( + &ctx, + &UpsertUserAiPrefs { + voice: req.voice.clone().filter(|s| !s.is_empty()), + tz_offset_minutes: Some( + req.timezone_offset_minutes + .unwrap_or_else(|| chrono::Local::now().offset().local_minus_utc()), + ), + library: Some(library), + updated_at: now as i64, + }, + ) + .map_err(|e| anyhow::anyhow!("failed to upsert user_ai_prefs: {e}")) +} + /// Which scripting strategy to use for the reel narration. #[derive(Clone, Copy)] -#[allow(dead_code)] pub enum ScripterMode { /// Fast path: single LLM call via the direct client. Fast, @@ -348,6 +385,7 @@ pub async fn create_reel_handler( app_state: web::Data, exif_dao: web::Data>>, insight_dao: web::Data>>, + prefs_dao: web::Data>>>, ) -> impl Responder { let span_context = extract_context_from_request(&http_request); @@ -416,7 +454,9 @@ pub async fn create_reel_handler( abort: None, }, ); - return HttpResponse::Accepted().json(ReelJobCreatedResponse { + // Capture params for passive prefs mirror (best-effort, never fails). + let _ = capture_prefs(&app_state, &prefs_dao, &req, req.library.as_deref()); + HttpResponse::Accepted().json(ReelJobCreatedResponse { job_id: job_id.to_string(), status: ReelJobStatus::Done, }); @@ -474,6 +514,9 @@ pub async fn create_reel_handler( }); with_job(job_id, |job| job.abort = Some(handle.abort_handle())); + // Capture params for passive prefs mirror (best-effort, never fails). + let _ = capture_prefs(&app_state, &prefs_dao, &req, req.library.as_deref()); + HttpResponse::Accepted().json(ReelJobCreatedResponse { job_id: job_id.to_string(), status: ReelJobStatus::Queued, @@ -861,18 +904,22 @@ fn resolve_media_path(app_state: &AppState, media: &SegmentMedia) -> Option u32 { std::env::var("REEL_PREGEN_HOUR") .ok() - .and_then(|v| v.parse().ok()) + .and_then(|v| v.trim().parse().ok()) + .filter(|h| *h <= 23) .unwrap_or(3) } /// Env: "1" (default, Monday). Day of week for weekly pre-gen (0=Sun, 1=Mon, ...). +/// Clamped to 0-6; invalid values fall back to default. fn pregen_week_dow() -> u32 { std::env::var("REEL_PREGEN_WEEK_DOW") .ok() - .and_then(|v| v.parse().ok()) + .and_then(|v| v.trim().parse().ok()) + .filter(|d| *d <= 6) .unwrap_or(1) } @@ -881,8 +928,10 @@ fn pregen_week_dow() -> u32 { /// DST shifts. pub(crate) fn secs_until_next_run_hour(now: chrono::DateTime, run_hour: u32) -> u64 { let now_hour = now.hour(); - let diff = if now_hour >= run_hour { + let diff = if now_hour > run_hour { 24 - now_hour + run_hour + } else if now_hour == run_hour { + 0 } else { run_hour - now_hour }; @@ -891,26 +940,22 @@ pub(crate) fn secs_until_next_run_hour(now: chrono::DateTime, run /// Load pre-gen parameters: tries the user_ai_prefs DB row first, falls back /// to env vars, then to server-local defaults. -fn load_pregen_params( - prefs_dao: &web::Data>>>, -) -> (i32, Option, String) { +fn load_pregen_params(app_state: &AppState) -> (i32, Option, String) { // Try DB row first - if let Ok(mut dao) = prefs_dao.lock() { + if let Ok(mut dao) = app_state.user_ai_prefs_dao.lock() { let ctx = opentelemetry::Context::new(); if let Ok(Some(prefs)) = dao.get_prefs(&ctx) { - let tz = prefs - .tz_offset_minutes - .unwrap_or_else(|| chrono::Local::now().offset().local_minus_utc()); + let tz = prefs.tz_offset_minutes.unwrap_or_else(fixed_tz_offset); let voice = prefs.voice; let library = prefs.library.unwrap_or_else(|| "all".to_string()); return (tz, voice, library); } } - // Fall back to env + // Fall back to env (explicit offset overrides auto-detect) let tz = std::env::var("REEL_PREGEN_TZ_OFFSET_MINUTES") .ok() .and_then(|v| v.parse().ok()) - .unwrap_or_else(|| chrono::Local::now().offset().local_minus_utc()); + .unwrap_or_else(fixed_tz_offset); let voice = std::env::var("REEL_PREGEN_VOICE").ok(); let library = std::env::var("REEL_PREGEN_LIBRARY") .ok() @@ -918,12 +963,19 @@ fn load_pregen_params( (tz, voice, library) } +/// Fixed timezone offset: reads `REEL_PREGEN_TZ_FIXED_MINUTES` (e.g. "-480" +/// for US Eastern) when set, falling back to the system local offset. Using +/// a fixed offset avoids DST shifts changing the pre-gen schedule halfway +/// through the year. +fn fixed_tz_offset() -> i32 { + std::env::var("REEL_PREGEN_TZ_FIXED_MINUTES") + .ok() + .and_then(|v| v.trim().parse().ok()) + .unwrap_or_else(|| chrono::Local::now().offset().local_minus_utc()) +} + /// Spawn the nightly pre-generation scheduler. Runs behind `REEL_PREGEN_ENABLED`. -pub(crate) async fn spawn_pregen_scheduler( - app_state: web::Data, - insight_dao: web::Data>>>, - prefs_dao: web::Data>>>, -) { +pub(crate) async fn spawn_pregen_scheduler(app_state: web::Data) { if std::env::var("REEL_PREGEN_ENABLED").ok() != Some("1".to_string()) { log::info!("Reel pre-generation scheduler disabled (REEL_PREGEN_ENABLED != 1)"); return; @@ -942,7 +994,7 @@ pub(crate) async fn spawn_pregen_scheduler( log::debug!("Next pre-gen run in {}s", sleep_secs); tokio::time::sleep(std::time::Duration::from_secs(sleep_secs)).await; - if let Err(e) = run_pregen_batch(&app_state, &insight_dao, &prefs_dao).await { + if let Err(e) = run_pregen_batch(&app_state).await { log::error!("Reel pre-generation batch failed: {}", e); } } @@ -950,11 +1002,7 @@ pub(crate) async fn spawn_pregen_scheduler( } /// Run the pre-generation batch for all applicable spans. -async fn run_pregen_batch( - app_state: &AppState, - insight_dao: &web::Data>>>, - prefs_dao: &web::Data>>>, -) -> anyhow::Result<()> { +async fn run_pregen_batch(app_state: &AppState) -> anyhow::Result<()> { let now = chrono::Local::now(); let weekday = now.weekday().num_days_from_sunday(); // 0=Sun, 1=Mon, ... let day_of_month = now.day(); @@ -967,11 +1015,10 @@ async fn run_pregen_batch( spans.push("month"); } - let (tz, voice, library) = load_pregen_params(prefs_dao); + let (tz, voice, library) = load_pregen_params(app_state); for span in spans { - if let Err(e) = pregen_one(app_state, insight_dao, span, tz, voice.clone(), &library).await - { + if let Err(e) = pregen_one(app_state, span, tz, voice.clone(), &library).await { log::error!("Pre-gen failed for span={}: {}", span, e); } } @@ -982,7 +1029,6 @@ async fn run_pregen_batch( /// Pre-generate a single reel for the given span. async fn pregen_one( app_state: &AppState, - insight_dao: &web::Data>>>, span: &str, tz: i32, voice: Option, @@ -1006,11 +1052,10 @@ async fn pregen_one( max_segments: 24, }; - let exif_dao: Arc>> = Arc::new(StdMutex::new(Box::new( - crate::database::SqliteExifDao::new(), - ))); + let exif_dao = app_state.insight_generator.exif_dao(); + let insight_dao = app_state.insight_generator.insight_dao(); let ctx = opentelemetry::Context::new(); - let (planned, reel_meta) = match selector::resolve(app_state, &exif_dao, &ctx, &selector) { + let (planned, reel_meta) = match selector::resolve(app_state, exif_dao, &ctx, &selector) { Ok((p, m)) => (p, m), Err(e) => { log::warn!("Pre-gen resolve failed for span={}: {}", span, e); @@ -1042,7 +1087,7 @@ async fn pregen_one( let is_fresh = { let mut dao = app_state.precomputed_reel_dao.lock().expect("lock"); - dao.exists_fresh(&ctx, span, "all", RENDER_VERSION as i32, min_generated_at) + dao.exists_fresh(&ctx, span, library, RENDER_VERSION as i32, min_generated_at) .unwrap_or(false) }; @@ -1074,7 +1119,7 @@ async fn pregen_one( &ctx, &crate::database::models::InsertablePrecomputedReel { span: span.to_string(), - library_key: "all".to_string(), + library_key: library.to_string(), cache_key: key.clone(), output_path: mp4_path.to_string_lossy().to_string(), title, @@ -1094,7 +1139,7 @@ async fn pregen_one( let (title, mp4) = produce_reel( app_state, insight_dao, - &exif_dao, + exif_dao, planned, reel_meta, voice.clone(), @@ -1110,7 +1155,7 @@ async fn pregen_one( &ctx, &crate::database::models::InsertablePrecomputedReel { span: span.to_string(), - library_key: "all".to_string(), + library_key: library.to_string(), cache_key: key.clone(), output_path: mp4.to_string_lossy().to_string(), title, @@ -1129,9 +1174,8 @@ async fn pregen_one( #[cfg(test)] mod tests { use super::*; - use crate::ai::face_client::FaceClient; use crate::libraries::Library; - use crate::video::actors::StreamActor; + use chrono::TimeZone; fn photo(p: &str, lib: i32) -> SegmentMedia { SegmentMedia::Photo { @@ -1156,128 +1200,6 @@ mod tests { } } - /// Minimal AppState for tests that only need library lookup. - #[allow(dead_code)] - fn test_app_state() -> AppState { - use crate::ai::InsightGenerator; - use crate::ai::insight_chat::{ChatLockMap, InsightChatService}; - use crate::ai::turn_registry::TurnRegistry; - use crate::ai::{OllamaClient, SmsApiClient}; - use crate::database::{ - ExifDao, InsightDao, InsightGenerationJobDao, PreviewDao, SqliteExifDao, - SqliteInsightDao, SqliteInsightGenerationJobDao, SqlitePreviewDao, - }; - use crate::faces; - use crate::state::AppState; - use crate::tags::SqliteTagDao; - use actix::Actor; - use std::sync::Mutex; - - let temp_dir = tempfile::tempdir().expect("Failed to create temp directory"); - let base_path = temp_dir.path().to_path_buf(); - let base_path_str = base_path.to_string_lossy().to_string(); - - let test_lib = Library { - id: crate::libraries::PRIMARY_LIBRARY_ID, - name: "main".to_string(), - root_path: base_path_str.clone(), - enabled: true, - excluded_dirs: Vec::new(), - }; - - let ollama = OllamaClient::new( - "http://localhost:11434".to_string(), - None, - "llama3.2".to_string(), - None, - ); - let sms_client = SmsApiClient::new("http://localhost:8000".to_string(), None); - let apollo_client = crate::ai::apollo_client::ApolloClient::new(None); - - let insight_dao: std::sync::Arc>> = - std::sync::Arc::new(Mutex::new(Box::new(SqliteInsightDao::new()))); - let exif_dao: std::sync::Arc>> = - std::sync::Arc::new(Mutex::new(Box::new(SqliteExifDao::new()))); - let daily_summary_dao: std::sync::Arc>> = - std::sync::Arc::new(Mutex::new(Box::new( - crate::database::SqliteDailySummaryDao::new(), - ))); - let insight_generator = InsightGenerator::new( - ollama.clone(), - None, - None, - sms_client.clone(), - apollo_client.clone(), - insight_dao.clone(), - exif_dao.clone(), - daily_summary_dao, - std::sync::Arc::new(Mutex::new(Box::new( - crate::database::SqliteCalendarEventDao::new(), - ))), - std::sync::Arc::new(Mutex::new(Box::new( - crate::database::SqliteLocationHistoryDao::new(), - ))), - std::sync::Arc::new(Mutex::new(Box::new( - crate::database::SqliteSearchHistoryDao::new(), - ))), - std::sync::Arc::new(Mutex::new(Box::new(SqliteTagDao::default()))), - std::sync::Arc::new(Mutex::new(Box::new(faces::SqliteFaceDao::new()))), - std::sync::Arc::new(Mutex::new(Box::new( - crate::database::SqliteKnowledgeDao::new(), - ))), - std::sync::Arc::new(Mutex::new(Box::new( - crate::database::SqlitePersonaDao::new(), - ))), - vec![test_lib.clone()], - ); - - let chat_locks: ChatLockMap = - std::sync::Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new())); - let insight_chat = std::sync::Arc::new(InsightChatService::new( - std::sync::Arc::new(insight_generator.clone()), - insight_dao.clone(), - chat_locks, - )); - let turn_registry = std::sync::Arc::new(TurnRegistry::new(300)); - let preview_dao: std::sync::Arc>> = - std::sync::Arc::new(Mutex::new(Box::new(SqlitePreviewDao::new()))); - let insight_job_dao: std::sync::Arc>> = - std::sync::Arc::new(Mutex::new(Box::new(SqliteInsightGenerationJobDao::new()))); - let insight_job_handles: std::sync::Arc< - Mutex>, - > = std::sync::Arc::new(Mutex::new(std::collections::HashMap::new())); - - AppState::new( - std::sync::Arc::new(StreamActor {}.start()), - vec![test_lib], - base_path_str.clone(), - base_path_str.clone(), - base_path_str.clone(), - base_path_str.clone(), - Vec::new(), - ollama, - None, - Vec::new(), - None, - Vec::new(), - sms_client, - insight_generator, - insight_chat, - turn_registry, - preview_dao, - FaceClient::new(None), - crate::ai::clip_client::ClipClient::new(None), - insight_job_dao, - insight_job_handles, - std::sync::Arc::new(Mutex::new(Box::new( - crate::database::SqlitePrecomputedReelDao::new(), - ))), - std::sync::Arc::new(Mutex::new(Box::new( - crate::database::SqliteUserAiPrefsDao::new(), - ))), - ) - } - #[test] fn cache_key_is_stable_for_same_inputs() { let media = vec![photo("a.jpg", 1), photo("b.jpg", 1)]; @@ -1443,4 +1365,55 @@ mod tests { }]; assert_eq!(normalize_library_key(&libs, Some("missing")), "all"); } + + #[test] + fn secs_until_next_run_hour_same_hour_returns_zero() { + let dt = chrono::Local + .with_ymd_and_hms(2026, 6, 13, 3, 30, 0) + .single() + .expect("valid datetime"); + assert_eq!(secs_until_next_run_hour(dt, 3), 0); + } + + #[test] + fn secs_until_next_run_hour_future_today_returns_remaining() { + let dt = chrono::Local + .with_ymd_and_hms(2026, 6, 13, 10, 0, 0) + .single() + .expect("valid datetime"); + assert_eq!(secs_until_next_run_hour(dt, 14), 4 * 3600); + } + + #[test] + fn secs_until_next_run_hour_past_today_wraps() { + let dt = chrono::Local + .with_ymd_and_hms(2026, 6, 13, 20, 0, 0) + .single() + .expect("valid datetime"); + assert_eq!(secs_until_next_run_hour(dt, 3), (24 - 20 + 3) * 3600); + } + + #[test] + fn secs_until_next_run_hour_midnight() { + let dt = chrono::Local + .with_ymd_and_hms(2026, 6, 13, 0, 0, 0) + .single() + .expect("valid datetime"); + // 0:00, run at 3 → 3 hours + assert_eq!(secs_until_next_run_hour(dt, 3), 3 * 3600); + // 0:00, run at 0 → 0 (immediate) + assert_eq!(secs_until_next_run_hour(dt, 0), 0); + } + + #[test] + fn secs_until_next_run_hour_last_hour() { + let dt = chrono::Local + .with_ymd_and_hms(2026, 6, 13, 23, 30, 0) + .single() + .expect("valid datetime"); + // 23:30, run at 23 → 0 (still in hour 23) + assert_eq!(secs_until_next_run_hour(dt, 23), 0); + // 23:30, run at 0 → 1 hour + assert_eq!(secs_until_next_run_hour(dt, 0), 3600); + } } -- 2.52.0 From e4d8d374fb80f93475e6afc5dea920f8b39c589d Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 15:14:36 -0400 Subject: [PATCH 12/26] Reels pre-gen: fix runtime breakers from review (1-5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Drop the unregistered prefs_dao/reel_dao web::Data extractors from create_reel_handler / precomputed_reel_handler and read the DAOs off AppState instead (consistent with the scheduler). Missing app_data would have 500'd every POST /reels and /reels/precomputed at runtime. 2. Restore the dropped 'return' in the cache-hit branch — without it a cache hit fell through, overwrote the Done job with Queued, and re-ran the whole TTS+render pipeline on every request. 3. Make secs_until_next_run_hour minute/second-accurate so a batch that finishes inside the run hour sleeps ~24h instead of busy-looping (wake, re-run, sleep 0) for the rest of the hour. Tests updated. 4. Prune photo/user-bound tools (get_file_tags, get_faces_in_photo, recall_facts_for_photo, recall_facts_for_entity) from the agentic reel scripter's allow-list — they no-op/error with the empty file/user context and only burn iterations. 5. Align AGENTIC_SYSTEM_PROMPT's advertised tool list with the actual (pruned) allow-list. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/reels/mod.rs | 72 +++++++++++++++++++++++++-------------------- src/reels/script.rs | 20 ++++++++----- 2 files changed, 52 insertions(+), 40 deletions(-) diff --git a/src/reels/mod.rs b/src/reels/mod.rs index 1fc5b3b..7fc71b0 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -18,7 +18,7 @@ pub mod selector; use std::collections::HashMap; use std::path::{Path, PathBuf}; -use std::sync::{Arc, LazyLock, Mutex, Mutex as StdMutex}; +use std::sync::{LazyLock, Mutex, Mutex as StdMutex}; use std::time::{Duration, Instant}; use actix_files::NamedFile; @@ -30,7 +30,7 @@ use serde_json::json; use uuid::Uuid; use crate::data::Claims; -use crate::database::{ExifDao, InsightDao, PrecomputedReelDao, UserAiPrefsDao}; +use crate::database::{ExifDao, InsightDao}; use crate::libraries::{Library, resolve_library_param}; use crate::memories::MemoriesSpan; use crate::otel::extract_context_from_request; @@ -61,7 +61,6 @@ pub fn normalize_library_key(libs: &[Library], param: Option<&str>) -> String { /// caller regardless of DB errors. fn capture_prefs( app_state: &AppState, - prefs_dao: &web::Data>>>, req: &web::Json, library_param: Option<&str>, ) -> Result<(), anyhow::Error> { @@ -77,7 +76,7 @@ fn capture_prefs( } _ => "all".to_string(), }; - let mut dao = prefs_dao.lock().expect("lock"); + let mut dao = app_state.user_ai_prefs_dao.lock().expect("lock"); let ctx = opentelemetry::Context::new(); dao.upsert_prefs( &ctx, @@ -385,7 +384,6 @@ pub async fn create_reel_handler( app_state: web::Data, exif_dao: web::Data>>, insight_dao: web::Data>>, - prefs_dao: web::Data>>>, ) -> impl Responder { let span_context = extract_context_from_request(&http_request); @@ -455,8 +453,8 @@ pub async fn create_reel_handler( }, ); // Capture params for passive prefs mirror (best-effort, never fails). - let _ = capture_prefs(&app_state, &prefs_dao, &req, req.library.as_deref()); - HttpResponse::Accepted().json(ReelJobCreatedResponse { + let _ = capture_prefs(&app_state, &req, req.library.as_deref()); + return HttpResponse::Accepted().json(ReelJobCreatedResponse { job_id: job_id.to_string(), status: ReelJobStatus::Done, }); @@ -515,7 +513,7 @@ pub async fn create_reel_handler( with_job(job_id, |job| job.abort = Some(handle.abort_handle())); // Capture params for passive prefs mirror (best-effort, never fails). - let _ = capture_prefs(&app_state, &prefs_dao, &req, req.library.as_deref()); + let _ = capture_prefs(&app_state, &req, req.library.as_deref()); HttpResponse::Accepted().json(ReelJobCreatedResponse { job_id: job_id.to_string(), @@ -584,7 +582,6 @@ pub async fn precomputed_reel_handler( _claims: Claims, query: web::Query>, app_state: web::Data, - reel_dao: web::Data>>, ) -> impl Responder { let span = query.get("span").map(|s| s.as_str()).unwrap_or("day"); let library_key = normalize_library_key( @@ -605,7 +602,10 @@ pub async fn precomputed_reel_handler( let min_generated_at = now - (max_age_hours * 3600); let ctx = opentelemetry::Context::new(); - let mut dao = reel_dao.lock().expect("Unable to lock PrecomputedReelDao"); + let mut dao = app_state + .precomputed_reel_dao + .lock() + .expect("Unable to lock PrecomputedReelDao"); // Fast existence gate: is there a fresh row at all? if !dao @@ -923,19 +923,23 @@ fn pregen_week_dow() -> u32 { .unwrap_or(1) } -/// Pure: seconds until the next run of `run_hour` given the current local time. -/// Handles same-day vs wrap-around. Recomputed each loop iteration to absorb -/// DST shifts. +/// Pure: seconds until the next `run_hour:00:00` strictly after `now`. +/// +/// Minute/second-accurate (not just hour-granular): when `now` is already at or +/// past the target this wraps to the same hour tomorrow, so a batch that +/// finishes inside the run hour sleeps ~24h rather than busy-looping (waking, +/// re-running, and re-sleeping 0s) for the rest of that hour. The tradeoff is +/// that booting at or after `run_hour` waits until the next day. Recomputed each +/// loop iteration from `Local::now()` so DST shifts are absorbed. pub(crate) fn secs_until_next_run_hour(now: chrono::DateTime, run_hour: u32) -> u64 { - let now_hour = now.hour(); - let diff = if now_hour > run_hour { - 24 - now_hour + run_hour - } else if now_hour == run_hour { - 0 + let now_secs = now.hour() * 3600 + now.minute() * 60 + now.second(); + let target_secs = run_hour * 3600; + let diff = if target_secs > now_secs { + target_secs - now_secs } else { - run_hour - now_hour + 86_400 - now_secs + target_secs }; - (diff * 3600) as u64 + diff as u64 } /// Load pre-gen parameters: tries the user_ai_prefs DB row first, falls back @@ -1367,21 +1371,25 @@ mod tests { } #[test] - fn secs_until_next_run_hour_same_hour_returns_zero() { + fn secs_until_next_run_hour_within_run_hour_wraps_to_tomorrow() { + // 03:30, run 3 → already past today's 03:00, so wait until tomorrow + // 03:00 (23h30m). Crucially NOT 0 — that would busy-loop the scheduler + // for the rest of the hour. let dt = chrono::Local .with_ymd_and_hms(2026, 6, 13, 3, 30, 0) .single() .expect("valid datetime"); - assert_eq!(secs_until_next_run_hour(dt, 3), 0); + assert_eq!(secs_until_next_run_hour(dt, 3), 23 * 3600 + 30 * 60); } #[test] - fn secs_until_next_run_hour_future_today_returns_remaining() { + fn secs_until_next_run_hour_future_today_counts_minutes() { + // 10:15 → 14:00 is 3h45m, not a whole-hour 4h (minutes count). let dt = chrono::Local - .with_ymd_and_hms(2026, 6, 13, 10, 0, 0) + .with_ymd_and_hms(2026, 6, 13, 10, 15, 0) .single() .expect("valid datetime"); - assert_eq!(secs_until_next_run_hour(dt, 14), 4 * 3600); + assert_eq!(secs_until_next_run_hour(dt, 14), 3 * 3600 + 45 * 60); } #[test] @@ -1401,19 +1409,19 @@ mod tests { .expect("valid datetime"); // 0:00, run at 3 → 3 hours assert_eq!(secs_until_next_run_hour(dt, 3), 3 * 3600); - // 0:00, run at 0 → 0 (immediate) - assert_eq!(secs_until_next_run_hour(dt, 0), 0); + // 0:00 exactly, run at 0 → wraps to next midnight (not 0, so no busy loop) + assert_eq!(secs_until_next_run_hour(dt, 0), 86_400); } #[test] - fn secs_until_next_run_hour_last_hour() { + fn secs_until_next_run_hour_just_before_target() { + // 23:30, run 0 → 30 minutes to midnight (minute-accurate, not 1h). let dt = chrono::Local .with_ymd_and_hms(2026, 6, 13, 23, 30, 0) .single() .expect("valid datetime"); - // 23:30, run at 23 → 0 (still in hour 23) - assert_eq!(secs_until_next_run_hour(dt, 23), 0); - // 23:30, run at 0 → 1 hour - assert_eq!(secs_until_next_run_hour(dt, 0), 3600); + assert_eq!(secs_until_next_run_hour(dt, 0), 30 * 60); + // 23:30, run 23 → already past today's 23:00, wait until tomorrow. + assert_eq!(secs_until_next_run_hour(dt, 23), 86_400 - 30 * 60); } } diff --git a/src/reels/script.rs b/src/reels/script.rs index 202a22c..858efd1 100644 --- a/src/reels/script.rs +++ b/src/reels/script.rs @@ -53,10 +53,12 @@ Be concrete and grounded in the details given; never invent names, places, or \ events that aren't supported. Keep each line to one or two short sentences that \ can be read aloud in a few seconds. Avoid generic filler like \"what a \ wonderful day\" — if you have little to go on, simply describe the moment \ -plainly.\n\nYou may call read-only tools (search_messages, get_file_tags, \ -reverse_geocode, get_current_datetime, recall_entities, recall_facts_for_photo, \ -recall_facts_for_entity) to ground each line in real context. Never invent \ -details. Return ONLY the JSON object, no prose or code fences."; +plainly.\n\nYou may call read-only tools (search_rag, search_messages, \ +get_sms_messages, get_calendar_events, get_location_history, reverse_geocode, \ +get_personal_place_at, recall_entities, get_current_datetime) to ground each \ +line in real context — e.g. reverse_geocode a moment's GPS to name the place, \ +or check the calendar/messages around its date. Never invent details. Return \ +ONLY the JSON object, no prose or code fences."; /// Maximum agentic tool iterations for pre-generation. Tunable via /// `REEL_PREGEN_MAX_TOOL_ITERS` (default 8). @@ -317,19 +319,21 @@ pub async fn generate_script_agentic( // then filter out write tools. let gate = generator.current_gate_opts_for_persona(false, None); let all_tools = InsightGenerator::build_tool_definitions(gate); + // Whole-reel calls have no single photo and no authenticated user, so the + // loop runs execute_tool with empty file/image context and user_id=0. Only + // tools that work without that context are useful here — photo/user-bound + // tools (get_file_tags, get_faces_in_photo, recall_facts_for_photo, + // recall_facts_for_entity) would just no-op or error, burning iterations, + // so they're excluded. let read_only_names: std::collections::HashSet<&str> = [ "search_rag", "search_messages", "get_sms_messages", "get_calendar_events", "get_location_history", - "get_file_tags", - "get_faces_in_photo", "reverse_geocode", "get_personal_place_at", "recall_entities", - "recall_facts_for_photo", - "recall_facts_for_entity", "get_current_datetime", ] .into_iter() -- 2.52.0 From ca007a618d50c3f58794280d02f56d05054e0ce3 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 15:19:41 -0400 Subject: [PATCH 13/26] Reels pre-gen: record true media count + real upsert for user_ai_prefs - pregen_one recorded media_count as planned.len() (beat count); record the actual media item total (media.len(), photos + clips) in both the cache-hit and freshly-rendered ledger paths. Drops the redundant photo_count binding. - Replace upsert_prefs's insert-then-catch-error-then-update dance with a single atomic INSERT ... ON CONFLICT(id) DO UPDATE. Explicit id=1 makes the conflict target deterministic; explicit column .set((...)) keeps None -> NULL overwrite semantics so the row mirrors the latest request exactly, and genuine insert errors surface instead of being swallowed. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/database/user_ai_prefs_dao.rs | 46 ++++++++++++++----------------- src/reels/mod.rs | 7 +++-- 2 files changed, 24 insertions(+), 29 deletions(-) diff --git a/src/database/user_ai_prefs_dao.rs b/src/database/user_ai_prefs_dao.rs index d58a56c..129ef0c 100644 --- a/src/database/user_ai_prefs_dao.rs +++ b/src/database/user_ai_prefs_dao.rs @@ -84,32 +84,26 @@ impl UserAiPrefsDao for SqliteUserAiPrefsDao { .lock() .expect("Unable to lock UserAiPrefsDao"); - // SQLite: INSERT on first call, UPDATE on subsequent calls. - // The first INSERT creates the row with id=1 (auto-increment). - // Subsequent calls UPDATE the existing row. - let result = diesel::insert_into(dsl::user_ai_prefs) - .values(prefs) - .execute(connection.deref_mut()); - - match result { - Ok(_) => { - // First insert succeeded. - Ok(()) - } - Err(_e) => { - // Insert failed (likely due to duplicate key). Update instead. - diesel::update(dsl::user_ai_prefs.filter(dsl::id.eq(1))) - .set(( - dsl::voice.eq(&prefs.voice), - dsl::tz_offset_minutes.eq(&prefs.tz_offset_minutes), - dsl::library.eq(&prefs.library), - dsl::updated_at.eq(&prefs.updated_at), - )) - .execute(connection.deref_mut()) - .map_err(|e| anyhow::anyhow!("Failed to upsert prefs: {}", e))?; - Ok(()) - } - } + // Single-row table (id=1): one atomic upsert. The explicit id=1 + // makes the conflict target deterministic so the second call + // updates in place rather than tripping the CHECK(id=1) constraint, + // and real insert errors surface instead of being swallowed into a + // separate update branch. The columns are set explicitly (rather + // than via AsChangeset) so a None field overwrites to NULL — the + // row mirrors the latest request exactly, not a merge of past ones. + diesel::insert_into(dsl::user_ai_prefs) + .values((dsl::id.eq(1), prefs)) + .on_conflict(dsl::id) + .do_update() + .set(( + dsl::voice.eq(&prefs.voice), + dsl::tz_offset_minutes.eq(&prefs.tz_offset_minutes), + dsl::library.eq(&prefs.library), + dsl::updated_at.eq(&prefs.updated_at), + )) + .execute(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to upsert prefs: {}", e))?; + Ok(()) }) .map_err(|e| DbError::log(DbErrorKind::InsertError, e)) } diff --git a/src/reels/mod.rs b/src/reels/mod.rs index 7fc71b0..059ce43 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -1075,6 +1075,8 @@ async fn pregen_one( // Flatten every media item across beats (in order) into the cache key. let media: Vec = planned.iter().flat_map(|b| b.media.clone()).collect(); let key = cache_key(&selector, &media, voice.as_deref()); + // Total media items shown (photos + clips), not beat count. + let media_count = media.len() as i32; // Dedup: check if fresh ledger row exists let now = std::time::SystemTime::now() @@ -1127,7 +1129,7 @@ async fn pregen_one( cache_key: key.clone(), output_path: mp4_path.to_string_lossy().to_string(), title, - media_count: planned.len() as i32, + media_count, render_version: RENDER_VERSION as i32, tz_offset_minutes: tz, voice: voice.clone(), @@ -1139,7 +1141,6 @@ async fn pregen_one( // Generate the reel log::info!("Generating precomputed reel for span={}, key={}", span, key); - let photo_count = planned.len() as i32; let (title, mp4) = produce_reel( app_state, insight_dao, @@ -1163,7 +1164,7 @@ async fn pregen_one( cache_key: key.clone(), output_path: mp4.to_string_lossy().to_string(), title, - media_count: photo_count, + media_count, render_version: RENDER_VERSION as i32, tz_offset_minutes: tz, voice: voice.clone(), -- 2.52.0 From 19fc1bbdf81ff1d6a1d2388b1efd0434c1686706 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 23:12:54 -0400 Subject: [PATCH 14/26] Reels pre-gen: use DEFAULT_MAX_SEGMENTS so cache keys match on-demand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pregen_one hardcoded max_segments: 24 while create_reel_handler defaults to DEFAULT_MAX_SEGMENTS (40). Since the cache key encodes the raw max_segments, the pre-generated reel's key never matched the client's on-demand request, so POST /reels cache-hit an older max=40 reel and the agentic pre-gen file was left orphaned. Align to DEFAULT_MAX_SEGMENTS (as the plan specified) so the on-demand cache-hit path serves the pre-gen reel. Content is unchanged — the actual beat count is duration-budgeted either way; only the key descriptor differed. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/reels/mod.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/reels/mod.rs b/src/reels/mod.rs index 059ce43..2a4e6e9 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -1053,7 +1053,11 @@ async fn pregen_one( } else { Some(library.to_string()) }, - max_segments: 24, + // Must match the on-demand default (create_reel_handler) so the cache + // key — which encodes the raw max_segments — lines up and the on-demand + // cache-hit path serves this pre-generated reel. The client sends no + // max_segments, so it defaults to DEFAULT_MAX_SEGMENTS there too. + max_segments: selector::DEFAULT_MAX_SEGMENTS, }; let exif_dao = app_state.insight_generator.exif_dao(); -- 2.52.0 From b52b1eb32334e70446835d526da6558675895f25 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 23:14:39 -0400 Subject: [PATCH 15/26] Reels pre-gen: make dedup cache-key-aware so key changes regenerate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit exists_fresh only matched (span, library, render_version, age), so a cache-key change that doesn't bump RENDER_VERSION (e.g. the max_segments alignment, or any future selection-logic tweak) left last night's ledger row looking 'fresh' — the nightly run would skip and the orphaned reel would persist. Dedup now compares the stored cache_key to the freshly computed key (and confirms the mp4 exists), so a changed key forces a regen within the freshness window. exists_fresh stays as the HTTP endpoint's fast gate. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/reels/mod.rs | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/reels/mod.rs b/src/reels/mod.rs index 2a4e6e9..c5d7492 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -1095,14 +1095,29 @@ async fn pregen_one( }; let min_generated_at = now - (max_age_hours as i64 * 3600); - let is_fresh = { + // Skip only when a fresh ledger row points at THIS exact cache key (same + // media, params, render version) and its file still exists. Comparing the + // stored cache_key — not just (span, library) — means a key change from + // selection-logic/params drift that doesn't bump RENDER_VERSION still forces + // a regen within the freshness window, instead of leaving a stale row that + // points at an orphaned reel. + let already_current = { let mut dao = app_state.precomputed_reel_dao.lock().expect("lock"); - dao.exists_fresh(&ctx, span, library, RENDER_VERSION as i32, min_generated_at) - .unwrap_or(false) + matches!( + dao.latest_for(&ctx, span, library), + Ok(Some(row)) + if row.cache_key == key + && row.render_version == RENDER_VERSION as i32 + && row.generated_at >= min_generated_at + ) && reel_mp4_path(app_state, &key).exists() }; - if is_fresh { - log::info!("Fresh precomputed reel exists for span={}, skipping", span); + if already_current { + log::info!( + "Fresh precomputed reel already current for span={} key={}, skipping", + span, + key + ); return Ok(()); } -- 2.52.0 From 664b3694f8088db76b734790dd236f4b8cd3e9b9 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 23:16:14 -0400 Subject: [PATCH 16/26] Reels pre-gen: always render the agentic reel, don't adopt on-demand mp4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Past the key-aware dedup, any mp4 already at the cache key was not pre-generated by us (no matching ledger row) — typically an on-demand fast-scripted reel sharing the key after the max_segments alignment. Adopting it recorded a ledger row pointing at the fast reel, silently defeating agentic pre-gen. Drop the adopt-existing-mp4 shortcut and always produce_reel (atomic overwrite). Worst case is one redundant re-render if a prior run crashed between render and ledger write. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/reels/mod.rs | 45 +++++++-------------------------------------- 1 file changed, 7 insertions(+), 38 deletions(-) diff --git a/src/reels/mod.rs b/src/reels/mod.rs index c5d7492..95769ad 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -1121,44 +1121,13 @@ async fn pregen_one( return Ok(()); } - // Check if MP4 already on disk (from a previous run that crashed after render) - let mp4_path = reel_mp4_path(app_state, &key); - if mp4_path.exists() { - log::info!( - "Precomputed reel MP4 already exists for key={}, recording ledger and skipping render", - key - ); - // Read title from sidecar if available - let sidecar_path = mp4_path.with_extension("json"); - let title = if sidecar_path.exists() { - let sidecar = tokio::fs::read_to_string(&sidecar_path).await.ok(); - sidecar - .and_then(|s| serde_json::from_str::(&s).ok()) - .map(|s| s.title) - .unwrap_or_else(|| format!("{} reel", span)) - } else { - format!("{} reel", span) - }; - let mut reel_dao = app_state.precomputed_reel_dao.lock().expect("lock"); - reel_dao.record_reel( - &ctx, - &crate::database::models::InsertablePrecomputedReel { - span: span.to_string(), - library_key: library.to_string(), - cache_key: key.clone(), - output_path: mp4_path.to_string_lossy().to_string(), - title, - media_count, - render_version: RENDER_VERSION as i32, - tz_offset_minutes: tz, - voice: voice.clone(), - generated_at: now, - }, - )?; - return Ok(()); - } - - // Generate the reel + // Past the key-aware dedup above, any MP4 already at this key was NOT + // pre-generated by us (it has no matching ledger row) — most likely an + // on-demand fast-scripted reel that happens to share the key. Don't adopt + // it: regenerate so the precomputed reel is the agentic one. produce_reel + // publishes atomically, overwriting whatever is there. (The narrow + // render-succeeded-but-ledger-write-failed crash window just costs one + // redundant re-render next run.) log::info!("Generating precomputed reel for span={}, key={}", span, key); let (title, mp4) = produce_reel( app_state, -- 2.52.0 From 7e21213181be66ed092a2f48e877be408c895ee6 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sat, 13 Jun 2026 23:27:32 -0400 Subject: [PATCH 17/26] Reels: bound disk/ledger growth (pre-gen prune + on-demand cache sweep) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nothing reaped reels before, so the on-disk cache and ledger grew unbounded — each night's daily reel is a new ~4MB file + ledger row that's stale within ~26h. - Pre-gen self-prune: after recording a reel, prune_superseded keeps the newest PREGEN_KEEP_PER_SCOPE (2) rows per (span, library) and unlinks the superseded reels' mp4+sidecar. Caps the ledger/disk at ~spans×libraries×2. - On-disk sweeper (spawn_reel_cache_sweeper): every 24h, removes reel mp4s with no ledger row and no live job older than REEL_CACHE_MAX_AGE_DAYS (7) — bounding the on-demand cache, which has no ledger row and otherwise grows forever — plus crashed-render cruft (.mp4.tmp/.concat.txt/orphan sidecars). Runs regardless of REEL_PREGEN_ENABLED; disable with REEL_CACHE_SWEEP_ENABLED=0. - New DAO methods prune_superseded + all_cache_keys (with tests); env knobs documented in .env.example. Co-Authored-By: Claude Opus 4.8 (1M context) --- .env.example | 7 + src/database/precomputed_reel_dao.rs | 118 +++++++++++++++++ src/main.rs | 2 + src/reels/mod.rs | 186 ++++++++++++++++++++++++--- 4 files changed, 296 insertions(+), 17 deletions(-) diff --git a/.env.example b/.env.example index a7bd7e5..2e431bc 100644 --- a/.env.example +++ b/.env.example @@ -160,3 +160,10 @@ SEARCH_RAG_RERANK=0 # REEL_PREGEN_LIBRARY=all # Max agentic tool iterations for pre-gen scripter. Default 8. # REEL_PREGEN_MAX_TOOL_ITERS=8 +# +# On-disk reel cache sweep (runs every 24h, independent of pre-gen). Removes +# reel MP4s with no ledger row + no live job that are older than the max age — +# i.e. the on-demand cache, which otherwise grows forever. Set to 0 to disable. +# REEL_CACHE_SWEEP_ENABLED=1 +# Age (days) before an unreferenced reel MP4 is swept. Default 7. +# REEL_CACHE_MAX_AGE_DAYS=7 diff --git a/src/database/precomputed_reel_dao.rs b/src/database/precomputed_reel_dao.rs index 7acc098..b66573b 100644 --- a/src/database/precomputed_reel_dao.rs +++ b/src/database/precomputed_reel_dao.rs @@ -41,6 +41,23 @@ pub trait PrecomputedReelDao: Sync + Send { render_version: i32, min_generated_at: i64, ) -> Result; + + /// Delete all but the newest `keep` rows for (span, library_key), returning + /// the deleted rows so the caller can unlink their output files. Used by the + /// nightly job to retire superseded reels (e.g. yesterday's daily). + #[allow(dead_code)] + fn prune_superseded( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + keep: usize, + ) -> Result, DbError>; + + /// Every cache_key currently in the ledger. Used by the on-disk cache sweep + /// to protect files a ledger row still points at. + #[allow(dead_code)] + fn all_cache_keys(&mut self, context: &opentelemetry::Context) -> Result, DbError>; } pub struct SqlitePrecomputedReelDao { @@ -148,6 +165,60 @@ impl PrecomputedReelDao for SqlitePrecomputedReelDao { }) .map_err(|e| DbError::log(DbErrorKind::QueryError, e)) } + + fn prune_superseded( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + keep: usize, + ) -> Result, DbError> { + trace_db_call(context, "delete", "prune_superseded", |_span| { + use schema::precomputed_reels::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + // Newest first; everything past `keep` is superseded. The table + // holds at most a handful of rows per (span, library), so loading + // and slicing in Rust is cheaper than a correlated subquery. + let mut rows: Vec = dsl::precomputed_reels + .filter(dsl::span.eq(span)) + .filter(dsl::library_key.eq(library_key)) + .order(dsl::generated_at.desc()) + .load::(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to load reels for prune: {}", e))?; + + let stale = rows.split_off(rows.len().min(keep)); + if !stale.is_empty() { + let ids: Vec = stale.iter().map(|r| r.id).collect(); + diesel::delete(dsl::precomputed_reels.filter(dsl::id.eq_any(ids))) + .execute(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to delete superseded reels: {}", e))?; + } + Ok(stale) + }) + .map_err(|e| DbError::log(DbErrorKind::UpdateError, e)) + } + + fn all_cache_keys(&mut self, context: &opentelemetry::Context) -> Result, DbError> { + trace_db_call(context, "query", "all_cache_keys", |_span| { + use schema::precomputed_reels::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + dsl::precomputed_reels + .select(dsl::cache_key) + .load::(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to load cache keys: {}", e)) + }) + .map_err(|e| DbError::log(DbErrorKind::QueryError, e)) + } } #[cfg(test)] @@ -318,4 +389,51 @@ mod tests { assert!(dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap()); assert!(!dao.exists_fresh(&ctx, "day", "1", 2, 900_000).unwrap()); } + + #[test] + fn prune_superseded_keeps_newest_and_returns_deleted() { + let mut dao = setup_dao(); + let ctx = ctx(); + // Three day/lib1 reels at increasing timestamps, plus an unrelated one. + for (i, key) in ["k1", "k2", "k3"].iter().enumerate() { + dao.record_reel( + &ctx, + &InsertablePrecomputedReel { + cache_key: key.to_string(), + generated_at: 1_000_000 + i as i64 * 1000, + ..sample_row() + }, + ) + .unwrap(); + } + let other = InsertablePrecomputedReel { + library_key: "2".to_string(), + cache_key: "other".to_string(), + ..sample_row() + }; + dao.record_reel(&ctx, &other).unwrap(); + + // Keep the newest 2 of (day, "1"); k1 (oldest) is superseded. + let deleted = dao.prune_superseded(&ctx, "day", "1", 2).unwrap(); + assert_eq!(deleted.len(), 1); + assert_eq!(deleted[0].cache_key, "k1"); + + // The newest 2 survive; the other-library row is untouched. + let keys = dao.all_cache_keys(&ctx).unwrap(); + assert_eq!(keys.len(), 3); + assert!(keys.contains(&"k2".to_string())); + assert!(keys.contains(&"k3".to_string())); + assert!(keys.contains(&"other".to_string())); + assert!(!keys.contains(&"k1".to_string())); + } + + #[test] + fn prune_superseded_noop_when_within_keep() { + let mut dao = setup_dao(); + let ctx = ctx(); + dao.record_reel(&ctx, &sample_row()).unwrap(); + let deleted = dao.prune_superseded(&ctx, "day", "1", 2).unwrap(); + assert!(deleted.is_empty()); + assert_eq!(dao.all_cache_keys(&ctx).unwrap().len(), 1); + } } diff --git a/src/main.rs b/src/main.rs index dd2868f..e420d8b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -269,6 +269,8 @@ fn main() -> std::io::Result<()> { // Spawn the nightly pre-generation scheduler (Section D). reels::spawn_pregen_scheduler(app_state.clone()).await; + // Spawn the on-disk reel-cache sweeper (bounds pre-gen + on-demand reels). + reels::spawn_reel_cache_sweeper(app_state.clone()).await; HttpServer::new(move || { let user_dao = SqliteUserDao::new(); diff --git a/src/reels/mod.rs b/src/reels/mod.rs index 95769ad..afe2ced 100644 --- a/src/reels/mod.rs +++ b/src/reels/mod.rs @@ -46,6 +46,21 @@ const REEL_PRECOMPUTED_WEEK_MAX_AGE_HOURS: u64 = 192; /// Maximum age for a precomputed month reel. const REEL_PRECOMPUTED_MONTH_MAX_AGE_HOURS: u64 = 768; +/// How many precomputed reels to keep per (span, library). The newest is the +/// one served; one extra is a grace window so a regen mid-flight (or a client +/// that started a fetch just before the swap) isn't left without a file. +const PREGEN_KEEP_PER_SCOPE: usize = 2; + +/// On-disk reel cache sweep: an unreferenced reel MP4 older than this is +/// removed. Catches the on-demand cache (which has no ledger row and otherwise +/// grows forever) and any pre-gen orphans. Tunable via `REEL_CACHE_MAX_AGE_DAYS`. +const REEL_CACHE_MAX_AGE_DAYS_DEFAULT: u64 = 7; +/// Interval between on-disk cache sweeps. +const REEL_CACHE_SWEEP_INTERVAL_SECS: u64 = 24 * 3600; +/// Transient render artifacts (`.mp4.tmp`, `.concat.txt`, orphaned sidecars) +/// older than this are leftovers from a crashed render and safe to remove. +const REEL_TMP_MAX_AGE_SECS: u64 = 3600; + /// Resolve a library request parameter to a stable key string. /// Returns the library's id as a string when found, or `"all"` when /// the param is absent or the lookup fails. @@ -1142,28 +1157,165 @@ async fn pregen_one( ) .await?; - // Record to ledger - let mut reel_dao = app_state.precomputed_reel_dao.lock().expect("lock"); - reel_dao.record_reel( - &ctx, - &crate::database::models::InsertablePrecomputedReel { - span: span.to_string(), - library_key: library.to_string(), - cache_key: key.clone(), - output_path: mp4.to_string_lossy().to_string(), - title, - media_count, - render_version: RENDER_VERSION as i32, - tz_offset_minutes: tz, - voice: voice.clone(), - generated_at: now, - }, - )?; + // Record to ledger, then retire superseded reels for this (span, library) + // — yesterday's daily, an older render-version, etc. — keeping a small + // grace window. Done under one lock so the prune sees the row we just wrote. + let superseded = { + let mut reel_dao = app_state.precomputed_reel_dao.lock().expect("lock"); + reel_dao.record_reel( + &ctx, + &crate::database::models::InsertablePrecomputedReel { + span: span.to_string(), + library_key: library.to_string(), + cache_key: key.clone(), + output_path: mp4.to_string_lossy().to_string(), + title, + media_count, + render_version: RENDER_VERSION as i32, + tz_offset_minutes: tz, + voice: voice.clone(), + generated_at: now, + }, + )?; + reel_dao + .prune_superseded(&ctx, span, library, PREGEN_KEEP_PER_SCOPE) + .unwrap_or_default() + }; + for row in &superseded { + delete_reel_files(&row.output_path); + } + if !superseded.is_empty() { + log::info!( + "Pruned {} superseded precomputed reel(s) for span={}", + superseded.len(), + span + ); + } log::info!("Precomputed reel generated for span={}, key={}", span, key); Ok(()) } +// --- On-disk cache sweep ----------------------------------------------------- + +/// Best-effort unlink of a reel's MP4 and its `.json` sidecar. +fn delete_reel_files(mp4_output_path: &str) { + let mp4 = Path::new(mp4_output_path); + let _ = std::fs::remove_file(mp4); + let _ = std::fs::remove_file(mp4.with_extension("json")); +} + +/// Max age (seconds) before an unreferenced reel MP4 is swept. +fn reel_cache_max_age_secs() -> u64 { + std::env::var("REEL_CACHE_MAX_AGE_DAYS") + .ok() + .and_then(|v| v.trim().parse::().ok()) + .filter(|d| *d > 0) + .unwrap_or(REEL_CACHE_MAX_AGE_DAYS_DEFAULT) + * 86_400 +} + +/// Spawn the periodic on-disk reel-cache sweeper. Runs independently of the +/// pre-gen scheduler because the on-demand cache grows whether or not pre-gen +/// is enabled. Disable with `REEL_CACHE_SWEEP_ENABLED=0`. +pub(crate) async fn spawn_reel_cache_sweeper(app_state: web::Data) { + if std::env::var("REEL_CACHE_SWEEP_ENABLED").ok().as_deref() == Some("0") { + log::info!("Reel cache sweeper disabled (REEL_CACHE_SWEEP_ENABLED=0)"); + return; + } + tokio::spawn(async move { + // Settle after startup, then sweep on a fixed cadence. + tokio::time::sleep(Duration::from_secs(300)).await; + loop { + let removed = sweep_reel_cache(&app_state); + if removed > 0 { + log::info!("Reel cache sweep removed {removed} stale file(s)"); + } + tokio::time::sleep(Duration::from_secs(REEL_CACHE_SWEEP_INTERVAL_SECS)).await; + } + }); +} + +/// One sweep of `reels_path`. Removes: stale render artifacts (`.mp4.tmp`, +/// `.concat.txt`, orphaned sidecars) from crashed runs; and reel MP4s that no +/// ledger row references, that no live job points at, and that are older than +/// the cache max age (the on-demand cache, which has no ledger row). Returns the +/// number of files removed. Best-effort — any IO error on one entry is skipped. +fn sweep_reel_cache(app_state: &AppState) -> usize { + let dir = Path::new(&app_state.reels_path); + let read_dir = match std::fs::read_dir(dir) { + Ok(rd) => rd, + Err(_) => return 0, // dir not created yet → nothing to sweep + }; + + // Files a ledger row still points at (current pre-gen reels). + let protected: std::collections::HashSet = { + let ctx = opentelemetry::Context::new(); + let mut dao = app_state.precomputed_reel_dao.lock().expect("lock"); + dao.all_cache_keys(&ctx) + .unwrap_or_default() + .into_iter() + .collect() + }; + // Outputs of live in-memory jobs (a Done reel a client may still be fetching). + let active: std::collections::HashSet = { + let jobs = REEL_JOBS.lock().unwrap(); + jobs.values() + .filter_map(|j| j.output_path.as_ref()) + .map(|p| p.to_string_lossy().to_string()) + .collect() + }; + + let now = std::time::SystemTime::now(); + let max_age = Duration::from_secs(reel_cache_max_age_secs()); + let tmp_max_age = Duration::from_secs(REEL_TMP_MAX_AGE_SECS); + let mut removed = 0usize; + + for entry in read_dir.flatten() { + let path = entry.path(); + let Some(name) = path.file_name().and_then(|n| n.to_str()) else { + continue; + }; + let age = entry + .metadata() + .and_then(|m| m.modified()) + .ok() + .and_then(|t| now.duration_since(t).ok()) + .unwrap_or_default(); + + // Transient render artifacts from a crashed run. + if name.ends_with(".mp4.tmp") || name.ends_with(".concat.txt") { + if age > tmp_max_age && std::fs::remove_file(&path).is_ok() { + removed += 1; + } + continue; + } + + // Reel MP4: keep if referenced (ledger or live job) or still recent. + if let Some(key) = name.strip_suffix(".mp4") { + let p = path.to_string_lossy().to_string(); + if protected.contains(key) || active.contains(&p) || age < max_age { + continue; + } + if std::fs::remove_file(&path).is_ok() { + let _ = std::fs::remove_file(path.with_extension("json")); + removed += 1; + } + continue; + } + + // Orphaned sidecar (its MP4 is gone). + if name.ends_with(".json") + && !path.with_extension("mp4").exists() + && age > tmp_max_age + && std::fs::remove_file(&path).is_ok() + { + removed += 1; + } + } + removed +} + #[cfg(test)] mod tests { use super::*; -- 2.52.0 From 50ed78084423f53bd5f79a3789637f129ccb50f2 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sun, 14 Jun 2026 00:44:16 -0400 Subject: [PATCH 18/26] =?UTF-8?q?Unified=20NL=20search=20Phase=201:=20NL?= =?UTF-8?q?=E2=86=92structured-query=20translator=20+=20forward=20geocodin?= =?UTF-8?q?g?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foundation for the /photos/search/unified endpoint (Phase 2). Two new, fully unit-tested pieces, not yet wired into a route (allow-until-wired, mirroring llm_client.rs): - ai/nl_query.rs: translate a free-text query into a StructuredQuery via one grounded LLM call. Two-stage — the model emits names/ISO dates, then a pure resolve step maps tag names against the real vocab and converts dates to unix seconds. Hallucinated (non-vocab) tags are surfaced in unmatched_tags rather than silently used as hard filters — the anti-noise guard. 12 tests. - geo::forward_geocode + bbox_to_circle: resolve a place name to a circle via Nominatim /search, collapsing the bounding box to centroid + circumscribing radius so "Portland" and "Italy" both map onto the existing gps circle filter with no schema change. Radius is the max centroid-to-corner distance (corners aren't equidistant on a sphere). 4 tests. fmt + clippy clean; 19 new tests pass. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/ai/mod.rs | 1 + src/ai/nl_query.rs | 414 +++++++++++++++++++++++++++++++++++++++++++++ src/geo.rs | 180 ++++++++++++++++++++ 3 files changed, 595 insertions(+) create mode 100644 src/ai/nl_query.rs diff --git a/src/ai/mod.rs b/src/ai/mod.rs index c5302fb..7d0802e 100644 --- a/src/ai/mod.rs +++ b/src/ai/mod.rs @@ -10,6 +10,7 @@ pub mod insight_generator; pub mod llamacpp; pub mod llm_client; pub mod local_llm; +pub mod nl_query; pub mod ollama; pub mod openrouter; pub mod pronunciation; diff --git a/src/ai/nl_query.rs b/src/ai/nl_query.rs new file mode 100644 index 0000000..a94fc06 --- /dev/null +++ b/src/ai/nl_query.rs @@ -0,0 +1,414 @@ +//! Natural-language → structured-query translation for unified photo search. +//! +//! The unified search endpoint (`/photos/search/unified`, Phase 2) needs to +//! turn a free-text query like *"sunset photos in Italy from last summer"* +//! into the structured filter the existing `/photos` engine understands plus +//! a semantic term for CLIP ranking. That translation is a single grounded +//! LLM call, isolated here so it can be unit-tested without a network or the +//! full `InsightGenerator`. +//! +//! Two-stage design: +//! 1. The LLM emits a [`RawNlQuery`] — references are by *name* (tags) and +//! dates as ISO strings, never numeric ids it could hallucinate. +//! 2. [`resolve_raw_query`] maps names against the real tag vocabulary and +//! converts ISO dates to unix seconds, producing a [`StructuredQuery`]. +//! A tag the model invents that isn't in the vocab is surfaced in +//! `unmatched_tags` (the caller folds it back into the semantic term) +//! rather than silently dropped — this is the anti-noise guard. +//! +//! Geocoding of `place` and person filtering are intentionally *not* handled +//! here: `place` stays as text for the caller to forward-geocode (async, see +//! `geo::forward_geocode`), and person filtering is deferred until a +//! person→photos resolver exists. + +// Phase 1: this module is fully implemented and unit-tested, but its first +// consumer (the `/photos/search/unified` endpoint) lands in Phase 2. Mirrors +// llm_client.rs's allow-until-wired pattern so the bin target stays +// clippy-clean in the interim; remove when the endpoint is added. +#![allow(dead_code)] + +use crate::ai::llm_client::{ChatMessage, LlmClient, Tool, strip_think_blocks}; +use anyhow::{Result, anyhow}; +use serde::{Deserialize, Serialize}; + +/// Raw query object as emitted by the LLM. Tag references are by name +/// (resolved against the real vocab in Rust); dates are ISO `YYYY-MM-DD`. +/// Every field is optional so a partial / minimal model response still +/// deserializes. +#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +pub struct RawNlQuery { + /// Visual/scene description handed to CLIP for ranking. The descriptive + /// remainder after structured filters are peeled off. + #[serde(default)] + pub semantic: Option, + /// Tag names the photos must have. Matched case-insensitively against + /// the supplied vocabulary; non-matches land in `unmatched_tags`. + #[serde(default)] + pub tags: Vec, + /// Tag names the photos must NOT have. + #[serde(default)] + pub exclude_tags: Vec, + #[serde(default)] + pub camera_make: Option, + #[serde(default)] + pub camera_model: Option, + #[serde(default)] + pub lens_model: Option, + /// Free-text place/location name to forward-geocode (e.g. "Italy"). + #[serde(default)] + pub place: Option, + /// Inclusive start date, ISO `YYYY-MM-DD`. + #[serde(default)] + pub date_from: Option, + /// Inclusive end date, ISO `YYYY-MM-DD`. + #[serde(default)] + pub date_to: Option, + /// "photo" | "video" — normalized in [`resolve_raw_query`]. + #[serde(default)] + pub media_type: Option, +} + +/// Resolved structured query: tag names mapped to ids against the real +/// vocab, ISO dates converted to unix seconds. `place` stays as text for the +/// caller to forward-geocode into a gps circle. Serializable so the endpoint +/// can echo it back to the client as "this is how I read your query" +/// (editable filter chips). +#[derive(Debug, Clone, Default, PartialEq, Serialize)] +pub struct StructuredQuery { + pub semantic: Option, + pub tag_ids: Vec, + pub exclude_tag_ids: Vec, + /// Tag names the model produced that don't exist in the vocabulary. + /// The caller folds these back into the semantic term so the concept + /// isn't lost — and surfacing them keeps a hallucinated tag from + /// silently filtering the whole library to nothing. + pub unmatched_tags: Vec, + pub camera_make: Option, + pub camera_model: Option, + pub lens_model: Option, + /// Raw place name awaiting forward-geocoding by the caller. + pub place: Option, + pub date_from: Option, + pub date_to: Option, + /// Normalized to "photo" | "video"; `None` means no media-type filter. + pub media_type: Option, +} + +/// Convert an ISO `YYYY-MM-DD` date to a unix timestamp (seconds). With +/// `end_of_day`, returns 23:59:59 of that day so a `date_to` filter is +/// inclusive of the whole day; otherwise 00:00:00. Returns `None` for any +/// unparseable input (the filter is simply omitted rather than erroring). +pub fn iso_to_unix(date: &str, end_of_day: bool) -> Option { + let d = chrono::NaiveDate::parse_from_str(date.trim(), "%Y-%m-%d").ok()?; + let time = if end_of_day { + chrono::NaiveTime::from_hms_opt(23, 59, 59)? + } else { + chrono::NaiveTime::from_hms_opt(0, 0, 0)? + }; + Some(d.and_time(time).and_utc().timestamp()) +} + +/// Normalize a free-form media-type string to the engine's vocabulary. +/// Anything that isn't clearly photo or video (including "all") yields +/// `None` — no filter. +fn normalize_media_type(raw: &str) -> Option { + match raw.trim().to_lowercase().as_str() { + "photo" | "photos" | "image" | "images" | "picture" | "pictures" => { + Some("photo".to_string()) + } + "video" | "videos" | "movie" | "movies" | "clip" | "clips" => Some("video".to_string()), + _ => None, + } +} + +/// Resolve a raw LLM query against the real tag vocabulary, producing the +/// structured filter. Pure — no network, no LLM — so it carries the +/// correctness-critical mapping logic under unit test. +/// +/// `tag_vocab` is `(tag_id, tag_name)` pairs (the shape `TagDao::get_all_tags` +/// yields once the count is dropped). Matching is case-insensitive and exact +/// on the trimmed name. +pub fn resolve_raw_query(raw: RawNlQuery, tag_vocab: &[(i32, String)]) -> StructuredQuery { + // Case-insensitive name → id lookup. Built once per call. + let lookup: std::collections::HashMap = tag_vocab + .iter() + .map(|(id, name)| (name.trim().to_lowercase(), *id)) + .collect(); + + let resolve_names = |names: &[String], ids: &mut Vec, unmatched: &mut Vec| { + for name in names { + let key = name.trim().to_lowercase(); + if key.is_empty() { + continue; + } + match lookup.get(&key) { + Some(id) if !ids.contains(id) => ids.push(*id), + Some(_) => {} // duplicate, already collected + None => { + if !unmatched.iter().any(|u| u.eq_ignore_ascii_case(name)) { + unmatched.push(name.trim().to_string()); + } + } + } + } + }; + + let mut tag_ids = Vec::new(); + let mut unmatched_tags = Vec::new(); + resolve_names(&raw.tags, &mut tag_ids, &mut unmatched_tags); + + // Excluded tags that don't match a real tag are simply ignored — you + // can't exclude a tag that doesn't exist, and folding them into + // `semantic` would make no sense. + let mut exclude_tag_ids = Vec::new(); + let mut exclude_unmatched = Vec::new(); + resolve_names( + &raw.exclude_tags, + &mut exclude_tag_ids, + &mut exclude_unmatched, + ); + + let clean = |s: Option| s.map(|v| v.trim().to_string()).filter(|v| !v.is_empty()); + + StructuredQuery { + semantic: clean(raw.semantic), + tag_ids, + exclude_tag_ids, + unmatched_tags, + camera_make: clean(raw.camera_make), + camera_model: clean(raw.camera_model), + lens_model: clean(raw.lens_model), + place: clean(raw.place), + date_from: raw.date_from.as_deref().and_then(|d| iso_to_unix(d, false)), + date_to: raw.date_to.as_deref().and_then(|d| iso_to_unix(d, true)), + media_type: raw.media_type.as_deref().and_then(normalize_media_type), + } +} + +/// Build the grounded system prompt. The model is told the current date (so +/// "last summer" resolves) and the exact tag vocabulary (so it uses real +/// tags or routes the concept to `semantic` instead of inventing one). +fn build_system_prompt(tag_vocab: &[(i32, String)], today: chrono::NaiveDate) -> String { + // Cap the vocab dump so a huge library doesn't blow the context window; + // the most-used tags are the ones a query is likely to reference. + const MAX_TAGS: usize = 400; + let mut names: Vec<&str> = tag_vocab.iter().map(|(_, n)| n.as_str()).collect(); + names.sort_unstable(); + names.dedup(); + let shown = names.len().min(MAX_TAGS); + let vocab = names[..shown].join(", "); + let truncation = if names.len() > MAX_TAGS { + format!(" (showing {MAX_TAGS} of {} tags)", names.len()) + } else { + String::new() + }; + + format!( + "You translate a user's natural-language photo-search request into a JSON \ +filter. Today's date is {today}. Respond with ONLY a JSON object, no prose, no \ +code fences.\n\n\ +Schema (all fields optional):\n\ +{{\n \ +\"semantic\": string|null, // visual scene/subject for image similarity search\n \ +\"tags\": string[], // ONLY names from the tag list below\n \ +\"exclude_tags\": string[], // ONLY names from the tag list below\n \ +\"camera_make\": string|null,\n \ +\"camera_model\": string|null,\n \ +\"lens_model\": string|null,\n \ +\"place\": string|null, // a location name to look up (city, country, landmark)\n \ +\"date_from\": \"YYYY-MM-DD\"|null, // inclusive\n \ +\"date_to\": \"YYYY-MM-DD\"|null, // inclusive\n \ +\"media_type\": \"photo\"|\"video\"|null\n\ +}}\n\n\ +Rules:\n\ +- Put descriptive/visual concepts (\"sunset\", \"crowded beach\", \"red car\") in \"semantic\".\n\ +- Only use \"tags\"/\"exclude_tags\" values that appear EXACTLY in the tag list. If a \ +concept isn't a listed tag, put it in \"semantic\" instead — never invent a tag.\n\ +- Resolve relative dates against today's date (\"last summer\", \"2023\", \"last month\").\n\ +- Put place/location names in \"place\" (not \"semantic\").\n\ +- Omit (use null / empty array) anything the request doesn't mention.\n\n\ +Available tags{truncation}: {vocab}" + ) +} + +/// Extract the JSON object from a model response that may include a leading +/// `` block, code fences, or trailing prose. Strips the think block +/// first (so reasoning that mentions braces can't fool the scan), then +/// returns the substring from the first `{` to the last `}` inclusive — or +/// the trimmed text if no braces are found (which then fails to parse with a +/// clear error). +fn extract_json(raw: &str) -> String { + let s = strip_think_blocks(raw); + let start = s.find('{'); + let end = s.rfind('}'); + match (start, end) { + (Some(a), Some(b)) if b >= a => s[a..=b].to_string(), + _ => s.trim().to_string(), + } +} + +/// Parse a model response string into a [`StructuredQuery`], resolving names +/// against the vocab. Separated from the LLM call so it's unit-testable. +pub fn parse_response(response: &str, tag_vocab: &[(i32, String)]) -> Result { + let json = extract_json(response); + let raw: RawNlQuery = serde_json::from_str(&json) + .map_err(|e| anyhow!("failed to parse NL query JSON: {e}; raw response: {response:?}"))?; + Ok(resolve_raw_query(raw, tag_vocab)) +} + +/// Translate a natural-language query into a [`StructuredQuery`] via one +/// grounded LLM call. The `client` is any configured backend (the unified +/// endpoint passes the resolved chat backend); `tag_vocab` grounds the tag +/// mapping; `today` anchors relative-date resolution. +pub async fn translate_nl_query( + client: &dyn LlmClient, + nl: &str, + tag_vocab: &[(i32, String)], + today: chrono::NaiveDate, +) -> Result { + let system = build_system_prompt(tag_vocab, today); + let messages = vec![ChatMessage::system(system), ChatMessage::user(nl)]; + let (msg, _, _) = client.chat_with_tools(messages, Vec::::new()).await?; + parse_response(&msg.content, tag_vocab) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn vocab() -> Vec<(i32, String)> { + vec![ + (1, "beach".to_string()), + (2, "Sunset".to_string()), // mixed case to exercise case-insensitivity + (3, "family".to_string()), + ] + } + + #[test] + fn iso_to_unix_start_and_end_of_day() { + // 2023-01-01 UTC midnight = 1672531200. + assert_eq!(iso_to_unix("2023-01-01", false), Some(1_672_531_200)); + // End of that day is 86399 seconds later. + assert_eq!( + iso_to_unix("2023-01-01", true), + Some(1_672_531_200 + 86_399) + ); + } + + #[test] + fn iso_to_unix_rejects_garbage() { + assert_eq!(iso_to_unix("last summer", false), None); + assert_eq!(iso_to_unix("2023-13-99", false), None); + assert_eq!(iso_to_unix("", false), None); + } + + #[test] + fn resolve_matches_tags_case_insensitively() { + let raw = RawNlQuery { + tags: vec!["BEACH".to_string(), "sunset".to_string()], + ..Default::default() + }; + let q = resolve_raw_query(raw, &vocab()); + assert_eq!(q.tag_ids, vec![1, 2]); + assert!(q.unmatched_tags.is_empty()); + } + + #[test] + fn resolve_surfaces_unmatched_tags_not_silently_dropped() { + // A hallucinated / non-vocab tag must be surfaced so the caller can + // fold it into semantic — never silently used as a hard filter. + let raw = RawNlQuery { + tags: vec!["beach".to_string(), "golden hour".to_string()], + ..Default::default() + }; + let q = resolve_raw_query(raw, &vocab()); + assert_eq!(q.tag_ids, vec![1]); + assert_eq!(q.unmatched_tags, vec!["golden hour".to_string()]); + } + + #[test] + fn resolve_dedups_repeated_tags() { + let raw = RawNlQuery { + tags: vec![ + "beach".to_string(), + "Beach".to_string(), + "beach".to_string(), + ], + ..Default::default() + }; + let q = resolve_raw_query(raw, &vocab()); + assert_eq!(q.tag_ids, vec![1]); + } + + #[test] + fn resolve_normalizes_media_type_and_dates() { + let raw = RawNlQuery { + media_type: Some("Videos".to_string()), + date_from: Some("2023-06-01".to_string()), + date_to: Some("2023-06-30".to_string()), + ..Default::default() + }; + let q = resolve_raw_query(raw, &vocab()); + assert_eq!(q.media_type.as_deref(), Some("video")); + assert_eq!(q.date_from, iso_to_unix("2023-06-01", false)); + assert_eq!(q.date_to, iso_to_unix("2023-06-30", true)); + } + + #[test] + fn resolve_media_type_all_is_no_filter() { + let raw = RawNlQuery { + media_type: Some("all".to_string()), + ..Default::default() + }; + assert_eq!(resolve_raw_query(raw, &vocab()).media_type, None); + } + + #[test] + fn resolve_trims_and_empties_to_none() { + let raw = RawNlQuery { + semantic: Some(" ".to_string()), + camera_make: Some(" Fujifilm ".to_string()), + place: Some("".to_string()), + ..Default::default() + }; + let q = resolve_raw_query(raw, &vocab()); + assert_eq!(q.semantic, None); + assert_eq!(q.camera_make.as_deref(), Some("Fujifilm")); + assert_eq!(q.place, None); + } + + #[test] + fn parse_response_handles_code_fences_and_prose() { + let resp = "Here is the filter:\n```json\n{\"semantic\":\"sunset\",\"tags\":[\"beach\"]}\n```\nDone."; + let q = parse_response(resp, &vocab()).expect("parse"); + assert_eq!(q.semantic.as_deref(), Some("sunset")); + assert_eq!(q.tag_ids, vec![1]); + } + + #[test] + fn parse_response_handles_think_block_then_json() { + let resp = "user wants beach sunsets{\"tags\":[\"beach\",\"sunset\"]}"; + let q = parse_response(resp, &vocab()).expect("parse"); + assert_eq!(q.tag_ids, vec![1, 2]); + } + + #[test] + fn parse_response_errors_on_non_json() { + assert!(parse_response("I cannot help with that.", &vocab()).is_err()); + } + + #[test] + fn build_system_prompt_includes_date_and_vocab() { + let today = chrono::NaiveDate::from_ymd_opt(2026, 6, 14).unwrap(); + let prompt = build_system_prompt(&vocab(), today); + assert!( + prompt.contains("2026-06-14"), + "prompt should state today's date" + ); + assert!(prompt.contains("beach"), "prompt should list the vocab"); + assert!( + prompt.contains("never invent a tag"), + "prompt should warn against inventing tags" + ); + } +} diff --git a/src/geo.rs b/src/geo.rs index 46cc1dc..b7ef9d1 100644 --- a/src/geo.rs +++ b/src/geo.rs @@ -1,4 +1,5 @@ /// Geographic calculation utilities for GPS-based search +use serde::Deserialize; use std::f64; /// Calculate distance between two GPS coordinates using the Haversine formula. @@ -61,6 +62,148 @@ pub fn gps_bounding_box(lat: f64, lon: f64, radius_km: f64) -> (f64, f64, f64, f ) } +/// A place resolved from a free-text query via forward geocoding. +/// +/// The filter pipeline searches a *circle* (`gps_lat`/`gps_lon`/ +/// `gps_radius_km`), but a place can be anything from a single address to +/// a whole country. We collapse Nominatim's bounding box into the smallest +/// circle that circumscribes it (see [`bbox_to_circle`]) so "Portland" and +/// "Italy" both map onto the existing circle filter without a schema change. +// Phase 1: forward geocoding is implemented and unit-tested here, but its +// first consumer (the `/photos/search/unified` endpoint) lands in Phase 2. +// allow-until-wired (mirrors llm_client.rs); remove when the endpoint is added. +#[allow(dead_code)] +#[derive(Debug, Clone, PartialEq)] +pub struct GeoPlace { + /// Nominatim's canonical name for the match (e.g. "Italia"). + pub display_name: String, + /// Centroid latitude in decimal degrees. + pub lat: f64, + /// Centroid longitude in decimal degrees. + pub lon: f64, + /// Radius (km) of a circle centred on the centroid that covers the + /// matched area. Floored to [`MIN_PLACE_RADIUS_KM`] so a point result + /// (whose bounding box is microscopic) still yields a usable circle. + pub radius_km: f64, +} + +/// Floor for a geocoded place's radius. Point results (a street address) +/// come back with a near-zero bounding box; without a floor the circle +/// filter would match nothing. +#[allow(dead_code)] +pub const MIN_PLACE_RADIUS_KM: f64 = 0.5; + +/// Collapse a bounding box into the centroid + circumscribing radius. +/// +/// Input is Nominatim's `boundingbox` order: `(south_lat, north_lat, +/// west_lon, east_lon)`. The radius is the *largest* great-circle distance +/// from the centroid to any of the four corners, so the resulting circle +/// fully covers the box. (The corners aren't equidistant on a sphere — +/// longitude lines converge toward the poles, so the equator-facing edge's +/// corners are farthest; taking the max guarantees coverage in either +/// hemisphere.) +/// +/// Pure and exact (no flooring) so it can be unit-tested directly; callers +/// apply [`MIN_PLACE_RADIUS_KM`] when turning the result into a filter. +#[allow(dead_code)] +pub fn bbox_to_circle(south: f64, north: f64, west: f64, east: f64) -> (f64, f64, f64) { + let center_lat = (south + north) / 2.0; + let center_lon = (west + east) / 2.0; + let radius_km = [(south, west), (south, east), (north, west), (north, east)] + .iter() + .map(|(clat, clon)| haversine_distance(center_lat, center_lon, *clat, *clon)) + .fold(0.0_f64, f64::max); + (center_lat, center_lon, radius_km) +} + +/// Raw Nominatim `/search` result. `lat`/`lon` arrive as strings and +/// `boundingbox` as a 4-element string array `[south, north, west, east]`. +#[allow(dead_code)] +#[derive(Deserialize)] +struct NominatimSearchResult { + lat: String, + lon: String, + display_name: String, + boundingbox: Option<[String; 4]>, +} + +/// Forward-geocode a free-text place name to a [`GeoPlace`] via the public +/// OpenStreetMap Nominatim `/search` endpoint. +/// +/// Mirrors `InsightGenerator::reverse_geocode`'s error posture: any network, +/// HTTP, or parse failure returns `None` rather than propagating, so a flaky +/// geocoder degrades the query to "no location filter" instead of failing it. +/// +/// Nominatim's usage policy requires a `User-Agent` and rate-limits to ~1 +/// request/second; callers doing this interactively should cache results. +#[allow(dead_code)] +pub async fn forward_geocode(query: &str) -> Option { + let q = query.trim(); + if q.is_empty() { + return None; + } + + let client = reqwest::Client::new(); + let response = match client + .get("https://nominatim.openstreetmap.org/search") + .query(&[("format", "json"), ("limit", "1"), ("q", q)]) + .header("User-Agent", "ImageAPI/1.0") // Nominatim requires User-Agent + .send() + .await + { + Ok(resp) => resp, + Err(e) => { + log::warn!("Forward geocoding network error for {q:?}: {e}"); + return None; + } + }; + + if !response.status().is_success() { + log::warn!( + "Forward geocoding HTTP error for {q:?}: {}", + response.status() + ); + return None; + } + + let results: Vec = match response.json().await { + Ok(r) => r, + Err(e) => { + log::warn!("Forward geocoding JSON parse error for {q:?}: {e}"); + return None; + } + }; + + let top = results.into_iter().next()?; + let lat: f64 = top.lat.parse().ok()?; + let lon: f64 = top.lon.parse().ok()?; + + // Prefer the bounding box (handles large places); fall back to a + // point + floor radius when Nominatim omits it. + let (center_lat, center_lon, radius_km) = match &top.boundingbox { + Some([s, n, w, e]) => match (s.parse(), n.parse(), w.parse(), e.parse()) { + (Ok(s), Ok(n), Ok(w), Ok(e)) => bbox_to_circle(s, n, w, e), + _ => (lat, lon, 0.0), + }, + None => (lat, lon, 0.0), + }; + + let place = GeoPlace { + display_name: top.display_name, + lat: center_lat, + lon: center_lon, + radius_km: radius_km.max(MIN_PLACE_RADIUS_KM), + }; + log::info!( + "Forward geocoded {q:?} -> {} ({:.4}, {:.4}, r={:.1}km)", + place.display_name, + place.lat, + place.lon, + place.radius_km + ); + Some(place) +} + #[cfg(test)] mod tests { use super::*; @@ -118,4 +261,41 @@ mod tests { distance ); } + + #[test] + fn test_bbox_to_circle_centroid() { + // Symmetric box around (10, 20): centroid should land dead centre. + let (lat, lon, radius) = bbox_to_circle(9.0, 11.0, 19.0, 21.0); + assert!((lat - 10.0).abs() < 1e-9, "centroid lat, got {lat}"); + assert!((lon - 20.0).abs() < 1e-9, "centroid lon, got {lon}"); + assert!(radius > 0.0, "radius should be positive, got {radius}"); + } + + #[test] + fn test_bbox_to_circle_covers_corner() { + // The radius must reach every corner of the box. Verify the + // centroid-to-corner distance equals the returned radius for all + // four corners (they're symmetric, so all equal). + let (south, north, west, east) = (40.0, 42.0, -74.0, -72.0); + let (lat, lon, radius) = bbox_to_circle(south, north, west, east); + for (clat, clon) in [(south, west), (south, east), (north, west), (north, east)] { + let d = haversine_distance(lat, lon, clat, clon); + assert!( + d <= radius + 1e-6, + "corner ({clat},{clon}) at {d}km should be within radius {radius}km" + ); + } + } + + #[test] + fn test_bbox_to_circle_country_vs_city_scale() { + // A country-sized box yields a far larger radius than a city-sized + // one — confirming the bbox approach scales with place size. + let (_, _, country) = bbox_to_circle(35.5, 47.1, 6.6, 18.5); // ~Italy + let (_, _, city) = bbox_to_circle(45.4, 45.6, -122.8, -122.5); // ~Portland + assert!( + country > city * 10.0, + "country radius {country}km should dwarf city radius {city}km" + ); + } } -- 2.52.0 From e4c875f47346612a46b4f8bdf38d06a9d22928e6 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sun, 14 Jun 2026 01:03:43 -0400 Subject: [PATCH 19/26] Unified NL search Phase 2: /photos/search/unified endpoint Composes the two existing engines (Path A orchestration): - Translate NL -> StructuredQuery via local LLM, respecting LLM_BACKEND (resolve_backend(Local) -> ollama or llama-swap; no hybrid). - Forward-geocode the place name into a gps circle. - Structured filters (tags/EXIF/geo/date/media) build a candidate set of EXIF rows; CLIP ranks within it, joined by content_hash. Degenerate cases match existing behavior: semantic-only -> plain CLIP; filters-only -> date-sorted. - Echoes the interpreted query (incl. resolved place) for editable client chips. Refactor: extracted reusable cores from clip_search (score_photos, resolve_hits, parse_library_scope, score_error_response) shared by both endpoints. Removed the Phase 1 allow-until-wired attributes now that nl_query + geo are consumed. fmt + clippy clean; 23 backend tests pass (7 geo, 12 nl_query, 4 unified). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/ai/nl_query.rs | 6 - src/clip_search.rs | 398 ++++++++++++++++++++----------------- src/geo.rs | 8 - src/lib.rs | 1 + src/main.rs | 8 + src/unified_search.rs | 452 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 675 insertions(+), 198 deletions(-) create mode 100644 src/unified_search.rs diff --git a/src/ai/nl_query.rs b/src/ai/nl_query.rs index a94fc06..d709322 100644 --- a/src/ai/nl_query.rs +++ b/src/ai/nl_query.rs @@ -21,12 +21,6 @@ //! `geo::forward_geocode`), and person filtering is deferred until a //! person→photos resolver exists. -// Phase 1: this module is fully implemented and unit-tested, but its first -// consumer (the `/photos/search/unified` endpoint) lands in Phase 2. Mirrors -// llm_client.rs's allow-until-wired pattern so the bin target stays -// clippy-clean in the interim; remove when the endpoint is added. -#![allow(dead_code)] - use crate::ai::llm_client::{ChatMessage, LlmClient, Tool, strip_think_blocks}; use anyhow::{Result, anyhow}; use serde::{Deserialize, Serialize}; diff --git a/src/clip_search.rs b/src/clip_search.rs index 98ea96e..7b4510e 100644 --- a/src/clip_search.rs +++ b/src/clip_search.rs @@ -124,65 +124,161 @@ fn dot(a: &[f32], b: &[f32]) -> f32 { a.iter().zip(b.iter()).map(|(x, y)| x * y).sum() } -pub async fn search_photos( - state: web::Data, - exif_dao: web::Data>>, - query: web::Query, -) -> ActixResult { - let q_text = query.q.trim().to_string(); - if q_text.is_empty() { - return Ok(HttpResponse::BadRequest().json(SearchError { - error: "query parameter `q` is required".into(), - })); - } +/// Failure modes of [`score_photos`]. Carries enough to let each caller pick +/// an appropriate HTTP status (the CLIP service being down is a 502, a +/// disabled feature is a 503, a rejected query is a 400, a DB failure 500). +pub enum ScoreError { + /// CLIP search isn't configured at all (no Apollo endpoint). + Disabled, + /// The query was rejected by the encoder (client error). + Rejected(String), + /// The CLIP service is transiently unavailable (upstream error). + Unavailable(String), + /// The encoder returned an embedding we couldn't decode. + MalformedEmbedding, + /// A database / index load failure. + Internal(String), +} + +/// Result of scoring the whole library against a query embedding: the +/// resolved model version, how many embeddings were considered, and every +/// `(score, content_hash)` above threshold, sorted by descending score. +/// Pagination and path resolution are the caller's job (see [`resolve_hits`]) +/// so this core can be reused for both the plain search endpoint and the +/// unified endpoint (which filters by hash before paginating). +pub struct ScoredPhotos { + pub model_version: String, + pub considered: usize, + /// `(cosine_score, content_hash)` pairs, descending by score. + pub hits: Vec<(f32, String)>, +} + +/// Encode `q_text` via CLIP and score it against every stored embedding in +/// the given library scope. Returns all matches above `threshold`, sorted by +/// descending similarity. Pure of HTTP concerns so it's shared by +/// `search_photos` and the unified search endpoint. +pub async fn score_photos( + state: &AppState, + exif_dao: &Mutex>, + q_text: &str, + library_ids: &[i32], + threshold: f32, + model_version: Option<&str>, +) -> Result { if !state.clip_client.is_enabled() { - return Ok(HttpResponse::ServiceUnavailable().json(SearchError { - error: "CLIP search is disabled (no Apollo CLIP endpoint configured)".into(), - })); + return Err(ScoreError::Disabled); } - let limit = query.limit.clamp(1, 200); - let offset = query.offset; - let threshold = query.threshold.clamp(-1.0, 1.0); - - // 1. Encode the query text. Fast — Apollo's text encoder is ~50ms - // on CPU. Bail with a clear error message if Apollo's down so the - // user sees "service unavailable" rather than empty results. - let query_resp = match state.clip_client.encode_text(&q_text).await { + // 1. Encode the query text. Fast — Apollo's text encoder is ~50ms on CPU. + let query_resp = match state.clip_client.encode_text(q_text).await { Ok(r) => r, - Err(ClipError::Permanent(e)) => { - return Ok(HttpResponse::BadRequest().json(SearchError { - error: format!("query rejected: {e}"), - })); - } - Err(ClipError::Transient(e)) => { - return Ok(HttpResponse::BadGateway().json(SearchError { - error: format!("CLIP service unavailable: {e}"), - })); - } - Err(ClipError::Disabled) => { - return Ok(HttpResponse::ServiceUnavailable().json(SearchError { - error: "CLIP service disabled".into(), - })); - } + Err(ClipError::Permanent(e)) => return Err(ScoreError::Rejected(e.to_string())), + Err(ClipError::Transient(e)) => return Err(ScoreError::Unavailable(e.to_string())), + Err(ClipError::Disabled) => return Err(ScoreError::Disabled), }; // decode_embedding works on raw bytes; the wire format is b64. let query_bytes = base64::engine::general_purpose::STANDARD .decode(query_resp.embedding.as_bytes()) .unwrap_or_default(); - let query_vec = match decode_embedding(&query_bytes) { - Some(v) => v, - None => { - return Ok(HttpResponse::BadGateway().json(SearchError { - error: "CLIP service returned a malformed query embedding".into(), - })); - } - }; + let query_vec = decode_embedding(&query_bytes).ok_or(ScoreError::MalformedEmbedding)?; - // 2. Decide which library scope to search. `library_ids` (multi) - // wins over the legacy `library` (single) when both are present; - // either / both empty falls back to "every enabled library". - let library_ids: Vec = if let Some(raw) = query.library_ids.as_deref() { + // 2. Pull the (hash, embedding) matrix under the dao lock, release + // before scoring. The caller-supplied `model_version` (or the live + // engine's) forces a strict join so a mid-flight model swap can't mix + // geometries. + let ctx = opentelemetry::Context::current(); + let rows: Vec<(String, Vec)> = { + let mut dao = exif_dao.lock().expect("exif dao"); + dao.list_clip_index( + &ctx, + library_ids, + model_version.or(Some(&query_resp.model_version)), + ) + .map_err(|e| { + log::warn!("clip_search: list_clip_index failed: {:?}", e); + ScoreError::Internal("failed to load search index".into()) + })? + }; + let considered = rows.len(); + + // 3. Score. Keep all matches and sort at the end (~microseconds at 14k). + let mut hits: Vec<(f32, String)> = Vec::with_capacity(considered); + for (hash, blob) in rows { + let Some(emb) = decode_embedding(&blob) else { + continue; + }; + if emb.len() != query_vec.len() { + continue; + } + let sim = dot(&emb, &query_vec); + if sim < threshold { + continue; + } + hits.push((sim, hash)); + } + hits.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)); + + Ok(ScoredPhotos { + model_version: query_resp.model_version, + considered, + hits, + }) +} + +/// Resolve a page of `(score, content_hash)` pairs back to [`SearchHit`]s +/// (each carrying `library_id` + `rel_path`). Hashes that no longer resolve +/// to a row are skipped. Shared by both endpoints. +pub fn resolve_hits( + exif_dao: &Mutex>, + scored: &[(f32, String)], +) -> Vec { + if scored.is_empty() { + return Vec::new(); + } + let ctx = opentelemetry::Context::current(); + let hashes: Vec = scored.iter().map(|(_, h)| h.clone()).collect(); + let mut dao = exif_dao.lock().expect("exif dao"); + let path_map = dao + .get_rel_paths_for_hashes(&ctx, &hashes) + .unwrap_or_else(|e| { + log::warn!("clip_search: get_rel_paths_for_hashes failed: {:?}", e); + std::collections::HashMap::new() + }); + + let mut results = Vec::with_capacity(scored.len()); + for (score, hash) in scored { + let row = match dao.find_by_content_hash(&ctx, hash) { + Ok(Some(r)) => r, + Ok(None) => continue, + Err(e) => { + log::warn!("clip_search: find_by_content_hash failed for {hash}: {e:?}"); + continue; + } + }; + // Prefer get_rel_paths_for_hashes's first entry (shares image_exif's + // natural order), falling back to the ImageExif row. + let rel_path = path_map + .get(hash) + .and_then(|paths| paths.first().cloned()) + .unwrap_or(row.file_path); + results.push(SearchHit { + library_id: row.library_id, + rel_path, + content_hash: hash.clone(), + score: *score, + }); + } + results +} + +/// Parse the `library_ids` (multi) / `library` (single) scope params into a +/// deduped id list. Empty = "every enabled library". Shared so the unified +/// endpoint scopes CLIP identically. +pub fn parse_library_scope( + library_ids: Option<&str>, + library: Option, +) -> Result, String> { + if let Some(raw) = library_ids { let mut out: Vec = Vec::new(); for piece in raw.split(',') { let trimmed = piece.trim(); @@ -195,158 +291,92 @@ pub async fn search_photos( out.push(id); } } - Err(_) => { - return Ok(HttpResponse::BadRequest().json(SearchError { - error: format!("invalid library_ids entry: {trimmed:?}"), - })); - } + Err(_) => return Err(format!("invalid library_ids entry: {trimmed:?}")), } } - out - } else if let Some(id) = query.library { - vec![id] + Ok(out) + } else if let Some(id) = library { + Ok(vec![id]) } else { - Vec::new() - }; + Ok(Vec::new()) + } +} - // 3. Pull the (hash, embedding) matrix. Lock contention here is - // bounded — one big SELECT under a mutex Arc> - // and then we release before scoring. If this becomes a hotspot - // we'll cache the decoded matrix in AppState with TTL. - let ctx = opentelemetry::Context::current(); - let rows: Vec<(String, Vec)> = { - let mut dao = exif_dao.lock().expect("exif dao"); - match dao.list_clip_index( - &ctx, - &library_ids, - query - .model_version - .as_deref() - .or(Some(&query_resp.model_version)), - ) { - Ok(r) => r, - Err(e) => { - log::warn!("clip_search: list_clip_index failed: {:?}", e); - return Ok(HttpResponse::InternalServerError().json(SearchError { - error: "failed to load search index".into(), - })); - } - } - }; - let considered = rows.len(); - if considered == 0 { - return Ok(HttpResponse::Ok().json(SearchResponse { - query: q_text, - model_version: query_resp.model_version, - threshold, - considered, - total_matching: 0, - offset, - results: Vec::new(), +pub async fn search_photos( + state: web::Data, + exif_dao: web::Data>>, + query: web::Query, +) -> ActixResult { + let q_text = query.q.trim().to_string(); + if q_text.is_empty() { + return Ok(HttpResponse::BadRequest().json(SearchError { + error: "query parameter `q` is required".into(), })); } - // 4. Score. Cap the loop's transient allocation; we keep all scores - // and sort at the end. With ~14k entries the sort is microseconds. - let mut scored: Vec<(f32, String)> = Vec::with_capacity(considered); - for (hash, blob) in rows { - let Some(emb) = decode_embedding(&blob) else { - continue; - }; - if emb.len() != query_vec.len() { - continue; - } - let sim = dot(&emb, &query_vec); - if sim < threshold { - continue; - } - scored.push((sim, hash)); - } - scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)); - let total_matching = scored.len(); - // Pagination — slice the sorted list at `[offset, offset+limit)`. - // Offsets past the end produce empty pages rather than an error so - // the client can stop fetching naturally on "load more" past the end. - let scored: Vec<(f32, String)> = if offset >= total_matching { + let limit = query.limit.clamp(1, 200); + let offset = query.offset; + let threshold = query.threshold.clamp(-1.0, 1.0); + + let library_ids = match parse_library_scope(query.library_ids.as_deref(), query.library) { + Ok(ids) => ids, + Err(msg) => return Ok(HttpResponse::BadRequest().json(SearchError { error: msg })), + }; + + let scored = match score_photos( + &state, + &exif_dao, + &q_text, + &library_ids, + threshold, + query.model_version.as_deref(), + ) + .await + { + Ok(s) => s, + Err(e) => return Ok(score_error_response(e)), + }; + + let total_matching = scored.hits.len(); + // Pagination — slice the sorted list at `[offset, offset+limit)`. Offsets + // past the end produce empty pages so "load more" stops naturally. + let page: Vec<(f32, String)> = if offset >= total_matching { Vec::new() } else { let end = (offset + limit).min(total_matching); - scored[offset..end].to_vec() + scored.hits[offset..end].to_vec() }; - - if scored.is_empty() { - return Ok(HttpResponse::Ok().json(SearchResponse { - query: q_text, - model_version: query_resp.model_version, - threshold, - considered, - total_matching, - offset, - results: Vec::new(), - })); - } - - // 5. Resolve each surviving hash back to a `(library_id, rel_path)`. - // `get_rel_paths_by_hash` returns every rel_path; we pick the first - // one for the result. Apollo / the UI can fetch alternatives via - // /image/metadata when needed. - let hashes: Vec = scored.iter().map(|(_, h)| h.clone()).collect(); - let path_map = { - let mut dao = exif_dao.lock().expect("exif dao"); - match dao.get_rel_paths_for_hashes(&ctx, &hashes) { - Ok(m) => m, - Err(e) => { - log::warn!("clip_search: get_rel_paths_for_hashes failed: {:?}", e); - return Ok(HttpResponse::InternalServerError().json(SearchError { - error: "failed to resolve photo paths".into(), - })); - } - } - }; - - // We need (library_id, rel_path) — get_rel_paths_for_hashes only - // returns rel_paths. Cross-reference via find_by_content_hash to - // pick the library too. Single call per surviving hash; cheap at - // top-20. - let mut results = Vec::with_capacity(scored.len()); - { - let mut dao = exif_dao.lock().expect("exif dao"); - for (score, hash) in scored { - let row = match dao.find_by_content_hash(&ctx, &hash) { - Ok(Some(r)) => r, - Ok(None) => continue, - Err(e) => { - log::warn!( - "clip_search: find_by_content_hash failed for {}: {:?}", - hash, - e - ); - continue; - } - }; - // Prefer get_rel_paths_for_hashes's first entry if it - // exists (it shares semantics with `image_exif`'s natural - // order), falling back to the ImageExif row. - let rel_path = path_map - .get(&hash) - .and_then(|paths| paths.first().cloned()) - .unwrap_or(row.file_path); - results.push(SearchHit { - library_id: row.library_id, - rel_path, - content_hash: hash, - score, - }); - } - } + let results = resolve_hits(&exif_dao, &page); Ok(HttpResponse::Ok().json(SearchResponse { query: q_text, - model_version: query_resp.model_version, + model_version: scored.model_version, threshold, - considered, + considered: scored.considered, total_matching, offset, results, })) } + +/// Map a [`ScoreError`] to the HTTP response `search_photos` historically +/// returned for each failure mode. Reused by the unified endpoint. +pub fn score_error_response(e: ScoreError) -> HttpResponse { + match e { + ScoreError::Disabled => HttpResponse::ServiceUnavailable().json(SearchError { + error: "CLIP search is disabled (no Apollo CLIP endpoint configured)".into(), + }), + ScoreError::Rejected(msg) => HttpResponse::BadRequest().json(SearchError { + error: format!("query rejected: {msg}"), + }), + ScoreError::Unavailable(msg) => HttpResponse::BadGateway().json(SearchError { + error: format!("CLIP service unavailable: {msg}"), + }), + ScoreError::MalformedEmbedding => HttpResponse::BadGateway().json(SearchError { + error: "CLIP service returned a malformed query embedding".into(), + }), + ScoreError::Internal(msg) => { + HttpResponse::InternalServerError().json(SearchError { error: msg }) + } + } +} diff --git a/src/geo.rs b/src/geo.rs index b7ef9d1..b54f609 100644 --- a/src/geo.rs +++ b/src/geo.rs @@ -69,10 +69,6 @@ pub fn gps_bounding_box(lat: f64, lon: f64, radius_km: f64) -> (f64, f64, f64, f /// a whole country. We collapse Nominatim's bounding box into the smallest /// circle that circumscribes it (see [`bbox_to_circle`]) so "Portland" and /// "Italy" both map onto the existing circle filter without a schema change. -// Phase 1: forward geocoding is implemented and unit-tested here, but its -// first consumer (the `/photos/search/unified` endpoint) lands in Phase 2. -// allow-until-wired (mirrors llm_client.rs); remove when the endpoint is added. -#[allow(dead_code)] #[derive(Debug, Clone, PartialEq)] pub struct GeoPlace { /// Nominatim's canonical name for the match (e.g. "Italia"). @@ -90,7 +86,6 @@ pub struct GeoPlace { /// Floor for a geocoded place's radius. Point results (a street address) /// come back with a near-zero bounding box; without a floor the circle /// filter would match nothing. -#[allow(dead_code)] pub const MIN_PLACE_RADIUS_KM: f64 = 0.5; /// Collapse a bounding box into the centroid + circumscribing radius. @@ -105,7 +100,6 @@ pub const MIN_PLACE_RADIUS_KM: f64 = 0.5; /// /// Pure and exact (no flooring) so it can be unit-tested directly; callers /// apply [`MIN_PLACE_RADIUS_KM`] when turning the result into a filter. -#[allow(dead_code)] pub fn bbox_to_circle(south: f64, north: f64, west: f64, east: f64) -> (f64, f64, f64) { let center_lat = (south + north) / 2.0; let center_lon = (west + east) / 2.0; @@ -118,7 +112,6 @@ pub fn bbox_to_circle(south: f64, north: f64, west: f64, east: f64) -> (f64, f64 /// Raw Nominatim `/search` result. `lat`/`lon` arrive as strings and /// `boundingbox` as a 4-element string array `[south, north, west, east]`. -#[allow(dead_code)] #[derive(Deserialize)] struct NominatimSearchResult { lat: String, @@ -136,7 +129,6 @@ struct NominatimSearchResult { /// /// Nominatim's usage policy requires a `User-Agent` and rate-limits to ~1 /// request/second; callers doing this interactively should cache results. -#[allow(dead_code)] pub async fn forward_geocode(query: &str) -> Option { let q = query.trim(); if q.is_empty() { diff --git a/src/lib.rs b/src/lib.rs index 0ea7ddb..a228472 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,6 +35,7 @@ pub mod tags; #[cfg(test)] pub mod testhelpers; pub mod thumbnails; +pub mod unified_search; pub mod utils; pub mod video; diff --git a/src/main.rs b/src/main.rs index e420d8b..7faa959 100644 --- a/src/main.rs +++ b/src/main.rs @@ -54,6 +54,7 @@ mod perceptual_hash; mod state; mod tags; mod thumbnails; +mod unified_search; mod utils; mod video; mod watcher; @@ -333,6 +334,13 @@ fn main() -> std::io::Result<()> { web::resource("/photos/search") .route(web::get().to(clip_search::search_photos)), ) + .service( + // Unified natural-language search: LLM translates the + // query into structured filters + a semantic term, then + // filters constrain and CLIP ranks. See src/unified_search.rs. + web::resource("/photos/search/unified") + .route(web::get().to(unified_search::unified_search::)), + ) .service(web::resource("/file/move").post(move_file::)) .service(handlers::image::get_image) .service(handlers::image::upload_image) diff --git a/src/unified_search.rs b/src/unified_search.rs new file mode 100644 index 0000000..a3187a4 --- /dev/null +++ b/src/unified_search.rs @@ -0,0 +1,452 @@ +//! `/photos/search/unified?q=` — unified NL photo search. +//! +//! One free-text box that composes the two existing engines instead of making +//! the user pick between them: +//! 1. A grounded local-LLM call ([`crate::ai::nl_query`]) translates the +//! query into a structured filter + a semantic term. +//! 2. Structured filters (tags / EXIF / geo / date / media-type) define the +//! candidate set; the semantic term ranks within it via CLIP. +//! +//! Path A (orchestration): we reuse `clip_search`'s scoring core and the +//! existing `ExifDao` / `TagDao` queries, joining on `content_hash`. EXIF rows +//! are the universal candidate carrier — each has `(library_id, file_path, +//! content_hash, date_taken)` — so the structured filter is just a predicate +//! over them, and the CLIP hits (which key on `content_hash`) intersect by +//! hash. No new schema, no surgery on `list_photos`. +//! +//! Degenerate cases collapse to the existing behavior: semantic-only → plain +//! CLIP search; filters-only → a date-sorted filtered listing. +//! +//! Person filtering is intentionally deferred (no person→photos resolver yet). + +use crate::AppState; +use crate::ai::backend::{BackendKind, SamplingOverrides}; +use crate::ai::nl_query::{StructuredQuery, translate_nl_query}; +use crate::clip_search::{ + SearchHit, parse_library_scope, resolve_hits, score_error_response, score_photos, +}; +use crate::data::Claims; +use crate::database::ExifDao; +use crate::file_types::{is_image_file, is_video_file}; +use crate::geo::{forward_geocode, gps_bounding_box, haversine_distance}; +use crate::tags::TagDao; +use actix_web::HttpResponse; +use actix_web::web::{Data, Query}; +use serde::{Deserialize, Serialize}; +use std::collections::HashSet; +use std::path::Path; +use std::sync::Mutex; + +#[derive(Debug, Deserialize)] +pub struct UnifiedQuery { + /// Natural-language query. Required; empty triggers 400. + pub q: String, + #[serde(default = "default_limit")] + pub limit: usize, + #[serde(default)] + pub offset: usize, + /// CLIP cosine floor for the semantic ranking stage. Same default as the + /// plain search endpoint. + #[serde(default = "default_threshold")] + pub threshold: f32, + /// Legacy single-library scope (see clip_search). + pub library: Option, + /// Multi-library scope, comma-separated ids. + pub library_ids: Option, +} + +fn default_limit() -> usize { + 20 +} +fn default_threshold() -> f32 { + 0.20 +} + +/// A geocoded place echoed back so the client can show / edit the location +/// filter it actually searched. +#[derive(Debug, Serialize)] +struct ResolvedPlace { + display_name: String, + lat: f64, + lon: f64, + radius_km: f64, +} + +/// How the server interpreted the NL query — echoed to the client to render +/// editable filter chips. tag ids map to the client's existing tag list. +#[derive(Debug, Serialize)] +struct Interpreted { + semantic: Option, + tag_ids: Vec, + exclude_tag_ids: Vec, + /// Words the model treated as tags that don't exist in the vocab; folded + /// into the semantic term and surfaced here so the UI can explain it. + unmatched_tags: Vec, + camera_make: Option, + camera_model: Option, + lens_model: Option, + date_from: Option, + date_to: Option, + media_type: Option, + place: Option, +} + +#[derive(Debug, Serialize)] +struct UnifiedResponse { + query: String, + interpreted: Interpreted, + /// CLIP model version used for ranking; `None` when the query had no + /// semantic term (filters-only). + model_version: Option, + /// Embeddings scored by CLIP (0 when filters-only). + considered: usize, + /// Matches before pagination. + total_matching: usize, + offset: usize, + results: Vec, +} + +#[derive(Debug, Serialize)] +struct ErrorBody { + error: String, +} + +fn bad_request(msg: impl Into) -> HttpResponse { + HttpResponse::BadRequest().json(ErrorBody { error: msg.into() }) +} + +/// Combine the model's semantic term with any tag words that didn't match the +/// vocab, so a hallucinated/non-vocab tag becomes a soft semantic signal +/// rather than being dropped. +fn effective_semantic(sq: &StructuredQuery) -> Option { + let mut parts: Vec = Vec::new(); + if let Some(s) = sq.semantic.as_deref() { + parts.push(s.to_string()); + } + parts.extend(sq.unmatched_tags.iter().cloned()); + if parts.is_empty() { + None + } else { + Some(parts.join(" ")) + } +} + +pub async fn unified_search( + _: Claims, + state: Data, + exif_dao: Data>>, + tag_dao: Data>, + query: Query, +) -> HttpResponse { + let nl = query.q.trim().to_string(); + if nl.is_empty() { + return bad_request("query parameter `q` is required"); + } + + let limit = query.limit.clamp(1, 200); + let offset = query.offset; + let threshold = query.threshold.clamp(-1.0, 1.0); + + let library_ids = match parse_library_scope(query.library_ids.as_deref(), query.library) { + Ok(ids) => ids, + Err(msg) => return bad_request(msg), + }; + + let ctx = opentelemetry::Context::current(); + + // ── 1. Translate the NL query, grounded on the real tag vocabulary ── + let tag_vocab: Vec<(i32, String)> = { + let mut dao = tag_dao.lock().expect("tag dao"); + match dao.get_all_tags(&ctx, None) { + Ok(tags) => tags.into_iter().map(|(_, t)| (t.id, t.name)).collect(), + Err(e) => { + log::warn!("unified_search: get_all_tags failed: {e:?}"); + Vec::new() + } + } + }; + + // Respect env/config for the LLM backend (LLM_BACKEND → ollama or + // llama-swap); local only, no hybrid, per the feature's design. + let overrides = SamplingOverrides { + model: None, + num_ctx: None, + temperature: None, + top_p: None, + top_k: None, + min_p: None, + }; + let backend = match state + .insight_generator + .resolve_backend(BackendKind::Local, &overrides) + .await + { + Ok(b) => b, + Err(e) => { + log::warn!("unified_search: resolve_backend failed: {e:?}"); + return HttpResponse::ServiceUnavailable().json(ErrorBody { + error: "LLM backend unavailable".into(), + }); + } + }; + + let today = chrono::Utc::now().date_naive(); + let sq = match translate_nl_query(backend.chat(), &nl, &tag_vocab, today).await { + Ok(sq) => sq, + Err(e) => { + log::warn!("unified_search: translate_nl_query failed: {e:?}"); + return HttpResponse::BadGateway().json(ErrorBody { + error: "could not interpret the query".into(), + }); + } + }; + + // ── 2. Forward-geocode the place name into a gps circle ── + let resolved_place = match sq.place.as_deref() { + Some(p) => forward_geocode(p).await.map(|g| ResolvedPlace { + display_name: g.display_name, + lat: g.lat, + lon: g.lon, + radius_km: g.radius_km, + }), + None => None, + }; + let gps = resolved_place.as_ref().map(|p| (p.lat, p.lon, p.radius_km)); + + let semantic = effective_semantic(&sq); + + let has_exif_filter = sq.camera_make.is_some() + || sq.camera_model.is_some() + || sq.lens_model.is_some() + || sq.date_from.is_some() + || sq.date_to.is_some(); + let has_struct = + has_exif_filter || gps.is_some() || !sq.tag_ids.is_empty() || sq.media_type.is_some(); + + // ── 3. Build the structured candidate set (EXIF rows passing every + // filter). Skipped entirely for a pure-semantic query. ── + let mut candidate: Vec = Vec::new(); + let mut allowed_hashes: HashSet = HashSet::new(); + if has_struct { + // Tag membership set (rel_path only — same cross-library imprecision + // as the existing /photos tag listing). ALL-mode: the photo must + // carry every named tag. + let tag_set: Option> = if sq.tag_ids.is_empty() { + None + } else { + let mut dao = tag_dao.lock().expect("tag dao"); + match dao.get_files_with_all_tag_ids( + sq.tag_ids.clone(), + sq.exclude_tag_ids.clone(), + &ctx, + ) { + Ok(files) => Some(files.into_iter().map(|f| f.file_name).collect()), + Err(e) => { + log::warn!("unified_search: tag filter failed: {e:?}"); + Some(HashSet::new()) + } + } + }; + + // EXIF query handles camera/lens/gps-box/date. With no EXIF filters + // it returns the whole table, which we then narrow by the predicates + // below (tags / media / scope). Fine at personal-library scale. + let gps_bounds = gps.map(|(lat, lon, r)| gps_bounding_box(lat, lon, r)); + let rows = { + let mut dao = exif_dao.lock().expect("exif dao"); + dao.query_by_exif( + &ctx, + None, // scope filtered in-Rust to support multi-library + sq.camera_make.as_deref(), + sq.camera_model.as_deref(), + sq.lens_model.as_deref(), + gps_bounds, + sq.date_from, + sq.date_to, + ) + .unwrap_or_else(|e| { + log::warn!("unified_search: query_by_exif failed: {e:?}"); + Vec::new() + }) + }; + + candidate = rows + .into_iter() + .filter(|row| { + // Library scope. + if !library_ids.is_empty() && !library_ids.contains(&row.library_id) { + return false; + } + // Precise GPS distance (the EXIF query only did a coarse box). + if let Some((lat, lon, radius_km)) = gps { + match (row.gps_latitude, row.gps_longitude) { + (Some(plat), Some(plon)) => { + if haversine_distance(lat, lon, plat as f64, plon as f64) > radius_km { + return false; + } + } + _ => return false, + } + } + // Media type. + if let Some(mt) = sq.media_type.as_deref() { + let p = Path::new(&row.file_path); + let ok = if mt == "video" { + is_video_file(p) + } else { + is_image_file(p) + }; + if !ok { + return false; + } + } + // Tag membership. + if let Some(ts) = &tag_set + && !ts.contains(&row.file_path) + { + return false; + } + true + }) + .collect(); + + allowed_hashes = candidate + .iter() + .filter_map(|r| r.content_hash.clone()) + .collect(); + } + + // ── 4. Rank ── + match semantic { + Some(ref sem) => { + // Semantic term present: CLIP-rank, then keep only hits that pass + // the structured filters (by content_hash). + let scored = + match score_photos(&state, &exif_dao, sem, &library_ids, threshold, None).await { + Ok(s) => s, + Err(e) => return score_error_response(e), + }; + let hits: Vec<(f32, String)> = if has_struct { + scored + .hits + .into_iter() + .filter(|(_, h)| allowed_hashes.contains(h)) + .collect() + } else { + scored.hits + }; + let total_matching = hits.len(); + let page = paginate(&hits, offset, limit); + let results = resolve_hits(&exif_dao, &page); + HttpResponse::Ok().json(UnifiedResponse { + query: nl, + interpreted: interpreted(&sq, resolved_place), + model_version: Some(scored.model_version), + considered: scored.considered, + total_matching, + offset, + results, + }) + } + None => { + // Filters-only: no semantic term. Require at least one filter, + // then return the candidate set newest-first. + if !has_struct { + return bad_request("query had no searchable terms"); + } + candidate.sort_by(|a, b| b.date_taken.cmp(&a.date_taken)); + let total_matching = candidate.len(); + let end = (offset + limit).min(total_matching); + let results: Vec = if offset >= total_matching { + Vec::new() + } else { + candidate[offset..end] + .iter() + .map(|r| SearchHit { + library_id: r.library_id, + rel_path: r.file_path.clone(), + content_hash: r.content_hash.clone().unwrap_or_default(), + score: 0.0, + }) + .collect() + }; + HttpResponse::Ok().json(UnifiedResponse { + query: nl, + interpreted: interpreted(&sq, resolved_place), + model_version: None, + considered: 0, + total_matching, + offset, + results, + }) + } + } +} + +/// Slice a sorted hit list at `[offset, offset+limit)`, tolerating +/// out-of-range offsets (empty page). +fn paginate(hits: &[(f32, String)], offset: usize, limit: usize) -> Vec<(f32, String)> { + if offset >= hits.len() { + return Vec::new(); + } + let end = (offset + limit).min(hits.len()); + hits[offset..end].to_vec() +} + +fn interpreted(sq: &StructuredQuery, place: Option) -> Interpreted { + Interpreted { + semantic: sq.semantic.clone(), + tag_ids: sq.tag_ids.clone(), + exclude_tag_ids: sq.exclude_tag_ids.clone(), + unmatched_tags: sq.unmatched_tags.clone(), + camera_make: sq.camera_make.clone(), + camera_model: sq.camera_model.clone(), + lens_model: sq.lens_model.clone(), + date_from: sq.date_from, + date_to: sq.date_to, + media_type: sq.media_type.clone(), + place, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ai::nl_query::StructuredQuery; + + #[test] + fn effective_semantic_combines_semantic_and_unmatched() { + let sq = StructuredQuery { + semantic: Some("sunset".into()), + unmatched_tags: vec!["golden hour".into()], + ..Default::default() + }; + assert_eq!( + effective_semantic(&sq).as_deref(), + Some("sunset golden hour") + ); + } + + #[test] + fn effective_semantic_none_when_empty() { + let sq = StructuredQuery::default(); + assert_eq!(effective_semantic(&sq), None); + } + + #[test] + fn effective_semantic_unmatched_only() { + let sq = StructuredQuery { + unmatched_tags: vec!["disco".into()], + ..Default::default() + }; + assert_eq!(effective_semantic(&sq).as_deref(), Some("disco")); + } + + #[test] + fn paginate_handles_out_of_range_offset() { + let hits = vec![(0.9, "a".to_string()), (0.8, "b".to_string())]; + assert_eq!(paginate(&hits, 5, 10).len(), 0); + assert_eq!(paginate(&hits, 0, 1).len(), 1); + assert_eq!(paginate(&hits, 1, 10).len(), 1); + } +} -- 2.52.0 From fcbd7e273389ebe322c4cd5d0f2e2769f8d3b6cb Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sun, 14 Jun 2026 01:19:53 -0400 Subject: [PATCH 20/26] Unified search: accept client model override (avoid model swapping) Add an optional `model` query param to /photos/search/unified, passed into resolve_backend's overrides. The client sends the user's currently-selected local model so the translation step reuses an already-loaded model instead of forcing a llama-swap eviction + cold start. Falls back to the configured default when absent. Still local only (no hybrid). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/unified_search.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/unified_search.rs b/src/unified_search.rs index a3187a4..7bb78dd 100644 --- a/src/unified_search.rs +++ b/src/unified_search.rs @@ -53,6 +53,11 @@ pub struct UnifiedQuery { pub library: Option, /// Multi-library scope, comma-separated ids. pub library_ids: Option, + /// Optional model override. The client passes the user's currently-selected + /// local model so the translation step reuses a model that's already loaded + /// (avoids a llama-swap eviction / cold start). Falls back to the configured + /// default local model when absent. Local only — no hybrid here. + pub model: Option, } fn default_limit() -> usize { @@ -167,9 +172,12 @@ pub async fn unified_search( }; // Respect env/config for the LLM backend (LLM_BACKEND → ollama or - // llama-swap); local only, no hybrid, per the feature's design. + // llama-swap); local only, no hybrid, per the feature's design. The + // client-supplied model (the user's current selection) routes translation + // to an already-loaded model when possible; otherwise resolve_backend + // falls back to the configured default. let overrides = SamplingOverrides { - model: None, + model: query.model.clone().filter(|m| !m.is_empty()), num_ctx: None, temperature: None, top_p: None, -- 2.52.0 From e56235acc5871b41d6d67c1e1d2bdb0decee4f10 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sun, 14 Jun 2026 01:29:21 -0400 Subject: [PATCH 21/26] Unified search: stage-by-stage logging to debug empty results Log the translated query (semantic/tags/place/date/media + has_struct), the tag-filter file count, candidate-row + allowed-hash counts, and the CLIP considered/hits/after-filter counts. Pinpoints which stage drops results to zero (over-extracted filter, tag path mismatch, Any/All over-constraint, or CLIP threshold). info-level for now while debugging. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/unified_search.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/unified_search.rs b/src/unified_search.rs index 7bb78dd..d80feec 100644 --- a/src/unified_search.rs +++ b/src/unified_search.rs @@ -231,6 +231,22 @@ pub async fn unified_search( let has_struct = has_exif_filter || gps.is_some() || !sq.tag_ids.is_empty() || sq.media_type.is_some(); + // Stage trace: what the model extracted + whether a structured filter is + // active. The chips show this to the user too, but logging it makes the + // "why no results" path debuggable from the server side. + log::info!( + "unified_search: q={nl:?} semantic={:?} tag_ids={:?} exclude={:?} place={:?} gps={:?} date=({:?},{:?}) media={:?} unmatched={:?} has_struct={has_struct}", + sq.semantic, + sq.tag_ids, + sq.exclude_tag_ids, + resolved_place.as_ref().map(|p| p.display_name.as_str()), + gps, + sq.date_from, + sq.date_to, + sq.media_type, + sq.unmatched_tags, + ); + // ── 3. Build the structured candidate set (EXIF rows passing every // filter). Skipped entirely for a pure-semantic query. ── let mut candidate: Vec = Vec::new(); @@ -255,6 +271,11 @@ pub async fn unified_search( } } }; + log::info!( + "unified_search: tag_ids={:?} -> tag_set_files={:?}", + sq.tag_ids, + tag_set.as_ref().map(|s| s.len()) + ); // EXIF query handles camera/lens/gps-box/date. With no EXIF filters // it returns the whole table, which we then narrow by the predicates @@ -322,6 +343,11 @@ pub async fn unified_search( .iter() .filter_map(|r| r.content_hash.clone()) .collect(); + log::info!( + "unified_search: candidate_rows={} allowed_hashes={}", + candidate.len(), + allowed_hashes.len() + ); } // ── 4. Rank ── @@ -334,6 +360,8 @@ pub async fn unified_search( Ok(s) => s, Err(e) => return score_error_response(e), }; + let considered = scored.considered; + let clip_hits = scored.hits.len(); let hits: Vec<(f32, String)> = if has_struct { scored .hits @@ -343,6 +371,10 @@ pub async fn unified_search( } else { scored.hits }; + log::info!( + "unified_search: clip considered={considered} hits={clip_hits} after_struct_filter={}", + hits.len() + ); let total_matching = hits.len(); let page = paginate(&hits, offset, limit); let results = resolve_hits(&exif_dao, &page); @@ -364,6 +396,7 @@ pub async fn unified_search( } candidate.sort_by(|a, b| b.date_taken.cmp(&a.date_taken)); let total_matching = candidate.len(); + log::info!("unified_search: filters-only matches={total_matching}"); let end = (offset + limit).min(total_matching); let results: Vec = if offset >= total_matching { Vec::new() -- 2.52.0 From 0a40e785284ce670ddcc8a90097d8db24dbc2869 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sun, 14 Jun 2026 01:58:48 -0400 Subject: [PATCH 22/26] Unified search: UNIFIED_SEARCH_MODEL env override for the translation step Pin the NL->structured translation to a small, fast model that can stay co-resident with CLIP (and the chat model) so it never evicts them on a tight VRAM budget. Precedence: UNIFIED_SEARCH_MODEL env > client-selected model > configured default. Logs the effective model (backend.model()) so model A/B tests are visible. Documented in .env.example. Co-Authored-By: Claude Opus 4.8 (1M context) --- .env.example | 10 ++++++++++ src/unified_search.rs | 21 ++++++++++++++++----- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/.env.example b/.env.example index 2e431bc..64c31d3 100644 --- a/.env.example +++ b/.env.example @@ -80,6 +80,16 @@ AGENTIC_CHAT_MAX_ITERATIONS=6 # LLAMA_SWAP_ALLOWED_MODELS=chat,vision,embed # LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS=180 +# ── Unified search translation model (optional) ───────────────────────── +# /photos/search/unified runs one small LLM call to translate a natural- +# language query into structured filters + a semantic term, then CLIP-ranks. +# That step needs an LLM AND CLIP available at once. On a tight VRAM budget a +# large chat model can't co-reside with CLIP, so pin a small, fast model here +# (it can stay loaded alongside CLIP and the chat model). Precedence: +# UNIFIED_SEARCH_MODEL > the client's selected model > the configured default. +# Use the configured backend (LLM_BACKEND); local only — no hybrid. +# UNIFIED_SEARCH_MODEL=qwen3-0.6b + # ── Text-to-speech (optional, requires LLAMA_SWAP_URL) ─────────────────── # TTS routes through the same llama-swap proxy (a Chatterbox model id), so it # only needs LLAMA_SWAP_URL — it does NOT require LLM_BACKEND=llamacpp. diff --git a/src/unified_search.rs b/src/unified_search.rs index d80feec..bb6344c 100644 --- a/src/unified_search.rs +++ b/src/unified_search.rs @@ -172,12 +172,22 @@ pub async fn unified_search( }; // Respect env/config for the LLM backend (LLM_BACKEND → ollama or - // llama-swap); local only, no hybrid, per the feature's design. The - // client-supplied model (the user's current selection) routes translation - // to an already-loaded model when possible; otherwise resolve_backend - // falls back to the configured default. + // llama-swap); local only, no hybrid, per the feature's design. + // + // Translation-model precedence: + // 1. UNIFIED_SEARCH_MODEL env — pin a small, fast model that can stay + // co-resident with CLIP (and the chat model) so translation never + // evicts them. This is the recommended setup on a tight VRAM budget. + // 2. the client-selected model — routes translation to whatever the user + // already has loaded (no swap) when no dedicated model is pinned. + // 3. None → resolve_backend uses the configured default local model. + let translation_model = std::env::var("UNIFIED_SEARCH_MODEL") + .ok() + .filter(|m| !m.trim().is_empty()) + .or_else(|| query.model.clone()) + .filter(|m| !m.trim().is_empty()); let overrides = SamplingOverrides { - model: query.model.clone().filter(|m| !m.is_empty()), + model: translation_model, num_ctx: None, temperature: None, top_p: None, @@ -197,6 +207,7 @@ pub async fn unified_search( }); } }; + log::info!("unified_search: translating with model={}", backend.model()); let today = chrono::Utc::now().date_naive(); let sq = match translate_nl_query(backend.chat(), &nl, &tag_vocab, today).await { -- 2.52.0 From 6c315edacc0379c1ebbc1a761263496e3e207ac2 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sun, 14 Jun 2026 02:02:57 -0400 Subject: [PATCH 23/26] clip_client: log encode_text failures (URL + status/body or network error) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CLIP encode failure reason was only ever returned in the HTTP response body, never logged server-side, making 502s from /photos/search opaque. Log the underlying cause — network error to the URL, or the Apollo HTTP status + response body — so CLIP-service problems are diagnosable from the ImageApi log. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/ai/clip_client.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ai/clip_client.rs b/src/ai/clip_client.rs index 85c66a7..3519e8b 100644 --- a/src/ai/clip_client.rs +++ b/src/ai/clip_client.rs @@ -191,11 +191,13 @@ impl ClipClient { let resp = match self.client.post(&url).json(&body).send().await { Ok(r) => r, Err(e) if e.is_timeout() || e.is_connect() => { + log::warn!("clip encode_text network error to {url}: {e}"); return Err(ClipError::Transient(anyhow::anyhow!( "clip client network: {e}" ))); } Err(e) => { + log::warn!("clip encode_text request error to {url}: {e}"); return Err(ClipError::Transient(anyhow::anyhow!( "clip client request: {e}" ))); @@ -210,6 +212,7 @@ impl ClipClient { return Ok(body); } let body_text = resp.text().await.unwrap_or_default(); + log::warn!("clip encode_text HTTP {status} from {url}: {body_text}"); Err(classify_error_response(status.as_u16(), &body_text)) } -- 2.52.0 From 6e5898e7663e5810afb0c8e16ec6b975dd81adf8 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sun, 14 Jun 2026 02:20:06 -0400 Subject: [PATCH 24/26] Unified search: rank within filtered set instead of pre-thresholding CLIP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When structured filters are present they're the constraint and CLIP only ranks within the candidate set, so drop the global similarity threshold for that case. Previously the 0.2 whole-library threshold ran BEFORE intersecting with the filters, discarding filter-matching photos that scored just under it (e.g. a 2022 beach photo at 0.18) — producing after_struct_filter=0 even when matches existed. Plain semantic (no filters) keeps the user's threshold. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/unified_search.rs | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/unified_search.rs b/src/unified_search.rs index bb6344c..9eea405 100644 --- a/src/unified_search.rs +++ b/src/unified_search.rs @@ -364,13 +364,27 @@ pub async fn unified_search( // ── 4. Rank ── match semantic { Some(ref sem) => { - // Semantic term present: CLIP-rank, then keep only hits that pass - // the structured filters (by content_hash). - let scored = - match score_photos(&state, &exif_dao, sem, &library_ids, threshold, None).await { - Ok(s) => s, - Err(e) => return score_error_response(e), - }; + // When structured filters are present they ARE the constraint — + // CLIP only ranks within the candidate set. So drop the global + // similarity threshold (it's tuned for whole-library search and + // would pre-discard filter-matching photos that scored just under + // it — e.g. a 2022 beach photo at 0.18 — before the intersection + // ever runs). With no filters, keep the user's threshold for the + // plain semantic case. + let clip_threshold = if has_struct { -1.0 } else { threshold }; + let scored = match score_photos( + &state, + &exif_dao, + sem, + &library_ids, + clip_threshold, + None, + ) + .await + { + Ok(s) => s, + Err(e) => return score_error_response(e), + }; let considered = scored.considered; let clip_hits = scored.hits.len(); let hits: Vec<(f32, String)> = if has_struct { -- 2.52.0 From f2ab8d3740a0407116ad98ea36cc14642268aefb Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sun, 14 Jun 2026 02:25:24 -0400 Subject: [PATCH 25/26] Unified search: use ANY-mode tag matching, not ALL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ALL-mode over-constrains NL queries — the model maps several query words to tags and few photos carry every one, zeroing the candidate set. Switch to ANY (a photo matches if it has any named tag); the semantic CLIP ranking provides precision within that pool. Exclude tags still filter out. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/unified_search.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/unified_search.rs b/src/unified_search.rs index 9eea405..555773c 100644 --- a/src/unified_search.rs +++ b/src/unified_search.rs @@ -264,13 +264,15 @@ pub async fn unified_search( let mut allowed_hashes: HashSet = HashSet::new(); if has_struct { // Tag membership set (rel_path only — same cross-library imprecision - // as the existing /photos tag listing). ALL-mode: the photo must - // carry every named tag. + // as the existing /photos tag listing). ANY-mode: a photo matches if + // it carries any of the named tags. ALL-mode over-constrains NL + // queries (the model maps several words to tags and few photos carry + // them all); the semantic term does the precision work instead. let tag_set: Option> = if sq.tag_ids.is_empty() { None } else { let mut dao = tag_dao.lock().expect("tag dao"); - match dao.get_files_with_all_tag_ids( + match dao.get_files_with_any_tag_ids( sq.tag_ids.clone(), sq.exclude_tag_ids.clone(), &ctx, -- 2.52.0 From 48a1b753f0ec706c6a388a91aeddbec583723f19 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Wed, 17 Jun 2026 18:14:44 -0400 Subject: [PATCH 26/26] AI: add enable_thinking reasoning toggle plumbed to llama.cpp New optional SamplingOverride forwarded to llama-server as chat_template_kwargs.enable_thinking (gates Qwen3-style reasoning blocks). None leaves the template default; other backends ignore it. Wired through the agentic-insight and chat-turn request bodies/handlers. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/ai/backend.rs | 6 ++++++ src/ai/handlers.rs | 15 +++++++++++++++ src/ai/insight_chat.rs | 9 +++++++++ src/ai/insight_generator.rs | 3 +++ src/ai/llamacpp.rs | 19 +++++++++++++++++++ src/bin/populate_knowledge.rs | 1 + src/reels/script.rs | 1 + src/unified_search.rs | 1 + 8 files changed, 55 insertions(+) diff --git a/src/ai/backend.rs b/src/ai/backend.rs index 0515f1c..dfcdd03 100644 --- a/src/ai/backend.rs +++ b/src/ai/backend.rs @@ -41,6 +41,10 @@ pub struct SamplingOverrides { pub top_p: Option, pub top_k: Option, pub min_p: Option, + /// Reasoning toggle. Only the llama.cpp backend honors it (forwarded as + /// `chat_template_kwargs.enable_thinking`); other backends ignore it. + /// `None` leaves the model/template default in place. + pub enable_thinking: Option, } impl SamplingOverrides { @@ -124,6 +128,7 @@ mod tests { top_p: None, top_k: None, min_p: None, + enable_thinking: None, }; assert!(!empty.has_sampling()); @@ -134,6 +139,7 @@ mod tests { top_p: None, top_k: None, min_p: None, + enable_thinking: None, }; assert!(with_temp.has_sampling()); } diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs index c6bc212..ae9f300 100644 --- a/src/ai/handlers.rs +++ b/src/ai/handlers.rs @@ -40,6 +40,12 @@ pub struct GeneratePhotoInsightRequest { pub top_k: Option, #[serde(default)] pub min_p: Option, + /// Reasoning toggle for thinking-capable models. Forwarded to the + /// llama.cpp backend as `chat_template_kwargs.enable_thinking`; ignored + /// by other backends and the non-agentic (Ollama) path. Only the agentic + /// endpoint routes through llama.cpp. None defers to the template default. + #[serde(default)] + pub enable_thinking: Option, /// `"local"` (default, Ollama with images) | `"hybrid"` (local vision + /// OpenRouter chat). Only respected by the agentic endpoint. #[serde(default)] @@ -868,6 +874,7 @@ pub async fn generate_agentic_insight_handler( request.top_p, request.top_k, request.min_p, + request.enable_thinking, max_iterations, request.backend.clone(), fewshot_examples, @@ -1169,6 +1176,11 @@ pub struct ChatTurnHttpRequest { pub top_k: Option, #[serde(default)] pub min_p: Option, + /// Reasoning toggle for thinking-capable models. Forwarded to the + /// llama.cpp backend as `chat_template_kwargs.enable_thinking`; ignored + /// by other backends. None defers to the model/template default. + #[serde(default)] + pub enable_thinking: Option, #[serde(default)] pub max_iterations: Option, /// Per-turn system-prompt override. Ephemeral in append mode, @@ -1247,6 +1259,7 @@ pub async fn chat_turn_handler( top_p: request.top_p, top_k: request.top_k, min_p: request.min_p, + enable_thinking: request.enable_thinking, max_iterations: request.max_iterations, system_prompt: request.system_prompt.clone(), persona_id: request.persona_id.clone(), @@ -1473,6 +1486,7 @@ pub async fn chat_stream_handler( top_p: request.top_p, top_k: request.top_k, min_p: request.min_p, + enable_thinking: request.enable_thinking, max_iterations: request.max_iterations, system_prompt: request.system_prompt.clone(), persona_id: request.persona_id.clone(), @@ -1618,6 +1632,7 @@ pub async fn turn_async_handler( top_p: request.top_p, top_k: request.top_k, min_p: request.min_p, + enable_thinking: request.enable_thinking, max_iterations: request.max_iterations, system_prompt: request.system_prompt.clone(), persona_id: request.persona_id.clone(), diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs index 84f2b32..af00731 100644 --- a/src/ai/insight_chat.rs +++ b/src/ai/insight_chat.rs @@ -70,6 +70,10 @@ pub struct ChatTurnRequest { pub top_p: Option, pub top_k: Option, pub min_p: Option, + /// Reasoning toggle for thinking-capable models. Forwarded to the + /// llama.cpp backend as `chat_template_kwargs.enable_thinking`; ignored + /// by other backends. None defers to the model/template default. + pub enable_thinking: Option, pub max_iterations: Option, /// Per-turn system-prompt override. In append mode (default), applied /// ephemerally — original system message restored before persistence. @@ -344,6 +348,7 @@ impl InsightChatService { top_p: req.top_p, top_k: req.top_k, min_p: req.min_p, + enable_thinking: req.enable_thinking, }; let backend = self.generator.resolve_backend(kind, &overrides).await?; let model_used = backend.model().to_string(); @@ -847,6 +852,7 @@ impl InsightChatService { top_p: req.top_p, top_k: req.top_k, min_p: req.min_p, + enable_thinking: req.enable_thinking, }; let backend = self.generator.resolve_backend(kind, &overrides).await?; let model_used = backend.model().to_string(); @@ -1017,6 +1023,7 @@ impl InsightChatService { top_p: req.top_p, top_k: req.top_k, min_p: req.min_p, + enable_thinking: req.enable_thinking, }; let backend = self.generator.resolve_backend(kind, &overrides).await?; let model_used = backend.model().to_string(); @@ -1425,6 +1432,7 @@ impl InsightChatService { top_p: req.top_p, top_k: req.top_k, min_p: req.min_p, + enable_thinking: req.enable_thinking, }; let backend = self.generator.resolve_backend(kind, &overrides).await?; let model_used = backend.model().to_string(); @@ -1607,6 +1615,7 @@ impl InsightChatService { top_p: req.top_p, top_k: req.top_k, min_p: req.min_p, + enable_thinking: req.enable_thinking, }; let backend = self.generator.resolve_backend(kind, &overrides).await?; let model_used = backend.model().to_string(); diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 4ff8494..d45fa55 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -3933,6 +3933,7 @@ Return ONLY the summary, nothing else."#, if let Some(ctx) = overrides.num_ctx { c.set_num_ctx(Some(ctx)); } + c.set_enable_thinking(overrides.enable_thinking); Box::new(c) } else { // Pure Ollama local. @@ -4064,6 +4065,7 @@ Return ONLY the summary, nothing else."#, top_p: Option, top_k: Option, min_p: Option, + enable_thinking: Option, max_iterations: usize, backend: Option, fewshot_examples: Vec>, @@ -4091,6 +4093,7 @@ Return ONLY the summary, nothing else."#, top_p, top_k, min_p, + enable_thinking, }; let backend = self.resolve_backend(kind, &overrides).await?; span.set_attribute(KeyValue::new("model", backend.model().to_string())); diff --git a/src/ai/llamacpp.rs b/src/ai/llamacpp.rs index 8a7c898..77e7f63 100644 --- a/src/ai/llamacpp.rs +++ b/src/ai/llamacpp.rs @@ -64,6 +64,12 @@ pub struct LlamaCppClient { top_p: Option, top_k: Option, min_p: Option, + /// When `Some`, forwarded to llama-server as + /// `chat_template_kwargs: {"enable_thinking": }`. The Jinja chat + /// template (e.g. Qwen3) reads this to gate its reasoning block. `None` + /// omits the key entirely, leaving the template's own default. Templates + /// that don't reference the key ignore it, so sending it is harmless. + enable_thinking: Option, } impl LlamaCppClient { @@ -89,6 +95,7 @@ impl LlamaCppClient { top_p: None, top_k: None, min_p: None, + enable_thinking: None, } } @@ -104,6 +111,12 @@ impl LlamaCppClient { self.num_ctx = num_ctx; } + /// Set the reasoning toggle forwarded as `chat_template_kwargs.enable_thinking`. + /// `None` leaves the chat template's own default in place. + pub fn set_enable_thinking(&mut self, enable_thinking: Option) { + self.enable_thinking = enable_thinking; + } + pub fn set_sampling_params( &mut self, temperature: Option, @@ -458,6 +471,12 @@ impl LlamaCppClient { // via -c, so we silently drop the override here. The config.yaml // entry is the source of truth for context size. let _ = self.num_ctx; + // Reasoning toggle for thinking-capable templates (Qwen3 et al.). + // llama-server forwards chat_template_kwargs into the Jinja render + // (requires --jinja); templates that ignore the key are unaffected. + if let Some(think) = self.enable_thinking { + v.push(("chat_template_kwargs", json!({ "enable_thinking": think }))); + } v } diff --git a/src/bin/populate_knowledge.rs b/src/bin/populate_knowledge.rs index 71f2f8a..396eddc 100644 --- a/src/bin/populate_knowledge.rs +++ b/src/bin/populate_knowledge.rs @@ -336,6 +336,7 @@ async fn main() -> anyhow::Result<()> { args.top_p, args.top_k, args.min_p, + None, // enable_thinking: leave model/template default args.max_iterations, None, Vec::new(), diff --git a/src/reels/script.rs b/src/reels/script.rs index 858efd1..38ef9cc 100644 --- a/src/reels/script.rs +++ b/src/reels/script.rs @@ -309,6 +309,7 @@ pub async fn generate_script_agentic( top_p: None, top_k: None, min_p: None, + enable_thinking: None, }, ) .await diff --git a/src/unified_search.rs b/src/unified_search.rs index 555773c..0940a92 100644 --- a/src/unified_search.rs +++ b/src/unified_search.rs @@ -193,6 +193,7 @@ pub async fn unified_search( top_p: None, top_k: None, min_p: None, + enable_thinking: None, }; let backend = match state .insight_generator -- 2.52.0