diff --git a/src/ai/tts.rs b/src/ai/tts.rs index 08d9dcd..4e7544c 100644 --- a/src/ai/tts.rs +++ b/src/ai/tts.rs @@ -23,6 +23,7 @@ use std::time::{Duration, Instant}; use tokio::sync::Semaphore; use uuid::Uuid; +use crate::ai::llamacpp::LlamaCppClient; use crate::data::Claims; use crate::file_types::{is_audio_file, is_video_file}; use crate::files::is_valid_full_path; @@ -473,6 +474,36 @@ pub struct TtsJobStatusResponse { pub error: Option, } +/// Synthesize speech honoring the global single-GPU serialization +/// (`TTS_PERMIT`) and the GPU write lease, exactly as the speech-job path does. +/// Queues on the permit rather than fast-failing, so callers wait their turn +/// instead of contending. Text is run through the same markdown/emoji cleanup + +/// pronunciation pipeline as the HTTP handlers. Reused by the memory-reel +/// pipeline to narrate each segment without racing a user's TTS request on the +/// Chatterbox GPU. +pub async fn synthesize_serialized( + client: &LlamaCppClient, + text: &str, + voice: Option<&str>, + format: &str, +) -> anyhow::Result> { + let prepared = prepare_for_tts(text); + if prepared.is_empty() { + anyhow::bail!("nothing to synthesize after cleanup"); + } + // Queue rather than fast-fail (mirrors create_speech_job_handler). + let _permit = TTS_PERMIT + .acquire() + .await + .map_err(|_| anyhow::anyhow!("TTS permit closed"))?; + // Wait for the LLM side to release the GPU before the request timeout + // starts (see ai::gpu). + let _gpu = crate::ai::gpu::tts_lease().await; + client + .text_to_speech(&prepared, voice, format, None, None, None) + .await +} + /// POST /tts/speech/jobs — durable variant of /tts/speech for long syntheses. /// Returns 202 + a job id immediately; the synth queues on the single GPU /// permit (instead of fast-failing 429) and the client polls the job until diff --git a/src/main.rs b/src/main.rs index 8b56efd..b059e9b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -62,6 +62,7 @@ mod knowledge; mod memories; mod otel; mod personas; +mod reels; mod service; #[cfg(test)] mod testhelpers; @@ -344,6 +345,9 @@ fn main() -> std::io::Result<()> { .service(handlers::image::clear_image_date) .service(handlers::image::get_full_exif) .service(memories::list_memories) + .service(reels::create_reel_handler) + .service(reels::reel_status_handler) + .service(reels::reel_video_handler) .service(ai::generate_insight_handler) .service(ai::generate_agentic_insight_handler) .service(ai::generation_status_handler) diff --git a/src/memories.rs b/src/memories.rs index 4b1682b..c877981 100644 --- a/src/memories.rs +++ b/src/memories.rs @@ -349,12 +349,6 @@ pub async fn list_memories( opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); let span_mode = q.span.unwrap_or(MemoriesSpan::Day); - let span_token = match span_mode { - MemoriesSpan::Day => "day", - MemoriesSpan::Week => "week", - MemoriesSpan::Month => "month", - }; - let years_back: i32 = DEFAULT_YEARS_BACK; // The SQL filter expects a signed offset in minutes from UTC; default // 0 (UTC) when the client didn't send a hint. We also keep a chrono @@ -366,18 +360,66 @@ pub async fn list_memories( .timezone_offset_minutes .and_then(|offset_mins| FixedOffset::east_opt(offset_mins * 60)); - debug!( - "list_memories: span={:?} tz_offset_min={} years_back={}", - span_mode, tz_offset_minutes, years_back - ); - - let library = match crate::libraries::resolve_library_param(&app_state, q.library.as_deref()) { - Ok(lib) => lib, + let items = match gather_memory_items( + &app_state, + &exif_dao, + &span_context, + span_mode, + tz_offset_minutes, + client_timezone, + q.library.as_deref(), + ) { + Ok(items) => items, Err(msg) => { warn!("Rejecting /memories request: {}", msg); return HttpResponse::BadRequest().body(msg); } }; + + span.add_event( + "memories_scanned", + vec![ + KeyValue::new("span", format!("{:?}", span_mode)), + KeyValue::new("years_back", DEFAULT_YEARS_BACK.to_string()), + KeyValue::new("result_count", items.len().to_string()), + KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()), + KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)), + ], + ); + span.set_status(Status::Ok); + + HttpResponse::Ok().json(MemoriesResponse { items }) +} + +/// Resolve an "on this day/week/month across past years" window into an +/// ordered list of [`MemoryItem`]s. Shared by the `/memories` handler and the +/// memory-reel selector so both honour the same library resolution, per-library +/// exclusions, timezone handling, and sort order. Returns `Err(message)` only +/// when the `library` param is invalid (callers map that to 400); per-library +/// query/lock failures are logged and skipped, matching the handler's +/// best-effort behaviour. +pub fn gather_memory_items( + app_state: &AppState, + exif_dao: &Mutex>, + span_context: &opentelemetry::Context, + span_mode: MemoriesSpan, + tz_offset_minutes: i32, + client_timezone: Option, + library_param: Option<&str>, +) -> Result, String> { + let span_token = match span_mode { + MemoriesSpan::Day => "day", + MemoriesSpan::Week => "week", + MemoriesSpan::Month => "month", + }; + let years_back: i32 = DEFAULT_YEARS_BACK; + + debug!( + "gather_memory_items: span={:?} tz_offset_min={} years_back={}", + span_mode, tz_offset_minutes, years_back + ); + + let library = crate::libraries::resolve_library_param(app_state, library_param)?; let libraries_to_scan: Vec<&crate::libraries::Library> = match library { Some(lib) => vec![lib], None => app_state.libraries.iter().collect(), @@ -394,7 +436,7 @@ pub async fn list_memories( let rows = match exif_dao.lock() { Ok(mut dao) => match dao.get_memories_in_window( - &span_context, + span_context, lib.id, span_token, years_back, @@ -469,21 +511,7 @@ pub async fn list_memories( } } - let items: Vec = memories_with_dates.into_iter().map(|(m, _)| m).collect(); - - span.add_event( - "memories_scanned", - vec![ - KeyValue::new("span", format!("{:?}", span_mode)), - KeyValue::new("years_back", years_back.to_string()), - KeyValue::new("result_count", items.len().to_string()), - KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()), - KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)), - ], - ); - span.set_status(Status::Ok); - - HttpResponse::Ok().json(MemoriesResponse { items }) + Ok(memories_with_dates.into_iter().map(|(m, _)| m).collect()) } #[cfg(test)] diff --git a/src/reels/mod.rs b/src/reels/mod.rs new file mode 100644 index 0000000..fe270f8 --- /dev/null +++ b/src/reels/mod.rs @@ -0,0 +1,625 @@ +//! Memory reels: render an MP4 slideshow of a selection of photos with an +//! LLM-written, voice-cloned narration over it. +//! +//! Pipeline: a [`selector`] resolves *which* photos (and the reel metadata), +//! the [`script`] module writes per-photo narration via the LLM, each line is +//! synthesized to speech, and [`render`] assembles the stills + narration into +//! one MP4. Jobs run in the background (mirroring the TTS speech-job registry) +//! because a reel takes minutes; the finished MP4 is cached on disk keyed by +//! the selection so a repeat request is instant. +//! +//! Phase 1 is on-demand and photos-only. The segment model is media-typed so a +//! video-clip segment (phase 2) and a nightly pre-render (phase 3) slot in +//! without reworking the pipeline. + +pub mod render; +pub mod script; +pub mod selector; + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::{LazyLock, Mutex as StdMutex}; +use std::time::{Duration, Instant}; + +use actix_files::NamedFile; +use actix_web::{HttpRequest, HttpResponse, Responder, get, post, web}; +use chrono::DateTime; +use serde::{Deserialize, Serialize}; +use serde_json::json; +use std::sync::Mutex; +use uuid::Uuid; + +use crate::data::Claims; +use crate::database::{ExifDao, InsightDao}; +use crate::memories::MemoriesSpan; +use crate::otel::extract_context_from_request; +use crate::state::AppState; +use selector::ReelSelector; + +/// The media behind one reel segment. Photos-only for now; a `Clip` variant +/// (a section of a source video) is the phase-2 extension point. +#[derive(Debug, Clone)] +pub enum SegmentMedia { + Photo { rel_path: String, library_id: i32 }, +} + +/// A segment before narration: which photo, when it was taken, and any cached +/// insight to feed the scripter. +#[derive(Debug, Clone)] +pub struct PlannedSegment { + pub media: SegmentMedia, + pub date: Option, + pub insight_title: Option, + pub insight_summary: Option, +} + +impl PlannedSegment { + /// Human date for the prompt, e.g. "June 12, 2019". `None` when undated. + pub fn date_label(&self) -> Option { + let ts = self.date?; + let dt = DateTime::from_timestamp(ts, 0)?; + Some(dt.format("%B %-d, %Y").to_string()) + } +} + +/// Reel-wide metadata the scripter uses for framing. +#[derive(Debug, Clone)] +pub struct ReelMeta { + pub span: MemoriesSpan, + pub years: Vec, +} + +impl ReelMeta { + /// Natural-language phrase for the span, e.g. "on this day". + pub fn span_phrase(&self) -> &'static str { + match self.span { + MemoriesSpan::Day => "on this day", + MemoriesSpan::Week => "this week", + MemoriesSpan::Month => "this month", + } + } +} + +// --- Job registry ------------------------------------------------------------ +// +// In-memory, same shape as the TTS speech-job registry: a reel takes minutes, +// too long to hold one HTTP request from a phone. POST /reels returns a job id; +// the client polls GET /reels/{id} until the video URL appears. The heavy +// artifact (the MP4) lives on disk, not in this map — jobs only carry status + +// the output path. State is intentionally not durable across restarts; the +// on-disk cache is what makes a repeat request cheap, not the registry. + +#[derive(Clone, Copy, PartialEq, Eq, Debug, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum ReelJobStatus { + Queued, + Running, + Done, + Error, +} + +impl ReelJobStatus { + fn is_terminal(self) -> bool { + matches!(self, Self::Done | Self::Error) + } +} + +struct ReelJob { + status: ReelJobStatus, + /// Coarse progress label for the client ("scripting", "narrating", …). + stage: &'static str, + title: Option, + output_path: Option, + error: Option, + created_at: Instant, + finished_at: Option, + abort: Option, +} + +/// Finished jobs linger so a client that lost connectivity can still collect +/// the result; anything older than MAX_AGE is dropped (aborted first if somehow +/// still running). Swept lazily on each create. +const REEL_JOB_RESULT_TTL: Duration = Duration::from_secs(30 * 60); +const REEL_JOB_MAX_AGE: Duration = Duration::from_secs(60 * 60); + +static REEL_JOBS: LazyLock>> = + LazyLock::new(|| StdMutex::new(HashMap::new())); + +fn sweep_stale_jobs(jobs: &mut HashMap, now: Instant) { + jobs.retain(|_, job| { + let result_expired = job + .finished_at + .is_some_and(|t| now.duration_since(t) >= REEL_JOB_RESULT_TTL); + let too_old = now.duration_since(job.created_at) >= REEL_JOB_MAX_AGE; + if too_old && let Some(h) = job.abort.take() { + h.abort(); + } + !(result_expired || too_old) + }); +} + +fn with_job(id: Uuid, f: impl FnOnce(&mut ReelJob) -> R) -> Option { + REEL_JOBS.lock().unwrap().get_mut(&id).map(f) +} + +fn set_stage(id: Uuid, stage: &'static str) { + with_job(id, |job| { + if !job.status.is_terminal() { + job.status = ReelJobStatus::Running; + job.stage = stage; + } + }); +} + +/// Move a job to a terminal state (first terminal write wins). +fn finish_job( + id: Uuid, + status: ReelJobStatus, + title: Option, + output_path: Option, + error: Option, +) { + with_job(id, |job| { + if job.status.is_terminal() { + return; + } + job.status = status; + job.stage = match status { + ReelJobStatus::Done => "done", + _ => "error", + }; + job.title = title; + job.output_path = output_path; + job.error = error; + job.finished_at = Some(Instant::now()); + job.abort = None; + }); +} + +// --- On-disk cache ----------------------------------------------------------- + +/// Render version: bump to invalidate every cached reel after a rendering / +/// scripting change that should produce a fresh result. +const RENDER_VERSION: u32 = 1; + +/// Cache key over everything that determines *which* media and *how* it's +/// voiced — but not the (non-deterministic) narration text. Same inputs → same +/// MP4 served instantly. blake3 keeps it filesystem-safe and collision-free. +fn cache_key(selector: &ReelSelector, media: &[SegmentMedia], voice: Option<&str>) -> String { + let mut buf = format!( + "v{}|{}|voice={}|", + RENDER_VERSION, + selector.descriptor(), + voice.unwrap_or("default") + ); + for m in media { + match m { + SegmentMedia::Photo { + rel_path, + library_id, + } => buf.push_str(&format!("{library_id}:{rel_path}|")), + } + } + blake3::hash(buf.as_bytes()).to_hex().to_string() +} + +fn reel_mp4_path(app_state: &AppState, key: &str) -> PathBuf { + Path::new(&app_state.reels_path).join(format!("{key}.mp4")) +} + +fn reel_sidecar_path(app_state: &AppState, key: &str) -> PathBuf { + Path::new(&app_state.reels_path).join(format!("{key}.json")) +} + +#[derive(Serialize, Deserialize)] +struct ReelSidecar { + title: String, +} + +// --- HTTP types -------------------------------------------------------------- + +#[derive(Debug, Deserialize)] +pub struct CreateReelRequest { + #[serde(default)] + pub span: Option, + #[serde(default)] + pub timezone_offset_minutes: Option, + #[serde(default)] + pub library: Option, + /// Cloned TTS voice for the narration; server default when omitted. + #[serde(default)] + pub voice: Option, + /// Cap on photos in the reel (clamped server-side). + #[serde(default)] + pub max_segments: Option, +} + +#[derive(Debug, Serialize)] +pub struct ReelJobCreatedResponse { + pub job_id: String, + pub status: ReelJobStatus, +} + +#[derive(Debug, Serialize)] +pub struct ReelStatusResponse { + pub job_id: String, + pub status: ReelJobStatus, + pub stage: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub title: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub video_url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, +} + +// --- Handlers ---------------------------------------------------------------- + +/// POST /reels — start (or instantly serve from cache) a memory reel for the +/// requested span. Returns 202 + a job id; the client polls GET /reels/{id}. +#[post("/reels")] +pub async fn create_reel_handler( + http_request: HttpRequest, + _claims: Claims, + req: web::Json, + app_state: web::Data, + exif_dao: web::Data>>, + insight_dao: web::Data>>, +) -> impl Responder { + let span_context = extract_context_from_request(&http_request); + + if app_state.llamacpp.is_none() { + return HttpResponse::ServiceUnavailable().json(json!({ + "error": "Reel narration needs the LLM/TTS backend (set LLAMA_SWAP_URL)" + })); + } + + let span = req.span.unwrap_or(MemoriesSpan::Day); + let max_segments = req.max_segments.unwrap_or(selector::DEFAULT_MAX_SEGMENTS); + let selector = ReelSelector::Memories { + span, + tz_offset_minutes: req.timezone_offset_minutes.unwrap_or(0), + library: req.library.clone(), + max_segments, + }; + + // Cheap pass: resolve the media set for the cache key and the emptiness + // check. Insight enrichment + scripting happen in the background job. + let (planned, meta) = match selector::resolve(&app_state, &exif_dao, &span_context, &selector) { + Ok(r) => r, + Err(msg) => return HttpResponse::BadRequest().body(msg), + }; + if planned.is_empty() { + return HttpResponse::UnprocessableEntity().json(json!({ + "error": "No photo memories found for this span" + })); + } + + let media: Vec = planned.iter().map(|p| p.media.clone()).collect(); + let voice = req.voice.clone().filter(|s| !s.is_empty()); + let key = cache_key(&selector, &media, voice.as_deref()); + + let job_id = Uuid::new_v4(); + + // Cache hit: register an already-Done job pointing at the existing MP4 so + // the client's first poll returns the video URL immediately. + let mp4 = reel_mp4_path(&app_state, &key); + if mp4.exists() { + let title = std::fs::read(reel_sidecar_path(&app_state, &key)) + .ok() + .and_then(|b| serde_json::from_slice::(&b).ok()) + .map(|s| s.title); + let mut jobs = REEL_JOBS.lock().unwrap(); + sweep_stale_jobs(&mut jobs, Instant::now()); + jobs.insert( + job_id, + ReelJob { + status: ReelJobStatus::Done, + stage: "done", + title, + output_path: Some(mp4), + error: None, + created_at: Instant::now(), + finished_at: Some(Instant::now()), + abort: None, + }, + ); + return HttpResponse::Accepted().json(ReelJobCreatedResponse { + job_id: job_id.to_string(), + status: ReelJobStatus::Done, + }); + } + + { + let mut jobs = REEL_JOBS.lock().unwrap(); + sweep_stale_jobs(&mut jobs, Instant::now()); + jobs.insert( + job_id, + ReelJob { + status: ReelJobStatus::Queued, + stage: "queued", + title: None, + output_path: None, + error: None, + created_at: Instant::now(), + finished_at: None, + abort: None, + }, + ); + } + + let state = app_state.clone(); + let insight_dao = insight_dao.clone(); + let handle = tokio::spawn(async move { + match run_reel_job(&state, &insight_dao, job_id, planned, meta, voice, &key).await { + Ok((title, path)) => { + finish_job(job_id, ReelJobStatus::Done, Some(title), Some(path), None) + } + Err(e) => { + log::error!("reel job {job_id} failed: {e:?}"); + finish_job( + job_id, + ReelJobStatus::Error, + None, + None, + Some(format!("{e}")), + ) + } + } + }); + with_job(job_id, |job| job.abort = Some(handle.abort_handle())); + + HttpResponse::Accepted().json(ReelJobCreatedResponse { + job_id: job_id.to_string(), + status: ReelJobStatus::Queued, + }) +} + +/// GET /reels/{id} — poll a reel job. Done jobs carry a `video_url`. +#[get("/reels/{id}")] +pub async fn reel_status_handler(_claims: Claims, path: web::Path) -> impl Responder { + let id_str = path.into_inner(); + let Ok(id) = Uuid::parse_str(&id_str) else { + return HttpResponse::BadRequest().json(json!({ "error": "invalid job id" })); + }; + let resp = with_job(id, |job| ReelStatusResponse { + job_id: id_str.clone(), + status: job.status, + stage: job.stage.to_string(), + title: job.title.clone(), + video_url: matches!(job.status, ReelJobStatus::Done) + .then(|| format!("/reels/{id_str}/video")), + error: job.error.clone(), + }); + match resp { + Some(r) => HttpResponse::Ok().json(r), + None => HttpResponse::NotFound().json(json!({ "error": "job not found or expired" })), + } +} + +/// GET /reels/{id}/video — stream the finished MP4 (supports range requests via +/// NamedFile, so the mobile player can seek). +#[get("/reels/{id}/video")] +pub async fn reel_video_handler( + _claims: Claims, + request: HttpRequest, + path: web::Path, +) -> impl Responder { + let id_str = path.into_inner(); + let Ok(id) = Uuid::parse_str(&id_str) else { + return HttpResponse::BadRequest().json(json!({ "error": "invalid job id" })); + }; + let output = with_job(id, |job| job.output_path.clone()).flatten(); + let Some(path) = output else { + return HttpResponse::NotFound().json(json!({ "error": "reel not ready" })); + }; + match NamedFile::open(&path) { + Ok(file) => file.into_response(&request), + Err(e) => { + log::error!("opening reel mp4 {path:?} failed: {e:?}"); + HttpResponse::NotFound().json(json!({ "error": "reel file missing" })) + } + } +} + +// --- Pipeline ---------------------------------------------------------------- + +/// Run the full reel pipeline: enrich → script → narrate → render → concat, +/// then publish the MP4 into the cache. Returns (title, mp4_path). +async fn run_reel_job( + app_state: &AppState, + insight_dao: &Mutex>, + job_id: Uuid, + mut planned: Vec, + meta: ReelMeta, + voice: Option, + key: &str, +) -> anyhow::Result<(String, PathBuf)> { + use anyhow::{Context, anyhow}; + + let client = app_state + .llamacpp + .as_ref() + .ok_or_else(|| anyhow!("TTS/LLM backend not configured"))? + .clone(); + + // 1. Enrich with cached insights, then script (one LLM call). + set_stage(job_id, "scripting"); + let span_context = opentelemetry::Context::new(); + selector::enrich(insight_dao, &span_context, &mut planned); + let script = script::generate_script(&client, &meta, &planned).await?; + + // 2. Narrate each line to speech and 3. render each photo segment. A + // segment whose audio or render fails is skipped (logged) rather than + // sinking the whole reel — handles an odd HEIC/corrupt file gracefully. + set_stage(job_id, "narrating"); + let work = tempfile::tempdir().context("creating reel work dir")?; + let nvenc = render::is_nvenc_available().await; + let opts = render::SegmentOpts { + nvenc, + ..Default::default() + }; + + let mut segment_files: Vec = Vec::new(); + for (i, (seg, line)) in planned.iter().zip(script.lines.iter()).enumerate() { + let image_path = match resolve_image_path(app_state, &seg.media) { + Some(p) => p, + None => { + log::warn!("reel {job_id}: skipping segment {i}, image path unresolved"); + continue; + } + }; + + let audio_bytes = + match crate::ai::tts::synthesize_serialized(&client, line, voice.as_deref(), "wav") + .await + { + Ok(b) => b, + Err(e) => { + log::warn!("reel {job_id}: skipping segment {i}, TTS failed: {e}"); + continue; + } + }; + let audio_path = work.path().join(format!("narration_{i:03}.wav")); + if let Err(e) = tokio::fs::write(&audio_path, &audio_bytes).await { + log::warn!("reel {job_id}: skipping segment {i}, writing audio failed: {e}"); + continue; + } + + let narration_secs = + crate::video::ffmpeg::get_duration_seconds(&audio_path.to_string_lossy()) + .await + .ok() + .flatten() + .unwrap_or(render::MIN_SEGMENT_SECONDS); + let duration = render::segment_duration(narration_secs); + + set_stage(job_id, "rendering"); + let seg_out = work.path().join(format!("seg_{i:03}.mp4")); + if let Err(e) = + render::render_segment(&image_path, &audio_path, &seg_out, duration, &opts).await + { + log::warn!("reel {job_id}: skipping segment {i}, render failed: {e}"); + continue; + } + segment_files.push(seg_out.to_string_lossy().to_string()); + } + + if segment_files.is_empty() { + return Err(anyhow!("no segments rendered successfully")); + } + + // 4. Concat into the cache. Write to a temp name in the reels dir, then + // rename atomically (same filesystem) so a reader never sees a partial. + std::fs::create_dir_all(&app_state.reels_path).context("creating reels dir")?; + let final_path = reel_mp4_path(app_state, key); + let tmp_path = final_path.with_extension("mp4.tmp"); + render::concat_segments(&segment_files, &tmp_path).await?; + std::fs::rename(&tmp_path, &final_path).context("publishing reel mp4")?; + + // Sidecar carries the title so a future cache hit can return it without + // re-running the pipeline. + let sidecar = serde_json::to_vec(&ReelSidecar { + title: script.title.clone(), + }) + .context("serializing reel sidecar")?; + let _ = std::fs::write(reel_sidecar_path(app_state, key), sidecar); + + Ok((script.title, final_path)) +} + +/// Resolve a photo segment's library-relative path to a validated absolute +/// path under its library root. +fn resolve_image_path(app_state: &AppState, media: &SegmentMedia) -> Option { + let SegmentMedia::Photo { + rel_path, + library_id, + } = media; + let lib = app_state.library_by_id(*library_id)?; + crate::files::is_valid_full_path(&lib.root_path, rel_path, false) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn photo(p: &str, lib: i32) -> SegmentMedia { + SegmentMedia::Photo { + rel_path: p.to_string(), + library_id: lib, + } + } + + fn day_selector() -> ReelSelector { + ReelSelector::Memories { + span: MemoriesSpan::Day, + tz_offset_minutes: 0, + library: None, + max_segments: 24, + } + } + + #[test] + fn cache_key_is_stable_for_same_inputs() { + let media = vec![photo("a.jpg", 1), photo("b.jpg", 1)]; + let k1 = cache_key(&day_selector(), &media, Some("grandma")); + let k2 = cache_key(&day_selector(), &media, Some("grandma")); + assert_eq!(k1, k2); + // 64-hex blake3. + assert_eq!(k1.len(), 64); + assert!(k1.chars().all(|c| c.is_ascii_hexdigit())); + } + + #[test] + fn cache_key_changes_with_media_order_voice_and_selector() { + let media = vec![photo("a.jpg", 1), photo("b.jpg", 1)]; + let reordered = vec![photo("b.jpg", 1), photo("a.jpg", 1)]; + let base = cache_key(&day_selector(), &media, Some("grandma")); + // Order matters (the reel sequence differs). + assert_ne!( + base, + cache_key(&day_selector(), &reordered, Some("grandma")) + ); + // Voice matters. + assert_ne!(base, cache_key(&day_selector(), &media, Some("dad"))); + assert_ne!(base, cache_key(&day_selector(), &media, None)); + // Span matters. + let week = ReelSelector::Memories { + span: MemoriesSpan::Week, + tz_offset_minutes: 0, + library: None, + max_segments: 24, + }; + assert_ne!(base, cache_key(&week, &media, Some("grandma"))); + } + + #[test] + fn span_phrase_maps_each_span() { + let mk = |span| ReelMeta { + span, + years: vec![], + }; + assert_eq!(mk(MemoriesSpan::Day).span_phrase(), "on this day"); + assert_eq!(mk(MemoriesSpan::Week).span_phrase(), "this week"); + assert_eq!(mk(MemoriesSpan::Month).span_phrase(), "this month"); + } + + #[test] + fn date_label_formats_or_none() { + let seg = PlannedSegment { + media: photo("a.jpg", 1), + date: Some(1_560_384_000), // 2019-06-13 UTC + insight_title: None, + insight_summary: None, + }; + assert!(seg.date_label().unwrap().contains("2019")); + + let undated = PlannedSegment { + media: photo("a.jpg", 1), + date: None, + insight_title: None, + insight_summary: None, + }; + assert_eq!(undated.date_label(), None); + } +} diff --git a/src/reels/render.rs b/src/reels/render.rs new file mode 100644 index 0000000..ca39515 --- /dev/null +++ b/src/reels/render.rs @@ -0,0 +1,338 @@ +//! ffmpeg assembly for memory reels. +//! +//! Two-stage, per-segment design: each segment is rendered to its own +//! normalized MP4 (identical codec/resolution/fps/timebase), then the segments +//! are joined with the concat demuxer (stream copy, no re-encode). Rendering +//! per segment — rather than one monster filtergraph — keeps each ffmpeg +//! invocation simple to reason about, parallelizes naturally, and means a +//! video-clip segment type (phase 2) slots in as just a different per-segment +//! builder without touching the concat stage. +//! +//! The arg builders are pure (`Vec` out) so the exact ffmpeg command +//! is unit-testable; the runners spawn ffmpeg and surface stderr on failure. + +use anyhow::{Context, Result, bail}; +use std::path::Path; +use tokio::process::Command; + +/// Re-exported so the reel pipeline reaches NVENC detection through this module +/// rather than depending on `video::ffmpeg` directly. +pub use crate::video::ffmpeg::is_nvenc_available; + +/// Reel canvas. Landscape matches the majority of camera photos; portrait +/// shots are letterboxed by the `pad` in [`segment_filter`] rather than +/// cropped, so faces never get cut off. +pub const REEL_WIDTH: u32 = 1920; +pub const REEL_HEIGHT: u32 = 1080; +pub const REEL_FPS: u32 = 30; + +/// A still's screen time is its narration length plus a short breath, with a +/// floor so a terse line still lingers. No ceiling: the segment always covers +/// the full narration so speech is never truncated — the scripter is asked to +/// keep lines short instead. +pub const MIN_SEGMENT_SECONDS: f64 = 2.5; +const NARRATION_TAIL_SECONDS: f64 = 0.6; + +/// Screen time for a photo segment given its narration audio length. +pub fn segment_duration(narration_secs: f64) -> f64 { + let d = narration_secs + NARRATION_TAIL_SECONDS; + if d.is_finite() && d > MIN_SEGMENT_SECONDS { + d + } else { + MIN_SEGMENT_SECONDS + } +} + +/// Options controlling per-segment rendering. `ken_burns` adds a slow zoom for +/// motion; it's defaulted off until the effect is eyeballed on the GPU box, +/// since a wrong zoompan expression reads as jitter and can't be verified here. +#[derive(Debug, Clone, Copy)] +pub struct SegmentOpts { + pub width: u32, + pub height: u32, + pub fps: u32, + pub nvenc: bool, + pub ken_burns: bool, +} + +impl Default for SegmentOpts { + fn default() -> Self { + Self { + width: REEL_WIDTH, + height: REEL_HEIGHT, + fps: REEL_FPS, + nvenc: false, + ken_burns: false, + } + } +} + +/// Video filter for a photo segment: fit the image inside the canvas +/// (preserving aspect, padding the rest), normalize SAR/fps/pixel format, and +/// optionally apply a gentle Ken Burns zoom. +pub fn segment_filter(opts: &SegmentOpts, duration: f64) -> String { + let (w, h, fps) = (opts.width, opts.height, opts.fps); + if opts.ken_burns { + // Upscale first so zoompan samples from a larger frame (avoids + // shimmer), drift the zoom from 1.0→~1.12 across the segment, hold the + // crop centered, then settle to the canvas. + let frames = (duration * fps as f64).round().max(1.0) as u64; + format!( + "scale={w}*2:{h}*2:force_original_aspect_ratio=increase,\ + crop={w}*2:{h}*2,\ + zoompan=z='min(zoom+0.0009,1.12)':d={frames}:\ + x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s={w}x{h}:fps={fps},\ + setsar=1,format=yuv420p" + ) + } else { + format!( + "scale={w}:{h}:force_original_aspect_ratio=decrease,\ + pad={w}:{h}:(ow-iw)/2:(oh-ih)/2,\ + setsar=1,fps={fps},format=yuv420p" + ) + } +} + +fn video_encoder_args(nvenc: bool) -> Vec { + if nvenc { + // p4 ≈ balanced; cq 23 ≈ libx264 crf 21. Matches the HLS transcode path. + [ + "-c:v", + "h264_nvenc", + "-preset", + "p4", + "-cq", + "23", + "-pix_fmt", + "yuv420p", + ] + } else { + [ + "-c:v", "libx264", "-crf", "21", "-preset", "veryfast", "-pix_fmt", "yuv420p", + ] + } + .iter() + .map(|s| s.to_string()) + .collect() +} + +/// Build the ffmpeg args that render one photo segment: a still looped for +/// `duration` seconds with its narration muxed in. The narration is padded +/// with trailing silence (`apad`) so short lines don't end the segment early; +/// `-t` bounds both streams to the segment length. +pub fn build_segment_args( + image_path: &str, + audio_path: &str, + out_path: &str, + duration: f64, + opts: &SegmentOpts, +) -> Vec { + let mut args: Vec = vec!["-y".into()]; + if opts.nvenc { + args.extend(["-hwaccel".into(), "cuda".into()]); + } + args.extend([ + "-loop".into(), + "1".into(), + "-i".into(), + image_path.into(), + "-i".into(), + audio_path.into(), + "-filter_complex".into(), + format!("[0:v]{}[v];[1:a]apad[a]", segment_filter(opts, duration)), + "-map".into(), + "[v]".into(), + "-map".into(), + "[a]".into(), + "-t".into(), + format!("{duration:.3}"), + ]); + args.extend(video_encoder_args(opts.nvenc)); + args.extend( + ["-c:a", "aac", "-b:a", "160k", "-ar", "48000", "-shortest"] + .iter() + .map(|s| s.to_string()), + ); + args.push(out_path.into()); + args +} + +/// Build the concat-demuxer args that join rendered segments losslessly. +/// `+faststart` moves the moov atom up front so the reel streams immediately +/// on the mobile client. +pub fn build_concat_args(list_path: &str, out_path: &str) -> Vec { + [ + "-y", + "-f", + "concat", + "-safe", + "0", + "-i", + list_path, + "-c", + "copy", + "-movflags", + "+faststart", + out_path, + ] + .iter() + .map(|s| s.to_string()) + .collect() +} + +/// Render the concat list file body. Each line points the demuxer at one +/// segment; single quotes in paths are escaped per ffmpeg's concat syntax. +pub fn build_concat_list(segment_paths: &[String]) -> String { + let mut out = String::new(); + for p in segment_paths { + let escaped = p.replace('\'', r"'\''"); + out.push_str(&format!("file '{escaped}'\n")); + } + out +} + +async fn run_ffmpeg(args: &[String], what: &str) -> Result<()> { + let output = Command::new("ffmpeg") + .args(args) + .output() + .await + .with_context(|| format!("spawning ffmpeg for {what}"))?; + if !output.status.success() { + bail!( + "ffmpeg {what} failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + } + Ok(()) +} + +/// Render one photo segment to `out_path`. +pub async fn render_segment( + image_path: &Path, + audio_path: &Path, + out_path: &Path, + duration: f64, + opts: &SegmentOpts, +) -> Result<()> { + let args = build_segment_args( + &image_path.to_string_lossy(), + &audio_path.to_string_lossy(), + &out_path.to_string_lossy(), + duration, + opts, + ); + run_ffmpeg(&args, "segment render").await +} + +/// Join rendered segments into the final reel. Writes the concat list into the +/// same directory as the output so relative paths and cleanup stay local. +pub async fn concat_segments(segment_paths: &[String], out_path: &Path) -> Result<()> { + let list_path = out_path.with_extension("concat.txt"); + let body = build_concat_list(segment_paths); + tokio::fs::write(&list_path, body) + .await + .context("writing concat list")?; + let args = build_concat_args(&list_path.to_string_lossy(), &out_path.to_string_lossy()); + let result = run_ffmpeg(&args, "concat").await; + let _ = tokio::fs::remove_file(&list_path).await; + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn segment_duration_floors_short_lines() { + // A one-word narration still lingers at the floor. + assert_eq!(segment_duration(0.5), MIN_SEGMENT_SECONDS); + assert_eq!(segment_duration(0.0), MIN_SEGMENT_SECONDS); + } + + #[test] + fn segment_duration_covers_full_narration_plus_tail() { + // No ceiling: a long line gets its full length so speech isn't cut. + assert!((segment_duration(5.0) - 5.6).abs() < 1e-9); + assert!((segment_duration(20.0) - 20.6).abs() < 1e-9); + } + + #[test] + fn segment_duration_rejects_nonfinite() { + assert_eq!(segment_duration(f64::NAN), MIN_SEGMENT_SECONDS); + assert_eq!(segment_duration(f64::INFINITY), MIN_SEGMENT_SECONDS); + } + + #[test] + fn static_filter_fits_and_pads_without_cropping() { + let f = segment_filter(&SegmentOpts::default(), 4.0); + assert!(f.contains("force_original_aspect_ratio=decrease")); + assert!(f.contains("pad=1920:1080")); + assert!(f.contains("format=yuv420p")); + // No zoompan when ken_burns is off. + assert!(!f.contains("zoompan")); + } + + #[test] + fn ken_burns_filter_uses_duration_scaled_frame_count() { + let opts = SegmentOpts { + ken_burns: true, + ..SegmentOpts::default() + }; + // 4s * 30fps = 120 frames in the zoompan d= term. + let f = segment_filter(&opts, 4.0); + assert!(f.contains("zoompan")); + assert!(f.contains("d=120:")); + assert!(f.contains("s=1920x1080")); + } + + #[test] + fn segment_args_loop_still_and_bound_with_t() { + let args = build_segment_args( + "/img.jpg", + "/a.wav", + "/out.mp4", + 4.0, + &SegmentOpts::default(), + ); + let joined = args.join(" "); + assert!(joined.contains("-loop 1 -i /img.jpg")); + assert!(joined.contains("-i /a.wav")); + assert!(joined.contains("apad")); + assert!(joined.contains("-t 4.000")); + assert!(joined.contains("libx264")); + assert!(joined.ends_with("/out.mp4")); + } + + #[test] + fn segment_args_use_nvenc_and_cuda_when_enabled() { + let opts = SegmentOpts { + nvenc: true, + ..SegmentOpts::default() + }; + let args = build_segment_args("/img.jpg", "/a.wav", "/out.mp4", 3.0, &opts); + let joined = args.join(" "); + assert!(joined.contains("-hwaccel cuda")); + assert!(joined.contains("h264_nvenc")); + assert!(!joined.contains("libx264")); + } + + #[test] + fn concat_args_stream_copy_with_faststart() { + let args = build_concat_args("/tmp/list.txt", "/out.mp4"); + let joined = args.join(" "); + assert!(joined.contains("-f concat -safe 0 -i /tmp/list.txt")); + assert!(joined.contains("-c copy")); + assert!(joined.contains("+faststart")); + } + + #[test] + fn concat_list_escapes_single_quotes() { + let body = build_concat_list(&[ + "/tmp/seg_000.mp4".into(), + "/tmp/own's dir/seg_001.mp4".into(), + ]); + assert!(body.contains("file '/tmp/seg_000.mp4'\n")); + // The apostrophe is closed-escaped-reopened per ffmpeg concat syntax. + assert!(body.contains(r"own'\''s")); + } +} diff --git a/src/reels/script.rs b/src/reels/script.rs new file mode 100644 index 0000000..1cf3189 --- /dev/null +++ b/src/reels/script.rs @@ -0,0 +1,289 @@ +//! Narration scripting for memory reels. +//! +//! One LLM call turns the planned segments (each carrying its date and, where +//! available, its cached insight) into a short first-person narration line per +//! photo plus a title for the reel. We reuse the cached insight summary as the +//! richest per-photo signal rather than re-running vision at reel time — that +//! keeps reel generation off the GPU's vision slot entirely. +//! +//! The prompt builder and response parser are pure so the contract is +//! unit-testable; `generate_script` wires them to the LLM client. + +use anyhow::{Context, Result}; +use std::sync::Arc; + +use super::{PlannedSegment, ReelMeta}; +use crate::ai::llamacpp::LlamaCppClient; +use crate::ai::llm_client::LlmClient; + +/// The narration for a whole reel: a title and one line per segment, in order. +#[derive(Debug, Clone, PartialEq)] +pub struct ReelScript { + pub title: String, + pub lines: Vec, +} + +const SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \ +slideshow of someone's own photos set to a spoken voiceover. Write warm, \ +specific, first-person narration as if the person is gently looking back on \ +their own memories. Be concrete and grounded in the details given; never \ +invent names, places, or events that aren't supported. Keep each line to one \ +or two short sentences that can be read aloud in a few seconds. Avoid generic \ +filler like \"what a wonderful day\" — if you have little to go on, simply \ +describe the moment plainly."; + +/// Build the (system, user) prompt pair for the scripter. The user message +/// describes each segment in order and asks for strict JSON back. +pub fn build_script_messages(meta: &ReelMeta, planned: &[PlannedSegment]) -> (String, String) { + let mut user = String::new(); + user.push_str(&format!( + "These are {} photos surfaced as memories {}.\n\n", + planned.len(), + meta.span_phrase() + )); + if !meta.years.is_empty() { + let years: Vec = meta.years.iter().map(|y| y.to_string()).collect(); + user.push_str(&format!("They span the years: {}.\n\n", years.join(", "))); + } + user.push_str("Photos, in the order they will appear:\n"); + for (i, seg) in planned.iter().enumerate() { + user.push_str(&format!("\n[{}]", i + 1)); + if let Some(date) = seg.date_label() { + user.push_str(&format!(" {date}")); + } + user.push('\n'); + match (&seg.insight_title, &seg.insight_summary) { + (Some(t), Some(s)) if !s.trim().is_empty() => { + user.push_str(&format!(" Known context: {t} — {s}\n")); + } + (Some(t), _) => user.push_str(&format!(" Known context: {t}\n")), + (_, Some(s)) if !s.trim().is_empty() => { + user.push_str(&format!(" Known context: {s}\n")); + } + _ => user.push_str(" (no extra context — narrate plainly from the date)\n"), + } + } + user.push_str(&format!( + "\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\ + {{\"title\": \"\", \"segments\": [\"\", \ + \"\", ... ]}}\n\ + The \"segments\" array MUST have exactly {} items, one per photo in order.", + planned.len() + )); + (SYSTEM_PROMPT.to_string(), user) +} + +/// Parse the model's response into a script with exactly `n` lines. Tolerant of +/// code fences and surrounding prose, and of both `segments: [".."]` and +/// `segments: [{"narration": ".."}]` shapes. Missing/extra lines are padded or +/// truncated so the caller always gets `n` aligned to the segments. +pub fn parse_script_response(raw: &str, n: usize) -> ReelScript { + let fallback_line = "A moment worth remembering."; + let value = extract_json_object(raw); + + let title = value + .as_ref() + .and_then(|v| v.get("title")) + .and_then(|t| t.as_str()) + .map(clean_text) + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| "Memories".to_string()); + + let mut lines: Vec = value + .as_ref() + .and_then(|v| v.get("segments")) + .and_then(|s| s.as_array()) + .map(|arr| { + arr.iter() + .map(|item| { + let text = item + .as_str() + .map(|s| s.to_string()) + .or_else(|| { + item.get("narration") + .and_then(|n| n.as_str()) + .map(|s| s.to_string()) + }) + .unwrap_or_default(); + clean_text(&text) + }) + .collect() + }) + .unwrap_or_default(); + + // Align to exactly n: drop extras, pad shortfalls with a neutral line so + // every photo still gets spoken audio. + lines.truncate(n); + while lines.len() < n { + lines.push(fallback_line.to_string()); + } + for line in lines.iter_mut() { + if line.is_empty() { + *line = fallback_line.to_string(); + } + } + + ReelScript { title, lines } +} + +/// Pull the first balanced top-level JSON object out of a possibly-noisy model +/// response (code fences, leading prose). Returns None if nothing parses. +fn extract_json_object(raw: &str) -> Option { + // Fast path: the whole thing is valid JSON. + if let Ok(v) = serde_json::from_str::(raw.trim()) { + return Some(v); + } + // Otherwise scan for the first '{' ... matching '}' span, ignoring braces + // inside strings. + let bytes = raw.as_bytes(); + let start = raw.find('{')?; + let mut depth = 0i32; + let mut in_str = false; + let mut escaped = false; + for i in start..bytes.len() { + let c = bytes[i] as char; + if in_str { + if escaped { + escaped = false; + } else if c == '\\' { + escaped = true; + } else if c == '"' { + in_str = false; + } + continue; + } + match c { + '"' => in_str = true, + '{' => depth += 1, + '}' => { + depth -= 1; + if depth == 0 { + return serde_json::from_str(&raw[start..=i]).ok(); + } + } + _ => {} + } + } + None +} + +/// Collapse whitespace and strip stray markdown/quote decorations a model +/// sometimes leaves around a line. +fn clean_text(s: &str) -> String { + let trimmed = s.trim().trim_matches('"').trim(); + trimmed.split_whitespace().collect::>().join(" ") +} + +/// Generate the reel script via the LLM. Text-only (no images) — the per-photo +/// context comes from cached insights. The call takes the GPU read lease +/// internally (see `LlamaCppClient::generate`). +pub async fn generate_script( + client: &Arc, + meta: &ReelMeta, + planned: &[PlannedSegment], +) -> Result { + let (system, user) = build_script_messages(meta, planned); + let raw = client + .generate(&user, Some(&system), None) + .await + .context("LLM script generation failed")?; + Ok(parse_script_response(&raw, planned.len())) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::memories::MemoriesSpan; + + fn meta() -> ReelMeta { + ReelMeta { + span: MemoriesSpan::Day, + years: vec![2019, 2021], + } + } + + fn planned(n: usize) -> Vec { + (0..n) + .map(|i| PlannedSegment { + media: super::super::SegmentMedia::Photo { + rel_path: format!("p{i}.jpg"), + library_id: 1, + }, + date: Some(1_560_000_000 + i as i64 * 86_400), + insight_title: None, + insight_summary: None, + }) + .collect() + } + + #[test] + fn prompt_states_exact_segment_count_and_span() { + let (sys, user) = build_script_messages(&meta(), &planned(3)); + assert!(sys.contains("memory reel")); + assert!(user.contains("3 photos")); + assert!(user.contains("on this day")); + assert!(user.contains("exactly 3 items")); + // Each photo gets an indexed entry. + assert!(user.contains("[1]") && user.contains("[2]") && user.contains("[3]")); + } + + #[test] + fn prompt_includes_insight_context_when_present() { + let mut p = planned(1); + p[0].insight_title = Some("Lake house weekend".into()); + p[0].insight_summary = Some("Swimming with the dogs.".into()); + let (_sys, user) = build_script_messages(&meta(), &p); + assert!(user.contains("Lake house weekend — Swimming with the dogs.")); + } + + #[test] + fn parse_plain_json_object() { + let raw = r#"{"title":"Summer Days","segments":["First line.","Second line."]}"#; + let script = parse_script_response(raw, 2); + assert_eq!(script.title, "Summer Days"); + assert_eq!(script.lines, vec!["First line.", "Second line."]); + } + + #[test] + fn parse_tolerates_code_fences_and_prose() { + let raw = "Sure! Here's your reel:\n```json\n{\"title\": \"Trip\", \"segments\": [\"A.\", \"B.\"]}\n```\nEnjoy!"; + let script = parse_script_response(raw, 2); + assert_eq!(script.title, "Trip"); + assert_eq!(script.lines, vec!["A.", "B."]); + } + + #[test] + fn parse_accepts_object_segment_shape() { + let raw = r#"{"title":"T","segments":[{"narration":"One."},{"narration":"Two."}]}"#; + let script = parse_script_response(raw, 2); + assert_eq!(script.lines, vec!["One.", "Two."]); + } + + #[test] + fn parse_pads_short_and_truncates_long_to_n() { + // Model returned 1 line but we have 3 segments → pad with neutral lines. + let short = parse_script_response(r#"{"title":"T","segments":["Only one."]}"#, 3); + assert_eq!(short.lines.len(), 3); + assert_eq!(short.lines[0], "Only one."); + assert!(!short.lines[1].is_empty()); + + // Model returned 3 but we have 2 → truncate. + let long = parse_script_response(r#"{"title":"T","segments":["a","b","c"]}"#, 2); + assert_eq!(long.lines, vec!["a", "b"]); + } + + #[test] + fn parse_falls_back_on_garbage() { + let script = parse_script_response("the model said no", 2); + assert_eq!(script.title, "Memories"); + assert_eq!(script.lines.len(), 2); + assert!(script.lines.iter().all(|l| !l.is_empty())); + } + + #[test] + fn parse_blank_line_replaced_with_fallback() { + let script = parse_script_response(r#"{"title":"T","segments":[" ","Real."]}"#, 2); + assert!(!script.lines[0].is_empty()); + assert_eq!(script.lines[1], "Real."); + } +} diff --git a/src/reels/selector.rs b/src/reels/selector.rs new file mode 100644 index 0000000..0a53ee5 --- /dev/null +++ b/src/reels/selector.rs @@ -0,0 +1,252 @@ +//! Reel selectors: resolve "what goes in the reel" into an ordered media set +//! plus the metadata the scripter needs. The renderer and scripter are +//! selector-agnostic, so adding tag- or date-range-based reels later means +//! adding a variant here, not touching the pipeline. +//! +//! Resolution is split in two so the handler can compute a cache key (and +//! short-circuit on a cache hit) without the per-photo insight lookups: +//! [`resolve`] is the cheap media-set pass; [`enrich`] adds cached insights and +//! runs in the background job. + +use std::path::Path; +use std::sync::Mutex; + +use chrono::{DateTime, Datelike, FixedOffset}; + +use super::{PlannedSegment, ReelMeta, SegmentMedia}; +use crate::database::{ExifDao, InsightDao}; +use crate::file_types::is_image_file; +use crate::memories::{self, MemoriesSpan}; +use crate::state::AppState; + +/// Default and hard caps on how many photos a reel covers. The cap bounds the +/// LLM/TTS/ffmpeg work per reel; when a span has more, [`sample_evenly`] keeps +/// a representative spread across the years rather than just the oldest. +pub const DEFAULT_MAX_SEGMENTS: usize = 24; +pub const HARD_MAX_SEGMENTS: usize = 40; + +/// What a reel is built from. v1 ships the memories (on this day/week/month) +/// selector; tag and date-range variants slot in here later. +#[derive(Debug, Clone)] +pub enum ReelSelector { + Memories { + span: MemoriesSpan, + tz_offset_minutes: i32, + library: Option, + max_segments: usize, + }, +} + +impl ReelSelector { + /// Stable string identity for the cache key. Captures everything that + /// changes *which* media is selected (but not the non-deterministic + /// narration, which can't be part of a pre-render key). + pub fn descriptor(&self) -> String { + match self { + ReelSelector::Memories { + span, + tz_offset_minutes, + library, + max_segments, + } => format!( + "memories:span={:?}:tz={}:lib={}:max={}", + span, + tz_offset_minutes, + library.as_deref().unwrap_or("all"), + max_segments + ), + } + } +} + +/// Pick at most `max` items spread evenly across the input, always keeping the +/// first and last. Returns the input unchanged when it already fits. +pub fn sample_evenly(items: &[T], max: usize) -> Vec { + if max == 0 { + return Vec::new(); + } + if items.len() <= max { + return items.to_vec(); + } + if max == 1 { + return vec![items[0].clone()]; + } + let last = items.len() - 1; + (0..max) + .map(|i| { + // Spread indices 0..=last across max picks, endpoints included. + let idx = (i * last + (max - 1) / 2) / (max - 1); + items[idx.min(last)].clone() + }) + .collect() +} + +/// Cheap pass: resolve the selector into an ordered list of media (no insight +/// lookups yet) plus reel metadata. `Err` only on an invalid library param. +pub fn resolve( + app_state: &AppState, + exif_dao: &Mutex>, + span_context: &opentelemetry::Context, + selector: &ReelSelector, +) -> Result<(Vec, ReelMeta), String> { + match selector { + ReelSelector::Memories { + span, + tz_offset_minutes, + library, + max_segments, + } => { + let client_tz = FixedOffset::east_opt(tz_offset_minutes * 60); + let items = memories::gather_memory_items( + app_state, + exif_dao, + span_context, + *span, + *tz_offset_minutes, + client_tz, + library.as_deref(), + )?; + + // Phase 1 is photos-only: drop videos (a clip segment type lands + // in phase 2). Filter before sampling so the spread is over the + // photos that will actually appear. + let items: Vec = items + .into_iter() + .filter(|it| is_image_file(Path::new(&it.path))) + .collect(); + + let cap = (*max_segments).clamp(1, HARD_MAX_SEGMENTS); + let items = sample_evenly(&items, cap); + + let years = distinct_years(&items, client_tz); + let meta = ReelMeta { span: *span, years }; + + let planned = items + .into_iter() + .map(|it| PlannedSegment { + media: SegmentMedia::Photo { + rel_path: it.path, + library_id: it.library_id, + }, + date: it.created, + insight_title: None, + insight_summary: None, + }) + .collect(); + Ok((planned, meta)) + } + } +} + +/// Distinct calendar years represented by the selected media, in the client's +/// timezone, ascending. Used to tell the scripter how far back the reel reaches. +fn distinct_years(items: &[memories::MemoryItem], tz: Option) -> Vec { + let mut years: Vec = items + .iter() + .filter_map(|it| it.created) + .filter_map(|ts| DateTime::from_timestamp(ts, 0)) + .map(|dt| match tz { + Some(off) => dt.with_timezone(&off).year(), + None => dt.year(), + }) + .collect(); + years.sort_unstable(); + years.dedup(); + years +} + +/// Background pass: fill each segment's cached insight (title + summary) where +/// one exists. Best-effort — a missing or errored lookup leaves the fields +/// `None` and the scripter narrates from the date alone. +pub fn enrich( + insight_dao: &Mutex>, + span_context: &opentelemetry::Context, + planned: &mut [PlannedSegment], +) { + let Ok(mut dao) = insight_dao.lock() else { + return; + }; + for seg in planned.iter_mut() { + let rel_path = match &seg.media { + SegmentMedia::Photo { rel_path, .. } => rel_path, + }; + if let Ok(Some(insight)) = dao.get_insight(span_context, rel_path) { + seg.insight_title = Some(insight.title); + seg.insight_summary = Some(insight.summary); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sample_evenly_returns_all_when_under_cap() { + let v = vec![1, 2, 3]; + assert_eq!(sample_evenly(&v, 5), vec![1, 2, 3]); + assert_eq!(sample_evenly(&v, 3), vec![1, 2, 3]); + } + + #[test] + fn sample_evenly_keeps_endpoints_and_spreads() { + let v: Vec = (0..100).collect(); + let picked = sample_evenly(&v, 5); + assert_eq!(picked.len(), 5); + assert_eq!(picked[0], 0); // first kept + assert_eq!(*picked.last().unwrap(), 99); // last kept + // Strictly increasing, no dupes. + assert!(picked.windows(2).all(|w| w[0] < w[1])); + } + + #[test] + fn sample_evenly_handles_one_and_zero() { + let v: Vec = (0..10).collect(); + assert_eq!(sample_evenly(&v, 1), vec![0]); + assert!(sample_evenly(&v, 0).is_empty()); + } + + #[test] + fn descriptor_is_stable_and_distinguishes_inputs() { + let a = ReelSelector::Memories { + span: MemoriesSpan::Day, + tz_offset_minutes: -480, + library: None, + max_segments: 24, + }; + let b = ReelSelector::Memories { + span: MemoriesSpan::Week, + tz_offset_minutes: -480, + library: None, + max_segments: 24, + }; + assert_eq!(a.descriptor(), a.clone().descriptor()); + assert_ne!(a.descriptor(), b.descriptor()); + assert!(a.descriptor().contains("lib=all")); + } + + #[test] + fn distinct_years_dedupes_and_sorts() { + let items = vec![ + memories::MemoryItem { + path: "a".into(), + created: Some(1_560_000_000), // 2019 + modified: None, + library_id: 1, + }, + memories::MemoryItem { + path: "b".into(), + created: Some(1_560_086_400), // 2019 + modified: None, + library_id: 1, + }, + memories::MemoryItem { + path: "c".into(), + created: Some(1_623_000_000), // 2021 + modified: None, + library_id: 1, + }, + ]; + assert_eq!(distinct_years(&items, None), vec![2019, 2021]); + } +} diff --git a/src/state.rs b/src/state.rs index e678ad1..bf894f3 100644 --- a/src/state.rs +++ b/src/state.rs @@ -53,6 +53,10 @@ pub struct AppState { pub video_path: String, pub gif_path: String, pub preview_clips_path: String, + /// Directory for cached memory-reel MP4s (+ title sidecars). Derived from + /// `REELS_DIRECTORY`, defaulting to a `reels` dir beside the preview clips. + /// Created lazily by the reel pipeline on first render. + pub reels_path: String, pub excluded_dirs: Vec, pub ollama: OllamaClient, /// `None` when `OPENROUTER_API_KEY` is not configured. Consulted only @@ -141,6 +145,19 @@ impl AppState { preview_dao, ); + // Reels cache dir: explicit env, else a `reels` sibling of the preview + // clips dir (a known-writable, test-safe location). Not created here — + // the reel pipeline does `create_dir_all` before its first write, so + // construction (incl. tests) never touches the filesystem. + let reels_path = std::env::var("REELS_DIRECTORY").unwrap_or_else(|_| { + std::path::Path::new(&preview_clips_path) + .parent() + .map(|p| p.join("reels")) + .unwrap_or_else(|| std::path::PathBuf::from("reels")) + .to_string_lossy() + .to_string() + }); + let library_health = libraries::new_health_map(&libraries_vec); let live_libraries = Arc::new(RwLock::new(libraries_vec.clone())); Self { @@ -155,6 +172,7 @@ impl AppState { video_path, gif_path, preview_clips_path, + reels_path, excluded_dirs, ollama, openrouter, diff --git a/src/video/ffmpeg.rs b/src/video/ffmpeg.rs index d385cac..019bd86 100644 --- a/src/video/ffmpeg.rs +++ b/src/video/ffmpeg.rs @@ -231,7 +231,7 @@ impl Ffmpeg { /// a hard failure — previously the `parse::` on empty stdout produced /// "cannot parse float from empty string" and poisoned the preview-clip row /// with status=failed, which the watcher would re-queue every full scan. -async fn get_duration_seconds(input_file: &str) -> Result> { +pub async fn get_duration_seconds(input_file: &str) -> Result> { if let Some(d) = probe_duration(input_file, "format=duration").await? { return Ok(Some(d)); }