Add memory-reel backend: on-demand narrated photo slideshow
New POST /reels + GET /reels/{id} (+ /video) build an MP4 slideshow of a
memory span (day/week/month), narrated by the LLM in a cloned voice.
Pipeline (src/reels/): a selector resolves which photos + reel metadata,
the scripter writes one narration line per photo via a single LLM call
(reusing each photo's cached insight as context — no fresh vision calls,
so reel generation stays off the GPU's vision slot), each line is
synthesized to speech, and the renderer assembles stills + narration via
ffmpeg. Jobs run in the background (mirroring the TTS speech-job
registry) since a reel takes minutes; the finished MP4 is cached on disk
keyed by the selection so a repeat request is instant.
The segment model is media-typed (Photo today) so a video-clip segment
(phase 2) and a nightly pre-render (phase 3) slot in without reworking
the pipeline. Ken Burns motion is implemented but defaulted off pending a
visual check on the GPU box.
Supporting changes:
- memories: extract gather_memory_items() so the reel selector reuses the
exact window/exclusion/tz/sort logic behind /memories.
- ai::tts: add synthesize_serialized() so reel narration honors the same
single-GPU permit + write lease as user TTS requests.
- video::ffmpeg: make get_duration_seconds() pub for narration timing.
- AppState: reels_path (REELS_DIRECTORY, defaults beside preview clips).
Pure logic (cache key, script parsing, ffmpeg arg/filter construction,
even sampling, segment timing) is unit-tested (26 tests). The runtime
path (ffmpeg render, TTS, LLM) needs a real run on the GPU host to verify
end-to-end — not exercisable in CI.
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -23,6 +23,7 @@ use std::time::{Duration, Instant};
|
|||||||
use tokio::sync::Semaphore;
|
use tokio::sync::Semaphore;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::ai::llamacpp::LlamaCppClient;
|
||||||
use crate::data::Claims;
|
use crate::data::Claims;
|
||||||
use crate::file_types::{is_audio_file, is_video_file};
|
use crate::file_types::{is_audio_file, is_video_file};
|
||||||
use crate::files::is_valid_full_path;
|
use crate::files::is_valid_full_path;
|
||||||
@@ -473,6 +474,36 @@ pub struct TtsJobStatusResponse {
|
|||||||
pub error: Option<String>,
|
pub error: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Synthesize speech honoring the global single-GPU serialization
|
||||||
|
/// (`TTS_PERMIT`) and the GPU write lease, exactly as the speech-job path does.
|
||||||
|
/// Queues on the permit rather than fast-failing, so callers wait their turn
|
||||||
|
/// instead of contending. Text is run through the same markdown/emoji cleanup +
|
||||||
|
/// pronunciation pipeline as the HTTP handlers. Reused by the memory-reel
|
||||||
|
/// pipeline to narrate each segment without racing a user's TTS request on the
|
||||||
|
/// Chatterbox GPU.
|
||||||
|
pub async fn synthesize_serialized(
|
||||||
|
client: &LlamaCppClient,
|
||||||
|
text: &str,
|
||||||
|
voice: Option<&str>,
|
||||||
|
format: &str,
|
||||||
|
) -> anyhow::Result<Vec<u8>> {
|
||||||
|
let prepared = prepare_for_tts(text);
|
||||||
|
if prepared.is_empty() {
|
||||||
|
anyhow::bail!("nothing to synthesize after cleanup");
|
||||||
|
}
|
||||||
|
// Queue rather than fast-fail (mirrors create_speech_job_handler).
|
||||||
|
let _permit = TTS_PERMIT
|
||||||
|
.acquire()
|
||||||
|
.await
|
||||||
|
.map_err(|_| anyhow::anyhow!("TTS permit closed"))?;
|
||||||
|
// Wait for the LLM side to release the GPU before the request timeout
|
||||||
|
// starts (see ai::gpu).
|
||||||
|
let _gpu = crate::ai::gpu::tts_lease().await;
|
||||||
|
client
|
||||||
|
.text_to_speech(&prepared, voice, format, None, None, None)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
/// POST /tts/speech/jobs — durable variant of /tts/speech for long syntheses.
|
/// POST /tts/speech/jobs — durable variant of /tts/speech for long syntheses.
|
||||||
/// Returns 202 + a job id immediately; the synth queues on the single GPU
|
/// Returns 202 + a job id immediately; the synth queues on the single GPU
|
||||||
/// permit (instead of fast-failing 429) and the client polls the job until
|
/// permit (instead of fast-failing 429) and the client polls the job until
|
||||||
|
|||||||
@@ -62,6 +62,7 @@ mod knowledge;
|
|||||||
mod memories;
|
mod memories;
|
||||||
mod otel;
|
mod otel;
|
||||||
mod personas;
|
mod personas;
|
||||||
|
mod reels;
|
||||||
mod service;
|
mod service;
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod testhelpers;
|
mod testhelpers;
|
||||||
@@ -344,6 +345,9 @@ fn main() -> std::io::Result<()> {
|
|||||||
.service(handlers::image::clear_image_date)
|
.service(handlers::image::clear_image_date)
|
||||||
.service(handlers::image::get_full_exif)
|
.service(handlers::image::get_full_exif)
|
||||||
.service(memories::list_memories)
|
.service(memories::list_memories)
|
||||||
|
.service(reels::create_reel_handler)
|
||||||
|
.service(reels::reel_status_handler)
|
||||||
|
.service(reels::reel_video_handler)
|
||||||
.service(ai::generate_insight_handler)
|
.service(ai::generate_insight_handler)
|
||||||
.service(ai::generate_agentic_insight_handler)
|
.service(ai::generate_agentic_insight_handler)
|
||||||
.service(ai::generation_status_handler)
|
.service(ai::generation_status_handler)
|
||||||
|
|||||||
+57
-29
@@ -349,12 +349,6 @@ pub async fn list_memories(
|
|||||||
opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
|
opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
|
||||||
|
|
||||||
let span_mode = q.span.unwrap_or(MemoriesSpan::Day);
|
let span_mode = q.span.unwrap_or(MemoriesSpan::Day);
|
||||||
let span_token = match span_mode {
|
|
||||||
MemoriesSpan::Day => "day",
|
|
||||||
MemoriesSpan::Week => "week",
|
|
||||||
MemoriesSpan::Month => "month",
|
|
||||||
};
|
|
||||||
let years_back: i32 = DEFAULT_YEARS_BACK;
|
|
||||||
|
|
||||||
// The SQL filter expects a signed offset in minutes from UTC; default
|
// The SQL filter expects a signed offset in minutes from UTC; default
|
||||||
// 0 (UTC) when the client didn't send a hint. We also keep a chrono
|
// 0 (UTC) when the client didn't send a hint. We also keep a chrono
|
||||||
@@ -366,18 +360,66 @@ pub async fn list_memories(
|
|||||||
.timezone_offset_minutes
|
.timezone_offset_minutes
|
||||||
.and_then(|offset_mins| FixedOffset::east_opt(offset_mins * 60));
|
.and_then(|offset_mins| FixedOffset::east_opt(offset_mins * 60));
|
||||||
|
|
||||||
debug!(
|
let items = match gather_memory_items(
|
||||||
"list_memories: span={:?} tz_offset_min={} years_back={}",
|
&app_state,
|
||||||
span_mode, tz_offset_minutes, years_back
|
&exif_dao,
|
||||||
);
|
&span_context,
|
||||||
|
span_mode,
|
||||||
let library = match crate::libraries::resolve_library_param(&app_state, q.library.as_deref()) {
|
tz_offset_minutes,
|
||||||
Ok(lib) => lib,
|
client_timezone,
|
||||||
|
q.library.as_deref(),
|
||||||
|
) {
|
||||||
|
Ok(items) => items,
|
||||||
Err(msg) => {
|
Err(msg) => {
|
||||||
warn!("Rejecting /memories request: {}", msg);
|
warn!("Rejecting /memories request: {}", msg);
|
||||||
return HttpResponse::BadRequest().body(msg);
|
return HttpResponse::BadRequest().body(msg);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
span.add_event(
|
||||||
|
"memories_scanned",
|
||||||
|
vec![
|
||||||
|
KeyValue::new("span", format!("{:?}", span_mode)),
|
||||||
|
KeyValue::new("years_back", DEFAULT_YEARS_BACK.to_string()),
|
||||||
|
KeyValue::new("result_count", items.len().to_string()),
|
||||||
|
KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()),
|
||||||
|
KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
span.set_status(Status::Ok);
|
||||||
|
|
||||||
|
HttpResponse::Ok().json(MemoriesResponse { items })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve an "on this day/week/month across past years" window into an
|
||||||
|
/// ordered list of [`MemoryItem`]s. Shared by the `/memories` handler and the
|
||||||
|
/// memory-reel selector so both honour the same library resolution, per-library
|
||||||
|
/// exclusions, timezone handling, and sort order. Returns `Err(message)` only
|
||||||
|
/// when the `library` param is invalid (callers map that to 400); per-library
|
||||||
|
/// query/lock failures are logged and skipped, matching the handler's
|
||||||
|
/// best-effort behaviour.
|
||||||
|
pub fn gather_memory_items(
|
||||||
|
app_state: &AppState,
|
||||||
|
exif_dao: &Mutex<Box<dyn ExifDao>>,
|
||||||
|
span_context: &opentelemetry::Context,
|
||||||
|
span_mode: MemoriesSpan,
|
||||||
|
tz_offset_minutes: i32,
|
||||||
|
client_timezone: Option<FixedOffset>,
|
||||||
|
library_param: Option<&str>,
|
||||||
|
) -> Result<Vec<MemoryItem>, String> {
|
||||||
|
let span_token = match span_mode {
|
||||||
|
MemoriesSpan::Day => "day",
|
||||||
|
MemoriesSpan::Week => "week",
|
||||||
|
MemoriesSpan::Month => "month",
|
||||||
|
};
|
||||||
|
let years_back: i32 = DEFAULT_YEARS_BACK;
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"gather_memory_items: span={:?} tz_offset_min={} years_back={}",
|
||||||
|
span_mode, tz_offset_minutes, years_back
|
||||||
|
);
|
||||||
|
|
||||||
|
let library = crate::libraries::resolve_library_param(app_state, library_param)?;
|
||||||
let libraries_to_scan: Vec<&crate::libraries::Library> = match library {
|
let libraries_to_scan: Vec<&crate::libraries::Library> = match library {
|
||||||
Some(lib) => vec![lib],
|
Some(lib) => vec![lib],
|
||||||
None => app_state.libraries.iter().collect(),
|
None => app_state.libraries.iter().collect(),
|
||||||
@@ -394,7 +436,7 @@ pub async fn list_memories(
|
|||||||
|
|
||||||
let rows = match exif_dao.lock() {
|
let rows = match exif_dao.lock() {
|
||||||
Ok(mut dao) => match dao.get_memories_in_window(
|
Ok(mut dao) => match dao.get_memories_in_window(
|
||||||
&span_context,
|
span_context,
|
||||||
lib.id,
|
lib.id,
|
||||||
span_token,
|
span_token,
|
||||||
years_back,
|
years_back,
|
||||||
@@ -469,21 +511,7 @@ pub async fn list_memories(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let items: Vec<MemoryItem> = memories_with_dates.into_iter().map(|(m, _)| m).collect();
|
Ok(memories_with_dates.into_iter().map(|(m, _)| m).collect())
|
||||||
|
|
||||||
span.add_event(
|
|
||||||
"memories_scanned",
|
|
||||||
vec![
|
|
||||||
KeyValue::new("span", format!("{:?}", span_mode)),
|
|
||||||
KeyValue::new("years_back", years_back.to_string()),
|
|
||||||
KeyValue::new("result_count", items.len().to_string()),
|
|
||||||
KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()),
|
|
||||||
KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)),
|
|
||||||
],
|
|
||||||
);
|
|
||||||
span.set_status(Status::Ok);
|
|
||||||
|
|
||||||
HttpResponse::Ok().json(MemoriesResponse { items })
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
@@ -0,0 +1,625 @@
|
|||||||
|
//! Memory reels: render an MP4 slideshow of a selection of photos with an
|
||||||
|
//! LLM-written, voice-cloned narration over it.
|
||||||
|
//!
|
||||||
|
//! Pipeline: a [`selector`] resolves *which* photos (and the reel metadata),
|
||||||
|
//! the [`script`] module writes per-photo narration via the LLM, each line is
|
||||||
|
//! synthesized to speech, and [`render`] assembles the stills + narration into
|
||||||
|
//! one MP4. Jobs run in the background (mirroring the TTS speech-job registry)
|
||||||
|
//! because a reel takes minutes; the finished MP4 is cached on disk keyed by
|
||||||
|
//! the selection so a repeat request is instant.
|
||||||
|
//!
|
||||||
|
//! Phase 1 is on-demand and photos-only. The segment model is media-typed so a
|
||||||
|
//! video-clip segment (phase 2) and a nightly pre-render (phase 3) slot in
|
||||||
|
//! without reworking the pipeline.
|
||||||
|
|
||||||
|
pub mod render;
|
||||||
|
pub mod script;
|
||||||
|
pub mod selector;
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::sync::{LazyLock, Mutex as StdMutex};
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
|
use actix_files::NamedFile;
|
||||||
|
use actix_web::{HttpRequest, HttpResponse, Responder, get, post, web};
|
||||||
|
use chrono::DateTime;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use serde_json::json;
|
||||||
|
use std::sync::Mutex;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::data::Claims;
|
||||||
|
use crate::database::{ExifDao, InsightDao};
|
||||||
|
use crate::memories::MemoriesSpan;
|
||||||
|
use crate::otel::extract_context_from_request;
|
||||||
|
use crate::state::AppState;
|
||||||
|
use selector::ReelSelector;
|
||||||
|
|
||||||
|
/// The media behind one reel segment. Photos-only for now; a `Clip` variant
|
||||||
|
/// (a section of a source video) is the phase-2 extension point.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum SegmentMedia {
|
||||||
|
Photo { rel_path: String, library_id: i32 },
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A segment before narration: which photo, when it was taken, and any cached
|
||||||
|
/// insight to feed the scripter.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct PlannedSegment {
|
||||||
|
pub media: SegmentMedia,
|
||||||
|
pub date: Option<i64>,
|
||||||
|
pub insight_title: Option<String>,
|
||||||
|
pub insight_summary: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PlannedSegment {
|
||||||
|
/// Human date for the prompt, e.g. "June 12, 2019". `None` when undated.
|
||||||
|
pub fn date_label(&self) -> Option<String> {
|
||||||
|
let ts = self.date?;
|
||||||
|
let dt = DateTime::from_timestamp(ts, 0)?;
|
||||||
|
Some(dt.format("%B %-d, %Y").to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reel-wide metadata the scripter uses for framing.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct ReelMeta {
|
||||||
|
pub span: MemoriesSpan,
|
||||||
|
pub years: Vec<i32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ReelMeta {
|
||||||
|
/// Natural-language phrase for the span, e.g. "on this day".
|
||||||
|
pub fn span_phrase(&self) -> &'static str {
|
||||||
|
match self.span {
|
||||||
|
MemoriesSpan::Day => "on this day",
|
||||||
|
MemoriesSpan::Week => "this week",
|
||||||
|
MemoriesSpan::Month => "this month",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Job registry ------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// In-memory, same shape as the TTS speech-job registry: a reel takes minutes,
|
||||||
|
// too long to hold one HTTP request from a phone. POST /reels returns a job id;
|
||||||
|
// the client polls GET /reels/{id} until the video URL appears. The heavy
|
||||||
|
// artifact (the MP4) lives on disk, not in this map — jobs only carry status +
|
||||||
|
// the output path. State is intentionally not durable across restarts; the
|
||||||
|
// on-disk cache is what makes a repeat request cheap, not the registry.
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq, Debug, Serialize)]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
pub enum ReelJobStatus {
|
||||||
|
Queued,
|
||||||
|
Running,
|
||||||
|
Done,
|
||||||
|
Error,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ReelJobStatus {
|
||||||
|
fn is_terminal(self) -> bool {
|
||||||
|
matches!(self, Self::Done | Self::Error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ReelJob {
|
||||||
|
status: ReelJobStatus,
|
||||||
|
/// Coarse progress label for the client ("scripting", "narrating", …).
|
||||||
|
stage: &'static str,
|
||||||
|
title: Option<String>,
|
||||||
|
output_path: Option<PathBuf>,
|
||||||
|
error: Option<String>,
|
||||||
|
created_at: Instant,
|
||||||
|
finished_at: Option<Instant>,
|
||||||
|
abort: Option<tokio::task::AbortHandle>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Finished jobs linger so a client that lost connectivity can still collect
|
||||||
|
/// the result; anything older than MAX_AGE is dropped (aborted first if somehow
|
||||||
|
/// still running). Swept lazily on each create.
|
||||||
|
const REEL_JOB_RESULT_TTL: Duration = Duration::from_secs(30 * 60);
|
||||||
|
const REEL_JOB_MAX_AGE: Duration = Duration::from_secs(60 * 60);
|
||||||
|
|
||||||
|
static REEL_JOBS: LazyLock<StdMutex<HashMap<Uuid, ReelJob>>> =
|
||||||
|
LazyLock::new(|| StdMutex::new(HashMap::new()));
|
||||||
|
|
||||||
|
fn sweep_stale_jobs(jobs: &mut HashMap<Uuid, ReelJob>, now: Instant) {
|
||||||
|
jobs.retain(|_, job| {
|
||||||
|
let result_expired = job
|
||||||
|
.finished_at
|
||||||
|
.is_some_and(|t| now.duration_since(t) >= REEL_JOB_RESULT_TTL);
|
||||||
|
let too_old = now.duration_since(job.created_at) >= REEL_JOB_MAX_AGE;
|
||||||
|
if too_old && let Some(h) = job.abort.take() {
|
||||||
|
h.abort();
|
||||||
|
}
|
||||||
|
!(result_expired || too_old)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn with_job<R>(id: Uuid, f: impl FnOnce(&mut ReelJob) -> R) -> Option<R> {
|
||||||
|
REEL_JOBS.lock().unwrap().get_mut(&id).map(f)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_stage(id: Uuid, stage: &'static str) {
|
||||||
|
with_job(id, |job| {
|
||||||
|
if !job.status.is_terminal() {
|
||||||
|
job.status = ReelJobStatus::Running;
|
||||||
|
job.stage = stage;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Move a job to a terminal state (first terminal write wins).
|
||||||
|
fn finish_job(
|
||||||
|
id: Uuid,
|
||||||
|
status: ReelJobStatus,
|
||||||
|
title: Option<String>,
|
||||||
|
output_path: Option<PathBuf>,
|
||||||
|
error: Option<String>,
|
||||||
|
) {
|
||||||
|
with_job(id, |job| {
|
||||||
|
if job.status.is_terminal() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
job.status = status;
|
||||||
|
job.stage = match status {
|
||||||
|
ReelJobStatus::Done => "done",
|
||||||
|
_ => "error",
|
||||||
|
};
|
||||||
|
job.title = title;
|
||||||
|
job.output_path = output_path;
|
||||||
|
job.error = error;
|
||||||
|
job.finished_at = Some(Instant::now());
|
||||||
|
job.abort = None;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- On-disk cache -----------------------------------------------------------
|
||||||
|
|
||||||
|
/// Render version: bump to invalidate every cached reel after a rendering /
|
||||||
|
/// scripting change that should produce a fresh result.
|
||||||
|
const RENDER_VERSION: u32 = 1;
|
||||||
|
|
||||||
|
/// Cache key over everything that determines *which* media and *how* it's
|
||||||
|
/// voiced — but not the (non-deterministic) narration text. Same inputs → same
|
||||||
|
/// MP4 served instantly. blake3 keeps it filesystem-safe and collision-free.
|
||||||
|
fn cache_key(selector: &ReelSelector, media: &[SegmentMedia], voice: Option<&str>) -> String {
|
||||||
|
let mut buf = format!(
|
||||||
|
"v{}|{}|voice={}|",
|
||||||
|
RENDER_VERSION,
|
||||||
|
selector.descriptor(),
|
||||||
|
voice.unwrap_or("default")
|
||||||
|
);
|
||||||
|
for m in media {
|
||||||
|
match m {
|
||||||
|
SegmentMedia::Photo {
|
||||||
|
rel_path,
|
||||||
|
library_id,
|
||||||
|
} => buf.push_str(&format!("{library_id}:{rel_path}|")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
blake3::hash(buf.as_bytes()).to_hex().to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn reel_mp4_path(app_state: &AppState, key: &str) -> PathBuf {
|
||||||
|
Path::new(&app_state.reels_path).join(format!("{key}.mp4"))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn reel_sidecar_path(app_state: &AppState, key: &str) -> PathBuf {
|
||||||
|
Path::new(&app_state.reels_path).join(format!("{key}.json"))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
struct ReelSidecar {
|
||||||
|
title: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- HTTP types --------------------------------------------------------------
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct CreateReelRequest {
|
||||||
|
#[serde(default)]
|
||||||
|
pub span: Option<MemoriesSpan>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub timezone_offset_minutes: Option<i32>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub library: Option<String>,
|
||||||
|
/// Cloned TTS voice for the narration; server default when omitted.
|
||||||
|
#[serde(default)]
|
||||||
|
pub voice: Option<String>,
|
||||||
|
/// Cap on photos in the reel (clamped server-side).
|
||||||
|
#[serde(default)]
|
||||||
|
pub max_segments: Option<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct ReelJobCreatedResponse {
|
||||||
|
pub job_id: String,
|
||||||
|
pub status: ReelJobStatus,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct ReelStatusResponse {
|
||||||
|
pub job_id: String,
|
||||||
|
pub status: ReelJobStatus,
|
||||||
|
pub stage: String,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub title: Option<String>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub video_url: Option<String>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub error: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Handlers ----------------------------------------------------------------
|
||||||
|
|
||||||
|
/// POST /reels — start (or instantly serve from cache) a memory reel for the
|
||||||
|
/// requested span. Returns 202 + a job id; the client polls GET /reels/{id}.
|
||||||
|
#[post("/reels")]
|
||||||
|
pub async fn create_reel_handler(
|
||||||
|
http_request: HttpRequest,
|
||||||
|
_claims: Claims,
|
||||||
|
req: web::Json<CreateReelRequest>,
|
||||||
|
app_state: web::Data<AppState>,
|
||||||
|
exif_dao: web::Data<Mutex<Box<dyn ExifDao>>>,
|
||||||
|
insight_dao: web::Data<Mutex<Box<dyn InsightDao>>>,
|
||||||
|
) -> impl Responder {
|
||||||
|
let span_context = extract_context_from_request(&http_request);
|
||||||
|
|
||||||
|
if app_state.llamacpp.is_none() {
|
||||||
|
return HttpResponse::ServiceUnavailable().json(json!({
|
||||||
|
"error": "Reel narration needs the LLM/TTS backend (set LLAMA_SWAP_URL)"
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
let span = req.span.unwrap_or(MemoriesSpan::Day);
|
||||||
|
let max_segments = req.max_segments.unwrap_or(selector::DEFAULT_MAX_SEGMENTS);
|
||||||
|
let selector = ReelSelector::Memories {
|
||||||
|
span,
|
||||||
|
tz_offset_minutes: req.timezone_offset_minutes.unwrap_or(0),
|
||||||
|
library: req.library.clone(),
|
||||||
|
max_segments,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Cheap pass: resolve the media set for the cache key and the emptiness
|
||||||
|
// check. Insight enrichment + scripting happen in the background job.
|
||||||
|
let (planned, meta) = match selector::resolve(&app_state, &exif_dao, &span_context, &selector) {
|
||||||
|
Ok(r) => r,
|
||||||
|
Err(msg) => return HttpResponse::BadRequest().body(msg),
|
||||||
|
};
|
||||||
|
if planned.is_empty() {
|
||||||
|
return HttpResponse::UnprocessableEntity().json(json!({
|
||||||
|
"error": "No photo memories found for this span"
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
let media: Vec<SegmentMedia> = planned.iter().map(|p| p.media.clone()).collect();
|
||||||
|
let voice = req.voice.clone().filter(|s| !s.is_empty());
|
||||||
|
let key = cache_key(&selector, &media, voice.as_deref());
|
||||||
|
|
||||||
|
let job_id = Uuid::new_v4();
|
||||||
|
|
||||||
|
// Cache hit: register an already-Done job pointing at the existing MP4 so
|
||||||
|
// the client's first poll returns the video URL immediately.
|
||||||
|
let mp4 = reel_mp4_path(&app_state, &key);
|
||||||
|
if mp4.exists() {
|
||||||
|
let title = std::fs::read(reel_sidecar_path(&app_state, &key))
|
||||||
|
.ok()
|
||||||
|
.and_then(|b| serde_json::from_slice::<ReelSidecar>(&b).ok())
|
||||||
|
.map(|s| s.title);
|
||||||
|
let mut jobs = REEL_JOBS.lock().unwrap();
|
||||||
|
sweep_stale_jobs(&mut jobs, Instant::now());
|
||||||
|
jobs.insert(
|
||||||
|
job_id,
|
||||||
|
ReelJob {
|
||||||
|
status: ReelJobStatus::Done,
|
||||||
|
stage: "done",
|
||||||
|
title,
|
||||||
|
output_path: Some(mp4),
|
||||||
|
error: None,
|
||||||
|
created_at: Instant::now(),
|
||||||
|
finished_at: Some(Instant::now()),
|
||||||
|
abort: None,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
return HttpResponse::Accepted().json(ReelJobCreatedResponse {
|
||||||
|
job_id: job_id.to_string(),
|
||||||
|
status: ReelJobStatus::Done,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let mut jobs = REEL_JOBS.lock().unwrap();
|
||||||
|
sweep_stale_jobs(&mut jobs, Instant::now());
|
||||||
|
jobs.insert(
|
||||||
|
job_id,
|
||||||
|
ReelJob {
|
||||||
|
status: ReelJobStatus::Queued,
|
||||||
|
stage: "queued",
|
||||||
|
title: None,
|
||||||
|
output_path: None,
|
||||||
|
error: None,
|
||||||
|
created_at: Instant::now(),
|
||||||
|
finished_at: None,
|
||||||
|
abort: None,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let state = app_state.clone();
|
||||||
|
let insight_dao = insight_dao.clone();
|
||||||
|
let handle = tokio::spawn(async move {
|
||||||
|
match run_reel_job(&state, &insight_dao, job_id, planned, meta, voice, &key).await {
|
||||||
|
Ok((title, path)) => {
|
||||||
|
finish_job(job_id, ReelJobStatus::Done, Some(title), Some(path), None)
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
log::error!("reel job {job_id} failed: {e:?}");
|
||||||
|
finish_job(
|
||||||
|
job_id,
|
||||||
|
ReelJobStatus::Error,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
Some(format!("{e}")),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
with_job(job_id, |job| job.abort = Some(handle.abort_handle()));
|
||||||
|
|
||||||
|
HttpResponse::Accepted().json(ReelJobCreatedResponse {
|
||||||
|
job_id: job_id.to_string(),
|
||||||
|
status: ReelJobStatus::Queued,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// GET /reels/{id} — poll a reel job. Done jobs carry a `video_url`.
|
||||||
|
#[get("/reels/{id}")]
|
||||||
|
pub async fn reel_status_handler(_claims: Claims, path: web::Path<String>) -> impl Responder {
|
||||||
|
let id_str = path.into_inner();
|
||||||
|
let Ok(id) = Uuid::parse_str(&id_str) else {
|
||||||
|
return HttpResponse::BadRequest().json(json!({ "error": "invalid job id" }));
|
||||||
|
};
|
||||||
|
let resp = with_job(id, |job| ReelStatusResponse {
|
||||||
|
job_id: id_str.clone(),
|
||||||
|
status: job.status,
|
||||||
|
stage: job.stage.to_string(),
|
||||||
|
title: job.title.clone(),
|
||||||
|
video_url: matches!(job.status, ReelJobStatus::Done)
|
||||||
|
.then(|| format!("/reels/{id_str}/video")),
|
||||||
|
error: job.error.clone(),
|
||||||
|
});
|
||||||
|
match resp {
|
||||||
|
Some(r) => HttpResponse::Ok().json(r),
|
||||||
|
None => HttpResponse::NotFound().json(json!({ "error": "job not found or expired" })),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// GET /reels/{id}/video — stream the finished MP4 (supports range requests via
|
||||||
|
/// NamedFile, so the mobile player can seek).
|
||||||
|
#[get("/reels/{id}/video")]
|
||||||
|
pub async fn reel_video_handler(
|
||||||
|
_claims: Claims,
|
||||||
|
request: HttpRequest,
|
||||||
|
path: web::Path<String>,
|
||||||
|
) -> impl Responder {
|
||||||
|
let id_str = path.into_inner();
|
||||||
|
let Ok(id) = Uuid::parse_str(&id_str) else {
|
||||||
|
return HttpResponse::BadRequest().json(json!({ "error": "invalid job id" }));
|
||||||
|
};
|
||||||
|
let output = with_job(id, |job| job.output_path.clone()).flatten();
|
||||||
|
let Some(path) = output else {
|
||||||
|
return HttpResponse::NotFound().json(json!({ "error": "reel not ready" }));
|
||||||
|
};
|
||||||
|
match NamedFile::open(&path) {
|
||||||
|
Ok(file) => file.into_response(&request),
|
||||||
|
Err(e) => {
|
||||||
|
log::error!("opening reel mp4 {path:?} failed: {e:?}");
|
||||||
|
HttpResponse::NotFound().json(json!({ "error": "reel file missing" }))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Pipeline ----------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Run the full reel pipeline: enrich → script → narrate → render → concat,
|
||||||
|
/// then publish the MP4 into the cache. Returns (title, mp4_path).
|
||||||
|
async fn run_reel_job(
|
||||||
|
app_state: &AppState,
|
||||||
|
insight_dao: &Mutex<Box<dyn InsightDao>>,
|
||||||
|
job_id: Uuid,
|
||||||
|
mut planned: Vec<PlannedSegment>,
|
||||||
|
meta: ReelMeta,
|
||||||
|
voice: Option<String>,
|
||||||
|
key: &str,
|
||||||
|
) -> anyhow::Result<(String, PathBuf)> {
|
||||||
|
use anyhow::{Context, anyhow};
|
||||||
|
|
||||||
|
let client = app_state
|
||||||
|
.llamacpp
|
||||||
|
.as_ref()
|
||||||
|
.ok_or_else(|| anyhow!("TTS/LLM backend not configured"))?
|
||||||
|
.clone();
|
||||||
|
|
||||||
|
// 1. Enrich with cached insights, then script (one LLM call).
|
||||||
|
set_stage(job_id, "scripting");
|
||||||
|
let span_context = opentelemetry::Context::new();
|
||||||
|
selector::enrich(insight_dao, &span_context, &mut planned);
|
||||||
|
let script = script::generate_script(&client, &meta, &planned).await?;
|
||||||
|
|
||||||
|
// 2. Narrate each line to speech and 3. render each photo segment. A
|
||||||
|
// segment whose audio or render fails is skipped (logged) rather than
|
||||||
|
// sinking the whole reel — handles an odd HEIC/corrupt file gracefully.
|
||||||
|
set_stage(job_id, "narrating");
|
||||||
|
let work = tempfile::tempdir().context("creating reel work dir")?;
|
||||||
|
let nvenc = render::is_nvenc_available().await;
|
||||||
|
let opts = render::SegmentOpts {
|
||||||
|
nvenc,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut segment_files: Vec<String> = Vec::new();
|
||||||
|
for (i, (seg, line)) in planned.iter().zip(script.lines.iter()).enumerate() {
|
||||||
|
let image_path = match resolve_image_path(app_state, &seg.media) {
|
||||||
|
Some(p) => p,
|
||||||
|
None => {
|
||||||
|
log::warn!("reel {job_id}: skipping segment {i}, image path unresolved");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let audio_bytes =
|
||||||
|
match crate::ai::tts::synthesize_serialized(&client, line, voice.as_deref(), "wav")
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(b) => b,
|
||||||
|
Err(e) => {
|
||||||
|
log::warn!("reel {job_id}: skipping segment {i}, TTS failed: {e}");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let audio_path = work.path().join(format!("narration_{i:03}.wav"));
|
||||||
|
if let Err(e) = tokio::fs::write(&audio_path, &audio_bytes).await {
|
||||||
|
log::warn!("reel {job_id}: skipping segment {i}, writing audio failed: {e}");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let narration_secs =
|
||||||
|
crate::video::ffmpeg::get_duration_seconds(&audio_path.to_string_lossy())
|
||||||
|
.await
|
||||||
|
.ok()
|
||||||
|
.flatten()
|
||||||
|
.unwrap_or(render::MIN_SEGMENT_SECONDS);
|
||||||
|
let duration = render::segment_duration(narration_secs);
|
||||||
|
|
||||||
|
set_stage(job_id, "rendering");
|
||||||
|
let seg_out = work.path().join(format!("seg_{i:03}.mp4"));
|
||||||
|
if let Err(e) =
|
||||||
|
render::render_segment(&image_path, &audio_path, &seg_out, duration, &opts).await
|
||||||
|
{
|
||||||
|
log::warn!("reel {job_id}: skipping segment {i}, render failed: {e}");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
segment_files.push(seg_out.to_string_lossy().to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
if segment_files.is_empty() {
|
||||||
|
return Err(anyhow!("no segments rendered successfully"));
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Concat into the cache. Write to a temp name in the reels dir, then
|
||||||
|
// rename atomically (same filesystem) so a reader never sees a partial.
|
||||||
|
std::fs::create_dir_all(&app_state.reels_path).context("creating reels dir")?;
|
||||||
|
let final_path = reel_mp4_path(app_state, key);
|
||||||
|
let tmp_path = final_path.with_extension("mp4.tmp");
|
||||||
|
render::concat_segments(&segment_files, &tmp_path).await?;
|
||||||
|
std::fs::rename(&tmp_path, &final_path).context("publishing reel mp4")?;
|
||||||
|
|
||||||
|
// Sidecar carries the title so a future cache hit can return it without
|
||||||
|
// re-running the pipeline.
|
||||||
|
let sidecar = serde_json::to_vec(&ReelSidecar {
|
||||||
|
title: script.title.clone(),
|
||||||
|
})
|
||||||
|
.context("serializing reel sidecar")?;
|
||||||
|
let _ = std::fs::write(reel_sidecar_path(app_state, key), sidecar);
|
||||||
|
|
||||||
|
Ok((script.title, final_path))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve a photo segment's library-relative path to a validated absolute
|
||||||
|
/// path under its library root.
|
||||||
|
fn resolve_image_path(app_state: &AppState, media: &SegmentMedia) -> Option<PathBuf> {
|
||||||
|
let SegmentMedia::Photo {
|
||||||
|
rel_path,
|
||||||
|
library_id,
|
||||||
|
} = media;
|
||||||
|
let lib = app_state.library_by_id(*library_id)?;
|
||||||
|
crate::files::is_valid_full_path(&lib.root_path, rel_path, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn photo(p: &str, lib: i32) -> SegmentMedia {
|
||||||
|
SegmentMedia::Photo {
|
||||||
|
rel_path: p.to_string(),
|
||||||
|
library_id: lib,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn day_selector() -> ReelSelector {
|
||||||
|
ReelSelector::Memories {
|
||||||
|
span: MemoriesSpan::Day,
|
||||||
|
tz_offset_minutes: 0,
|
||||||
|
library: None,
|
||||||
|
max_segments: 24,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn cache_key_is_stable_for_same_inputs() {
|
||||||
|
let media = vec![photo("a.jpg", 1), photo("b.jpg", 1)];
|
||||||
|
let k1 = cache_key(&day_selector(), &media, Some("grandma"));
|
||||||
|
let k2 = cache_key(&day_selector(), &media, Some("grandma"));
|
||||||
|
assert_eq!(k1, k2);
|
||||||
|
// 64-hex blake3.
|
||||||
|
assert_eq!(k1.len(), 64);
|
||||||
|
assert!(k1.chars().all(|c| c.is_ascii_hexdigit()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn cache_key_changes_with_media_order_voice_and_selector() {
|
||||||
|
let media = vec![photo("a.jpg", 1), photo("b.jpg", 1)];
|
||||||
|
let reordered = vec![photo("b.jpg", 1), photo("a.jpg", 1)];
|
||||||
|
let base = cache_key(&day_selector(), &media, Some("grandma"));
|
||||||
|
// Order matters (the reel sequence differs).
|
||||||
|
assert_ne!(
|
||||||
|
base,
|
||||||
|
cache_key(&day_selector(), &reordered, Some("grandma"))
|
||||||
|
);
|
||||||
|
// Voice matters.
|
||||||
|
assert_ne!(base, cache_key(&day_selector(), &media, Some("dad")));
|
||||||
|
assert_ne!(base, cache_key(&day_selector(), &media, None));
|
||||||
|
// Span matters.
|
||||||
|
let week = ReelSelector::Memories {
|
||||||
|
span: MemoriesSpan::Week,
|
||||||
|
tz_offset_minutes: 0,
|
||||||
|
library: None,
|
||||||
|
max_segments: 24,
|
||||||
|
};
|
||||||
|
assert_ne!(base, cache_key(&week, &media, Some("grandma")));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn span_phrase_maps_each_span() {
|
||||||
|
let mk = |span| ReelMeta {
|
||||||
|
span,
|
||||||
|
years: vec![],
|
||||||
|
};
|
||||||
|
assert_eq!(mk(MemoriesSpan::Day).span_phrase(), "on this day");
|
||||||
|
assert_eq!(mk(MemoriesSpan::Week).span_phrase(), "this week");
|
||||||
|
assert_eq!(mk(MemoriesSpan::Month).span_phrase(), "this month");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn date_label_formats_or_none() {
|
||||||
|
let seg = PlannedSegment {
|
||||||
|
media: photo("a.jpg", 1),
|
||||||
|
date: Some(1_560_384_000), // 2019-06-13 UTC
|
||||||
|
insight_title: None,
|
||||||
|
insight_summary: None,
|
||||||
|
};
|
||||||
|
assert!(seg.date_label().unwrap().contains("2019"));
|
||||||
|
|
||||||
|
let undated = PlannedSegment {
|
||||||
|
media: photo("a.jpg", 1),
|
||||||
|
date: None,
|
||||||
|
insight_title: None,
|
||||||
|
insight_summary: None,
|
||||||
|
};
|
||||||
|
assert_eq!(undated.date_label(), None);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,338 @@
|
|||||||
|
//! ffmpeg assembly for memory reels.
|
||||||
|
//!
|
||||||
|
//! Two-stage, per-segment design: each segment is rendered to its own
|
||||||
|
//! normalized MP4 (identical codec/resolution/fps/timebase), then the segments
|
||||||
|
//! are joined with the concat demuxer (stream copy, no re-encode). Rendering
|
||||||
|
//! per segment — rather than one monster filtergraph — keeps each ffmpeg
|
||||||
|
//! invocation simple to reason about, parallelizes naturally, and means a
|
||||||
|
//! video-clip segment type (phase 2) slots in as just a different per-segment
|
||||||
|
//! builder without touching the concat stage.
|
||||||
|
//!
|
||||||
|
//! The arg builders are pure (`Vec<String>` out) so the exact ffmpeg command
|
||||||
|
//! is unit-testable; the runners spawn ffmpeg and surface stderr on failure.
|
||||||
|
|
||||||
|
use anyhow::{Context, Result, bail};
|
||||||
|
use std::path::Path;
|
||||||
|
use tokio::process::Command;
|
||||||
|
|
||||||
|
/// Re-exported so the reel pipeline reaches NVENC detection through this module
|
||||||
|
/// rather than depending on `video::ffmpeg` directly.
|
||||||
|
pub use crate::video::ffmpeg::is_nvenc_available;
|
||||||
|
|
||||||
|
/// Reel canvas. Landscape matches the majority of camera photos; portrait
|
||||||
|
/// shots are letterboxed by the `pad` in [`segment_filter`] rather than
|
||||||
|
/// cropped, so faces never get cut off.
|
||||||
|
pub const REEL_WIDTH: u32 = 1920;
|
||||||
|
pub const REEL_HEIGHT: u32 = 1080;
|
||||||
|
pub const REEL_FPS: u32 = 30;
|
||||||
|
|
||||||
|
/// A still's screen time is its narration length plus a short breath, with a
|
||||||
|
/// floor so a terse line still lingers. No ceiling: the segment always covers
|
||||||
|
/// the full narration so speech is never truncated — the scripter is asked to
|
||||||
|
/// keep lines short instead.
|
||||||
|
pub const MIN_SEGMENT_SECONDS: f64 = 2.5;
|
||||||
|
const NARRATION_TAIL_SECONDS: f64 = 0.6;
|
||||||
|
|
||||||
|
/// Screen time for a photo segment given its narration audio length.
|
||||||
|
pub fn segment_duration(narration_secs: f64) -> f64 {
|
||||||
|
let d = narration_secs + NARRATION_TAIL_SECONDS;
|
||||||
|
if d.is_finite() && d > MIN_SEGMENT_SECONDS {
|
||||||
|
d
|
||||||
|
} else {
|
||||||
|
MIN_SEGMENT_SECONDS
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Options controlling per-segment rendering. `ken_burns` adds a slow zoom for
|
||||||
|
/// motion; it's defaulted off until the effect is eyeballed on the GPU box,
|
||||||
|
/// since a wrong zoompan expression reads as jitter and can't be verified here.
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub struct SegmentOpts {
|
||||||
|
pub width: u32,
|
||||||
|
pub height: u32,
|
||||||
|
pub fps: u32,
|
||||||
|
pub nvenc: bool,
|
||||||
|
pub ken_burns: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for SegmentOpts {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
width: REEL_WIDTH,
|
||||||
|
height: REEL_HEIGHT,
|
||||||
|
fps: REEL_FPS,
|
||||||
|
nvenc: false,
|
||||||
|
ken_burns: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Video filter for a photo segment: fit the image inside the canvas
|
||||||
|
/// (preserving aspect, padding the rest), normalize SAR/fps/pixel format, and
|
||||||
|
/// optionally apply a gentle Ken Burns zoom.
|
||||||
|
pub fn segment_filter(opts: &SegmentOpts, duration: f64) -> String {
|
||||||
|
let (w, h, fps) = (opts.width, opts.height, opts.fps);
|
||||||
|
if opts.ken_burns {
|
||||||
|
// Upscale first so zoompan samples from a larger frame (avoids
|
||||||
|
// shimmer), drift the zoom from 1.0→~1.12 across the segment, hold the
|
||||||
|
// crop centered, then settle to the canvas.
|
||||||
|
let frames = (duration * fps as f64).round().max(1.0) as u64;
|
||||||
|
format!(
|
||||||
|
"scale={w}*2:{h}*2:force_original_aspect_ratio=increase,\
|
||||||
|
crop={w}*2:{h}*2,\
|
||||||
|
zoompan=z='min(zoom+0.0009,1.12)':d={frames}:\
|
||||||
|
x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s={w}x{h}:fps={fps},\
|
||||||
|
setsar=1,format=yuv420p"
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
format!(
|
||||||
|
"scale={w}:{h}:force_original_aspect_ratio=decrease,\
|
||||||
|
pad={w}:{h}:(ow-iw)/2:(oh-ih)/2,\
|
||||||
|
setsar=1,fps={fps},format=yuv420p"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn video_encoder_args(nvenc: bool) -> Vec<String> {
|
||||||
|
if nvenc {
|
||||||
|
// p4 ≈ balanced; cq 23 ≈ libx264 crf 21. Matches the HLS transcode path.
|
||||||
|
[
|
||||||
|
"-c:v",
|
||||||
|
"h264_nvenc",
|
||||||
|
"-preset",
|
||||||
|
"p4",
|
||||||
|
"-cq",
|
||||||
|
"23",
|
||||||
|
"-pix_fmt",
|
||||||
|
"yuv420p",
|
||||||
|
]
|
||||||
|
} else {
|
||||||
|
[
|
||||||
|
"-c:v", "libx264", "-crf", "21", "-preset", "veryfast", "-pix_fmt", "yuv420p",
|
||||||
|
]
|
||||||
|
}
|
||||||
|
.iter()
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build the ffmpeg args that render one photo segment: a still looped for
|
||||||
|
/// `duration` seconds with its narration muxed in. The narration is padded
|
||||||
|
/// with trailing silence (`apad`) so short lines don't end the segment early;
|
||||||
|
/// `-t` bounds both streams to the segment length.
|
||||||
|
pub fn build_segment_args(
|
||||||
|
image_path: &str,
|
||||||
|
audio_path: &str,
|
||||||
|
out_path: &str,
|
||||||
|
duration: f64,
|
||||||
|
opts: &SegmentOpts,
|
||||||
|
) -> Vec<String> {
|
||||||
|
let mut args: Vec<String> = vec!["-y".into()];
|
||||||
|
if opts.nvenc {
|
||||||
|
args.extend(["-hwaccel".into(), "cuda".into()]);
|
||||||
|
}
|
||||||
|
args.extend([
|
||||||
|
"-loop".into(),
|
||||||
|
"1".into(),
|
||||||
|
"-i".into(),
|
||||||
|
image_path.into(),
|
||||||
|
"-i".into(),
|
||||||
|
audio_path.into(),
|
||||||
|
"-filter_complex".into(),
|
||||||
|
format!("[0:v]{}[v];[1:a]apad[a]", segment_filter(opts, duration)),
|
||||||
|
"-map".into(),
|
||||||
|
"[v]".into(),
|
||||||
|
"-map".into(),
|
||||||
|
"[a]".into(),
|
||||||
|
"-t".into(),
|
||||||
|
format!("{duration:.3}"),
|
||||||
|
]);
|
||||||
|
args.extend(video_encoder_args(opts.nvenc));
|
||||||
|
args.extend(
|
||||||
|
["-c:a", "aac", "-b:a", "160k", "-ar", "48000", "-shortest"]
|
||||||
|
.iter()
|
||||||
|
.map(|s| s.to_string()),
|
||||||
|
);
|
||||||
|
args.push(out_path.into());
|
||||||
|
args
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build the concat-demuxer args that join rendered segments losslessly.
|
||||||
|
/// `+faststart` moves the moov atom up front so the reel streams immediately
|
||||||
|
/// on the mobile client.
|
||||||
|
pub fn build_concat_args(list_path: &str, out_path: &str) -> Vec<String> {
|
||||||
|
[
|
||||||
|
"-y",
|
||||||
|
"-f",
|
||||||
|
"concat",
|
||||||
|
"-safe",
|
||||||
|
"0",
|
||||||
|
"-i",
|
||||||
|
list_path,
|
||||||
|
"-c",
|
||||||
|
"copy",
|
||||||
|
"-movflags",
|
||||||
|
"+faststart",
|
||||||
|
out_path,
|
||||||
|
]
|
||||||
|
.iter()
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Render the concat list file body. Each line points the demuxer at one
|
||||||
|
/// segment; single quotes in paths are escaped per ffmpeg's concat syntax.
|
||||||
|
pub fn build_concat_list(segment_paths: &[String]) -> String {
|
||||||
|
let mut out = String::new();
|
||||||
|
for p in segment_paths {
|
||||||
|
let escaped = p.replace('\'', r"'\''");
|
||||||
|
out.push_str(&format!("file '{escaped}'\n"));
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn run_ffmpeg(args: &[String], what: &str) -> Result<()> {
|
||||||
|
let output = Command::new("ffmpeg")
|
||||||
|
.args(args)
|
||||||
|
.output()
|
||||||
|
.await
|
||||||
|
.with_context(|| format!("spawning ffmpeg for {what}"))?;
|
||||||
|
if !output.status.success() {
|
||||||
|
bail!(
|
||||||
|
"ffmpeg {what} failed: {}",
|
||||||
|
String::from_utf8_lossy(&output.stderr)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Render one photo segment to `out_path`.
|
||||||
|
pub async fn render_segment(
|
||||||
|
image_path: &Path,
|
||||||
|
audio_path: &Path,
|
||||||
|
out_path: &Path,
|
||||||
|
duration: f64,
|
||||||
|
opts: &SegmentOpts,
|
||||||
|
) -> Result<()> {
|
||||||
|
let args = build_segment_args(
|
||||||
|
&image_path.to_string_lossy(),
|
||||||
|
&audio_path.to_string_lossy(),
|
||||||
|
&out_path.to_string_lossy(),
|
||||||
|
duration,
|
||||||
|
opts,
|
||||||
|
);
|
||||||
|
run_ffmpeg(&args, "segment render").await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Join rendered segments into the final reel. Writes the concat list into the
|
||||||
|
/// same directory as the output so relative paths and cleanup stay local.
|
||||||
|
pub async fn concat_segments(segment_paths: &[String], out_path: &Path) -> Result<()> {
|
||||||
|
let list_path = out_path.with_extension("concat.txt");
|
||||||
|
let body = build_concat_list(segment_paths);
|
||||||
|
tokio::fs::write(&list_path, body)
|
||||||
|
.await
|
||||||
|
.context("writing concat list")?;
|
||||||
|
let args = build_concat_args(&list_path.to_string_lossy(), &out_path.to_string_lossy());
|
||||||
|
let result = run_ffmpeg(&args, "concat").await;
|
||||||
|
let _ = tokio::fs::remove_file(&list_path).await;
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn segment_duration_floors_short_lines() {
|
||||||
|
// A one-word narration still lingers at the floor.
|
||||||
|
assert_eq!(segment_duration(0.5), MIN_SEGMENT_SECONDS);
|
||||||
|
assert_eq!(segment_duration(0.0), MIN_SEGMENT_SECONDS);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn segment_duration_covers_full_narration_plus_tail() {
|
||||||
|
// No ceiling: a long line gets its full length so speech isn't cut.
|
||||||
|
assert!((segment_duration(5.0) - 5.6).abs() < 1e-9);
|
||||||
|
assert!((segment_duration(20.0) - 20.6).abs() < 1e-9);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn segment_duration_rejects_nonfinite() {
|
||||||
|
assert_eq!(segment_duration(f64::NAN), MIN_SEGMENT_SECONDS);
|
||||||
|
assert_eq!(segment_duration(f64::INFINITY), MIN_SEGMENT_SECONDS);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn static_filter_fits_and_pads_without_cropping() {
|
||||||
|
let f = segment_filter(&SegmentOpts::default(), 4.0);
|
||||||
|
assert!(f.contains("force_original_aspect_ratio=decrease"));
|
||||||
|
assert!(f.contains("pad=1920:1080"));
|
||||||
|
assert!(f.contains("format=yuv420p"));
|
||||||
|
// No zoompan when ken_burns is off.
|
||||||
|
assert!(!f.contains("zoompan"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn ken_burns_filter_uses_duration_scaled_frame_count() {
|
||||||
|
let opts = SegmentOpts {
|
||||||
|
ken_burns: true,
|
||||||
|
..SegmentOpts::default()
|
||||||
|
};
|
||||||
|
// 4s * 30fps = 120 frames in the zoompan d= term.
|
||||||
|
let f = segment_filter(&opts, 4.0);
|
||||||
|
assert!(f.contains("zoompan"));
|
||||||
|
assert!(f.contains("d=120:"));
|
||||||
|
assert!(f.contains("s=1920x1080"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn segment_args_loop_still_and_bound_with_t() {
|
||||||
|
let args = build_segment_args(
|
||||||
|
"/img.jpg",
|
||||||
|
"/a.wav",
|
||||||
|
"/out.mp4",
|
||||||
|
4.0,
|
||||||
|
&SegmentOpts::default(),
|
||||||
|
);
|
||||||
|
let joined = args.join(" ");
|
||||||
|
assert!(joined.contains("-loop 1 -i /img.jpg"));
|
||||||
|
assert!(joined.contains("-i /a.wav"));
|
||||||
|
assert!(joined.contains("apad"));
|
||||||
|
assert!(joined.contains("-t 4.000"));
|
||||||
|
assert!(joined.contains("libx264"));
|
||||||
|
assert!(joined.ends_with("/out.mp4"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn segment_args_use_nvenc_and_cuda_when_enabled() {
|
||||||
|
let opts = SegmentOpts {
|
||||||
|
nvenc: true,
|
||||||
|
..SegmentOpts::default()
|
||||||
|
};
|
||||||
|
let args = build_segment_args("/img.jpg", "/a.wav", "/out.mp4", 3.0, &opts);
|
||||||
|
let joined = args.join(" ");
|
||||||
|
assert!(joined.contains("-hwaccel cuda"));
|
||||||
|
assert!(joined.contains("h264_nvenc"));
|
||||||
|
assert!(!joined.contains("libx264"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn concat_args_stream_copy_with_faststart() {
|
||||||
|
let args = build_concat_args("/tmp/list.txt", "/out.mp4");
|
||||||
|
let joined = args.join(" ");
|
||||||
|
assert!(joined.contains("-f concat -safe 0 -i /tmp/list.txt"));
|
||||||
|
assert!(joined.contains("-c copy"));
|
||||||
|
assert!(joined.contains("+faststart"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn concat_list_escapes_single_quotes() {
|
||||||
|
let body = build_concat_list(&[
|
||||||
|
"/tmp/seg_000.mp4".into(),
|
||||||
|
"/tmp/own's dir/seg_001.mp4".into(),
|
||||||
|
]);
|
||||||
|
assert!(body.contains("file '/tmp/seg_000.mp4'\n"));
|
||||||
|
// The apostrophe is closed-escaped-reopened per ffmpeg concat syntax.
|
||||||
|
assert!(body.contains(r"own'\''s"));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,289 @@
|
|||||||
|
//! Narration scripting for memory reels.
|
||||||
|
//!
|
||||||
|
//! One LLM call turns the planned segments (each carrying its date and, where
|
||||||
|
//! available, its cached insight) into a short first-person narration line per
|
||||||
|
//! photo plus a title for the reel. We reuse the cached insight summary as the
|
||||||
|
//! richest per-photo signal rather than re-running vision at reel time — that
|
||||||
|
//! keeps reel generation off the GPU's vision slot entirely.
|
||||||
|
//!
|
||||||
|
//! The prompt builder and response parser are pure so the contract is
|
||||||
|
//! unit-testable; `generate_script` wires them to the LLM client.
|
||||||
|
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use super::{PlannedSegment, ReelMeta};
|
||||||
|
use crate::ai::llamacpp::LlamaCppClient;
|
||||||
|
use crate::ai::llm_client::LlmClient;
|
||||||
|
|
||||||
|
/// The narration for a whole reel: a title and one line per segment, in order.
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub struct ReelScript {
|
||||||
|
pub title: String,
|
||||||
|
pub lines: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
const SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \
|
||||||
|
slideshow of someone's own photos set to a spoken voiceover. Write warm, \
|
||||||
|
specific, first-person narration as if the person is gently looking back on \
|
||||||
|
their own memories. Be concrete and grounded in the details given; never \
|
||||||
|
invent names, places, or events that aren't supported. Keep each line to one \
|
||||||
|
or two short sentences that can be read aloud in a few seconds. Avoid generic \
|
||||||
|
filler like \"what a wonderful day\" — if you have little to go on, simply \
|
||||||
|
describe the moment plainly.";
|
||||||
|
|
||||||
|
/// Build the (system, user) prompt pair for the scripter. The user message
|
||||||
|
/// describes each segment in order and asks for strict JSON back.
|
||||||
|
pub fn build_script_messages(meta: &ReelMeta, planned: &[PlannedSegment]) -> (String, String) {
|
||||||
|
let mut user = String::new();
|
||||||
|
user.push_str(&format!(
|
||||||
|
"These are {} photos surfaced as memories {}.\n\n",
|
||||||
|
planned.len(),
|
||||||
|
meta.span_phrase()
|
||||||
|
));
|
||||||
|
if !meta.years.is_empty() {
|
||||||
|
let years: Vec<String> = meta.years.iter().map(|y| y.to_string()).collect();
|
||||||
|
user.push_str(&format!("They span the years: {}.\n\n", years.join(", ")));
|
||||||
|
}
|
||||||
|
user.push_str("Photos, in the order they will appear:\n");
|
||||||
|
for (i, seg) in planned.iter().enumerate() {
|
||||||
|
user.push_str(&format!("\n[{}]", i + 1));
|
||||||
|
if let Some(date) = seg.date_label() {
|
||||||
|
user.push_str(&format!(" {date}"));
|
||||||
|
}
|
||||||
|
user.push('\n');
|
||||||
|
match (&seg.insight_title, &seg.insight_summary) {
|
||||||
|
(Some(t), Some(s)) if !s.trim().is_empty() => {
|
||||||
|
user.push_str(&format!(" Known context: {t} — {s}\n"));
|
||||||
|
}
|
||||||
|
(Some(t), _) => user.push_str(&format!(" Known context: {t}\n")),
|
||||||
|
(_, Some(s)) if !s.trim().is_empty() => {
|
||||||
|
user.push_str(&format!(" Known context: {s}\n"));
|
||||||
|
}
|
||||||
|
_ => user.push_str(" (no extra context — narrate plainly from the date)\n"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
user.push_str(&format!(
|
||||||
|
"\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\
|
||||||
|
{{\"title\": \"<short reel title>\", \"segments\": [\"<line for photo 1>\", \
|
||||||
|
\"<line for photo 2>\", ... ]}}\n\
|
||||||
|
The \"segments\" array MUST have exactly {} items, one per photo in order.",
|
||||||
|
planned.len()
|
||||||
|
));
|
||||||
|
(SYSTEM_PROMPT.to_string(), user)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse the model's response into a script with exactly `n` lines. Tolerant of
|
||||||
|
/// code fences and surrounding prose, and of both `segments: [".."]` and
|
||||||
|
/// `segments: [{"narration": ".."}]` shapes. Missing/extra lines are padded or
|
||||||
|
/// truncated so the caller always gets `n` aligned to the segments.
|
||||||
|
pub fn parse_script_response(raw: &str, n: usize) -> ReelScript {
|
||||||
|
let fallback_line = "A moment worth remembering.";
|
||||||
|
let value = extract_json_object(raw);
|
||||||
|
|
||||||
|
let title = value
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|v| v.get("title"))
|
||||||
|
.and_then(|t| t.as_str())
|
||||||
|
.map(clean_text)
|
||||||
|
.filter(|s| !s.is_empty())
|
||||||
|
.unwrap_or_else(|| "Memories".to_string());
|
||||||
|
|
||||||
|
let mut lines: Vec<String> = value
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|v| v.get("segments"))
|
||||||
|
.and_then(|s| s.as_array())
|
||||||
|
.map(|arr| {
|
||||||
|
arr.iter()
|
||||||
|
.map(|item| {
|
||||||
|
let text = item
|
||||||
|
.as_str()
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.or_else(|| {
|
||||||
|
item.get("narration")
|
||||||
|
.and_then(|n| n.as_str())
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
})
|
||||||
|
.unwrap_or_default();
|
||||||
|
clean_text(&text)
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
})
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
// Align to exactly n: drop extras, pad shortfalls with a neutral line so
|
||||||
|
// every photo still gets spoken audio.
|
||||||
|
lines.truncate(n);
|
||||||
|
while lines.len() < n {
|
||||||
|
lines.push(fallback_line.to_string());
|
||||||
|
}
|
||||||
|
for line in lines.iter_mut() {
|
||||||
|
if line.is_empty() {
|
||||||
|
*line = fallback_line.to_string();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ReelScript { title, lines }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pull the first balanced top-level JSON object out of a possibly-noisy model
|
||||||
|
/// response (code fences, leading prose). Returns None if nothing parses.
|
||||||
|
fn extract_json_object(raw: &str) -> Option<serde_json::Value> {
|
||||||
|
// Fast path: the whole thing is valid JSON.
|
||||||
|
if let Ok(v) = serde_json::from_str::<serde_json::Value>(raw.trim()) {
|
||||||
|
return Some(v);
|
||||||
|
}
|
||||||
|
// Otherwise scan for the first '{' ... matching '}' span, ignoring braces
|
||||||
|
// inside strings.
|
||||||
|
let bytes = raw.as_bytes();
|
||||||
|
let start = raw.find('{')?;
|
||||||
|
let mut depth = 0i32;
|
||||||
|
let mut in_str = false;
|
||||||
|
let mut escaped = false;
|
||||||
|
for i in start..bytes.len() {
|
||||||
|
let c = bytes[i] as char;
|
||||||
|
if in_str {
|
||||||
|
if escaped {
|
||||||
|
escaped = false;
|
||||||
|
} else if c == '\\' {
|
||||||
|
escaped = true;
|
||||||
|
} else if c == '"' {
|
||||||
|
in_str = false;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
match c {
|
||||||
|
'"' => in_str = true,
|
||||||
|
'{' => depth += 1,
|
||||||
|
'}' => {
|
||||||
|
depth -= 1;
|
||||||
|
if depth == 0 {
|
||||||
|
return serde_json::from_str(&raw[start..=i]).ok();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Collapse whitespace and strip stray markdown/quote decorations a model
|
||||||
|
/// sometimes leaves around a line.
|
||||||
|
fn clean_text(s: &str) -> String {
|
||||||
|
let trimmed = s.trim().trim_matches('"').trim();
|
||||||
|
trimmed.split_whitespace().collect::<Vec<_>>().join(" ")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generate the reel script via the LLM. Text-only (no images) — the per-photo
|
||||||
|
/// context comes from cached insights. The call takes the GPU read lease
|
||||||
|
/// internally (see `LlamaCppClient::generate`).
|
||||||
|
pub async fn generate_script(
|
||||||
|
client: &Arc<LlamaCppClient>,
|
||||||
|
meta: &ReelMeta,
|
||||||
|
planned: &[PlannedSegment],
|
||||||
|
) -> Result<ReelScript> {
|
||||||
|
let (system, user) = build_script_messages(meta, planned);
|
||||||
|
let raw = client
|
||||||
|
.generate(&user, Some(&system), None)
|
||||||
|
.await
|
||||||
|
.context("LLM script generation failed")?;
|
||||||
|
Ok(parse_script_response(&raw, planned.len()))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::memories::MemoriesSpan;
|
||||||
|
|
||||||
|
fn meta() -> ReelMeta {
|
||||||
|
ReelMeta {
|
||||||
|
span: MemoriesSpan::Day,
|
||||||
|
years: vec![2019, 2021],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn planned(n: usize) -> Vec<PlannedSegment> {
|
||||||
|
(0..n)
|
||||||
|
.map(|i| PlannedSegment {
|
||||||
|
media: super::super::SegmentMedia::Photo {
|
||||||
|
rel_path: format!("p{i}.jpg"),
|
||||||
|
library_id: 1,
|
||||||
|
},
|
||||||
|
date: Some(1_560_000_000 + i as i64 * 86_400),
|
||||||
|
insight_title: None,
|
||||||
|
insight_summary: None,
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn prompt_states_exact_segment_count_and_span() {
|
||||||
|
let (sys, user) = build_script_messages(&meta(), &planned(3));
|
||||||
|
assert!(sys.contains("memory reel"));
|
||||||
|
assert!(user.contains("3 photos"));
|
||||||
|
assert!(user.contains("on this day"));
|
||||||
|
assert!(user.contains("exactly 3 items"));
|
||||||
|
// Each photo gets an indexed entry.
|
||||||
|
assert!(user.contains("[1]") && user.contains("[2]") && user.contains("[3]"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn prompt_includes_insight_context_when_present() {
|
||||||
|
let mut p = planned(1);
|
||||||
|
p[0].insight_title = Some("Lake house weekend".into());
|
||||||
|
p[0].insight_summary = Some("Swimming with the dogs.".into());
|
||||||
|
let (_sys, user) = build_script_messages(&meta(), &p);
|
||||||
|
assert!(user.contains("Lake house weekend — Swimming with the dogs."));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_plain_json_object() {
|
||||||
|
let raw = r#"{"title":"Summer Days","segments":["First line.","Second line."]}"#;
|
||||||
|
let script = parse_script_response(raw, 2);
|
||||||
|
assert_eq!(script.title, "Summer Days");
|
||||||
|
assert_eq!(script.lines, vec!["First line.", "Second line."]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_tolerates_code_fences_and_prose() {
|
||||||
|
let raw = "Sure! Here's your reel:\n```json\n{\"title\": \"Trip\", \"segments\": [\"A.\", \"B.\"]}\n```\nEnjoy!";
|
||||||
|
let script = parse_script_response(raw, 2);
|
||||||
|
assert_eq!(script.title, "Trip");
|
||||||
|
assert_eq!(script.lines, vec!["A.", "B."]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_accepts_object_segment_shape() {
|
||||||
|
let raw = r#"{"title":"T","segments":[{"narration":"One."},{"narration":"Two."}]}"#;
|
||||||
|
let script = parse_script_response(raw, 2);
|
||||||
|
assert_eq!(script.lines, vec!["One.", "Two."]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_pads_short_and_truncates_long_to_n() {
|
||||||
|
// Model returned 1 line but we have 3 segments → pad with neutral lines.
|
||||||
|
let short = parse_script_response(r#"{"title":"T","segments":["Only one."]}"#, 3);
|
||||||
|
assert_eq!(short.lines.len(), 3);
|
||||||
|
assert_eq!(short.lines[0], "Only one.");
|
||||||
|
assert!(!short.lines[1].is_empty());
|
||||||
|
|
||||||
|
// Model returned 3 but we have 2 → truncate.
|
||||||
|
let long = parse_script_response(r#"{"title":"T","segments":["a","b","c"]}"#, 2);
|
||||||
|
assert_eq!(long.lines, vec!["a", "b"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_falls_back_on_garbage() {
|
||||||
|
let script = parse_script_response("the model said no", 2);
|
||||||
|
assert_eq!(script.title, "Memories");
|
||||||
|
assert_eq!(script.lines.len(), 2);
|
||||||
|
assert!(script.lines.iter().all(|l| !l.is_empty()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_blank_line_replaced_with_fallback() {
|
||||||
|
let script = parse_script_response(r#"{"title":"T","segments":[" ","Real."]}"#, 2);
|
||||||
|
assert!(!script.lines[0].is_empty());
|
||||||
|
assert_eq!(script.lines[1], "Real.");
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,252 @@
|
|||||||
|
//! Reel selectors: resolve "what goes in the reel" into an ordered media set
|
||||||
|
//! plus the metadata the scripter needs. The renderer and scripter are
|
||||||
|
//! selector-agnostic, so adding tag- or date-range-based reels later means
|
||||||
|
//! adding a variant here, not touching the pipeline.
|
||||||
|
//!
|
||||||
|
//! Resolution is split in two so the handler can compute a cache key (and
|
||||||
|
//! short-circuit on a cache hit) without the per-photo insight lookups:
|
||||||
|
//! [`resolve`] is the cheap media-set pass; [`enrich`] adds cached insights and
|
||||||
|
//! runs in the background job.
|
||||||
|
|
||||||
|
use std::path::Path;
|
||||||
|
use std::sync::Mutex;
|
||||||
|
|
||||||
|
use chrono::{DateTime, Datelike, FixedOffset};
|
||||||
|
|
||||||
|
use super::{PlannedSegment, ReelMeta, SegmentMedia};
|
||||||
|
use crate::database::{ExifDao, InsightDao};
|
||||||
|
use crate::file_types::is_image_file;
|
||||||
|
use crate::memories::{self, MemoriesSpan};
|
||||||
|
use crate::state::AppState;
|
||||||
|
|
||||||
|
/// Default and hard caps on how many photos a reel covers. The cap bounds the
|
||||||
|
/// LLM/TTS/ffmpeg work per reel; when a span has more, [`sample_evenly`] keeps
|
||||||
|
/// a representative spread across the years rather than just the oldest.
|
||||||
|
pub const DEFAULT_MAX_SEGMENTS: usize = 24;
|
||||||
|
pub const HARD_MAX_SEGMENTS: usize = 40;
|
||||||
|
|
||||||
|
/// What a reel is built from. v1 ships the memories (on this day/week/month)
|
||||||
|
/// selector; tag and date-range variants slot in here later.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum ReelSelector {
|
||||||
|
Memories {
|
||||||
|
span: MemoriesSpan,
|
||||||
|
tz_offset_minutes: i32,
|
||||||
|
library: Option<String>,
|
||||||
|
max_segments: usize,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ReelSelector {
|
||||||
|
/// Stable string identity for the cache key. Captures everything that
|
||||||
|
/// changes *which* media is selected (but not the non-deterministic
|
||||||
|
/// narration, which can't be part of a pre-render key).
|
||||||
|
pub fn descriptor(&self) -> String {
|
||||||
|
match self {
|
||||||
|
ReelSelector::Memories {
|
||||||
|
span,
|
||||||
|
tz_offset_minutes,
|
||||||
|
library,
|
||||||
|
max_segments,
|
||||||
|
} => format!(
|
||||||
|
"memories:span={:?}:tz={}:lib={}:max={}",
|
||||||
|
span,
|
||||||
|
tz_offset_minutes,
|
||||||
|
library.as_deref().unwrap_or("all"),
|
||||||
|
max_segments
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pick at most `max` items spread evenly across the input, always keeping the
|
||||||
|
/// first and last. Returns the input unchanged when it already fits.
|
||||||
|
pub fn sample_evenly<T: Clone>(items: &[T], max: usize) -> Vec<T> {
|
||||||
|
if max == 0 {
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
if items.len() <= max {
|
||||||
|
return items.to_vec();
|
||||||
|
}
|
||||||
|
if max == 1 {
|
||||||
|
return vec![items[0].clone()];
|
||||||
|
}
|
||||||
|
let last = items.len() - 1;
|
||||||
|
(0..max)
|
||||||
|
.map(|i| {
|
||||||
|
// Spread indices 0..=last across max picks, endpoints included.
|
||||||
|
let idx = (i * last + (max - 1) / 2) / (max - 1);
|
||||||
|
items[idx.min(last)].clone()
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Cheap pass: resolve the selector into an ordered list of media (no insight
|
||||||
|
/// lookups yet) plus reel metadata. `Err` only on an invalid library param.
|
||||||
|
pub fn resolve(
|
||||||
|
app_state: &AppState,
|
||||||
|
exif_dao: &Mutex<Box<dyn ExifDao>>,
|
||||||
|
span_context: &opentelemetry::Context,
|
||||||
|
selector: &ReelSelector,
|
||||||
|
) -> Result<(Vec<PlannedSegment>, ReelMeta), String> {
|
||||||
|
match selector {
|
||||||
|
ReelSelector::Memories {
|
||||||
|
span,
|
||||||
|
tz_offset_minutes,
|
||||||
|
library,
|
||||||
|
max_segments,
|
||||||
|
} => {
|
||||||
|
let client_tz = FixedOffset::east_opt(tz_offset_minutes * 60);
|
||||||
|
let items = memories::gather_memory_items(
|
||||||
|
app_state,
|
||||||
|
exif_dao,
|
||||||
|
span_context,
|
||||||
|
*span,
|
||||||
|
*tz_offset_minutes,
|
||||||
|
client_tz,
|
||||||
|
library.as_deref(),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
// Phase 1 is photos-only: drop videos (a clip segment type lands
|
||||||
|
// in phase 2). Filter before sampling so the spread is over the
|
||||||
|
// photos that will actually appear.
|
||||||
|
let items: Vec<memories::MemoryItem> = items
|
||||||
|
.into_iter()
|
||||||
|
.filter(|it| is_image_file(Path::new(&it.path)))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let cap = (*max_segments).clamp(1, HARD_MAX_SEGMENTS);
|
||||||
|
let items = sample_evenly(&items, cap);
|
||||||
|
|
||||||
|
let years = distinct_years(&items, client_tz);
|
||||||
|
let meta = ReelMeta { span: *span, years };
|
||||||
|
|
||||||
|
let planned = items
|
||||||
|
.into_iter()
|
||||||
|
.map(|it| PlannedSegment {
|
||||||
|
media: SegmentMedia::Photo {
|
||||||
|
rel_path: it.path,
|
||||||
|
library_id: it.library_id,
|
||||||
|
},
|
||||||
|
date: it.created,
|
||||||
|
insight_title: None,
|
||||||
|
insight_summary: None,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
Ok((planned, meta))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Distinct calendar years represented by the selected media, in the client's
|
||||||
|
/// timezone, ascending. Used to tell the scripter how far back the reel reaches.
|
||||||
|
fn distinct_years(items: &[memories::MemoryItem], tz: Option<FixedOffset>) -> Vec<i32> {
|
||||||
|
let mut years: Vec<i32> = items
|
||||||
|
.iter()
|
||||||
|
.filter_map(|it| it.created)
|
||||||
|
.filter_map(|ts| DateTime::from_timestamp(ts, 0))
|
||||||
|
.map(|dt| match tz {
|
||||||
|
Some(off) => dt.with_timezone(&off).year(),
|
||||||
|
None => dt.year(),
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
years.sort_unstable();
|
||||||
|
years.dedup();
|
||||||
|
years
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Background pass: fill each segment's cached insight (title + summary) where
|
||||||
|
/// one exists. Best-effort — a missing or errored lookup leaves the fields
|
||||||
|
/// `None` and the scripter narrates from the date alone.
|
||||||
|
pub fn enrich(
|
||||||
|
insight_dao: &Mutex<Box<dyn InsightDao>>,
|
||||||
|
span_context: &opentelemetry::Context,
|
||||||
|
planned: &mut [PlannedSegment],
|
||||||
|
) {
|
||||||
|
let Ok(mut dao) = insight_dao.lock() else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
for seg in planned.iter_mut() {
|
||||||
|
let rel_path = match &seg.media {
|
||||||
|
SegmentMedia::Photo { rel_path, .. } => rel_path,
|
||||||
|
};
|
||||||
|
if let Ok(Some(insight)) = dao.get_insight(span_context, rel_path) {
|
||||||
|
seg.insight_title = Some(insight.title);
|
||||||
|
seg.insight_summary = Some(insight.summary);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sample_evenly_returns_all_when_under_cap() {
|
||||||
|
let v = vec![1, 2, 3];
|
||||||
|
assert_eq!(sample_evenly(&v, 5), vec![1, 2, 3]);
|
||||||
|
assert_eq!(sample_evenly(&v, 3), vec![1, 2, 3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sample_evenly_keeps_endpoints_and_spreads() {
|
||||||
|
let v: Vec<i32> = (0..100).collect();
|
||||||
|
let picked = sample_evenly(&v, 5);
|
||||||
|
assert_eq!(picked.len(), 5);
|
||||||
|
assert_eq!(picked[0], 0); // first kept
|
||||||
|
assert_eq!(*picked.last().unwrap(), 99); // last kept
|
||||||
|
// Strictly increasing, no dupes.
|
||||||
|
assert!(picked.windows(2).all(|w| w[0] < w[1]));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sample_evenly_handles_one_and_zero() {
|
||||||
|
let v: Vec<i32> = (0..10).collect();
|
||||||
|
assert_eq!(sample_evenly(&v, 1), vec![0]);
|
||||||
|
assert!(sample_evenly(&v, 0).is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn descriptor_is_stable_and_distinguishes_inputs() {
|
||||||
|
let a = ReelSelector::Memories {
|
||||||
|
span: MemoriesSpan::Day,
|
||||||
|
tz_offset_minutes: -480,
|
||||||
|
library: None,
|
||||||
|
max_segments: 24,
|
||||||
|
};
|
||||||
|
let b = ReelSelector::Memories {
|
||||||
|
span: MemoriesSpan::Week,
|
||||||
|
tz_offset_minutes: -480,
|
||||||
|
library: None,
|
||||||
|
max_segments: 24,
|
||||||
|
};
|
||||||
|
assert_eq!(a.descriptor(), a.clone().descriptor());
|
||||||
|
assert_ne!(a.descriptor(), b.descriptor());
|
||||||
|
assert!(a.descriptor().contains("lib=all"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn distinct_years_dedupes_and_sorts() {
|
||||||
|
let items = vec![
|
||||||
|
memories::MemoryItem {
|
||||||
|
path: "a".into(),
|
||||||
|
created: Some(1_560_000_000), // 2019
|
||||||
|
modified: None,
|
||||||
|
library_id: 1,
|
||||||
|
},
|
||||||
|
memories::MemoryItem {
|
||||||
|
path: "b".into(),
|
||||||
|
created: Some(1_560_086_400), // 2019
|
||||||
|
modified: None,
|
||||||
|
library_id: 1,
|
||||||
|
},
|
||||||
|
memories::MemoryItem {
|
||||||
|
path: "c".into(),
|
||||||
|
created: Some(1_623_000_000), // 2021
|
||||||
|
modified: None,
|
||||||
|
library_id: 1,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
assert_eq!(distinct_years(&items, None), vec![2019, 2021]);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -53,6 +53,10 @@ pub struct AppState {
|
|||||||
pub video_path: String,
|
pub video_path: String,
|
||||||
pub gif_path: String,
|
pub gif_path: String,
|
||||||
pub preview_clips_path: String,
|
pub preview_clips_path: String,
|
||||||
|
/// Directory for cached memory-reel MP4s (+ title sidecars). Derived from
|
||||||
|
/// `REELS_DIRECTORY`, defaulting to a `reels` dir beside the preview clips.
|
||||||
|
/// Created lazily by the reel pipeline on first render.
|
||||||
|
pub reels_path: String,
|
||||||
pub excluded_dirs: Vec<String>,
|
pub excluded_dirs: Vec<String>,
|
||||||
pub ollama: OllamaClient,
|
pub ollama: OllamaClient,
|
||||||
/// `None` when `OPENROUTER_API_KEY` is not configured. Consulted only
|
/// `None` when `OPENROUTER_API_KEY` is not configured. Consulted only
|
||||||
@@ -141,6 +145,19 @@ impl AppState {
|
|||||||
preview_dao,
|
preview_dao,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Reels cache dir: explicit env, else a `reels` sibling of the preview
|
||||||
|
// clips dir (a known-writable, test-safe location). Not created here —
|
||||||
|
// the reel pipeline does `create_dir_all` before its first write, so
|
||||||
|
// construction (incl. tests) never touches the filesystem.
|
||||||
|
let reels_path = std::env::var("REELS_DIRECTORY").unwrap_or_else(|_| {
|
||||||
|
std::path::Path::new(&preview_clips_path)
|
||||||
|
.parent()
|
||||||
|
.map(|p| p.join("reels"))
|
||||||
|
.unwrap_or_else(|| std::path::PathBuf::from("reels"))
|
||||||
|
.to_string_lossy()
|
||||||
|
.to_string()
|
||||||
|
});
|
||||||
|
|
||||||
let library_health = libraries::new_health_map(&libraries_vec);
|
let library_health = libraries::new_health_map(&libraries_vec);
|
||||||
let live_libraries = Arc::new(RwLock::new(libraries_vec.clone()));
|
let live_libraries = Arc::new(RwLock::new(libraries_vec.clone()));
|
||||||
Self {
|
Self {
|
||||||
@@ -155,6 +172,7 @@ impl AppState {
|
|||||||
video_path,
|
video_path,
|
||||||
gif_path,
|
gif_path,
|
||||||
preview_clips_path,
|
preview_clips_path,
|
||||||
|
reels_path,
|
||||||
excluded_dirs,
|
excluded_dirs,
|
||||||
ollama,
|
ollama,
|
||||||
openrouter,
|
openrouter,
|
||||||
|
|||||||
+1
-1
@@ -231,7 +231,7 @@ impl Ffmpeg {
|
|||||||
/// a hard failure — previously the `parse::<f64>` on empty stdout produced
|
/// a hard failure — previously the `parse::<f64>` on empty stdout produced
|
||||||
/// "cannot parse float from empty string" and poisoned the preview-clip row
|
/// "cannot parse float from empty string" and poisoned the preview-clip row
|
||||||
/// with status=failed, which the watcher would re-queue every full scan.
|
/// with status=failed, which the watcher would re-queue every full scan.
|
||||||
async fn get_duration_seconds(input_file: &str) -> Result<Option<f64>> {
|
pub async fn get_duration_seconds(input_file: &str) -> Result<Option<f64>> {
|
||||||
if let Some(d) = probe_duration(input_file, "format=duration").await? {
|
if let Some(d) = probe_duration(input_file, "format=duration").await? {
|
||||||
return Ok(Some(d));
|
return Ok(Some(d));
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user