Reels: burst beats + duration budget for week/month, plus step logging
Restructures a reel around beats — one narration line over one or more photos — instead of one line per photo. A single-photo beat is a held shot; a multi-photo beat is a quick burst that flashes through several moments of an event while the line is read. So a week/month reel can show everything it spans without a narrated (and timed) segment per photo. Selection (selector.rs): - Duration budget: cap the number of narrated beats to ~REEL_TARGET_SECONDS (default 90, env-tunable) so week/month reels don't run minutes long. - Event clustering by time gap; when there are more events than the beat budget, adjacent events merge so the whole span stays covered. Each beat bursts up to MAX_BURST_PHOTOS (an even spread), so a 40-shot dinner contributes a handful of quick frames, not forty narrated seconds. Render (render.rs): a beat renders its photos as a concat of per-photo fills (blurred-bg portrait, fps-before-fade) under one muxed narration; burst photos get a snappier fade. beat_durations splits the narration across the photos, stretching only if a long burst would flash too fast. Adds high-level info logs across the steps (request → script → per-beat narrate/render → join → done with elapsed) for visibility. Bumps RENDER_VERSION to re-render cached reels. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
+95
-36
@@ -36,24 +36,27 @@ use crate::otel::extract_context_from_request;
|
||||
use crate::state::AppState;
|
||||
use selector::ReelSelector;
|
||||
|
||||
/// The media behind one reel segment. Photos-only for now; a `Clip` variant
|
||||
/// (a section of a source video) is the phase-2 extension point.
|
||||
/// The media behind one shot. Photos-only for now; a `Clip` variant (a section
|
||||
/// of a source video) is the phase-2 extension point.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum SegmentMedia {
|
||||
Photo { rel_path: String, library_id: i32 },
|
||||
}
|
||||
|
||||
/// A segment before narration: which photo, when it was taken, and any cached
|
||||
/// insight to feed the scripter.
|
||||
/// A beat: one narration line over one or more photos. A single-photo beat is a
|
||||
/// held shot; a multi-photo beat is a quick burst that flashes through several
|
||||
/// moments of the same event while the line is read — so a week/month reel can
|
||||
/// *show* everything it spans without a narration line (and the seconds that
|
||||
/// come with it) per photo.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PlannedSegment {
|
||||
pub media: SegmentMedia,
|
||||
pub struct PlannedBeat {
|
||||
pub photos: Vec<SegmentMedia>,
|
||||
pub date: Option<i64>,
|
||||
pub insight_title: Option<String>,
|
||||
pub insight_summary: Option<String>,
|
||||
}
|
||||
|
||||
impl PlannedSegment {
|
||||
impl PlannedBeat {
|
||||
/// Human date for the prompt, e.g. "June 12, 2019". `None` when undated.
|
||||
pub fn date_label(&self) -> Option<String> {
|
||||
let ts = self.date?;
|
||||
@@ -180,7 +183,7 @@ fn finish_job(
|
||||
|
||||
/// Render version: bump to invalidate every cached reel after a rendering /
|
||||
/// scripting change that should produce a fresh result.
|
||||
const RENDER_VERSION: u32 = 3;
|
||||
const RENDER_VERSION: u32 = 4;
|
||||
|
||||
/// Narration expressiveness — Chatterbox's `exaggeration` knob. A slight bump
|
||||
/// over the ~0.5 default warms up otherwise-flat narration without over-acting;
|
||||
@@ -306,16 +309,25 @@ pub async fn create_reel_handler(
|
||||
}));
|
||||
}
|
||||
|
||||
let media: Vec<SegmentMedia> = planned.iter().map(|p| p.media.clone()).collect();
|
||||
// Flatten every photo across beats (in order) into the cache key — the key
|
||||
// tracks exactly which photos appear and in what sequence.
|
||||
let media: Vec<SegmentMedia> = planned.iter().flat_map(|b| b.photos.clone()).collect();
|
||||
let voice = req.voice.clone().filter(|s| !s.is_empty());
|
||||
let key = cache_key(&selector, &media, voice.as_deref());
|
||||
|
||||
let job_id = Uuid::new_v4();
|
||||
log::info!(
|
||||
"reel {job_id}: request span={:?} → {} beats, {} photos",
|
||||
span,
|
||||
planned.len(),
|
||||
media.len()
|
||||
);
|
||||
|
||||
// Cache hit: register an already-Done job pointing at the existing MP4 so
|
||||
// the client's first poll returns the video URL immediately.
|
||||
let mp4 = reel_mp4_path(&app_state, &key);
|
||||
if mp4.exists() {
|
||||
log::info!("reel {job_id}: cache hit, serving existing reel");
|
||||
let title = std::fs::read(reel_sidecar_path(&app_state, &key))
|
||||
.ok()
|
||||
.and_then(|b| serde_json::from_slice::<ReelSidecar>(&b).ok())
|
||||
@@ -358,6 +370,7 @@ pub async fn create_reel_handler(
|
||||
},
|
||||
);
|
||||
}
|
||||
log::info!("reel {job_id}: queued for generation");
|
||||
|
||||
let state = app_state.clone();
|
||||
let insight_dao = insight_dao.clone();
|
||||
@@ -441,45 +454,73 @@ async fn run_reel_job(
|
||||
app_state: &AppState,
|
||||
insight_dao: &Mutex<Box<dyn InsightDao>>,
|
||||
job_id: Uuid,
|
||||
mut planned: Vec<PlannedSegment>,
|
||||
mut planned: Vec<PlannedBeat>,
|
||||
meta: ReelMeta,
|
||||
voice: Option<String>,
|
||||
key: &str,
|
||||
) -> anyhow::Result<(String, PathBuf)> {
|
||||
use anyhow::{Context, anyhow};
|
||||
|
||||
let started = Instant::now();
|
||||
let total_photos: usize = planned.iter().map(|b| b.photos.len()).sum();
|
||||
log::info!(
|
||||
"reel {job_id}: starting — span {:?}, {} beats, {} photos, voice={}",
|
||||
meta.span,
|
||||
planned.len(),
|
||||
total_photos,
|
||||
voice.as_deref().unwrap_or("default")
|
||||
);
|
||||
|
||||
let client = app_state
|
||||
.llamacpp
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("TTS/LLM backend not configured"))?
|
||||
.clone();
|
||||
|
||||
// 1. Enrich with cached insights, then script (one LLM call).
|
||||
// 1. Enrich each beat with its lead photo's cached insight, then script
|
||||
// (one LLM call → one narration line per beat).
|
||||
set_stage(job_id, "scripting");
|
||||
log::info!("reel {job_id}: scripting narration via LLM…");
|
||||
let span_context = opentelemetry::Context::new();
|
||||
selector::enrich(insight_dao, &span_context, &mut planned);
|
||||
let script = script::generate_script(&client, &meta, &planned).await?;
|
||||
log::info!(
|
||||
"reel {job_id}: scripted \"{}\" ({} lines)",
|
||||
script.title,
|
||||
script.lines.len()
|
||||
);
|
||||
|
||||
// 2. Narrate each line to speech and 3. render each photo segment. A
|
||||
// segment whose audio or render fails is skipped (logged) rather than
|
||||
// sinking the whole reel — handles an odd HEIC/corrupt file gracefully.
|
||||
// 2. Narrate each beat's line and 3. render the beat (its photos shown in
|
||||
// sequence under that one narration). A beat whose audio or render fails
|
||||
// is skipped (logged) rather than sinking the whole reel — handles an
|
||||
// odd HEIC/corrupt file gracefully.
|
||||
set_stage(job_id, "narrating");
|
||||
let work = tempfile::tempdir().context("creating reel work dir")?;
|
||||
let nvenc = render::is_nvenc_available().await;
|
||||
log::info!(
|
||||
"reel {job_id}: narrating + rendering {} beats (encoder: {})",
|
||||
planned.len(),
|
||||
if nvenc { "nvenc" } else { "cpu" }
|
||||
);
|
||||
let opts = render::SegmentOpts {
|
||||
nvenc,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut segment_files: Vec<String> = Vec::new();
|
||||
for (i, (seg, line)) in planned.iter().zip(script.lines.iter()).enumerate() {
|
||||
let image_path = match resolve_image_path(app_state, &seg.media) {
|
||||
Some(p) => p,
|
||||
None => {
|
||||
log::warn!("reel {job_id}: skipping segment {i}, image path unresolved");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let beat_total = planned.len();
|
||||
let mut beat_files: Vec<String> = Vec::new();
|
||||
for (i, (beat, line)) in planned.iter().zip(script.lines.iter()).enumerate() {
|
||||
// Resolve all of the beat's photos to absolute paths; drop any that
|
||||
// don't resolve. An empty beat is skipped.
|
||||
let image_paths: Vec<PathBuf> = beat
|
||||
.photos
|
||||
.iter()
|
||||
.filter_map(|m| resolve_image_path(app_state, m))
|
||||
.collect();
|
||||
if image_paths.is_empty() {
|
||||
log::warn!("reel {job_id}: skipping beat {i}, no image paths resolved");
|
||||
continue;
|
||||
}
|
||||
|
||||
let audio_bytes = match crate::ai::tts::synthesize_serialized(
|
||||
&client,
|
||||
@@ -492,13 +533,13 @@ async fn run_reel_job(
|
||||
{
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
log::warn!("reel {job_id}: skipping segment {i}, TTS failed: {e}");
|
||||
log::warn!("reel {job_id}: skipping beat {i}, TTS failed: {e}");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let audio_path = work.path().join(format!("narration_{i:03}.wav"));
|
||||
if let Err(e) = tokio::fs::write(&audio_path, &audio_bytes).await {
|
||||
log::warn!("reel {job_id}: skipping segment {i}, writing audio failed: {e}");
|
||||
log::warn!("reel {job_id}: skipping beat {i}, writing audio failed: {e}");
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -508,25 +549,37 @@ async fn run_reel_job(
|
||||
.ok()
|
||||
.flatten()
|
||||
.unwrap_or(render::MIN_SEGMENT_SECONDS);
|
||||
let duration = render::segment_duration(narration_secs);
|
||||
|
||||
set_stage(job_id, "rendering");
|
||||
let seg_out = work.path().join(format!("seg_{i:03}.mp4"));
|
||||
log::info!(
|
||||
"reel {job_id}: beat {}/{} — {} photo(s), narration {:.1}s",
|
||||
i + 1,
|
||||
beat_total,
|
||||
image_paths.len(),
|
||||
narration_secs
|
||||
);
|
||||
let beat_out = work.path().join(format!("beat_{i:03}.mp4"));
|
||||
if let Err(e) =
|
||||
render::render_segment(&image_path, &audio_path, &seg_out, duration, &opts).await
|
||||
render::render_beat(&image_paths, &audio_path, &beat_out, narration_secs, &opts).await
|
||||
{
|
||||
log::warn!("reel {job_id}: skipping segment {i}, render failed: {e}");
|
||||
log::warn!("reel {job_id}: skipping beat {i}, render failed: {e}");
|
||||
continue;
|
||||
}
|
||||
segment_files.push(seg_out.to_string_lossy().to_string());
|
||||
beat_files.push(beat_out.to_string_lossy().to_string());
|
||||
}
|
||||
|
||||
let segment_files = beat_files;
|
||||
if segment_files.is_empty() {
|
||||
return Err(anyhow!("no segments rendered successfully"));
|
||||
return Err(anyhow!("no beats rendered successfully"));
|
||||
}
|
||||
|
||||
// 4. Concat into the cache. Write to a temp name in the reels dir, then
|
||||
// rename atomically (same filesystem) so a reader never sees a partial.
|
||||
set_stage(job_id, "rendering");
|
||||
log::info!(
|
||||
"reel {job_id}: joining {} rendered beats into the final reel",
|
||||
segment_files.len()
|
||||
);
|
||||
std::fs::create_dir_all(&app_state.reels_path).context("creating reels dir")?;
|
||||
let final_path = reel_mp4_path(app_state, key);
|
||||
let tmp_path = final_path.with_extension("mp4.tmp");
|
||||
@@ -541,6 +594,12 @@ async fn run_reel_job(
|
||||
.context("serializing reel sidecar")?;
|
||||
let _ = std::fs::write(reel_sidecar_path(app_state, key), sidecar);
|
||||
|
||||
log::info!(
|
||||
"reel {job_id}: done in {:.1}s — {} beats → {}",
|
||||
started.elapsed().as_secs_f64(),
|
||||
segment_files.len(),
|
||||
final_path.display()
|
||||
);
|
||||
Ok((script.title, final_path))
|
||||
}
|
||||
|
||||
@@ -622,16 +681,16 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn date_label_formats_or_none() {
|
||||
let seg = PlannedSegment {
|
||||
media: photo("a.jpg", 1),
|
||||
let beat = PlannedBeat {
|
||||
photos: vec![photo("a.jpg", 1)],
|
||||
date: Some(1_560_384_000), // 2019-06-13 UTC
|
||||
insight_title: None,
|
||||
insight_summary: None,
|
||||
};
|
||||
assert!(seg.date_label().unwrap().contains("2019"));
|
||||
assert!(beat.date_label().unwrap().contains("2019"));
|
||||
|
||||
let undated = PlannedSegment {
|
||||
media: photo("a.jpg", 1),
|
||||
let undated = PlannedBeat {
|
||||
photos: vec![photo("a.jpg", 1)],
|
||||
date: None,
|
||||
insight_title: None,
|
||||
insight_summary: None,
|
||||
|
||||
+195
-101
@@ -22,25 +22,31 @@ pub use crate::video::ffmpeg::is_nvenc_available;
|
||||
/// Reel canvas. Portrait, because reels are watched on a phone held upright —
|
||||
/// a landscape canvas letterboxes to a thin ~25%-height band there. Each photo
|
||||
/// is fitted sharp and centered over a blurred, zoomed copy of itself (see
|
||||
/// [`segment_filtergraph`]) so the frame is always filled regardless of the
|
||||
/// [`photo_filter_chain`]) so the frame is always filled regardless of the
|
||||
/// photo's orientation, without cropping the subject.
|
||||
pub const REEL_WIDTH: u32 = 1080;
|
||||
pub const REEL_HEIGHT: u32 = 1920;
|
||||
pub const REEL_FPS: u32 = 30;
|
||||
|
||||
/// A still's screen time is its narration length plus a short breath, with a
|
||||
/// floor so a terse line still lingers. No ceiling: the segment always covers
|
||||
/// the full narration so speech is never truncated — the scripter is asked to
|
||||
/// keep lines short instead.
|
||||
/// A beat's screen time is its narration length plus a short breath, with a
|
||||
/// floor so a terse line still lingers. No ceiling: the beat always covers the
|
||||
/// full narration so speech is never truncated — the scripter is asked to keep
|
||||
/// lines short instead.
|
||||
pub const MIN_SEGMENT_SECONDS: f64 = 2.5;
|
||||
const NARRATION_TAIL_SECONDS: f64 = 0.6;
|
||||
|
||||
/// Quick fade in/out baked into each segment so concatenated photos dip
|
||||
/// smoothly instead of hard-cutting. The fade-out lands inside the narration's
|
||||
/// silent tail, so speech is never clipped.
|
||||
const FADE_SECONDS: f64 = 0.35;
|
||||
/// Fade durations baked into each photo. A held (single-photo) beat gets a
|
||||
/// gentle dip; burst photos get a snappier fade so the montage feels quick.
|
||||
const SINGLE_FADE_SECONDS: f64 = 0.35;
|
||||
const BURST_FADE_SECONDS: f64 = 0.15;
|
||||
|
||||
/// Screen time for a photo segment given its narration audio length.
|
||||
/// Floor on how long each burst photo stays up, so a long line over many photos
|
||||
/// doesn't flash them subliminally. If the narration is too short to give every
|
||||
/// photo this much, the beat is stretched to fit.
|
||||
const MIN_BURST_PHOTO_SECONDS: f64 = 0.6;
|
||||
|
||||
/// Base screen time for a beat given its narration length: narration + breath,
|
||||
/// floored. Used as the lower bound on a beat's total duration.
|
||||
pub fn segment_duration(narration_secs: f64) -> f64 {
|
||||
let d = narration_secs + NARRATION_TAIL_SECONDS;
|
||||
if d.is_finite() && d > MIN_SEGMENT_SECONDS {
|
||||
@@ -50,6 +56,29 @@ pub fn segment_duration(narration_secs: f64) -> f64 {
|
||||
}
|
||||
}
|
||||
|
||||
/// Split a beat into per-photo durations. The beat lasts at least its narration
|
||||
/// (so speech isn't cut) and at least `n × MIN_BURST_PHOTO_SECONDS` (so a fast
|
||||
/// burst stays legible); the photos share that total evenly. Returns
|
||||
/// `(total_seconds, per_photo_seconds)`.
|
||||
pub fn beat_durations(narration_secs: f64, n_photos: usize) -> (f64, Vec<f64>) {
|
||||
let n = n_photos.max(1);
|
||||
let base = segment_duration(narration_secs);
|
||||
let min_total = n as f64 * MIN_BURST_PHOTO_SECONDS;
|
||||
let total = if base > min_total { base } else { min_total };
|
||||
let each = total / n as f64;
|
||||
(total, vec![each; n])
|
||||
}
|
||||
|
||||
/// Fade length to use for a beat of `n_photos` (gentle when held, snappy in a
|
||||
/// burst).
|
||||
fn fade_for(n_photos: usize) -> f64 {
|
||||
if n_photos > 1 {
|
||||
BURST_FADE_SECONDS
|
||||
} else {
|
||||
SINGLE_FADE_SECONDS
|
||||
}
|
||||
}
|
||||
|
||||
/// Options controlling per-segment rendering.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct SegmentOpts {
|
||||
@@ -70,38 +99,49 @@ impl Default for SegmentOpts {
|
||||
}
|
||||
}
|
||||
|
||||
/// Full `filter_complex` for one photo segment, producing labelled `[v]` (video)
|
||||
/// and `[a]` (audio) outputs. Input 0 is the looped still, input 1 the
|
||||
/// narration.
|
||||
/// Filter chain for one photo (input `idx`) producing the labelled output
|
||||
/// `[v{idx}]`. Splits the still into a background and foreground: the background
|
||||
/// is scaled to *cover* the canvas and heavily blurred; the foreground is
|
||||
/// scaled to *fit* and overlaid centered. This fills the portrait frame for any
|
||||
/// photo orientation — no black bars, no cropping of the subject — then a fade
|
||||
/// in/out softens the cut. Intermediate labels are suffixed with `idx` so
|
||||
/// several chains coexist in one `filter_complex`.
|
||||
///
|
||||
/// Video: split the still into a background and foreground. The background is
|
||||
/// scaled to *cover* the canvas and heavily blurred; the foreground is scaled to
|
||||
/// *fit* inside it and overlaid centered. This fills the portrait frame for any
|
||||
/// photo orientation — no black bars, no cropping of the subject — then a quick
|
||||
/// fade in/out softens the cut to the next segment.
|
||||
///
|
||||
/// Audio: pad the narration with trailing silence so a short line doesn't end
|
||||
/// the segment early; `-t` bounds it to the segment duration.
|
||||
pub fn segment_filtergraph(opts: &SegmentOpts, duration: f64) -> String {
|
||||
/// `fps` is normalized BEFORE the fades so the brightness ramp is computed on a
|
||||
/// true {fps}-frame timeline; otherwise the fade is sampled at the looped
|
||||
/// still's coarse cadence and duplicated up, which reads as a steppy dip.
|
||||
fn photo_filter_chain(idx: usize, opts: &SegmentOpts, duration: f64, fade: f64) -> String {
|
||||
let (w, h, fps) = (opts.width, opts.height, opts.fps);
|
||||
// Fade-out begins one fade-length before the end; clamp so a floor-length
|
||||
// segment still gets a valid (non-negative) start time.
|
||||
let fade_out_start = (duration - FADE_SECONDS).max(0.0);
|
||||
// `fps` is normalized BEFORE the fades so the brightness ramp is computed
|
||||
// on a true {fps}-frame timeline. If fps came after, the fade would be
|
||||
// sampled at the looped still's coarse input cadence and then duplicated up
|
||||
// to {fps}, which reads as a steppy / low-frame-rate dip.
|
||||
let fade_out_start = (duration - fade).max(0.0);
|
||||
format!(
|
||||
"[0:v]split=2[bg][fg];\
|
||||
[bg]scale={w}:{h}:force_original_aspect_ratio=increase,\
|
||||
crop={w}:{h},boxblur=20:2[bgb];\
|
||||
[fg]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs];\
|
||||
[bgb][fgs]overlay=(W-w)/2:(H-h)/2,\
|
||||
"[{idx}:v]split=2[bg{idx}][fg{idx}];\
|
||||
[bg{idx}]scale={w}:{h}:force_original_aspect_ratio=increase,\
|
||||
crop={w}:{h},boxblur=20:2[bgb{idx}];\
|
||||
[fg{idx}]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs{idx}];\
|
||||
[bgb{idx}][fgs{idx}]overlay=(W-w)/2:(H-h)/2,\
|
||||
fps={fps},\
|
||||
fade=t=in:st=0:d={FADE_SECONDS},\
|
||||
fade=t=out:st={fade_out_start:.3}:d={FADE_SECONDS},\
|
||||
setsar=1,format=yuv420p[v];\
|
||||
[1:a]apad[a]"
|
||||
fade=t=in:st=0:d={fade},\
|
||||
fade=t=out:st={fade_out_start:.3}:d={fade},\
|
||||
setsar=1,format=yuv420p[v{idx}]"
|
||||
)
|
||||
}
|
||||
|
||||
/// Full `filter_complex` for a beat of `per_photo` durations: one chain per
|
||||
/// photo, concatenated into `[v]`, with the narration (the last input, index
|
||||
/// `per_photo.len()`) padded with trailing silence into `[a]`. A single-photo
|
||||
/// beat degenerates to one chain + `concat=n=1` (a passthrough).
|
||||
pub fn beat_filtergraph(opts: &SegmentOpts, per_photo: &[f64]) -> String {
|
||||
let n = per_photo.len().max(1);
|
||||
let fade = fade_for(n);
|
||||
let chains: Vec<String> = per_photo
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, &d)| photo_filter_chain(i, opts, d, fade))
|
||||
.collect();
|
||||
let concat_inputs: String = (0..n).map(|i| format!("[v{i}]")).collect();
|
||||
format!(
|
||||
"{chains};{concat_inputs}concat=n={n}:v=1:a=0[v];[{n}:a]apad[a]",
|
||||
chains = chains.join(";")
|
||||
)
|
||||
}
|
||||
|
||||
@@ -128,15 +168,16 @@ fn video_encoder_args(nvenc: bool) -> Vec<String> {
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Build the ffmpeg args that render one photo segment: a still looped for
|
||||
/// `duration` seconds, filled to the portrait canvas with a blurred backdrop
|
||||
/// (see [`segment_filtergraph`]) and the narration muxed in. `-t` bounds both
|
||||
/// streams to the segment length.
|
||||
pub fn build_segment_args(
|
||||
image_path: &str,
|
||||
/// Build the ffmpeg args that render one beat: each photo looped for its slice
|
||||
/// of the beat (filled to the portrait canvas with a blurred backdrop), the
|
||||
/// slices concatenated, and the single narration muxed over the whole thing.
|
||||
/// `total` bounds the output (and the apad'd audio) to the beat length.
|
||||
pub fn build_beat_args(
|
||||
image_paths: &[String],
|
||||
audio_path: &str,
|
||||
out_path: &str,
|
||||
duration: f64,
|
||||
per_photo: &[f64],
|
||||
total: f64,
|
||||
opts: &SegmentOpts,
|
||||
) -> Vec<String> {
|
||||
let fps = opts.fps.to_string();
|
||||
@@ -144,26 +185,33 @@ pub fn build_segment_args(
|
||||
if opts.nvenc {
|
||||
args.extend(["-hwaccel".into(), "cuda".into()]);
|
||||
}
|
||||
// One looped-still input per photo, each bounded to its slice by an input
|
||||
// `-t`; reading at the target `-framerate` gives the fades real frames to
|
||||
// ramp across.
|
||||
for (path, &dur) in image_paths.iter().zip(per_photo.iter()) {
|
||||
args.extend([
|
||||
"-framerate".into(),
|
||||
fps.clone(),
|
||||
"-loop".into(),
|
||||
"1".into(),
|
||||
"-t".into(),
|
||||
format!("{dur:.3}"),
|
||||
"-i".into(),
|
||||
path.clone(),
|
||||
]);
|
||||
}
|
||||
args.extend([
|
||||
// Read the looped still at the target rate so frames exist for the
|
||||
// fade to ramp across (paired with the in-graph `fps` and CFR output).
|
||||
"-framerate".into(),
|
||||
fps.clone(),
|
||||
"-loop".into(),
|
||||
"1".into(),
|
||||
"-i".into(),
|
||||
image_path.into(),
|
||||
"-i".into(),
|
||||
audio_path.into(),
|
||||
"-filter_complex".into(),
|
||||
segment_filtergraph(opts, duration),
|
||||
beat_filtergraph(opts, per_photo),
|
||||
"-map".into(),
|
||||
"[v]".into(),
|
||||
"-map".into(),
|
||||
"[a]".into(),
|
||||
"-t".into(),
|
||||
format!("{duration:.3}"),
|
||||
// Force constant frame rate so the segment (and the concatenated reel)
|
||||
format!("{total:.3}"),
|
||||
// Force constant frame rate so the beat (and the concatenated reel)
|
||||
// plays at a steady {fps} rather than a variable cadence.
|
||||
"-r".into(),
|
||||
fps,
|
||||
@@ -231,22 +279,33 @@ async fn run_ffmpeg(args: &[String], what: &str) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Render one photo segment to `out_path`.
|
||||
pub async fn render_segment(
|
||||
image_path: &Path,
|
||||
/// Render one beat to `out_path`: its photos shown in sequence (a held shot for
|
||||
/// one photo, a quick burst for several) under the single narration in
|
||||
/// `audio_path`, whose measured length sets the beat's pacing.
|
||||
pub async fn render_beat(
|
||||
image_paths: &[std::path::PathBuf],
|
||||
audio_path: &Path,
|
||||
out_path: &Path,
|
||||
duration: f64,
|
||||
narration_secs: f64,
|
||||
opts: &SegmentOpts,
|
||||
) -> Result<()> {
|
||||
let args = build_segment_args(
|
||||
&image_path.to_string_lossy(),
|
||||
if image_paths.is_empty() {
|
||||
bail!("render_beat called with no images");
|
||||
}
|
||||
let (total, per_photo) = beat_durations(narration_secs, image_paths.len());
|
||||
let paths: Vec<String> = image_paths
|
||||
.iter()
|
||||
.map(|p| p.to_string_lossy().to_string())
|
||||
.collect();
|
||||
let args = build_beat_args(
|
||||
&paths,
|
||||
&audio_path.to_string_lossy(),
|
||||
&out_path.to_string_lossy(),
|
||||
duration,
|
||||
&per_photo,
|
||||
total,
|
||||
opts,
|
||||
);
|
||||
run_ffmpeg(&args, "segment render").await
|
||||
run_ffmpeg(&args, "beat render").await
|
||||
}
|
||||
|
||||
/// Join rendered segments into the final reel. Writes the concat list into the
|
||||
@@ -288,73 +347,108 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filtergraph_fills_portrait_with_blurred_bg_and_fitted_fg() {
|
||||
let g = segment_filtergraph(&SegmentOpts::default(), 4.0);
|
||||
// Background covers + blurs; foreground fits and is centered over it.
|
||||
assert!(g.contains("split=2[bg][fg]"));
|
||||
fn beat_durations_single_photo_matches_base() {
|
||||
let (total, per) = beat_durations(4.0, 1);
|
||||
assert!((total - 4.6).abs() < 1e-9); // narration + tail
|
||||
assert_eq!(per.len(), 1);
|
||||
assert!((per[0] - 4.6).abs() < 1e-9);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn beat_durations_burst_splits_evenly() {
|
||||
// 5 photos, narration 4.6s base → ~0.92s each (above the 0.6 floor).
|
||||
let (total, per) = beat_durations(4.0, 5);
|
||||
assert!((total - 4.6).abs() < 1e-9);
|
||||
assert_eq!(per.len(), 5);
|
||||
assert!((per.iter().sum::<f64>() - total).abs() < 1e-9);
|
||||
assert!(per.iter().all(|&d| d >= MIN_BURST_PHOTO_SECONDS));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn beat_durations_stretches_when_narration_too_short_for_burst() {
|
||||
// Floor narration (2.5s) over 10 photos would be 0.25s each — below the
|
||||
// legibility floor, so the beat stretches to 10 × 0.6 = 6s.
|
||||
let (total, per) = beat_durations(0.0, 10);
|
||||
assert!((total - 6.0).abs() < 1e-9);
|
||||
assert!(per.iter().all(|&d| (d - 0.6).abs() < 1e-9));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn beat_filtergraph_single_photo_fills_portrait_and_holds() {
|
||||
let (_t, per) = beat_durations(4.0, 1);
|
||||
let g = beat_filtergraph(&SegmentOpts::default(), &per);
|
||||
assert!(g.contains("[0:v]split=2[bg0][fg0]"));
|
||||
assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=increase"));
|
||||
assert!(g.contains("crop=1080:1920"));
|
||||
assert!(g.contains("boxblur"));
|
||||
assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=decrease"));
|
||||
assert!(g.contains("overlay=(W-w)/2:(H-h)/2"));
|
||||
// Produces the labelled outputs build_segment_args maps.
|
||||
assert!(g.contains("[v]"));
|
||||
// Single photo → concat of one, gentle fade, audio is input 1.
|
||||
assert!(g.contains("concat=n=1:v=1:a=0[v]"));
|
||||
assert!(g.contains("d=0.35")); // SINGLE_FADE
|
||||
assert!(g.contains("[1:a]apad[a]"));
|
||||
assert!(g.contains("format=yuv420p"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filtergraph_fades_in_and_out_within_duration() {
|
||||
// 4s segment, 0.35s fade → fade-out starts at 3.65s.
|
||||
let g = segment_filtergraph(&SegmentOpts::default(), 4.0);
|
||||
assert!(g.contains("fade=t=in:st=0:d=0.35"));
|
||||
assert!(g.contains("fade=t=out:st=3.650:d=0.35"));
|
||||
fn beat_filtergraph_burst_chains_concats_and_snappy_fade() {
|
||||
let (_t, per) = beat_durations(4.0, 3);
|
||||
let g = beat_filtergraph(&SegmentOpts::default(), &per);
|
||||
// One chain per photo with index-suffixed labels.
|
||||
assert!(g.contains("[0:v]split") && g.contains("[1:v]split") && g.contains("[2:v]split"));
|
||||
// Concatenated in order, audio is the 4th input (index 3).
|
||||
assert!(g.contains("[v0][v1][v2]concat=n=3:v=1:a=0[v]"));
|
||||
assert!(g.contains("[3:a]apad[a]"));
|
||||
// Burst uses the snappier fade.
|
||||
assert!(g.contains("d=0.15"));
|
||||
assert!(!g.contains("d=0.35"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filtergraph_normalizes_fps_before_fading() {
|
||||
// The fps filter must precede the fades, else the brightness ramp is
|
||||
// sampled at the still's coarse cadence and looks steppy.
|
||||
let g = segment_filtergraph(&SegmentOpts::default(), 4.0);
|
||||
fn beat_filtergraph_normalizes_fps_before_fading() {
|
||||
// fps must precede the fades on every chain (else the dip looks steppy).
|
||||
let (_t, per) = beat_durations(4.0, 1);
|
||||
let g = beat_filtergraph(&SegmentOpts::default(), &per);
|
||||
let fps_at = g.find("fps=30").expect("fps in graph");
|
||||
let fade_at = g.find("fade=t=in").expect("fade in graph");
|
||||
assert!(fps_at < fade_at);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filtergraph_fade_out_start_never_negative_at_floor() {
|
||||
// A floor-length segment shorter than a fade still yields st >= 0.
|
||||
let g = segment_filtergraph(&SegmentOpts::default(), 0.2);
|
||||
assert!(g.contains("fade=t=out:st=0.000:d=0.35"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn segment_args_loop_still_and_bound_with_t() {
|
||||
let args = build_segment_args(
|
||||
"/img.jpg",
|
||||
"/a.wav",
|
||||
fn beat_args_one_input_per_photo_plus_audio_bound_by_total() {
|
||||
let (total, per) = beat_durations(4.0, 2);
|
||||
let args = build_beat_args(
|
||||
&["/a.jpg".into(), "/b.jpg".into()],
|
||||
"/n.wav",
|
||||
"/out.mp4",
|
||||
4.0,
|
||||
&per,
|
||||
total,
|
||||
&SegmentOpts::default(),
|
||||
);
|
||||
let joined = args.join(" ");
|
||||
assert!(joined.contains("-framerate 30 -loop 1 -i /img.jpg"));
|
||||
assert!(joined.contains("-i /a.wav"));
|
||||
assert!(joined.contains("apad"));
|
||||
assert!(joined.contains("-t 4.000"));
|
||||
// Constant frame rate forced on the output.
|
||||
// A looped-still input per photo, each with its slice -t, then the audio.
|
||||
assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /a.jpg"));
|
||||
assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /b.jpg"));
|
||||
assert!(joined.contains("-i /n.wav"));
|
||||
// Output bounded to the beat total and forced CFR.
|
||||
assert!(joined.contains("-t 4.600"));
|
||||
assert!(joined.contains("-r 30"));
|
||||
assert!(joined.contains("libx264"));
|
||||
assert!(joined.ends_with("/out.mp4"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn segment_args_use_nvenc_and_cuda_when_enabled() {
|
||||
fn beat_args_use_nvenc_and_cuda_when_enabled() {
|
||||
let opts = SegmentOpts {
|
||||
nvenc: true,
|
||||
..SegmentOpts::default()
|
||||
};
|
||||
let args = build_segment_args("/img.jpg", "/a.wav", "/out.mp4", 3.0, &opts);
|
||||
let (total, per) = beat_durations(3.0, 1);
|
||||
let args = build_beat_args(
|
||||
&["/img.jpg".into()],
|
||||
"/a.wav",
|
||||
"/out.mp4",
|
||||
&per,
|
||||
total,
|
||||
&opts,
|
||||
);
|
||||
let joined = args.join(" ");
|
||||
assert!(joined.contains("-hwaccel cuda"));
|
||||
assert!(joined.contains("h264_nvenc"));
|
||||
|
||||
+61
-34
@@ -1,10 +1,11 @@
|
||||
//! Narration scripting for memory reels.
|
||||
//!
|
||||
//! One LLM call turns the planned segments (each carrying its date and, where
|
||||
//! One LLM call turns the planned beats (each carrying its date and, where
|
||||
//! available, its cached insight) into a short first-person narration line per
|
||||
//! photo plus a title for the reel. We reuse the cached insight summary as the
|
||||
//! richest per-photo signal rather than re-running vision at reel time — that
|
||||
//! keeps reel generation off the GPU's vision slot entirely.
|
||||
//! beat plus a title for the reel. A beat may show several photos in a quick
|
||||
//! burst, so a line narrates the *moment*, not a single frame. We reuse the
|
||||
//! cached insight summary as the richest signal rather than re-running vision
|
||||
//! at reel time — that keeps reel generation off the GPU's vision slot.
|
||||
//!
|
||||
//! The prompt builder and response parser are pure so the contract is
|
||||
//! unit-testable; `generate_script` wires them to the LLM client.
|
||||
@@ -12,11 +13,11 @@
|
||||
use anyhow::{Context, Result};
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::{PlannedSegment, ReelMeta};
|
||||
use super::{PlannedBeat, ReelMeta};
|
||||
use crate::ai::llamacpp::LlamaCppClient;
|
||||
use crate::ai::llm_client::LlmClient;
|
||||
|
||||
/// The narration for a whole reel: a title and one line per segment, in order.
|
||||
/// The narration for a whole reel: a title and one line per beat, in order.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct ReelScript {
|
||||
pub title: String,
|
||||
@@ -26,33 +27,38 @@ pub struct ReelScript {
|
||||
const SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \
|
||||
slideshow of someone's own photos set to a spoken voiceover. Write warm, \
|
||||
specific, first-person narration as if the person is gently looking back on \
|
||||
their own memories. Be concrete and grounded in the details given; never \
|
||||
invent names, places, or events that aren't supported. Keep each line to one \
|
||||
or two short sentences that can be read aloud in a few seconds. Avoid generic \
|
||||
filler like \"what a wonderful day\" — if you have little to go on, simply \
|
||||
describe the moment plainly.";
|
||||
their own memories. Each line plays over one moment, which may be a quick burst \
|
||||
of several photos, so narrate the moment as a whole rather than a single frame. \
|
||||
Be concrete and grounded in the details given; never invent names, places, or \
|
||||
events that aren't supported. Keep each line to one or two short sentences that \
|
||||
can be read aloud in a few seconds. Avoid generic filler like \"what a \
|
||||
wonderful day\" — if you have little to go on, simply describe the moment \
|
||||
plainly.";
|
||||
|
||||
/// Build the (system, user) prompt pair for the scripter. The user message
|
||||
/// describes each segment in order and asks for strict JSON back.
|
||||
pub fn build_script_messages(meta: &ReelMeta, planned: &[PlannedSegment]) -> (String, String) {
|
||||
/// describes each beat in order and asks for strict JSON back.
|
||||
pub fn build_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> (String, String) {
|
||||
let mut user = String::new();
|
||||
user.push_str(&format!(
|
||||
"These are {} photos surfaced as memories {}.\n\n",
|
||||
planned.len(),
|
||||
"This reel has {} moments surfaced as memories {}.\n\n",
|
||||
beats.len(),
|
||||
meta.span_phrase()
|
||||
));
|
||||
if !meta.years.is_empty() {
|
||||
let years: Vec<String> = meta.years.iter().map(|y| y.to_string()).collect();
|
||||
user.push_str(&format!("They span the years: {}.\n\n", years.join(", ")));
|
||||
}
|
||||
user.push_str("Photos, in the order they will appear:\n");
|
||||
for (i, seg) in planned.iter().enumerate() {
|
||||
user.push_str("Moments, in the order they will appear:\n");
|
||||
for (i, beat) in beats.iter().enumerate() {
|
||||
user.push_str(&format!("\n[{}]", i + 1));
|
||||
if let Some(date) = seg.date_label() {
|
||||
if let Some(date) = beat.date_label() {
|
||||
user.push_str(&format!(" {date}"));
|
||||
}
|
||||
if beat.photos.len() > 1 {
|
||||
user.push_str(&format!(" (a burst of {} photos)", beat.photos.len()));
|
||||
}
|
||||
user.push('\n');
|
||||
match (&seg.insight_title, &seg.insight_summary) {
|
||||
match (&beat.insight_title, &beat.insight_summary) {
|
||||
(Some(t), Some(s)) if !s.trim().is_empty() => {
|
||||
user.push_str(&format!(" Known context: {t} — {s}\n"));
|
||||
}
|
||||
@@ -65,10 +71,10 @@ pub fn build_script_messages(meta: &ReelMeta, planned: &[PlannedSegment]) -> (St
|
||||
}
|
||||
user.push_str(&format!(
|
||||
"\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\
|
||||
{{\"title\": \"<short reel title>\", \"segments\": [\"<line for photo 1>\", \
|
||||
\"<line for photo 2>\", ... ]}}\n\
|
||||
The \"segments\" array MUST have exactly {} items, one per photo in order.",
|
||||
planned.len()
|
||||
{{\"title\": \"<short reel title>\", \"segments\": [\"<line for moment 1>\", \
|
||||
\"<line for moment 2>\", ... ]}}\n\
|
||||
The \"segments\" array MUST have exactly {} items, one per moment in order.",
|
||||
beats.len()
|
||||
));
|
||||
(SYSTEM_PROMPT.to_string(), user)
|
||||
}
|
||||
@@ -174,20 +180,20 @@ fn clean_text(s: &str) -> String {
|
||||
trimmed.split_whitespace().collect::<Vec<_>>().join(" ")
|
||||
}
|
||||
|
||||
/// Generate the reel script via the LLM. Text-only (no images) — the per-photo
|
||||
/// Generate the reel script via the LLM. Text-only (no images) — the per-beat
|
||||
/// context comes from cached insights. The call takes the GPU read lease
|
||||
/// internally (see `LlamaCppClient::generate`).
|
||||
pub async fn generate_script(
|
||||
client: &Arc<LlamaCppClient>,
|
||||
meta: &ReelMeta,
|
||||
planned: &[PlannedSegment],
|
||||
beats: &[PlannedBeat],
|
||||
) -> Result<ReelScript> {
|
||||
let (system, user) = build_script_messages(meta, planned);
|
||||
let (system, user) = build_script_messages(meta, beats);
|
||||
let raw = client
|
||||
.generate(&user, Some(&system), None)
|
||||
.await
|
||||
.context("LLM script generation failed")?;
|
||||
Ok(parse_script_response(&raw, planned.len()))
|
||||
Ok(parse_script_response(&raw, beats.len()))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -202,13 +208,13 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
fn planned(n: usize) -> Vec<PlannedSegment> {
|
||||
fn planned(n: usize) -> Vec<PlannedBeat> {
|
||||
(0..n)
|
||||
.map(|i| PlannedSegment {
|
||||
media: super::super::SegmentMedia::Photo {
|
||||
.map(|i| PlannedBeat {
|
||||
photos: vec![super::super::SegmentMedia::Photo {
|
||||
rel_path: format!("p{i}.jpg"),
|
||||
library_id: 1,
|
||||
},
|
||||
}],
|
||||
date: Some(1_560_000_000 + i as i64 * 86_400),
|
||||
insight_title: None,
|
||||
insight_summary: None,
|
||||
@@ -217,16 +223,37 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prompt_states_exact_segment_count_and_span() {
|
||||
fn prompt_states_exact_moment_count_and_span() {
|
||||
let (sys, user) = build_script_messages(&meta(), &planned(3));
|
||||
assert!(sys.contains("memory reel"));
|
||||
assert!(user.contains("3 photos"));
|
||||
assert!(user.contains("3 moments"));
|
||||
assert!(user.contains("on this day"));
|
||||
assert!(user.contains("exactly 3 items"));
|
||||
// Each photo gets an indexed entry.
|
||||
// Each moment gets an indexed entry.
|
||||
assert!(user.contains("[1]") && user.contains("[2]") && user.contains("[3]"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prompt_notes_burst_photo_count() {
|
||||
let mut p = planned(1);
|
||||
p[0].photos = vec![
|
||||
super::super::SegmentMedia::Photo {
|
||||
rel_path: "a.jpg".into(),
|
||||
library_id: 1,
|
||||
},
|
||||
super::super::SegmentMedia::Photo {
|
||||
rel_path: "b.jpg".into(),
|
||||
library_id: 1,
|
||||
},
|
||||
super::super::SegmentMedia::Photo {
|
||||
rel_path: "c.jpg".into(),
|
||||
library_id: 1,
|
||||
},
|
||||
];
|
||||
let (_sys, user) = build_script_messages(&meta(), &p);
|
||||
assert!(user.contains("a burst of 3 photos"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prompt_includes_insight_context_when_present() {
|
||||
let mut p = planned(1);
|
||||
|
||||
+229
-33
@@ -13,18 +13,51 @@ use std::sync::Mutex;
|
||||
|
||||
use chrono::{DateTime, Datelike, FixedOffset};
|
||||
|
||||
use super::{PlannedSegment, ReelMeta, SegmentMedia};
|
||||
use super::{PlannedBeat, ReelMeta, SegmentMedia};
|
||||
use crate::database::{ExifDao, InsightDao};
|
||||
use crate::file_types::is_image_file;
|
||||
use crate::memories::{self, MemoriesSpan};
|
||||
use crate::state::AppState;
|
||||
|
||||
/// Default and hard caps on how many photos a reel covers. The cap bounds the
|
||||
/// LLM/TTS/ffmpeg work per reel; when a span has more, [`sample_evenly`] keeps
|
||||
/// a representative spread across the years rather than just the oldest.
|
||||
pub const DEFAULT_MAX_SEGMENTS: usize = 24;
|
||||
/// Default and hard caps on how many photos a reel covers. The default is an
|
||||
/// upper bound on the request; the effective count is usually smaller, set by
|
||||
/// the duration budget (see [`budget_segments`]). The hard cap bounds work per
|
||||
/// reel regardless.
|
||||
pub const DEFAULT_MAX_SEGMENTS: usize = 40;
|
||||
pub const HARD_MAX_SEGMENTS: usize = 40;
|
||||
|
||||
/// Target reel length. Week and especially month spans can surface hundreds of
|
||||
/// photos; at a few seconds of narration each, a naive reel runs minutes. We
|
||||
/// cap the segment count to keep the reel near this length. Tunable via
|
||||
/// `REEL_TARGET_SECONDS`.
|
||||
const DEFAULT_TARGET_REEL_SECONDS: f64 = 90.0;
|
||||
|
||||
/// Rough average wall-time per photo segment (a short narration line + the
|
||||
/// silent tail). Only used to turn the duration target into a segment count;
|
||||
/// the real per-segment time is the measured narration length.
|
||||
const EST_SECONDS_PER_SEGMENT: f64 = 5.0;
|
||||
|
||||
/// Time gap that separates one "event/moment" from the next when clustering a
|
||||
/// span's photos. Photos within a few hours are treated as the same occasion
|
||||
/// (and across years/days the gaps are far larger, so each instance clusters
|
||||
/// on its own). 4 hours splits e.g. a morning hike from an evening dinner.
|
||||
const EVENT_GAP_SECONDS: i64 = 4 * 3600;
|
||||
|
||||
fn target_reel_seconds() -> f64 {
|
||||
std::env::var("REEL_TARGET_SECONDS")
|
||||
.ok()
|
||||
.and_then(|s| s.trim().parse::<f64>().ok())
|
||||
.filter(|x| x.is_finite() && *x > 0.0)
|
||||
.unwrap_or(DEFAULT_TARGET_REEL_SECONDS)
|
||||
}
|
||||
|
||||
/// How many photo segments fit the duration budget, bounded by the request's
|
||||
/// max and the hard cap. This is what keeps week/month reels from running long.
|
||||
pub fn budget_segments(requested_max: usize) -> usize {
|
||||
let by_budget = (target_reel_seconds() / EST_SECONDS_PER_SEGMENT).floor() as usize;
|
||||
by_budget.min(requested_max).clamp(1, HARD_MAX_SEGMENTS)
|
||||
}
|
||||
|
||||
/// What a reel is built from. v1 ships the memories (on this day/week/month)
|
||||
/// selector; tag and date-range variants slot in here later.
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -81,6 +114,104 @@ pub fn sample_evenly<T: Clone>(items: &[T], max: usize) -> Vec<T> {
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Group time-sorted items into events by gap: a new event starts whenever the
|
||||
/// jump from the previous photo exceeds `gap_seconds`. Preserves order; items
|
||||
/// without a timestamp extend the current event.
|
||||
fn cluster_by_gap(
|
||||
items: &[memories::MemoryItem],
|
||||
gap_seconds: i64,
|
||||
) -> Vec<Vec<memories::MemoryItem>> {
|
||||
let mut clusters: Vec<Vec<memories::MemoryItem>> = Vec::new();
|
||||
let mut prev_ts: Option<i64> = None;
|
||||
for it in items {
|
||||
let starts_new = match (prev_ts, it.created) {
|
||||
(Some(p), Some(c)) => c - p > gap_seconds,
|
||||
_ => false,
|
||||
};
|
||||
if starts_new || clusters.is_empty() {
|
||||
clusters.push(Vec::new());
|
||||
}
|
||||
clusters.last_mut().unwrap().push(it.clone());
|
||||
if let Some(c) = it.created {
|
||||
prev_ts = Some(c);
|
||||
}
|
||||
}
|
||||
clusters
|
||||
}
|
||||
|
||||
/// Most photos a single beat will flash through. Bounds the burst so one huge
|
||||
/// event doesn't dominate, and keeps each photo on screen long enough to
|
||||
/// register at the per-beat narration length (see render's beat timing).
|
||||
pub const MAX_BURST_PHOTOS: usize = 10;
|
||||
|
||||
/// Merge a list of (time-ordered) event clusters into exactly `n` contiguous
|
||||
/// groups, so a span with more events than the beat budget still covers the
|
||||
/// whole timeline — adjacent events fold together into one beat rather than
|
||||
/// getting dropped. `n` must be ≥ 1 and ≤ clusters.len().
|
||||
fn partition_into_groups(
|
||||
clusters: Vec<Vec<memories::MemoryItem>>,
|
||||
n: usize,
|
||||
) -> Vec<Vec<memories::MemoryItem>> {
|
||||
let c = clusters.len();
|
||||
let mut clusters = clusters.into_iter();
|
||||
(0..n)
|
||||
.map(|j| {
|
||||
// Even contiguous split of c clusters into n groups.
|
||||
let start = j * c / n;
|
||||
let end = (j + 1) * c / n;
|
||||
let take = end.saturating_sub(start).max(1);
|
||||
(0..take)
|
||||
.flat_map(|_| clusters.next().into_iter().flatten())
|
||||
.collect()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Turn a span's photos into `n_beats` beats. Clusters photos into events by
|
||||
/// time gap; if there are more events than beats, adjacent events are merged so
|
||||
/// the whole span is still covered. Each beat then flashes up to
|
||||
/// `max_burst` photos (an even spread of its group) under one narration line —
|
||||
/// so a week/month reel *shows* all its moments without a narrated (and timed)
|
||||
/// segment per photo.
|
||||
pub fn form_beats(
|
||||
items: &[memories::MemoryItem],
|
||||
n_beats: usize,
|
||||
max_burst: usize,
|
||||
) -> Vec<PlannedBeat> {
|
||||
if n_beats == 0 || items.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
let clusters = cluster_by_gap(items, EVENT_GAP_SECONDS);
|
||||
// One beat per event when they fit; otherwise fold adjacent events together
|
||||
// into exactly n_beats groups.
|
||||
let groups = if clusters.len() <= n_beats {
|
||||
clusters
|
||||
} else {
|
||||
partition_into_groups(clusters, n_beats)
|
||||
};
|
||||
|
||||
groups
|
||||
.into_iter()
|
||||
.filter(|g| !g.is_empty())
|
||||
.map(|group| {
|
||||
let shown = sample_evenly(&group, max_burst);
|
||||
let date = shown.first().and_then(|it| it.created);
|
||||
PlannedBeat {
|
||||
photos: shown
|
||||
.into_iter()
|
||||
.map(|it| SegmentMedia::Photo {
|
||||
rel_path: it.path,
|
||||
library_id: it.library_id,
|
||||
})
|
||||
.collect(),
|
||||
date,
|
||||
insight_title: None,
|
||||
insight_summary: None,
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Cheap pass: resolve the selector into an ordered list of media (no insight
|
||||
/// lookups yet) plus reel metadata. `Err` only on an invalid library param.
|
||||
pub fn resolve(
|
||||
@@ -88,7 +219,7 @@ pub fn resolve(
|
||||
exif_dao: &Mutex<Box<dyn ExifDao>>,
|
||||
span_context: &opentelemetry::Context,
|
||||
selector: &ReelSelector,
|
||||
) -> Result<(Vec<PlannedSegment>, ReelMeta), String> {
|
||||
) -> Result<(Vec<PlannedBeat>, ReelMeta), String> {
|
||||
match selector {
|
||||
ReelSelector::Memories {
|
||||
span,
|
||||
@@ -108,32 +239,23 @@ pub fn resolve(
|
||||
)?;
|
||||
|
||||
// Phase 1 is photos-only: drop videos (a clip segment type lands
|
||||
// in phase 2). Filter before sampling so the spread is over the
|
||||
// photos that will actually appear.
|
||||
// in phase 2).
|
||||
let items: Vec<memories::MemoryItem> = items
|
||||
.into_iter()
|
||||
.filter(|it| is_image_file(Path::new(&it.path)))
|
||||
.collect();
|
||||
|
||||
let cap = (*max_segments).clamp(1, HARD_MAX_SEGMENTS);
|
||||
let items = sample_evenly(&items, cap);
|
||||
|
||||
// Years are derived from the whole span (what the reel represents),
|
||||
// before the budget narrows it down to beats.
|
||||
let years = distinct_years(&items, client_tz);
|
||||
let meta = ReelMeta { span: *span, years };
|
||||
|
||||
let planned = items
|
||||
.into_iter()
|
||||
.map(|it| PlannedSegment {
|
||||
media: SegmentMedia::Photo {
|
||||
rel_path: it.path,
|
||||
library_id: it.library_id,
|
||||
},
|
||||
date: it.created,
|
||||
insight_title: None,
|
||||
insight_summary: None,
|
||||
})
|
||||
.collect();
|
||||
Ok((planned, meta))
|
||||
// The budget caps the number of narrated beats (≈ reel length);
|
||||
// each beat then bursts through several photos, so the reel covers
|
||||
// the span's moments without running minutes long.
|
||||
let n_beats = budget_segments(*max_segments);
|
||||
let beats = form_beats(&items, n_beats, MAX_BURST_PHOTOS);
|
||||
Ok((beats, meta))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -155,24 +277,24 @@ fn distinct_years(items: &[memories::MemoryItem], tz: Option<FixedOffset>) -> Ve
|
||||
years
|
||||
}
|
||||
|
||||
/// Background pass: fill each segment's cached insight (title + summary) where
|
||||
/// one exists. Best-effort — a missing or errored lookup leaves the fields
|
||||
/// `None` and the scripter narrates from the date alone.
|
||||
/// Background pass: fill each beat's cached insight (title + summary) from its
|
||||
/// lead photo, where one exists. Best-effort — a missing or errored lookup
|
||||
/// leaves the fields `None` and the scripter narrates from the date alone.
|
||||
pub fn enrich(
|
||||
insight_dao: &Mutex<Box<dyn InsightDao>>,
|
||||
span_context: &opentelemetry::Context,
|
||||
planned: &mut [PlannedSegment],
|
||||
beats: &mut [PlannedBeat],
|
||||
) {
|
||||
let Ok(mut dao) = insight_dao.lock() else {
|
||||
return;
|
||||
};
|
||||
for seg in planned.iter_mut() {
|
||||
let rel_path = match &seg.media {
|
||||
SegmentMedia::Photo { rel_path, .. } => rel_path,
|
||||
for beat in beats.iter_mut() {
|
||||
let Some(SegmentMedia::Photo { rel_path, .. }) = beat.photos.first() else {
|
||||
continue;
|
||||
};
|
||||
if let Ok(Some(insight)) = dao.get_insight(span_context, rel_path) {
|
||||
seg.insight_title = Some(insight.title);
|
||||
seg.insight_summary = Some(insight.summary);
|
||||
beat.insight_title = Some(insight.title);
|
||||
beat.insight_summary = Some(insight.summary);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -249,4 +371,78 @@ mod tests {
|
||||
];
|
||||
assert_eq!(distinct_years(&items, None), vec![2019, 2021]);
|
||||
}
|
||||
|
||||
// Build an item at a given unix timestamp (seconds).
|
||||
fn item_at(ts: i64, name: &str) -> memories::MemoryItem {
|
||||
memories::MemoryItem {
|
||||
path: format!("{name}.jpg"),
|
||||
created: Some(ts),
|
||||
modified: None,
|
||||
library_id: 1,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn budget_segments_caps_to_duration_target() {
|
||||
// 90s / 5s ≈ 18, bounded by the request max and hard cap.
|
||||
assert_eq!(budget_segments(40), 18);
|
||||
assert_eq!(budget_segments(5), 5); // request asked for fewer
|
||||
assert_eq!(budget_segments(1000), 18); // hard cap / budget wins
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cluster_by_gap_splits_on_large_jumps() {
|
||||
// Two photos minutes apart, then one a day later → two events.
|
||||
let items = vec![
|
||||
item_at(1_000_000, "a"),
|
||||
item_at(1_000_300, "b"), // +5 min → same event
|
||||
item_at(1_100_000, "c"), // +~27h → new event
|
||||
];
|
||||
let clusters = cluster_by_gap(&items, EVENT_GAP_SECONDS);
|
||||
assert_eq!(clusters.len(), 2);
|
||||
assert_eq!(clusters[0].len(), 2);
|
||||
assert_eq!(clusters[1].len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn form_beats_one_beat_per_event_when_they_fit() {
|
||||
// Three well-separated events, budget of 10 → three beats, each holding
|
||||
// all of its (few) photos.
|
||||
let items = vec![
|
||||
item_at(0, "a"),
|
||||
item_at(50, "b"), // same event as a
|
||||
item_at(1_000_000, "c"),
|
||||
item_at(2_000_000, "d"),
|
||||
];
|
||||
let beats = form_beats(&items, 10, MAX_BURST_PHOTOS);
|
||||
assert_eq!(beats.len(), 3);
|
||||
assert_eq!(beats[0].photos.len(), 2); // burst of the first event
|
||||
assert_eq!(beats[1].photos.len(), 1);
|
||||
assert_eq!(beats[2].photos.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn form_beats_merges_events_when_over_budget() {
|
||||
// Six distinct events but only two beats → adjacent events fold in, and
|
||||
// every event's photos still appear (capped by the burst max).
|
||||
let items: Vec<memories::MemoryItem> = (0..6)
|
||||
.map(|i| item_at(i as i64 * 1_000_000, &format!("e{i}")))
|
||||
.collect();
|
||||
let beats = form_beats(&items, 2, MAX_BURST_PHOTOS);
|
||||
assert_eq!(beats.len(), 2);
|
||||
let shown: usize = beats.iter().map(|b| b.photos.len()).sum();
|
||||
assert_eq!(shown, 6); // all six moments still shown across two beats
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn form_beats_caps_burst_to_max() {
|
||||
// One dense event of 30 photos, generous budget → a single beat that
|
||||
// bursts at most MAX_BURST_PHOTOS, not all 30.
|
||||
let items: Vec<memories::MemoryItem> = (0..30)
|
||||
.map(|i| item_at(i as i64, &format!("p{i}")))
|
||||
.collect();
|
||||
let beats = form_beats(&items, 18, MAX_BURST_PHOTOS);
|
||||
assert_eq!(beats.len(), 1);
|
||||
assert_eq!(beats[0].photos.len(), MAX_BURST_PHOTOS);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user