Reels: burst beats + duration budget for week/month, plus step logging
Restructures a reel around beats — one narration line over one or more photos — instead of one line per photo. A single-photo beat is a held shot; a multi-photo beat is a quick burst that flashes through several moments of an event while the line is read. So a week/month reel can show everything it spans without a narrated (and timed) segment per photo. Selection (selector.rs): - Duration budget: cap the number of narrated beats to ~REEL_TARGET_SECONDS (default 90, env-tunable) so week/month reels don't run minutes long. - Event clustering by time gap; when there are more events than the beat budget, adjacent events merge so the whole span stays covered. Each beat bursts up to MAX_BURST_PHOTOS (an even spread), so a 40-shot dinner contributes a handful of quick frames, not forty narrated seconds. Render (render.rs): a beat renders its photos as a concat of per-photo fills (blurred-bg portrait, fps-before-fade) under one muxed narration; burst photos get a snappier fade. beat_durations splits the narration across the photos, stretching only if a long burst would flash too fast. Adds high-level info logs across the steps (request → script → per-beat narrate/render → join → done with elapsed) for visibility. Bumps RENDER_VERSION to re-render cached reels. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
+195
-101
@@ -22,25 +22,31 @@ pub use crate::video::ffmpeg::is_nvenc_available;
|
||||
/// Reel canvas. Portrait, because reels are watched on a phone held upright —
|
||||
/// a landscape canvas letterboxes to a thin ~25%-height band there. Each photo
|
||||
/// is fitted sharp and centered over a blurred, zoomed copy of itself (see
|
||||
/// [`segment_filtergraph`]) so the frame is always filled regardless of the
|
||||
/// [`photo_filter_chain`]) so the frame is always filled regardless of the
|
||||
/// photo's orientation, without cropping the subject.
|
||||
pub const REEL_WIDTH: u32 = 1080;
|
||||
pub const REEL_HEIGHT: u32 = 1920;
|
||||
pub const REEL_FPS: u32 = 30;
|
||||
|
||||
/// A still's screen time is its narration length plus a short breath, with a
|
||||
/// floor so a terse line still lingers. No ceiling: the segment always covers
|
||||
/// the full narration so speech is never truncated — the scripter is asked to
|
||||
/// keep lines short instead.
|
||||
/// A beat's screen time is its narration length plus a short breath, with a
|
||||
/// floor so a terse line still lingers. No ceiling: the beat always covers the
|
||||
/// full narration so speech is never truncated — the scripter is asked to keep
|
||||
/// lines short instead.
|
||||
pub const MIN_SEGMENT_SECONDS: f64 = 2.5;
|
||||
const NARRATION_TAIL_SECONDS: f64 = 0.6;
|
||||
|
||||
/// Quick fade in/out baked into each segment so concatenated photos dip
|
||||
/// smoothly instead of hard-cutting. The fade-out lands inside the narration's
|
||||
/// silent tail, so speech is never clipped.
|
||||
const FADE_SECONDS: f64 = 0.35;
|
||||
/// Fade durations baked into each photo. A held (single-photo) beat gets a
|
||||
/// gentle dip; burst photos get a snappier fade so the montage feels quick.
|
||||
const SINGLE_FADE_SECONDS: f64 = 0.35;
|
||||
const BURST_FADE_SECONDS: f64 = 0.15;
|
||||
|
||||
/// Screen time for a photo segment given its narration audio length.
|
||||
/// Floor on how long each burst photo stays up, so a long line over many photos
|
||||
/// doesn't flash them subliminally. If the narration is too short to give every
|
||||
/// photo this much, the beat is stretched to fit.
|
||||
const MIN_BURST_PHOTO_SECONDS: f64 = 0.6;
|
||||
|
||||
/// Base screen time for a beat given its narration length: narration + breath,
|
||||
/// floored. Used as the lower bound on a beat's total duration.
|
||||
pub fn segment_duration(narration_secs: f64) -> f64 {
|
||||
let d = narration_secs + NARRATION_TAIL_SECONDS;
|
||||
if d.is_finite() && d > MIN_SEGMENT_SECONDS {
|
||||
@@ -50,6 +56,29 @@ pub fn segment_duration(narration_secs: f64) -> f64 {
|
||||
}
|
||||
}
|
||||
|
||||
/// Split a beat into per-photo durations. The beat lasts at least its narration
|
||||
/// (so speech isn't cut) and at least `n × MIN_BURST_PHOTO_SECONDS` (so a fast
|
||||
/// burst stays legible); the photos share that total evenly. Returns
|
||||
/// `(total_seconds, per_photo_seconds)`.
|
||||
pub fn beat_durations(narration_secs: f64, n_photos: usize) -> (f64, Vec<f64>) {
|
||||
let n = n_photos.max(1);
|
||||
let base = segment_duration(narration_secs);
|
||||
let min_total = n as f64 * MIN_BURST_PHOTO_SECONDS;
|
||||
let total = if base > min_total { base } else { min_total };
|
||||
let each = total / n as f64;
|
||||
(total, vec![each; n])
|
||||
}
|
||||
|
||||
/// Fade length to use for a beat of `n_photos` (gentle when held, snappy in a
|
||||
/// burst).
|
||||
fn fade_for(n_photos: usize) -> f64 {
|
||||
if n_photos > 1 {
|
||||
BURST_FADE_SECONDS
|
||||
} else {
|
||||
SINGLE_FADE_SECONDS
|
||||
}
|
||||
}
|
||||
|
||||
/// Options controlling per-segment rendering.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct SegmentOpts {
|
||||
@@ -70,38 +99,49 @@ impl Default for SegmentOpts {
|
||||
}
|
||||
}
|
||||
|
||||
/// Full `filter_complex` for one photo segment, producing labelled `[v]` (video)
|
||||
/// and `[a]` (audio) outputs. Input 0 is the looped still, input 1 the
|
||||
/// narration.
|
||||
/// Filter chain for one photo (input `idx`) producing the labelled output
|
||||
/// `[v{idx}]`. Splits the still into a background and foreground: the background
|
||||
/// is scaled to *cover* the canvas and heavily blurred; the foreground is
|
||||
/// scaled to *fit* and overlaid centered. This fills the portrait frame for any
|
||||
/// photo orientation — no black bars, no cropping of the subject — then a fade
|
||||
/// in/out softens the cut. Intermediate labels are suffixed with `idx` so
|
||||
/// several chains coexist in one `filter_complex`.
|
||||
///
|
||||
/// Video: split the still into a background and foreground. The background is
|
||||
/// scaled to *cover* the canvas and heavily blurred; the foreground is scaled to
|
||||
/// *fit* inside it and overlaid centered. This fills the portrait frame for any
|
||||
/// photo orientation — no black bars, no cropping of the subject — then a quick
|
||||
/// fade in/out softens the cut to the next segment.
|
||||
///
|
||||
/// Audio: pad the narration with trailing silence so a short line doesn't end
|
||||
/// the segment early; `-t` bounds it to the segment duration.
|
||||
pub fn segment_filtergraph(opts: &SegmentOpts, duration: f64) -> String {
|
||||
/// `fps` is normalized BEFORE the fades so the brightness ramp is computed on a
|
||||
/// true {fps}-frame timeline; otherwise the fade is sampled at the looped
|
||||
/// still's coarse cadence and duplicated up, which reads as a steppy dip.
|
||||
fn photo_filter_chain(idx: usize, opts: &SegmentOpts, duration: f64, fade: f64) -> String {
|
||||
let (w, h, fps) = (opts.width, opts.height, opts.fps);
|
||||
// Fade-out begins one fade-length before the end; clamp so a floor-length
|
||||
// segment still gets a valid (non-negative) start time.
|
||||
let fade_out_start = (duration - FADE_SECONDS).max(0.0);
|
||||
// `fps` is normalized BEFORE the fades so the brightness ramp is computed
|
||||
// on a true {fps}-frame timeline. If fps came after, the fade would be
|
||||
// sampled at the looped still's coarse input cadence and then duplicated up
|
||||
// to {fps}, which reads as a steppy / low-frame-rate dip.
|
||||
let fade_out_start = (duration - fade).max(0.0);
|
||||
format!(
|
||||
"[0:v]split=2[bg][fg];\
|
||||
[bg]scale={w}:{h}:force_original_aspect_ratio=increase,\
|
||||
crop={w}:{h},boxblur=20:2[bgb];\
|
||||
[fg]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs];\
|
||||
[bgb][fgs]overlay=(W-w)/2:(H-h)/2,\
|
||||
"[{idx}:v]split=2[bg{idx}][fg{idx}];\
|
||||
[bg{idx}]scale={w}:{h}:force_original_aspect_ratio=increase,\
|
||||
crop={w}:{h},boxblur=20:2[bgb{idx}];\
|
||||
[fg{idx}]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs{idx}];\
|
||||
[bgb{idx}][fgs{idx}]overlay=(W-w)/2:(H-h)/2,\
|
||||
fps={fps},\
|
||||
fade=t=in:st=0:d={FADE_SECONDS},\
|
||||
fade=t=out:st={fade_out_start:.3}:d={FADE_SECONDS},\
|
||||
setsar=1,format=yuv420p[v];\
|
||||
[1:a]apad[a]"
|
||||
fade=t=in:st=0:d={fade},\
|
||||
fade=t=out:st={fade_out_start:.3}:d={fade},\
|
||||
setsar=1,format=yuv420p[v{idx}]"
|
||||
)
|
||||
}
|
||||
|
||||
/// Full `filter_complex` for a beat of `per_photo` durations: one chain per
|
||||
/// photo, concatenated into `[v]`, with the narration (the last input, index
|
||||
/// `per_photo.len()`) padded with trailing silence into `[a]`. A single-photo
|
||||
/// beat degenerates to one chain + `concat=n=1` (a passthrough).
|
||||
pub fn beat_filtergraph(opts: &SegmentOpts, per_photo: &[f64]) -> String {
|
||||
let n = per_photo.len().max(1);
|
||||
let fade = fade_for(n);
|
||||
let chains: Vec<String> = per_photo
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, &d)| photo_filter_chain(i, opts, d, fade))
|
||||
.collect();
|
||||
let concat_inputs: String = (0..n).map(|i| format!("[v{i}]")).collect();
|
||||
format!(
|
||||
"{chains};{concat_inputs}concat=n={n}:v=1:a=0[v];[{n}:a]apad[a]",
|
||||
chains = chains.join(";")
|
||||
)
|
||||
}
|
||||
|
||||
@@ -128,15 +168,16 @@ fn video_encoder_args(nvenc: bool) -> Vec<String> {
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Build the ffmpeg args that render one photo segment: a still looped for
|
||||
/// `duration` seconds, filled to the portrait canvas with a blurred backdrop
|
||||
/// (see [`segment_filtergraph`]) and the narration muxed in. `-t` bounds both
|
||||
/// streams to the segment length.
|
||||
pub fn build_segment_args(
|
||||
image_path: &str,
|
||||
/// Build the ffmpeg args that render one beat: each photo looped for its slice
|
||||
/// of the beat (filled to the portrait canvas with a blurred backdrop), the
|
||||
/// slices concatenated, and the single narration muxed over the whole thing.
|
||||
/// `total` bounds the output (and the apad'd audio) to the beat length.
|
||||
pub fn build_beat_args(
|
||||
image_paths: &[String],
|
||||
audio_path: &str,
|
||||
out_path: &str,
|
||||
duration: f64,
|
||||
per_photo: &[f64],
|
||||
total: f64,
|
||||
opts: &SegmentOpts,
|
||||
) -> Vec<String> {
|
||||
let fps = opts.fps.to_string();
|
||||
@@ -144,26 +185,33 @@ pub fn build_segment_args(
|
||||
if opts.nvenc {
|
||||
args.extend(["-hwaccel".into(), "cuda".into()]);
|
||||
}
|
||||
// One looped-still input per photo, each bounded to its slice by an input
|
||||
// `-t`; reading at the target `-framerate` gives the fades real frames to
|
||||
// ramp across.
|
||||
for (path, &dur) in image_paths.iter().zip(per_photo.iter()) {
|
||||
args.extend([
|
||||
"-framerate".into(),
|
||||
fps.clone(),
|
||||
"-loop".into(),
|
||||
"1".into(),
|
||||
"-t".into(),
|
||||
format!("{dur:.3}"),
|
||||
"-i".into(),
|
||||
path.clone(),
|
||||
]);
|
||||
}
|
||||
args.extend([
|
||||
// Read the looped still at the target rate so frames exist for the
|
||||
// fade to ramp across (paired with the in-graph `fps` and CFR output).
|
||||
"-framerate".into(),
|
||||
fps.clone(),
|
||||
"-loop".into(),
|
||||
"1".into(),
|
||||
"-i".into(),
|
||||
image_path.into(),
|
||||
"-i".into(),
|
||||
audio_path.into(),
|
||||
"-filter_complex".into(),
|
||||
segment_filtergraph(opts, duration),
|
||||
beat_filtergraph(opts, per_photo),
|
||||
"-map".into(),
|
||||
"[v]".into(),
|
||||
"-map".into(),
|
||||
"[a]".into(),
|
||||
"-t".into(),
|
||||
format!("{duration:.3}"),
|
||||
// Force constant frame rate so the segment (and the concatenated reel)
|
||||
format!("{total:.3}"),
|
||||
// Force constant frame rate so the beat (and the concatenated reel)
|
||||
// plays at a steady {fps} rather than a variable cadence.
|
||||
"-r".into(),
|
||||
fps,
|
||||
@@ -231,22 +279,33 @@ async fn run_ffmpeg(args: &[String], what: &str) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Render one photo segment to `out_path`.
|
||||
pub async fn render_segment(
|
||||
image_path: &Path,
|
||||
/// Render one beat to `out_path`: its photos shown in sequence (a held shot for
|
||||
/// one photo, a quick burst for several) under the single narration in
|
||||
/// `audio_path`, whose measured length sets the beat's pacing.
|
||||
pub async fn render_beat(
|
||||
image_paths: &[std::path::PathBuf],
|
||||
audio_path: &Path,
|
||||
out_path: &Path,
|
||||
duration: f64,
|
||||
narration_secs: f64,
|
||||
opts: &SegmentOpts,
|
||||
) -> Result<()> {
|
||||
let args = build_segment_args(
|
||||
&image_path.to_string_lossy(),
|
||||
if image_paths.is_empty() {
|
||||
bail!("render_beat called with no images");
|
||||
}
|
||||
let (total, per_photo) = beat_durations(narration_secs, image_paths.len());
|
||||
let paths: Vec<String> = image_paths
|
||||
.iter()
|
||||
.map(|p| p.to_string_lossy().to_string())
|
||||
.collect();
|
||||
let args = build_beat_args(
|
||||
&paths,
|
||||
&audio_path.to_string_lossy(),
|
||||
&out_path.to_string_lossy(),
|
||||
duration,
|
||||
&per_photo,
|
||||
total,
|
||||
opts,
|
||||
);
|
||||
run_ffmpeg(&args, "segment render").await
|
||||
run_ffmpeg(&args, "beat render").await
|
||||
}
|
||||
|
||||
/// Join rendered segments into the final reel. Writes the concat list into the
|
||||
@@ -288,73 +347,108 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filtergraph_fills_portrait_with_blurred_bg_and_fitted_fg() {
|
||||
let g = segment_filtergraph(&SegmentOpts::default(), 4.0);
|
||||
// Background covers + blurs; foreground fits and is centered over it.
|
||||
assert!(g.contains("split=2[bg][fg]"));
|
||||
fn beat_durations_single_photo_matches_base() {
|
||||
let (total, per) = beat_durations(4.0, 1);
|
||||
assert!((total - 4.6).abs() < 1e-9); // narration + tail
|
||||
assert_eq!(per.len(), 1);
|
||||
assert!((per[0] - 4.6).abs() < 1e-9);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn beat_durations_burst_splits_evenly() {
|
||||
// 5 photos, narration 4.6s base → ~0.92s each (above the 0.6 floor).
|
||||
let (total, per) = beat_durations(4.0, 5);
|
||||
assert!((total - 4.6).abs() < 1e-9);
|
||||
assert_eq!(per.len(), 5);
|
||||
assert!((per.iter().sum::<f64>() - total).abs() < 1e-9);
|
||||
assert!(per.iter().all(|&d| d >= MIN_BURST_PHOTO_SECONDS));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn beat_durations_stretches_when_narration_too_short_for_burst() {
|
||||
// Floor narration (2.5s) over 10 photos would be 0.25s each — below the
|
||||
// legibility floor, so the beat stretches to 10 × 0.6 = 6s.
|
||||
let (total, per) = beat_durations(0.0, 10);
|
||||
assert!((total - 6.0).abs() < 1e-9);
|
||||
assert!(per.iter().all(|&d| (d - 0.6).abs() < 1e-9));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn beat_filtergraph_single_photo_fills_portrait_and_holds() {
|
||||
let (_t, per) = beat_durations(4.0, 1);
|
||||
let g = beat_filtergraph(&SegmentOpts::default(), &per);
|
||||
assert!(g.contains("[0:v]split=2[bg0][fg0]"));
|
||||
assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=increase"));
|
||||
assert!(g.contains("crop=1080:1920"));
|
||||
assert!(g.contains("boxblur"));
|
||||
assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=decrease"));
|
||||
assert!(g.contains("overlay=(W-w)/2:(H-h)/2"));
|
||||
// Produces the labelled outputs build_segment_args maps.
|
||||
assert!(g.contains("[v]"));
|
||||
// Single photo → concat of one, gentle fade, audio is input 1.
|
||||
assert!(g.contains("concat=n=1:v=1:a=0[v]"));
|
||||
assert!(g.contains("d=0.35")); // SINGLE_FADE
|
||||
assert!(g.contains("[1:a]apad[a]"));
|
||||
assert!(g.contains("format=yuv420p"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filtergraph_fades_in_and_out_within_duration() {
|
||||
// 4s segment, 0.35s fade → fade-out starts at 3.65s.
|
||||
let g = segment_filtergraph(&SegmentOpts::default(), 4.0);
|
||||
assert!(g.contains("fade=t=in:st=0:d=0.35"));
|
||||
assert!(g.contains("fade=t=out:st=3.650:d=0.35"));
|
||||
fn beat_filtergraph_burst_chains_concats_and_snappy_fade() {
|
||||
let (_t, per) = beat_durations(4.0, 3);
|
||||
let g = beat_filtergraph(&SegmentOpts::default(), &per);
|
||||
// One chain per photo with index-suffixed labels.
|
||||
assert!(g.contains("[0:v]split") && g.contains("[1:v]split") && g.contains("[2:v]split"));
|
||||
// Concatenated in order, audio is the 4th input (index 3).
|
||||
assert!(g.contains("[v0][v1][v2]concat=n=3:v=1:a=0[v]"));
|
||||
assert!(g.contains("[3:a]apad[a]"));
|
||||
// Burst uses the snappier fade.
|
||||
assert!(g.contains("d=0.15"));
|
||||
assert!(!g.contains("d=0.35"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filtergraph_normalizes_fps_before_fading() {
|
||||
// The fps filter must precede the fades, else the brightness ramp is
|
||||
// sampled at the still's coarse cadence and looks steppy.
|
||||
let g = segment_filtergraph(&SegmentOpts::default(), 4.0);
|
||||
fn beat_filtergraph_normalizes_fps_before_fading() {
|
||||
// fps must precede the fades on every chain (else the dip looks steppy).
|
||||
let (_t, per) = beat_durations(4.0, 1);
|
||||
let g = beat_filtergraph(&SegmentOpts::default(), &per);
|
||||
let fps_at = g.find("fps=30").expect("fps in graph");
|
||||
let fade_at = g.find("fade=t=in").expect("fade in graph");
|
||||
assert!(fps_at < fade_at);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filtergraph_fade_out_start_never_negative_at_floor() {
|
||||
// A floor-length segment shorter than a fade still yields st >= 0.
|
||||
let g = segment_filtergraph(&SegmentOpts::default(), 0.2);
|
||||
assert!(g.contains("fade=t=out:st=0.000:d=0.35"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn segment_args_loop_still_and_bound_with_t() {
|
||||
let args = build_segment_args(
|
||||
"/img.jpg",
|
||||
"/a.wav",
|
||||
fn beat_args_one_input_per_photo_plus_audio_bound_by_total() {
|
||||
let (total, per) = beat_durations(4.0, 2);
|
||||
let args = build_beat_args(
|
||||
&["/a.jpg".into(), "/b.jpg".into()],
|
||||
"/n.wav",
|
||||
"/out.mp4",
|
||||
4.0,
|
||||
&per,
|
||||
total,
|
||||
&SegmentOpts::default(),
|
||||
);
|
||||
let joined = args.join(" ");
|
||||
assert!(joined.contains("-framerate 30 -loop 1 -i /img.jpg"));
|
||||
assert!(joined.contains("-i /a.wav"));
|
||||
assert!(joined.contains("apad"));
|
||||
assert!(joined.contains("-t 4.000"));
|
||||
// Constant frame rate forced on the output.
|
||||
// A looped-still input per photo, each with its slice -t, then the audio.
|
||||
assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /a.jpg"));
|
||||
assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /b.jpg"));
|
||||
assert!(joined.contains("-i /n.wav"));
|
||||
// Output bounded to the beat total and forced CFR.
|
||||
assert!(joined.contains("-t 4.600"));
|
||||
assert!(joined.contains("-r 30"));
|
||||
assert!(joined.contains("libx264"));
|
||||
assert!(joined.ends_with("/out.mp4"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn segment_args_use_nvenc_and_cuda_when_enabled() {
|
||||
fn beat_args_use_nvenc_and_cuda_when_enabled() {
|
||||
let opts = SegmentOpts {
|
||||
nvenc: true,
|
||||
..SegmentOpts::default()
|
||||
};
|
||||
let args = build_segment_args("/img.jpg", "/a.wav", "/out.mp4", 3.0, &opts);
|
||||
let (total, per) = beat_durations(3.0, 1);
|
||||
let args = build_beat_args(
|
||||
&["/img.jpg".into()],
|
||||
"/a.wav",
|
||||
"/out.mp4",
|
||||
&per,
|
||||
total,
|
||||
&opts,
|
||||
);
|
||||
let joined = args.join(" ");
|
||||
assert!(joined.contains("-hwaccel cuda"));
|
||||
assert!(joined.contains("h264_nvenc"));
|
||||
|
||||
Reference in New Issue
Block a user