Reels: mixed-media (video clip beats) + faster burst fade

Videos in a span now appear as clip beats: the first few seconds of the video (capped at CLIP_SECONDS=5, and to the source length) filled to the portrait canvas like photos, with its live audio ducked under the narration (amix at 0.35). If the narration outlasts the clip, the last frame is held (tpad); clips with no audio track just play under narration. Selection splits the beat budget between photo beats and clip beats — clips get up to half (≥1 when present), photos the rest — then merges both back into chronological order. SegmentMedia gains a Clip variant; beats carry `media` (photos or one clip) and the cache key tags P/C so a path used as a still vs a clip differ. Also drops the burst fade from 0.15s to 0.08s so a quick burst reads clearly differently from a held shot. Bumps RENDER_VERSION. The clip filtergraph (fill + duck-mix + last-frame hold) is unit-tested but, like the rest of the ffmpeg path, wants a real render check on the GPU host. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-13 00:02:51 -04:00
parent 299e32b014
commit 65793a2dda
4 changed files with 479 additions and 91 deletions
@@ -36,9 +36,15 @@ pub const MIN_SEGMENT_SECONDS: f64 = 2.5;
 const NARRATION_TAIL_SECONDS: f64 = 0.6;

 /// Fade durations baked into each photo. A held (single-photo) beat gets a
-/// gentle dip; burst photos get a snappier fade so the montage feels quick.
+/// gentle dip; burst photos get a much snappier fade so the difference between
+/// a held shot and a quick burst is obvious.
 const SINGLE_FADE_SECONDS: f64 = 0.35;
-const BURST_FADE_SECONDS: f64 = 0.15;
+const BURST_FADE_SECONDS: f64 = 0.08;
+
+/// Video-clip framing. A clip plays at most this long, with its live audio
+/// ducked to `CLIP_DUCK_VOLUME` under the narration.
+pub const CLIP_SECONDS: f64 = 5.0;
+const CLIP_DUCK_VOLUME: f64 = 0.35;

 /// Floor on how long each burst photo stays up, so a long line over many photos
 /// doesn't flash them subliminally. If the narration is too short to give every
@@ -308,6 +314,162 @@ pub async fn render_beat(
    run_ffmpeg(&args, "beat render").await
 }

+// --- Video-clip beats --------------------------------------------------------
+
+/// Video chain for a clip beat: fill the clip to the portrait canvas (blurred
+/// backdrop, same look as photos), normalize fps, hold the last frame if the
+/// narration outlasts the clip (`tpad`), then fade. Produces `[v]`.
+fn clip_video_filter(opts: &SegmentOpts, clip_dur: f64, beat_total: f64) -> String {
+    let (w, h, fps) = (opts.width, opts.height, opts.fps);
+    let fade = SINGLE_FADE_SECONDS;
+    let hold = (beat_total - clip_dur).max(0.0);
+    let fade_out_start = (beat_total - fade).max(0.0);
+    // Freeze the final frame to cover narration that runs past the clip.
+    let tpad = if hold > 0.05 {
+        format!(",tpad=stop_mode=clone:stop_duration={hold:.3}")
+    } else {
+        String::new()
+    };
+    format!(
+        "[0:v]split=2[bg][fg];\
+         [bg]scale={w}:{h}:force_original_aspect_ratio=increase,\
+         crop={w}:{h},boxblur=20:2[bgb];\
+         [fg]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs];\
+         [bgb][fgs]overlay=(W-w)/2:(H-h)/2,fps={fps}{tpad},\
+         fade=t=in:st=0:d={fade},fade=t=out:st={fade_out_start:.3}:d={fade},\
+         setsar=1,format=yuv420p[v]"
+    )
+}
+
+/// Audio chain for a clip beat. With a clip audio track, duck it under the
+/// narration and mix; without one, just the narration. Produces `[a]`.
+fn clip_audio_filter(has_audio: bool) -> String {
+    if has_audio {
+        format!(
+            "[0:a]volume={CLIP_DUCK_VOLUME}[duck];[1:a]apad[narr];\
+             [duck][narr]amix=inputs=2:duration=longest:normalize=0[a]"
+        )
+    } else {
+        "[1:a]apad[a]".to_string()
+    }
+}
+
+/// Full `filter_complex` for a clip beat (input 0 = clip, input 1 = narration).
+pub fn clip_beat_filtergraph(
+    opts: &SegmentOpts,
+    clip_dur: f64,
+    beat_total: f64,
+    has_audio: bool,
+) -> String {
+    format!(
+        "{};{}",
+        clip_video_filter(opts, clip_dur, beat_total),
+        clip_audio_filter(has_audio)
+    )
+}
+
+/// Build the ffmpeg args for a clip beat: the first `clip_dur` seconds of the
+/// source video, filled to the portrait canvas with its live audio ducked under
+/// the narration, bounded to `beat_total`.
+pub fn build_clip_beat_args(
+    clip_path: &str,
+    audio_path: &str,
+    out_path: &str,
+    clip_dur: f64,
+    beat_total: f64,
+    has_audio: bool,
+    opts: &SegmentOpts,
+) -> Vec<String> {
+    let fps = opts.fps.to_string();
+    let mut args: Vec<String> = vec!["-y".into()];
+    if opts.nvenc {
+        args.extend(["-hwaccel".into(), "cuda".into()]);
+    }
+    args.extend([
+        // Input `-t` limits the clip to its window; audio has none (apad fills).
+        "-t".into(),
+        format!("{clip_dur:.3}"),
+        "-i".into(),
+        clip_path.into(),
+        "-i".into(),
+        audio_path.into(),
+        "-filter_complex".into(),
+        clip_beat_filtergraph(opts, clip_dur, beat_total, has_audio),
+        "-map".into(),
+        "[v]".into(),
+        "-map".into(),
+        "[a]".into(),
+        "-t".into(),
+        format!("{beat_total:.3}"),
+        "-r".into(),
+        fps,
+    ]);
+    args.extend(video_encoder_args(opts.nvenc));
+    args.extend(
+        ["-c:a", "aac", "-b:a", "160k", "-ar", "48000"]
+            .iter()
+            .map(|s| s.to_string()),
+    );
+    args.push(out_path.into());
+    args
+}
+
+/// Whether a media file has at least one audio stream (so a clip beat knows
+/// whether to mix in live audio). Defaults to `false` on any probe failure.
+pub async fn has_audio_stream(path: &str) -> bool {
+    Command::new("ffprobe")
+        .args([
+            "-v",
+            "error",
+            "-select_streams",
+            "a",
+            "-show_entries",
+            "stream=index",
+            "-of",
+            "csv=p=0",
+            path,
+        ])
+        .output()
+        .await
+        .map(|out| !out.stdout.is_empty())
+        .unwrap_or(false)
+}
+
+/// Render one clip beat: a section of `clip_path` (capped at [`CLIP_SECONDS`],
+/// and to the source length) under the narration in `audio_path`. The beat
+/// lasts at least the narration, freezing the clip's last frame if needed.
+pub async fn render_clip_beat(
+    clip_path: &Path,
+    audio_path: &Path,
+    out_path: &Path,
+    narration_secs: f64,
+    opts: &SegmentOpts,
+) -> Result<()> {
+    let clip_str = clip_path.to_string_lossy().to_string();
+    // Clamp the clip to its own length so a short video isn't padded to the cap.
+    let source_dur = crate::video::ffmpeg::get_duration_seconds(&clip_str)
+        .await
+        .ok()
+        .flatten();
+    let clip_dur = match source_dur {
+        Some(d) if d > 0.0 && d < CLIP_SECONDS => d,
+        _ => CLIP_SECONDS,
+    };
+    let beat_total = clip_dur.max(segment_duration(narration_secs));
+    let has_audio = has_audio_stream(&clip_str).await;
+
+    let args = build_clip_beat_args(
+        &clip_str,
+        &audio_path.to_string_lossy(),
+        &out_path.to_string_lossy(),
+        clip_dur,
+        beat_total,
+        has_audio,
+        opts,
+    );
+    run_ffmpeg(&args, "clip beat render").await
+}
+
 /// Join rendered segments into the final reel. Writes the concat list into the
 /// same directory as the output so relative paths and cleanup stay local.
 pub async fn concat_segments(segment_paths: &[String], out_path: &Path) -> Result<()> {
@@ -397,8 +559,8 @@ mod tests {
        // Concatenated in order, audio is the 4th input (index 3).
        assert!(g.contains("[v0][v1][v2]concat=n=3:v=1:a=0[v]"));
        assert!(g.contains("[3:a]apad[a]"));
-        // Burst uses the snappier fade.
-        assert!(g.contains("d=0.15"));
+        // Burst uses the much snappier fade (vs 0.35 for a held shot).
+        assert!(g.contains("d=0.08"));
        assert!(!g.contains("d=0.35"));
    }

@@ -455,6 +617,54 @@ mod tests {
        assert!(!joined.contains("libx264"));
    }

+    #[test]
+    fn clip_filter_ducks_audio_and_holds_last_frame_when_narration_longer() {
+        // 5s clip, 7s beat → 2s freeze of the last frame, ducked-audio mix.
+        let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 7.0, true);
+        assert!(g.contains("tpad=stop_mode=clone:stop_duration=2.000"));
+        assert!(g.contains("volume=0.35"));
+        assert!(g.contains("amix=inputs=2"));
+        assert!(g.contains("[1:a]apad[narr]"));
+        // Fill applied to the clip too.
+        assert!(g.contains("boxblur"));
+        assert!(g.contains("overlay=(W-w)/2:(H-h)/2"));
+    }
+
+    #[test]
+    fn clip_filter_no_tpad_when_clip_covers_the_beat() {
+        // Clip at least as long as the beat → no freeze.
+        let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 5.0, true);
+        assert!(!g.contains("tpad"));
+    }
+
+    #[test]
+    fn clip_filter_narration_only_without_clip_audio() {
+        let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 5.0, false);
+        assert!(!g.contains("amix"));
+        assert!(!g.contains("volume="));
+        assert!(g.contains("[1:a]apad[a]"));
+    }
+
+    #[test]
+    fn clip_beat_args_bound_clip_and_output() {
+        let args = build_clip_beat_args(
+            "/v.mp4",
+            "/n.wav",
+            "/out.mp4",
+            5.0,
+            6.6,
+            true,
+            &SegmentOpts::default(),
+        );
+        let joined = args.join(" ");
+        // Input -t bounds the clip read; output -t bounds the beat.
+        assert!(joined.contains("-t 5.000 -i /v.mp4"));
+        assert!(joined.contains("-i /n.wav"));
+        assert!(joined.contains("-t 6.600"));
+        assert!(joined.contains("-r 30"));
+        assert!(joined.ends_with("/out.mp4"));
+    }
+
    #[test]
    fn concat_args_stream_copy_with_faststart_and_forced_muxer() {
        // Output goes to a .tmp path, so the muxer must be forced — ffmpeg