65793a2dda
Videos in a span now appear as clip beats: the first few seconds of the video (capped at CLIP_SECONDS=5, and to the source length) filled to the portrait canvas like photos, with its live audio ducked under the narration (amix at 0.35). If the narration outlasts the clip, the last frame is held (tpad); clips with no audio track just play under narration. Selection splits the beat budget between photo beats and clip beats — clips get up to half (≥1 when present), photos the rest — then merges both back into chronological order. SegmentMedia gains a Clip variant; beats carry `media` (photos or one clip) and the cache key tags P/C so a path used as a still vs a clip differ. Also drops the burst fade from 0.15s to 0.08s so a quick burst reads clearly differently from a held shot. Bumps RENDER_VERSION. The clip filtergraph (fill + duck-mix + last-frame hold) is unit-tested but, like the rest of the ffmpeg path, wants a real render check on the GPU host. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
695 lines
24 KiB
Rust
695 lines
24 KiB
Rust
//! ffmpeg assembly for memory reels.
|
||
//!
|
||
//! Two-stage, per-segment design: each segment is rendered to its own
|
||
//! normalized MP4 (identical codec/resolution/fps/timebase), then the segments
|
||
//! are joined with the concat demuxer (stream copy, no re-encode). Rendering
|
||
//! per segment — rather than one monster filtergraph — keeps each ffmpeg
|
||
//! invocation simple to reason about, parallelizes naturally, and means a
|
||
//! video-clip segment type (phase 2) slots in as just a different per-segment
|
||
//! builder without touching the concat stage.
|
||
//!
|
||
//! The arg builders are pure (`Vec<String>` out) so the exact ffmpeg command
|
||
//! is unit-testable; the runners spawn ffmpeg and surface stderr on failure.
|
||
|
||
use anyhow::{Context, Result, bail};
|
||
use std::path::Path;
|
||
use tokio::process::Command;
|
||
|
||
/// Re-exported so the reel pipeline reaches NVENC detection through this module
|
||
/// rather than depending on `video::ffmpeg` directly.
|
||
pub use crate::video::ffmpeg::is_nvenc_available;
|
||
|
||
/// Reel canvas. Portrait, because reels are watched on a phone held upright —
|
||
/// a landscape canvas letterboxes to a thin ~25%-height band there. Each photo
|
||
/// is fitted sharp and centered over a blurred, zoomed copy of itself (see
|
||
/// [`photo_filter_chain`]) so the frame is always filled regardless of the
|
||
/// photo's orientation, without cropping the subject.
|
||
pub const REEL_WIDTH: u32 = 1080;
|
||
pub const REEL_HEIGHT: u32 = 1920;
|
||
pub const REEL_FPS: u32 = 30;
|
||
|
||
/// A beat's screen time is its narration length plus a short breath, with a
|
||
/// floor so a terse line still lingers. No ceiling: the beat always covers the
|
||
/// full narration so speech is never truncated — the scripter is asked to keep
|
||
/// lines short instead.
|
||
pub const MIN_SEGMENT_SECONDS: f64 = 2.5;
|
||
const NARRATION_TAIL_SECONDS: f64 = 0.6;
|
||
|
||
/// Fade durations baked into each photo. A held (single-photo) beat gets a
|
||
/// gentle dip; burst photos get a much snappier fade so the difference between
|
||
/// a held shot and a quick burst is obvious.
|
||
const SINGLE_FADE_SECONDS: f64 = 0.35;
|
||
const BURST_FADE_SECONDS: f64 = 0.08;
|
||
|
||
/// Video-clip framing. A clip plays at most this long, with its live audio
|
||
/// ducked to `CLIP_DUCK_VOLUME` under the narration.
|
||
pub const CLIP_SECONDS: f64 = 5.0;
|
||
const CLIP_DUCK_VOLUME: f64 = 0.35;
|
||
|
||
/// Floor on how long each burst photo stays up, so a long line over many photos
|
||
/// doesn't flash them subliminally. If the narration is too short to give every
|
||
/// photo this much, the beat is stretched to fit.
|
||
const MIN_BURST_PHOTO_SECONDS: f64 = 0.6;
|
||
|
||
/// Base screen time for a beat given its narration length: narration + breath,
|
||
/// floored. Used as the lower bound on a beat's total duration.
|
||
pub fn segment_duration(narration_secs: f64) -> f64 {
|
||
let d = narration_secs + NARRATION_TAIL_SECONDS;
|
||
if d.is_finite() && d > MIN_SEGMENT_SECONDS {
|
||
d
|
||
} else {
|
||
MIN_SEGMENT_SECONDS
|
||
}
|
||
}
|
||
|
||
/// Split a beat into per-photo durations. The beat lasts at least its narration
|
||
/// (so speech isn't cut) and at least `n × MIN_BURST_PHOTO_SECONDS` (so a fast
|
||
/// burst stays legible); the photos share that total evenly. Returns
|
||
/// `(total_seconds, per_photo_seconds)`.
|
||
pub fn beat_durations(narration_secs: f64, n_photos: usize) -> (f64, Vec<f64>) {
|
||
let n = n_photos.max(1);
|
||
let base = segment_duration(narration_secs);
|
||
let min_total = n as f64 * MIN_BURST_PHOTO_SECONDS;
|
||
let total = if base > min_total { base } else { min_total };
|
||
let each = total / n as f64;
|
||
(total, vec![each; n])
|
||
}
|
||
|
||
/// Fade length to use for a beat of `n_photos` (gentle when held, snappy in a
|
||
/// burst).
|
||
fn fade_for(n_photos: usize) -> f64 {
|
||
if n_photos > 1 {
|
||
BURST_FADE_SECONDS
|
||
} else {
|
||
SINGLE_FADE_SECONDS
|
||
}
|
||
}
|
||
|
||
/// Options controlling per-segment rendering.
|
||
#[derive(Debug, Clone, Copy)]
|
||
pub struct SegmentOpts {
|
||
pub width: u32,
|
||
pub height: u32,
|
||
pub fps: u32,
|
||
pub nvenc: bool,
|
||
}
|
||
|
||
impl Default for SegmentOpts {
|
||
fn default() -> Self {
|
||
Self {
|
||
width: REEL_WIDTH,
|
||
height: REEL_HEIGHT,
|
||
fps: REEL_FPS,
|
||
nvenc: false,
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Filter chain for one photo (input `idx`) producing the labelled output
|
||
/// `[v{idx}]`. Splits the still into a background and foreground: the background
|
||
/// is scaled to *cover* the canvas and heavily blurred; the foreground is
|
||
/// scaled to *fit* and overlaid centered. This fills the portrait frame for any
|
||
/// photo orientation — no black bars, no cropping of the subject — then a fade
|
||
/// in/out softens the cut. Intermediate labels are suffixed with `idx` so
|
||
/// several chains coexist in one `filter_complex`.
|
||
///
|
||
/// `fps` is normalized BEFORE the fades so the brightness ramp is computed on a
|
||
/// true {fps}-frame timeline; otherwise the fade is sampled at the looped
|
||
/// still's coarse cadence and duplicated up, which reads as a steppy dip.
|
||
fn photo_filter_chain(idx: usize, opts: &SegmentOpts, duration: f64, fade: f64) -> String {
|
||
let (w, h, fps) = (opts.width, opts.height, opts.fps);
|
||
let fade_out_start = (duration - fade).max(0.0);
|
||
format!(
|
||
"[{idx}:v]split=2[bg{idx}][fg{idx}];\
|
||
[bg{idx}]scale={w}:{h}:force_original_aspect_ratio=increase,\
|
||
crop={w}:{h},boxblur=20:2[bgb{idx}];\
|
||
[fg{idx}]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs{idx}];\
|
||
[bgb{idx}][fgs{idx}]overlay=(W-w)/2:(H-h)/2,\
|
||
fps={fps},\
|
||
fade=t=in:st=0:d={fade},\
|
||
fade=t=out:st={fade_out_start:.3}:d={fade},\
|
||
setsar=1,format=yuv420p[v{idx}]"
|
||
)
|
||
}
|
||
|
||
/// Full `filter_complex` for a beat of `per_photo` durations: one chain per
|
||
/// photo, concatenated into `[v]`, with the narration (the last input, index
|
||
/// `per_photo.len()`) padded with trailing silence into `[a]`. A single-photo
|
||
/// beat degenerates to one chain + `concat=n=1` (a passthrough).
|
||
pub fn beat_filtergraph(opts: &SegmentOpts, per_photo: &[f64]) -> String {
|
||
let n = per_photo.len().max(1);
|
||
let fade = fade_for(n);
|
||
let chains: Vec<String> = per_photo
|
||
.iter()
|
||
.enumerate()
|
||
.map(|(i, &d)| photo_filter_chain(i, opts, d, fade))
|
||
.collect();
|
||
let concat_inputs: String = (0..n).map(|i| format!("[v{i}]")).collect();
|
||
format!(
|
||
"{chains};{concat_inputs}concat=n={n}:v=1:a=0[v];[{n}:a]apad[a]",
|
||
chains = chains.join(";")
|
||
)
|
||
}
|
||
|
||
fn video_encoder_args(nvenc: bool) -> Vec<String> {
|
||
if nvenc {
|
||
// p4 ≈ balanced; cq 23 ≈ libx264 crf 21. Matches the HLS transcode path.
|
||
[
|
||
"-c:v",
|
||
"h264_nvenc",
|
||
"-preset",
|
||
"p4",
|
||
"-cq",
|
||
"23",
|
||
"-pix_fmt",
|
||
"yuv420p",
|
||
]
|
||
} else {
|
||
[
|
||
"-c:v", "libx264", "-crf", "21", "-preset", "veryfast", "-pix_fmt", "yuv420p",
|
||
]
|
||
}
|
||
.iter()
|
||
.map(|s| s.to_string())
|
||
.collect()
|
||
}
|
||
|
||
/// Build the ffmpeg args that render one beat: each photo looped for its slice
|
||
/// of the beat (filled to the portrait canvas with a blurred backdrop), the
|
||
/// slices concatenated, and the single narration muxed over the whole thing.
|
||
/// `total` bounds the output (and the apad'd audio) to the beat length.
|
||
pub fn build_beat_args(
|
||
image_paths: &[String],
|
||
audio_path: &str,
|
||
out_path: &str,
|
||
per_photo: &[f64],
|
||
total: f64,
|
||
opts: &SegmentOpts,
|
||
) -> Vec<String> {
|
||
let fps = opts.fps.to_string();
|
||
let mut args: Vec<String> = vec!["-y".into()];
|
||
if opts.nvenc {
|
||
args.extend(["-hwaccel".into(), "cuda".into()]);
|
||
}
|
||
// One looped-still input per photo, each bounded to its slice by an input
|
||
// `-t`; reading at the target `-framerate` gives the fades real frames to
|
||
// ramp across.
|
||
for (path, &dur) in image_paths.iter().zip(per_photo.iter()) {
|
||
args.extend([
|
||
"-framerate".into(),
|
||
fps.clone(),
|
||
"-loop".into(),
|
||
"1".into(),
|
||
"-t".into(),
|
||
format!("{dur:.3}"),
|
||
"-i".into(),
|
||
path.clone(),
|
||
]);
|
||
}
|
||
args.extend([
|
||
"-i".into(),
|
||
audio_path.into(),
|
||
"-filter_complex".into(),
|
||
beat_filtergraph(opts, per_photo),
|
||
"-map".into(),
|
||
"[v]".into(),
|
||
"-map".into(),
|
||
"[a]".into(),
|
||
"-t".into(),
|
||
format!("{total:.3}"),
|
||
// Force constant frame rate so the beat (and the concatenated reel)
|
||
// plays at a steady {fps} rather than a variable cadence.
|
||
"-r".into(),
|
||
fps,
|
||
]);
|
||
args.extend(video_encoder_args(opts.nvenc));
|
||
args.extend(
|
||
["-c:a", "aac", "-b:a", "160k", "-ar", "48000", "-shortest"]
|
||
.iter()
|
||
.map(|s| s.to_string()),
|
||
);
|
||
args.push(out_path.into());
|
||
args
|
||
}
|
||
|
||
/// Build the concat-demuxer args that join rendered segments losslessly.
|
||
/// `+faststart` moves the moov atom up front so the reel streams immediately
|
||
/// on the mobile client. The output muxer is forced with `-f mp4` because we
|
||
/// write to a `.tmp` path (atomic publish) whose extension ffmpeg can't map to
|
||
/// a format on its own.
|
||
pub fn build_concat_args(list_path: &str, out_path: &str) -> Vec<String> {
|
||
[
|
||
"-y",
|
||
"-f",
|
||
"concat",
|
||
"-safe",
|
||
"0",
|
||
"-i",
|
||
list_path,
|
||
"-c",
|
||
"copy",
|
||
"-movflags",
|
||
"+faststart",
|
||
"-f",
|
||
"mp4",
|
||
out_path,
|
||
]
|
||
.iter()
|
||
.map(|s| s.to_string())
|
||
.collect()
|
||
}
|
||
|
||
/// Render the concat list file body. Each line points the demuxer at one
|
||
/// segment; single quotes in paths are escaped per ffmpeg's concat syntax.
|
||
pub fn build_concat_list(segment_paths: &[String]) -> String {
|
||
let mut out = String::new();
|
||
for p in segment_paths {
|
||
let escaped = p.replace('\'', r"'\''");
|
||
out.push_str(&format!("file '{escaped}'\n"));
|
||
}
|
||
out
|
||
}
|
||
|
||
async fn run_ffmpeg(args: &[String], what: &str) -> Result<()> {
|
||
let output = Command::new("ffmpeg")
|
||
.args(args)
|
||
.output()
|
||
.await
|
||
.with_context(|| format!("spawning ffmpeg for {what}"))?;
|
||
if !output.status.success() {
|
||
bail!(
|
||
"ffmpeg {what} failed: {}",
|
||
String::from_utf8_lossy(&output.stderr)
|
||
);
|
||
}
|
||
Ok(())
|
||
}
|
||
|
||
/// Render one beat to `out_path`: its photos shown in sequence (a held shot for
|
||
/// one photo, a quick burst for several) under the single narration in
|
||
/// `audio_path`, whose measured length sets the beat's pacing.
|
||
pub async fn render_beat(
|
||
image_paths: &[std::path::PathBuf],
|
||
audio_path: &Path,
|
||
out_path: &Path,
|
||
narration_secs: f64,
|
||
opts: &SegmentOpts,
|
||
) -> Result<()> {
|
||
if image_paths.is_empty() {
|
||
bail!("render_beat called with no images");
|
||
}
|
||
let (total, per_photo) = beat_durations(narration_secs, image_paths.len());
|
||
let paths: Vec<String> = image_paths
|
||
.iter()
|
||
.map(|p| p.to_string_lossy().to_string())
|
||
.collect();
|
||
let args = build_beat_args(
|
||
&paths,
|
||
&audio_path.to_string_lossy(),
|
||
&out_path.to_string_lossy(),
|
||
&per_photo,
|
||
total,
|
||
opts,
|
||
);
|
||
run_ffmpeg(&args, "beat render").await
|
||
}
|
||
|
||
// --- Video-clip beats --------------------------------------------------------
|
||
|
||
/// Video chain for a clip beat: fill the clip to the portrait canvas (blurred
|
||
/// backdrop, same look as photos), normalize fps, hold the last frame if the
|
||
/// narration outlasts the clip (`tpad`), then fade. Produces `[v]`.
|
||
fn clip_video_filter(opts: &SegmentOpts, clip_dur: f64, beat_total: f64) -> String {
|
||
let (w, h, fps) = (opts.width, opts.height, opts.fps);
|
||
let fade = SINGLE_FADE_SECONDS;
|
||
let hold = (beat_total - clip_dur).max(0.0);
|
||
let fade_out_start = (beat_total - fade).max(0.0);
|
||
// Freeze the final frame to cover narration that runs past the clip.
|
||
let tpad = if hold > 0.05 {
|
||
format!(",tpad=stop_mode=clone:stop_duration={hold:.3}")
|
||
} else {
|
||
String::new()
|
||
};
|
||
format!(
|
||
"[0:v]split=2[bg][fg];\
|
||
[bg]scale={w}:{h}:force_original_aspect_ratio=increase,\
|
||
crop={w}:{h},boxblur=20:2[bgb];\
|
||
[fg]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs];\
|
||
[bgb][fgs]overlay=(W-w)/2:(H-h)/2,fps={fps}{tpad},\
|
||
fade=t=in:st=0:d={fade},fade=t=out:st={fade_out_start:.3}:d={fade},\
|
||
setsar=1,format=yuv420p[v]"
|
||
)
|
||
}
|
||
|
||
/// Audio chain for a clip beat. With a clip audio track, duck it under the
|
||
/// narration and mix; without one, just the narration. Produces `[a]`.
|
||
fn clip_audio_filter(has_audio: bool) -> String {
|
||
if has_audio {
|
||
format!(
|
||
"[0:a]volume={CLIP_DUCK_VOLUME}[duck];[1:a]apad[narr];\
|
||
[duck][narr]amix=inputs=2:duration=longest:normalize=0[a]"
|
||
)
|
||
} else {
|
||
"[1:a]apad[a]".to_string()
|
||
}
|
||
}
|
||
|
||
/// Full `filter_complex` for a clip beat (input 0 = clip, input 1 = narration).
|
||
pub fn clip_beat_filtergraph(
|
||
opts: &SegmentOpts,
|
||
clip_dur: f64,
|
||
beat_total: f64,
|
||
has_audio: bool,
|
||
) -> String {
|
||
format!(
|
||
"{};{}",
|
||
clip_video_filter(opts, clip_dur, beat_total),
|
||
clip_audio_filter(has_audio)
|
||
)
|
||
}
|
||
|
||
/// Build the ffmpeg args for a clip beat: the first `clip_dur` seconds of the
|
||
/// source video, filled to the portrait canvas with its live audio ducked under
|
||
/// the narration, bounded to `beat_total`.
|
||
pub fn build_clip_beat_args(
|
||
clip_path: &str,
|
||
audio_path: &str,
|
||
out_path: &str,
|
||
clip_dur: f64,
|
||
beat_total: f64,
|
||
has_audio: bool,
|
||
opts: &SegmentOpts,
|
||
) -> Vec<String> {
|
||
let fps = opts.fps.to_string();
|
||
let mut args: Vec<String> = vec!["-y".into()];
|
||
if opts.nvenc {
|
||
args.extend(["-hwaccel".into(), "cuda".into()]);
|
||
}
|
||
args.extend([
|
||
// Input `-t` limits the clip to its window; audio has none (apad fills).
|
||
"-t".into(),
|
||
format!("{clip_dur:.3}"),
|
||
"-i".into(),
|
||
clip_path.into(),
|
||
"-i".into(),
|
||
audio_path.into(),
|
||
"-filter_complex".into(),
|
||
clip_beat_filtergraph(opts, clip_dur, beat_total, has_audio),
|
||
"-map".into(),
|
||
"[v]".into(),
|
||
"-map".into(),
|
||
"[a]".into(),
|
||
"-t".into(),
|
||
format!("{beat_total:.3}"),
|
||
"-r".into(),
|
||
fps,
|
||
]);
|
||
args.extend(video_encoder_args(opts.nvenc));
|
||
args.extend(
|
||
["-c:a", "aac", "-b:a", "160k", "-ar", "48000"]
|
||
.iter()
|
||
.map(|s| s.to_string()),
|
||
);
|
||
args.push(out_path.into());
|
||
args
|
||
}
|
||
|
||
/// Whether a media file has at least one audio stream (so a clip beat knows
|
||
/// whether to mix in live audio). Defaults to `false` on any probe failure.
|
||
pub async fn has_audio_stream(path: &str) -> bool {
|
||
Command::new("ffprobe")
|
||
.args([
|
||
"-v",
|
||
"error",
|
||
"-select_streams",
|
||
"a",
|
||
"-show_entries",
|
||
"stream=index",
|
||
"-of",
|
||
"csv=p=0",
|
||
path,
|
||
])
|
||
.output()
|
||
.await
|
||
.map(|out| !out.stdout.is_empty())
|
||
.unwrap_or(false)
|
||
}
|
||
|
||
/// Render one clip beat: a section of `clip_path` (capped at [`CLIP_SECONDS`],
|
||
/// and to the source length) under the narration in `audio_path`. The beat
|
||
/// lasts at least the narration, freezing the clip's last frame if needed.
|
||
pub async fn render_clip_beat(
|
||
clip_path: &Path,
|
||
audio_path: &Path,
|
||
out_path: &Path,
|
||
narration_secs: f64,
|
||
opts: &SegmentOpts,
|
||
) -> Result<()> {
|
||
let clip_str = clip_path.to_string_lossy().to_string();
|
||
// Clamp the clip to its own length so a short video isn't padded to the cap.
|
||
let source_dur = crate::video::ffmpeg::get_duration_seconds(&clip_str)
|
||
.await
|
||
.ok()
|
||
.flatten();
|
||
let clip_dur = match source_dur {
|
||
Some(d) if d > 0.0 && d < CLIP_SECONDS => d,
|
||
_ => CLIP_SECONDS,
|
||
};
|
||
let beat_total = clip_dur.max(segment_duration(narration_secs));
|
||
let has_audio = has_audio_stream(&clip_str).await;
|
||
|
||
let args = build_clip_beat_args(
|
||
&clip_str,
|
||
&audio_path.to_string_lossy(),
|
||
&out_path.to_string_lossy(),
|
||
clip_dur,
|
||
beat_total,
|
||
has_audio,
|
||
opts,
|
||
);
|
||
run_ffmpeg(&args, "clip beat render").await
|
||
}
|
||
|
||
/// Join rendered segments into the final reel. Writes the concat list into the
|
||
/// same directory as the output so relative paths and cleanup stay local.
|
||
pub async fn concat_segments(segment_paths: &[String], out_path: &Path) -> Result<()> {
|
||
let list_path = out_path.with_extension("concat.txt");
|
||
let body = build_concat_list(segment_paths);
|
||
tokio::fs::write(&list_path, body)
|
||
.await
|
||
.context("writing concat list")?;
|
||
let args = build_concat_args(&list_path.to_string_lossy(), &out_path.to_string_lossy());
|
||
let result = run_ffmpeg(&args, "concat").await;
|
||
let _ = tokio::fs::remove_file(&list_path).await;
|
||
result
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn segment_duration_floors_short_lines() {
|
||
// A one-word narration still lingers at the floor.
|
||
assert_eq!(segment_duration(0.5), MIN_SEGMENT_SECONDS);
|
||
assert_eq!(segment_duration(0.0), MIN_SEGMENT_SECONDS);
|
||
}
|
||
|
||
#[test]
|
||
fn segment_duration_covers_full_narration_plus_tail() {
|
||
// No ceiling: a long line gets its full length so speech isn't cut.
|
||
assert!((segment_duration(5.0) - 5.6).abs() < 1e-9);
|
||
assert!((segment_duration(20.0) - 20.6).abs() < 1e-9);
|
||
}
|
||
|
||
#[test]
|
||
fn segment_duration_rejects_nonfinite() {
|
||
assert_eq!(segment_duration(f64::NAN), MIN_SEGMENT_SECONDS);
|
||
assert_eq!(segment_duration(f64::INFINITY), MIN_SEGMENT_SECONDS);
|
||
}
|
||
|
||
#[test]
|
||
fn beat_durations_single_photo_matches_base() {
|
||
let (total, per) = beat_durations(4.0, 1);
|
||
assert!((total - 4.6).abs() < 1e-9); // narration + tail
|
||
assert_eq!(per.len(), 1);
|
||
assert!((per[0] - 4.6).abs() < 1e-9);
|
||
}
|
||
|
||
#[test]
|
||
fn beat_durations_burst_splits_evenly() {
|
||
// 5 photos, narration 4.6s base → ~0.92s each (above the 0.6 floor).
|
||
let (total, per) = beat_durations(4.0, 5);
|
||
assert!((total - 4.6).abs() < 1e-9);
|
||
assert_eq!(per.len(), 5);
|
||
assert!((per.iter().sum::<f64>() - total).abs() < 1e-9);
|
||
assert!(per.iter().all(|&d| d >= MIN_BURST_PHOTO_SECONDS));
|
||
}
|
||
|
||
#[test]
|
||
fn beat_durations_stretches_when_narration_too_short_for_burst() {
|
||
// Floor narration (2.5s) over 10 photos would be 0.25s each — below the
|
||
// legibility floor, so the beat stretches to 10 × 0.6 = 6s.
|
||
let (total, per) = beat_durations(0.0, 10);
|
||
assert!((total - 6.0).abs() < 1e-9);
|
||
assert!(per.iter().all(|&d| (d - 0.6).abs() < 1e-9));
|
||
}
|
||
|
||
#[test]
|
||
fn beat_filtergraph_single_photo_fills_portrait_and_holds() {
|
||
let (_t, per) = beat_durations(4.0, 1);
|
||
let g = beat_filtergraph(&SegmentOpts::default(), &per);
|
||
assert!(g.contains("[0:v]split=2[bg0][fg0]"));
|
||
assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=increase"));
|
||
assert!(g.contains("crop=1080:1920"));
|
||
assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=decrease"));
|
||
assert!(g.contains("overlay=(W-w)/2:(H-h)/2"));
|
||
// Single photo → concat of one, gentle fade, audio is input 1.
|
||
assert!(g.contains("concat=n=1:v=1:a=0[v]"));
|
||
assert!(g.contains("d=0.35")); // SINGLE_FADE
|
||
assert!(g.contains("[1:a]apad[a]"));
|
||
}
|
||
|
||
#[test]
|
||
fn beat_filtergraph_burst_chains_concats_and_snappy_fade() {
|
||
let (_t, per) = beat_durations(4.0, 3);
|
||
let g = beat_filtergraph(&SegmentOpts::default(), &per);
|
||
// One chain per photo with index-suffixed labels.
|
||
assert!(g.contains("[0:v]split") && g.contains("[1:v]split") && g.contains("[2:v]split"));
|
||
// Concatenated in order, audio is the 4th input (index 3).
|
||
assert!(g.contains("[v0][v1][v2]concat=n=3:v=1:a=0[v]"));
|
||
assert!(g.contains("[3:a]apad[a]"));
|
||
// Burst uses the much snappier fade (vs 0.35 for a held shot).
|
||
assert!(g.contains("d=0.08"));
|
||
assert!(!g.contains("d=0.35"));
|
||
}
|
||
|
||
#[test]
|
||
fn beat_filtergraph_normalizes_fps_before_fading() {
|
||
// fps must precede the fades on every chain (else the dip looks steppy).
|
||
let (_t, per) = beat_durations(4.0, 1);
|
||
let g = beat_filtergraph(&SegmentOpts::default(), &per);
|
||
let fps_at = g.find("fps=30").expect("fps in graph");
|
||
let fade_at = g.find("fade=t=in").expect("fade in graph");
|
||
assert!(fps_at < fade_at);
|
||
}
|
||
|
||
#[test]
|
||
fn beat_args_one_input_per_photo_plus_audio_bound_by_total() {
|
||
let (total, per) = beat_durations(4.0, 2);
|
||
let args = build_beat_args(
|
||
&["/a.jpg".into(), "/b.jpg".into()],
|
||
"/n.wav",
|
||
"/out.mp4",
|
||
&per,
|
||
total,
|
||
&SegmentOpts::default(),
|
||
);
|
||
let joined = args.join(" ");
|
||
// A looped-still input per photo, each with its slice -t, then the audio.
|
||
assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /a.jpg"));
|
||
assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /b.jpg"));
|
||
assert!(joined.contains("-i /n.wav"));
|
||
// Output bounded to the beat total and forced CFR.
|
||
assert!(joined.contains("-t 4.600"));
|
||
assert!(joined.contains("-r 30"));
|
||
assert!(joined.ends_with("/out.mp4"));
|
||
}
|
||
|
||
#[test]
|
||
fn beat_args_use_nvenc_and_cuda_when_enabled() {
|
||
let opts = SegmentOpts {
|
||
nvenc: true,
|
||
..SegmentOpts::default()
|
||
};
|
||
let (total, per) = beat_durations(3.0, 1);
|
||
let args = build_beat_args(
|
||
&["/img.jpg".into()],
|
||
"/a.wav",
|
||
"/out.mp4",
|
||
&per,
|
||
total,
|
||
&opts,
|
||
);
|
||
let joined = args.join(" ");
|
||
assert!(joined.contains("-hwaccel cuda"));
|
||
assert!(joined.contains("h264_nvenc"));
|
||
assert!(!joined.contains("libx264"));
|
||
}
|
||
|
||
#[test]
|
||
fn clip_filter_ducks_audio_and_holds_last_frame_when_narration_longer() {
|
||
// 5s clip, 7s beat → 2s freeze of the last frame, ducked-audio mix.
|
||
let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 7.0, true);
|
||
assert!(g.contains("tpad=stop_mode=clone:stop_duration=2.000"));
|
||
assert!(g.contains("volume=0.35"));
|
||
assert!(g.contains("amix=inputs=2"));
|
||
assert!(g.contains("[1:a]apad[narr]"));
|
||
// Fill applied to the clip too.
|
||
assert!(g.contains("boxblur"));
|
||
assert!(g.contains("overlay=(W-w)/2:(H-h)/2"));
|
||
}
|
||
|
||
#[test]
|
||
fn clip_filter_no_tpad_when_clip_covers_the_beat() {
|
||
// Clip at least as long as the beat → no freeze.
|
||
let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 5.0, true);
|
||
assert!(!g.contains("tpad"));
|
||
}
|
||
|
||
#[test]
|
||
fn clip_filter_narration_only_without_clip_audio() {
|
||
let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 5.0, false);
|
||
assert!(!g.contains("amix"));
|
||
assert!(!g.contains("volume="));
|
||
assert!(g.contains("[1:a]apad[a]"));
|
||
}
|
||
|
||
#[test]
|
||
fn clip_beat_args_bound_clip_and_output() {
|
||
let args = build_clip_beat_args(
|
||
"/v.mp4",
|
||
"/n.wav",
|
||
"/out.mp4",
|
||
5.0,
|
||
6.6,
|
||
true,
|
||
&SegmentOpts::default(),
|
||
);
|
||
let joined = args.join(" ");
|
||
// Input -t bounds the clip read; output -t bounds the beat.
|
||
assert!(joined.contains("-t 5.000 -i /v.mp4"));
|
||
assert!(joined.contains("-i /n.wav"));
|
||
assert!(joined.contains("-t 6.600"));
|
||
assert!(joined.contains("-r 30"));
|
||
assert!(joined.ends_with("/out.mp4"));
|
||
}
|
||
|
||
#[test]
|
||
fn concat_args_stream_copy_with_faststart_and_forced_muxer() {
|
||
// Output goes to a .tmp path, so the muxer must be forced — ffmpeg
|
||
// can't infer mp4 from the extension (the bug this guards against).
|
||
let args = build_concat_args("/tmp/list.txt", "/out.mp4.tmp");
|
||
let joined = args.join(" ");
|
||
assert!(joined.contains("-f concat -safe 0 -i /tmp/list.txt"));
|
||
assert!(joined.contains("-c copy"));
|
||
assert!(joined.contains("+faststart"));
|
||
assert!(joined.contains("-f mp4"));
|
||
// The forced muxer must come before the output path.
|
||
let f_mp4 = args.windows(2).position(|w| w == ["-f", "mp4"]).unwrap();
|
||
let out = args.iter().position(|a| a == "/out.mp4.tmp").unwrap();
|
||
assert!(f_mp4 < out);
|
||
}
|
||
|
||
#[test]
|
||
fn concat_list_escapes_single_quotes() {
|
||
let body = build_concat_list(&[
|
||
"/tmp/seg_000.mp4".into(),
|
||
"/tmp/own's dir/seg_001.mp4".into(),
|
||
]);
|
||
assert!(body.contains("file '/tmp/seg_000.mp4'\n"));
|
||
// The apostrophe is closed-escaped-reopened per ffmpeg concat syntax.
|
||
assert!(body.contains(r"own'\''s"));
|
||
}
|
||
}
|