ImageApi/src/reels/render.rs

//! ffmpeg assembly for memory reels.
//!
//! Two-stage, per-segment design: each segment is rendered to its own
//! normalized MP4 (identical codec/resolution/fps/timebase), then the segments
//! are joined with the concat demuxer (stream copy, no re-encode). Rendering
//! per segment — rather than one monster filtergraph — keeps each ffmpeg
//! invocation simple to reason about, parallelizes naturally, and means a
//! video-clip segment type (phase 2) slots in as just a different per-segment
//! builder without touching the concat stage.
//!
//! The arg builders are pure (`Vec<String>` out) so the exact ffmpeg command
//! is unit-testable; the runners spawn ffmpeg and surface stderr on failure.

use anyhow::{Context, Result, bail};
use std::path::Path;
use tokio::process::Command;

/// Re-exported so the reel pipeline reaches NVENC detection through this module
/// rather than depending on `video::ffmpeg` directly.
pub use crate::video::ffmpeg::is_nvenc_available;

/// Reel canvas. Portrait, because reels are watched on a phone held upright —
/// a landscape canvas letterboxes to a thin ~25%-height band there. Each photo
/// is fitted sharp and centered over a blurred, zoomed copy of itself (see
/// [`photo_filter_chain`]) so the frame is always filled regardless of the
/// photo's orientation, without cropping the subject.
pub const REEL_WIDTH: u32 = 1080;
pub const REEL_HEIGHT: u32 = 1920;
pub const REEL_FPS: u32 = 30;

/// A beat's screen time is its narration length plus a short breath, with a
/// floor so a terse line still lingers. No ceiling: the beat always covers the
/// full narration so speech is never truncated — the scripter is asked to keep
/// lines short instead.
pub const MIN_SEGMENT_SECONDS: f64 = 2.5;
const NARRATION_TAIL_SECONDS: f64 = 0.6;

/// Fade durations baked into each photo. A held (single-photo) beat gets a
/// gentle dip; burst photos get a much snappier fade so the difference between
/// a held shot and a quick burst is obvious.
const SINGLE_FADE_SECONDS: f64 = 0.35;
const BURST_FADE_SECONDS: f64 = 0.08;

/// Video-clip framing. A clip plays at most this long, with its live audio
/// ducked to `CLIP_DUCK_VOLUME` under the narration.
pub const CLIP_SECONDS: f64 = 5.0;
const CLIP_DUCK_VOLUME: f64 = 0.35;

/// Floor on how long each burst photo stays up, so a long line over many photos
/// doesn't flash them subliminally. If the narration is too short to give every
/// photo this much, the beat is stretched to fit.
const MIN_BURST_PHOTO_SECONDS: f64 = 0.6;

/// Base screen time for a beat given its narration length: narration + breath,
/// floored. Used as the lower bound on a beat's total duration.
pub fn segment_duration(narration_secs: f64) -> f64 {
    let d = narration_secs + NARRATION_TAIL_SECONDS;
    if d.is_finite() && d > MIN_SEGMENT_SECONDS {
        d
    } else {
        MIN_SEGMENT_SECONDS
    }
}

/// Split a beat into per-photo durations. The beat lasts at least its narration
/// (so speech isn't cut) and at least `n × MIN_BURST_PHOTO_SECONDS` (so a fast
/// burst stays legible); the photos share that total evenly. Returns
/// `(total_seconds, per_photo_seconds)`.
pub fn beat_durations(narration_secs: f64, n_photos: usize) -> (f64, Vec<f64>) {
    let n = n_photos.max(1);
    let base = segment_duration(narration_secs);
    let min_total = n as f64 * MIN_BURST_PHOTO_SECONDS;
    let total = if base > min_total { base } else { min_total };
    let each = total / n as f64;
    (total, vec![each; n])
}

/// Fade length to use for a beat of `n_photos` (gentle when held, snappy in a
/// burst).
fn fade_for(n_photos: usize) -> f64 {
    if n_photos > 1 {
        BURST_FADE_SECONDS
    } else {
        SINGLE_FADE_SECONDS
    }
}

/// Options controlling per-segment rendering.
#[derive(Debug, Clone, Copy)]
pub struct SegmentOpts {
    pub width: u32,
    pub height: u32,
    pub fps: u32,
    pub nvenc: bool,
}

impl Default for SegmentOpts {
    fn default() -> Self {
        Self {
            width: REEL_WIDTH,
            height: REEL_HEIGHT,
            fps: REEL_FPS,
            nvenc: false,
        }
    }
}

/// Filter chain for one photo (input `idx`) producing the labelled output
/// `[v{idx}]`. Splits the still into a background and foreground: the background
/// is scaled to *cover* the canvas and heavily blurred; the foreground is
/// scaled to *fit* and overlaid centered. This fills the portrait frame for any
/// photo orientation — no black bars, no cropping of the subject — then a fade
/// in/out softens the cut. Intermediate labels are suffixed with `idx` so
/// several chains coexist in one `filter_complex`.
///
/// `fps` is normalized BEFORE the fades so the brightness ramp is computed on a
/// true {fps}-frame timeline; otherwise the fade is sampled at the looped
/// still's coarse cadence and duplicated up, which reads as a steppy dip.
fn photo_filter_chain(idx: usize, opts: &SegmentOpts, duration: f64, fade: f64) -> String {
    let (w, h, fps) = (opts.width, opts.height, opts.fps);
    let fade_out_start = (duration - fade).max(0.0);
    format!(
        "[{idx}:v]split=2[bg{idx}][fg{idx}];\
         [bg{idx}]scale={w}:{h}:force_original_aspect_ratio=increase,\
         crop={w}:{h},boxblur=20:2[bgb{idx}];\
         [fg{idx}]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs{idx}];\
         [bgb{idx}][fgs{idx}]overlay=(W-w)/2:(H-h)/2,\
         fps={fps},\
         fade=t=in:st=0:d={fade},\
         fade=t=out:st={fade_out_start:.3}:d={fade},\
         setsar=1,format=yuv420p[v{idx}]"
    )
}

/// Full `filter_complex` for a beat of `per_photo` durations: one chain per
/// photo, concatenated into `[v]`, with the narration (the last input, index
/// `per_photo.len()`) padded with trailing silence into `[a]`. A single-photo
/// beat degenerates to one chain + `concat=n=1` (a passthrough).
pub fn beat_filtergraph(opts: &SegmentOpts, per_photo: &[f64]) -> String {
    let n = per_photo.len().max(1);
    let fade = fade_for(n);
    let chains: Vec<String> = per_photo
        .iter()
        .enumerate()
        .map(|(i, &d)| photo_filter_chain(i, opts, d, fade))
        .collect();
    let concat_inputs: String = (0..n).map(|i| format!("[v{i}]")).collect();
    format!(
        "{chains};{concat_inputs}concat=n={n}:v=1:a=0[v];[{n}:a]apad[a]",
        chains = chains.join(";")
    )
}

fn video_encoder_args(nvenc: bool) -> Vec<String> {
    if nvenc {
        // p4 ≈ balanced; cq 23 ≈ libx264 crf 21. Matches the HLS transcode path.
        [
            "-c:v",
            "h264_nvenc",
            "-preset",
            "p4",
            "-cq",
            "23",
            "-pix_fmt",
            "yuv420p",
        ]
    } else {
        [
            "-c:v", "libx264", "-crf", "21", "-preset", "veryfast", "-pix_fmt", "yuv420p",
        ]
    }
    .iter()
    .map(|s| s.to_string())
    .collect()
}

/// Build the ffmpeg args that render one beat: each photo looped for its slice
/// of the beat (filled to the portrait canvas with a blurred backdrop), the
/// slices concatenated, and the single narration muxed over the whole thing.
/// `total` bounds the output (and the apad'd audio) to the beat length.
pub fn build_beat_args(
    image_paths: &[String],
    audio_path: &str,
    out_path: &str,
    per_photo: &[f64],
    total: f64,
    opts: &SegmentOpts,
) -> Vec<String> {
    let fps = opts.fps.to_string();
    let mut args: Vec<String> = vec!["-y".into()];
    if opts.nvenc {
        args.extend(["-hwaccel".into(), "cuda".into()]);
    }
    // One looped-still input per photo, each bounded to its slice by an input
    // `-t`; reading at the target `-framerate` gives the fades real frames to
    // ramp across.
    for (path, &dur) in image_paths.iter().zip(per_photo.iter()) {
        args.extend([
            "-framerate".into(),
            fps.clone(),
            "-loop".into(),
            "1".into(),
            "-t".into(),
            format!("{dur:.3}"),
            "-i".into(),
            path.clone(),
        ]);
    }
    args.extend([
        "-i".into(),
        audio_path.into(),
        "-filter_complex".into(),
        beat_filtergraph(opts, per_photo),
        "-map".into(),
        "[v]".into(),
        "-map".into(),
        "[a]".into(),
        "-t".into(),
        format!("{total:.3}"),
        // Force constant frame rate so the beat (and the concatenated reel)
        // plays at a steady {fps} rather than a variable cadence.
        "-r".into(),
        fps,
    ]);
    args.extend(video_encoder_args(opts.nvenc));
    args.extend(
        ["-c:a", "aac", "-b:a", "160k", "-ar", "48000", "-shortest"]
            .iter()
            .map(|s| s.to_string()),
    );
    args.push(out_path.into());
    args
}

/// Build the concat-demuxer args that join rendered segments losslessly.
/// `+faststart` moves the moov atom up front so the reel streams immediately
/// on the mobile client. The output muxer is forced with `-f mp4` because we
/// write to a `.tmp` path (atomic publish) whose extension ffmpeg can't map to
/// a format on its own.
pub fn build_concat_args(list_path: &str, out_path: &str) -> Vec<String> {
    [
        "-y",
        "-f",
        "concat",
        "-safe",
        "0",
        "-i",
        list_path,
        "-c",
        "copy",
        "-movflags",
        "+faststart",
        "-f",
        "mp4",
        out_path,
    ]
    .iter()
    .map(|s| s.to_string())
    .collect()
}

/// Render the concat list file body. Each line points the demuxer at one
/// segment; single quotes in paths are escaped per ffmpeg's concat syntax.
pub fn build_concat_list(segment_paths: &[String]) -> String {
    let mut out = String::new();
    for p in segment_paths {
        let escaped = p.replace('\'', r"'\''");
        out.push_str(&format!("file '{escaped}'\n"));
    }
    out
}

async fn run_ffmpeg(args: &[String], what: &str) -> Result<()> {
    let output = Command::new("ffmpeg")
        .args(args)
        .output()
        .await
        .with_context(|| format!("spawning ffmpeg for {what}"))?;
    if !output.status.success() {
        bail!(
            "ffmpeg {what} failed: {}",
            String::from_utf8_lossy(&output.stderr)
        );
    }
    Ok(())
}

/// Render one beat to `out_path`: its photos shown in sequence (a held shot for
/// one photo, a quick burst for several) under the single narration in
/// `audio_path`, whose measured length sets the beat's pacing.
pub async fn render_beat(
    image_paths: &[std::path::PathBuf],
    audio_path: &Path,
    out_path: &Path,
    narration_secs: f64,
    opts: &SegmentOpts,
) -> Result<()> {
    if image_paths.is_empty() {
        bail!("render_beat called with no images");
    }
    let (total, per_photo) = beat_durations(narration_secs, image_paths.len());
    let paths: Vec<String> = image_paths
        .iter()
        .map(|p| p.to_string_lossy().to_string())
        .collect();
    let args = build_beat_args(
        &paths,
        &audio_path.to_string_lossy(),
        &out_path.to_string_lossy(),
        &per_photo,
        total,
        opts,
    );
    run_ffmpeg(&args, "beat render").await
}

// --- Video-clip beats --------------------------------------------------------

/// Video chain for a clip beat: fill the clip to the portrait canvas (blurred
/// backdrop, same look as photos), normalize fps, hold the last frame if the
/// narration outlasts the clip (`tpad`), then fade. Produces `[v]`.
fn clip_video_filter(opts: &SegmentOpts, clip_dur: f64, beat_total: f64) -> String {
    let (w, h, fps) = (opts.width, opts.height, opts.fps);
    let fade = SINGLE_FADE_SECONDS;
    let hold = (beat_total - clip_dur).max(0.0);
    let fade_out_start = (beat_total - fade).max(0.0);
    // Freeze the final frame to cover narration that runs past the clip.
    let tpad = if hold > 0.05 {
        format!(",tpad=stop_mode=clone:stop_duration={hold:.3}")
    } else {
        String::new()
    };
    format!(
        "[0:v]split=2[bg][fg];\
         [bg]scale={w}:{h}:force_original_aspect_ratio=increase,\
         crop={w}:{h},boxblur=20:2[bgb];\
         [fg]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs];\
         [bgb][fgs]overlay=(W-w)/2:(H-h)/2,fps={fps}{tpad},\
         fade=t=in:st=0:d={fade},fade=t=out:st={fade_out_start:.3}:d={fade},\
         setsar=1,format=yuv420p[v]"
    )
}

/// Audio chain for a clip beat. With a clip audio track, duck it under the
/// narration and mix; without one, just the narration. Produces `[a]`.
fn clip_audio_filter(has_audio: bool) -> String {
    if has_audio {
        format!(
            "[0:a]volume={CLIP_DUCK_VOLUME}[duck];[1:a]apad[narr];\
             [duck][narr]amix=inputs=2:duration=longest:normalize=0[a]"
        )
    } else {
        "[1:a]apad[a]".to_string()
    }
}

/// Full `filter_complex` for a clip beat (input 0 = clip, input 1 = narration).
pub fn clip_beat_filtergraph(
    opts: &SegmentOpts,
    clip_dur: f64,
    beat_total: f64,
    has_audio: bool,
) -> String {
    format!(
        "{};{}",
        clip_video_filter(opts, clip_dur, beat_total),
        clip_audio_filter(has_audio)
    )
}

/// Build the ffmpeg args for a clip beat: the first `clip_dur` seconds of the
/// source video, filled to the portrait canvas with its live audio ducked under
/// the narration, bounded to `beat_total`.
pub fn build_clip_beat_args(
    clip_path: &str,
    audio_path: &str,
    out_path: &str,
    clip_dur: f64,
    beat_total: f64,
    has_audio: bool,
    opts: &SegmentOpts,
) -> Vec<String> {
    let fps = opts.fps.to_string();
    let mut args: Vec<String> = vec!["-y".into()];
    if opts.nvenc {
        args.extend(["-hwaccel".into(), "cuda".into()]);
    }
    args.extend([
        // Input `-t` limits the clip to its window; audio has none (apad fills).
        "-t".into(),
        format!("{clip_dur:.3}"),
        "-i".into(),
        clip_path.into(),
        "-i".into(),
        audio_path.into(),
        "-filter_complex".into(),
        clip_beat_filtergraph(opts, clip_dur, beat_total, has_audio),
        "-map".into(),
        "[v]".into(),
        "-map".into(),
        "[a]".into(),
        "-t".into(),
        format!("{beat_total:.3}"),
        "-r".into(),
        fps,
    ]);
    args.extend(video_encoder_args(opts.nvenc));
    args.extend(
        ["-c:a", "aac", "-b:a", "160k", "-ar", "48000"]
            .iter()
            .map(|s| s.to_string()),
    );
    args.push(out_path.into());
    args
}

/// Whether a media file has at least one audio stream (so a clip beat knows
/// whether to mix in live audio). Defaults to `false` on any probe failure.
pub async fn has_audio_stream(path: &str) -> bool {
    Command::new("ffprobe")
        .args([
            "-v",
            "error",
            "-select_streams",
            "a",
            "-show_entries",
            "stream=index",
            "-of",
            "csv=p=0",
            path,
        ])
        .output()
        .await
        .map(|out| !out.stdout.is_empty())
        .unwrap_or(false)
}

/// Render one clip beat: a section of `clip_path` (capped at [`CLIP_SECONDS`],
/// and to the source length) under the narration in `audio_path`. The beat
/// lasts at least the narration, freezing the clip's last frame if needed.
pub async fn render_clip_beat(
    clip_path: &Path,
    audio_path: &Path,
    out_path: &Path,
    narration_secs: f64,
    opts: &SegmentOpts,
) -> Result<()> {
    let clip_str = clip_path.to_string_lossy().to_string();
    // Clamp the clip to its own length so a short video isn't padded to the cap.
    let source_dur = crate::video::ffmpeg::get_duration_seconds(&clip_str)
        .await
        .ok()
        .flatten();
    let clip_dur = match source_dur {
        Some(d) if d > 0.0 && d < CLIP_SECONDS => d,
        _ => CLIP_SECONDS,
    };
    let beat_total = clip_dur.max(segment_duration(narration_secs));
    let has_audio = has_audio_stream(&clip_str).await;

    let args = build_clip_beat_args(
        &clip_str,
        &audio_path.to_string_lossy(),
        &out_path.to_string_lossy(),
        clip_dur,
        beat_total,
        has_audio,
        opts,
    );
    run_ffmpeg(&args, "clip beat render").await
}

/// Join rendered segments into the final reel. Writes the concat list into the
/// same directory as the output so relative paths and cleanup stay local.
pub async fn concat_segments(segment_paths: &[String], out_path: &Path) -> Result<()> {
    let list_path = out_path.with_extension("concat.txt");
    let body = build_concat_list(segment_paths);
    tokio::fs::write(&list_path, body)
        .await
        .context("writing concat list")?;
    let args = build_concat_args(&list_path.to_string_lossy(), &out_path.to_string_lossy());
    let result = run_ffmpeg(&args, "concat").await;
    let _ = tokio::fs::remove_file(&list_path).await;
    result
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn segment_duration_floors_short_lines() {
        // A one-word narration still lingers at the floor.
        assert_eq!(segment_duration(0.5), MIN_SEGMENT_SECONDS);
        assert_eq!(segment_duration(0.0), MIN_SEGMENT_SECONDS);
    }

    #[test]
    fn segment_duration_covers_full_narration_plus_tail() {
        // No ceiling: a long line gets its full length so speech isn't cut.
        assert!((segment_duration(5.0) - 5.6).abs() < 1e-9);
        assert!((segment_duration(20.0) - 20.6).abs() < 1e-9);
    }

    #[test]
    fn segment_duration_rejects_nonfinite() {
        assert_eq!(segment_duration(f64::NAN), MIN_SEGMENT_SECONDS);
        assert_eq!(segment_duration(f64::INFINITY), MIN_SEGMENT_SECONDS);
    }

    #[test]
    fn beat_durations_single_photo_matches_base() {
        let (total, per) = beat_durations(4.0, 1);
        assert!((total - 4.6).abs() < 1e-9); // narration + tail
        assert_eq!(per.len(), 1);
        assert!((per[0] - 4.6).abs() < 1e-9);
    }

    #[test]
    fn beat_durations_burst_splits_evenly() {
        // 5 photos, narration 4.6s base → ~0.92s each (above the 0.6 floor).
        let (total, per) = beat_durations(4.0, 5);
        assert!((total - 4.6).abs() < 1e-9);
        assert_eq!(per.len(), 5);
        assert!((per.iter().sum::<f64>() - total).abs() < 1e-9);
        assert!(per.iter().all(|&d| d >= MIN_BURST_PHOTO_SECONDS));
    }

    #[test]
    fn beat_durations_stretches_when_narration_too_short_for_burst() {
        // Floor narration (2.5s) over 10 photos would be 0.25s each — below the
        // legibility floor, so the beat stretches to 10 × 0.6 = 6s.
        let (total, per) = beat_durations(0.0, 10);
        assert!((total - 6.0).abs() < 1e-9);
        assert!(per.iter().all(|&d| (d - 0.6).abs() < 1e-9));
    }

    #[test]
    fn beat_filtergraph_single_photo_fills_portrait_and_holds() {
        let (_t, per) = beat_durations(4.0, 1);
        let g = beat_filtergraph(&SegmentOpts::default(), &per);
        assert!(g.contains("[0:v]split=2[bg0][fg0]"));
        assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=increase"));
        assert!(g.contains("crop=1080:1920"));
        assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=decrease"));
        assert!(g.contains("overlay=(W-w)/2:(H-h)/2"));
        // Single photo → concat of one, gentle fade, audio is input 1.
        assert!(g.contains("concat=n=1:v=1:a=0[v]"));
        assert!(g.contains("d=0.35")); // SINGLE_FADE
        assert!(g.contains("[1:a]apad[a]"));
    }

    #[test]
    fn beat_filtergraph_burst_chains_concats_and_snappy_fade() {
        let (_t, per) = beat_durations(4.0, 3);
        let g = beat_filtergraph(&SegmentOpts::default(), &per);
        // One chain per photo with index-suffixed labels.
        assert!(g.contains("[0:v]split") && g.contains("[1:v]split") && g.contains("[2:v]split"));
        // Concatenated in order, audio is the 4th input (index 3).
        assert!(g.contains("[v0][v1][v2]concat=n=3:v=1:a=0[v]"));
        assert!(g.contains("[3:a]apad[a]"));
        // Burst uses the much snappier fade (vs 0.35 for a held shot).
        assert!(g.contains("d=0.08"));
        assert!(!g.contains("d=0.35"));
    }

    #[test]
    fn beat_filtergraph_normalizes_fps_before_fading() {
        // fps must precede the fades on every chain (else the dip looks steppy).
        let (_t, per) = beat_durations(4.0, 1);
        let g = beat_filtergraph(&SegmentOpts::default(), &per);
        let fps_at = g.find("fps=30").expect("fps in graph");
        let fade_at = g.find("fade=t=in").expect("fade in graph");
        assert!(fps_at < fade_at);
    }

    #[test]
    fn beat_args_one_input_per_photo_plus_audio_bound_by_total() {
        let (total, per) = beat_durations(4.0, 2);
        let args = build_beat_args(
            &["/a.jpg".into(), "/b.jpg".into()],
            "/n.wav",
            "/out.mp4",
            &per,
            total,
            &SegmentOpts::default(),
        );
        let joined = args.join(" ");
        // A looped-still input per photo, each with its slice -t, then the audio.
        assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /a.jpg"));
        assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /b.jpg"));
        assert!(joined.contains("-i /n.wav"));
        // Output bounded to the beat total and forced CFR.
        assert!(joined.contains("-t 4.600"));
        assert!(joined.contains("-r 30"));
        assert!(joined.ends_with("/out.mp4"));
    }

    #[test]
    fn beat_args_use_nvenc_and_cuda_when_enabled() {
        let opts = SegmentOpts {
            nvenc: true,
            ..SegmentOpts::default()
        };
        let (total, per) = beat_durations(3.0, 1);
        let args = build_beat_args(
            &["/img.jpg".into()],
            "/a.wav",
            "/out.mp4",
            &per,
            total,
            &opts,
        );
        let joined = args.join(" ");
        assert!(joined.contains("-hwaccel cuda"));
        assert!(joined.contains("h264_nvenc"));
        assert!(!joined.contains("libx264"));
    }

    #[test]
    fn clip_filter_ducks_audio_and_holds_last_frame_when_narration_longer() {
        // 5s clip, 7s beat → 2s freeze of the last frame, ducked-audio mix.
        let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 7.0, true);
        assert!(g.contains("tpad=stop_mode=clone:stop_duration=2.000"));
        assert!(g.contains("volume=0.35"));
        assert!(g.contains("amix=inputs=2"));
        assert!(g.contains("[1:a]apad[narr]"));
        // Fill applied to the clip too.
        assert!(g.contains("boxblur"));
        assert!(g.contains("overlay=(W-w)/2:(H-h)/2"));
    }

    #[test]
    fn clip_filter_no_tpad_when_clip_covers_the_beat() {
        // Clip at least as long as the beat → no freeze.
        let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 5.0, true);
        assert!(!g.contains("tpad"));
    }

    #[test]
    fn clip_filter_narration_only_without_clip_audio() {
        let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 5.0, false);
        assert!(!g.contains("amix"));
        assert!(!g.contains("volume="));
        assert!(g.contains("[1:a]apad[a]"));
    }

    #[test]
    fn clip_beat_args_bound_clip_and_output() {
        let args = build_clip_beat_args(
            "/v.mp4",
            "/n.wav",
            "/out.mp4",
            5.0,
            6.6,
            true,
            &SegmentOpts::default(),
        );
        let joined = args.join(" ");
        // Input -t bounds the clip read; output -t bounds the beat.
        assert!(joined.contains("-t 5.000 -i /v.mp4"));
        assert!(joined.contains("-i /n.wav"));
        assert!(joined.contains("-t 6.600"));
        assert!(joined.contains("-r 30"));
        assert!(joined.ends_with("/out.mp4"));
    }

    #[test]
    fn concat_args_stream_copy_with_faststart_and_forced_muxer() {
        // Output goes to a .tmp path, so the muxer must be forced — ffmpeg
        // can't infer mp4 from the extension (the bug this guards against).
        let args = build_concat_args("/tmp/list.txt", "/out.mp4.tmp");
        let joined = args.join(" ");
        assert!(joined.contains("-f concat -safe 0 -i /tmp/list.txt"));
        assert!(joined.contains("-c copy"));
        assert!(joined.contains("+faststart"));
        assert!(joined.contains("-f mp4"));
        // The forced muxer must come before the output path.
        let f_mp4 = args.windows(2).position(|w| w == ["-f", "mp4"]).unwrap();
        let out = args.iter().position(|a| a == "/out.mp4.tmp").unwrap();
        assert!(f_mp4 < out);
    }

    #[test]
    fn concat_list_escapes_single_quotes() {
        let body = build_concat_list(&[
            "/tmp/seg_000.mp4".into(),
            "/tmp/own's dir/seg_001.mp4".into(),
        ]);
        assert!(body.contains("file '/tmp/seg_000.mp4'\n"));
        // The apostrophe is closed-escaped-reopened per ffmpeg concat syntax.
        assert!(body.contains(r"own'\''s"));
    }
}