e3f731b3b2
New POST /reels + GET /reels/{id} (+ /video) build an MP4 slideshow of a
memory span (day/week/month), narrated by the LLM in a cloned voice.
Pipeline (src/reels/): a selector resolves which photos + reel metadata,
the scripter writes one narration line per photo via a single LLM call
(reusing each photo's cached insight as context — no fresh vision calls,
so reel generation stays off the GPU's vision slot), each line is
synthesized to speech, and the renderer assembles stills + narration via
ffmpeg. Jobs run in the background (mirroring the TTS speech-job
registry) since a reel takes minutes; the finished MP4 is cached on disk
keyed by the selection so a repeat request is instant.
The segment model is media-typed (Photo today) so a video-clip segment
(phase 2) and a nightly pre-render (phase 3) slot in without reworking
the pipeline. Ken Burns motion is implemented but defaulted off pending a
visual check on the GPU box.
Supporting changes:
- memories: extract gather_memory_items() so the reel selector reuses the
exact window/exclusion/tz/sort logic behind /memories.
- ai::tts: add synthesize_serialized() so reel narration honors the same
single-GPU permit + write lease as user TTS requests.
- video::ffmpeg: make get_duration_seconds() pub for narration timing.
- AppState: reels_path (REELS_DIRECTORY, defaults beside preview clips).
Pure logic (cache key, script parsing, ffmpeg arg/filter construction,
even sampling, segment timing) is unit-tested (26 tests). The runtime
path (ffmpeg render, TTS, LLM) needs a real run on the GPU host to verify
end-to-end — not exercisable in CI.
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
473 lines
17 KiB
Rust
473 lines
17 KiB
Rust
use futures::TryFutureExt;
|
|
use log::{debug, error, info, warn};
|
|
use std::io::Result;
|
|
use std::process::{Output, Stdio};
|
|
use std::sync::OnceLock;
|
|
use std::time::Instant;
|
|
use tokio::process::Command;
|
|
|
|
static NVENC_AVAILABLE: OnceLock<bool> = OnceLock::new();
|
|
|
|
/// Check if NVIDIA NVENC hardware encoder is available via ffmpeg.
|
|
async fn check_nvenc_available() -> bool {
|
|
Command::new("ffmpeg")
|
|
.args(["-hide_banner", "-encoders"])
|
|
.output()
|
|
.await
|
|
.map(|out| {
|
|
let stdout = String::from_utf8_lossy(&out.stdout);
|
|
stdout.contains("h264_nvenc")
|
|
})
|
|
.unwrap_or(false)
|
|
}
|
|
|
|
/// Returns whether NVENC is available, caching the result after first check.
|
|
pub async fn is_nvenc_available() -> bool {
|
|
if let Some(&available) = NVENC_AVAILABLE.get() {
|
|
return available;
|
|
}
|
|
let available = check_nvenc_available().await;
|
|
let _ = NVENC_AVAILABLE.set(available);
|
|
if available {
|
|
info!("CUDA NVENC hardware acceleration detected and enabled");
|
|
} else {
|
|
info!("NVENC not available, using CPU encoding");
|
|
}
|
|
available
|
|
}
|
|
|
|
pub struct Ffmpeg;
|
|
|
|
pub enum GifType {
|
|
Overview,
|
|
#[allow(dead_code)]
|
|
OverviewVideo {
|
|
duration: u32,
|
|
},
|
|
}
|
|
|
|
impl Ffmpeg {
|
|
async fn _generate_playlist(&self, input_file: &str, output_file: &str) -> Result<String> {
|
|
let ffmpeg_result: Result<Output> = Command::new("ffmpeg")
|
|
.arg("-i")
|
|
.arg(input_file)
|
|
.arg("-c:v")
|
|
.arg("h264")
|
|
.arg("-crf")
|
|
.arg("21")
|
|
.arg("-preset")
|
|
.arg("veryfast")
|
|
.arg("-hls_time")
|
|
.arg("3")
|
|
.arg("-hls_list_size")
|
|
.arg("100")
|
|
.arg("-vf")
|
|
.arg("scale=1080:-2,setsar=1:1")
|
|
.arg(output_file)
|
|
.stdout(Stdio::null())
|
|
.stderr(Stdio::piped())
|
|
.output()
|
|
.inspect_err(|e| error!("Failed to run ffmpeg on child process: {}", e))
|
|
.map_err(|e| std::io::Error::other(e.to_string()))
|
|
.await;
|
|
|
|
if let Ok(ref res) = ffmpeg_result {
|
|
debug!("ffmpeg output: {:?}", res);
|
|
}
|
|
|
|
ffmpeg_result.map(|_| output_file.to_string())
|
|
}
|
|
|
|
async fn get_video_duration(&self, input_file: &str) -> Result<u32> {
|
|
Command::new("ffprobe")
|
|
.args(["-i", input_file])
|
|
.args(["-show_entries", "format=duration"])
|
|
.args(["-v", "quiet"])
|
|
.args(["-of", "csv=p=0"])
|
|
.output()
|
|
.await
|
|
.map(|out| String::from_utf8_lossy(&out.stdout).trim().to_string())
|
|
.inspect(|duration| debug!("Found video duration: {:?}", duration))
|
|
.and_then(|duration| {
|
|
duration
|
|
.parse::<f32>()
|
|
.map(|duration| duration as u32)
|
|
.map_err(|e| std::io::Error::other(e.to_string()))
|
|
})
|
|
.inspect(|duration| debug!("Found video duration: {:?}", duration))
|
|
}
|
|
pub async fn generate_video_gif(
|
|
&self,
|
|
input_file: &str,
|
|
output_file: &str,
|
|
gif_type: GifType,
|
|
) -> Result<String> {
|
|
info!("Creating gif for: '{}'", input_file);
|
|
|
|
match gif_type {
|
|
GifType::Overview => {
|
|
let temp_dir = tempfile::tempdir()?;
|
|
let temp_path = temp_dir
|
|
.path()
|
|
.to_str()
|
|
.expect("Unable to make temp_dir a string");
|
|
|
|
match self
|
|
.get_video_duration(input_file)
|
|
.and_then(|duration| {
|
|
debug!("Creating gif frames for '{}'", input_file);
|
|
|
|
Command::new("ffmpeg")
|
|
.args(["-i", input_file])
|
|
.args(["-vf", &format!("fps=20/{}", duration)])
|
|
.args(["-q:v", "2"])
|
|
.stderr(Stdio::null())
|
|
.arg(format!("{}/frame_%03d.jpg", temp_path))
|
|
.status()
|
|
})
|
|
.and_then(|_| {
|
|
debug!("Generating palette");
|
|
|
|
Command::new("ffmpeg")
|
|
.args(["-i", &format!("{}/frame_%03d.jpg", temp_path)])
|
|
.args(["-vf", "palettegen"])
|
|
.arg(format!("{}/palette.png", temp_path))
|
|
.stderr(Stdio::null())
|
|
.status()
|
|
})
|
|
.and_then(|_| {
|
|
debug!("Creating gif for: '{}'", input_file);
|
|
self.create_gif_from_frames(temp_path, output_file)
|
|
})
|
|
.await
|
|
{
|
|
Ok(exit_code) => {
|
|
if exit_code == 0 {
|
|
info!("Created gif for '{}' -> '{}'", input_file, output_file);
|
|
} else {
|
|
warn!(
|
|
"Failed to create gif for '{}' with exit code: {}",
|
|
input_file, exit_code
|
|
);
|
|
}
|
|
}
|
|
Err(e) => {
|
|
error!("Error creating gif for '{}': {:?}", input_file, e);
|
|
}
|
|
}
|
|
}
|
|
GifType::OverviewVideo { duration } => {
|
|
let start = Instant::now();
|
|
|
|
match self
|
|
.get_video_duration(input_file)
|
|
.and_then(|input_duration| {
|
|
Command::new("ffmpeg")
|
|
.args(["-i", input_file])
|
|
.args([
|
|
"-vf",
|
|
// Grab 1 second of frames equally spaced to create a 'duration' second long video scaled to 720px on longest side
|
|
&format!(
|
|
"select='lt(mod(t,{}),1)',setpts=N/FRAME_RATE/TB,scale='if(gt(iw,ih),720,-2)':'if(gt(ih,iw),720,-2)",
|
|
input_duration / duration
|
|
),
|
|
])
|
|
.arg("-an")
|
|
.arg(output_file)
|
|
.status()
|
|
})
|
|
.await
|
|
{
|
|
Ok(out) => info!("Finished clip '{}' with code {:?} in {:?}", output_file, out.code(), start.elapsed()),
|
|
Err(e) => error!("Error creating video overview: {}", e),
|
|
}
|
|
}
|
|
}
|
|
Ok(output_file.to_string())
|
|
}
|
|
|
|
pub async fn create_gif_from_frames(
|
|
&self,
|
|
frame_base_dir: &str,
|
|
output_file: &str,
|
|
) -> Result<i32> {
|
|
let output = Command::new("ffmpeg")
|
|
.arg("-y")
|
|
.args(["-framerate", "4"])
|
|
.args(["-i", &format!("{}/frame_%03d.jpg", frame_base_dir)])
|
|
.args(["-i", &format!("{}/palette.png", frame_base_dir)])
|
|
.args([
|
|
"-filter_complex",
|
|
// Scale to 480x480 with a center crop
|
|
"[0:v]scale=480:-1:flags=lanczos,crop='min(in_w,in_h)':'min(in_w,in_h)':(in_w-out_w)/2:(in_h-out_h)/2, paletteuse",
|
|
])
|
|
.args(["-loop", "0"]) // loop forever
|
|
.args(["-final_delay", "75"])
|
|
.arg(output_file)
|
|
.stderr(Stdio::piped()) // Change this to capture stderr
|
|
.stdout(Stdio::piped()) // Optionally capture stdout too
|
|
.output()
|
|
.await?;
|
|
|
|
if !output.status.success() {
|
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
error!("FFmpeg error: {}", stderr);
|
|
let stdout = String::from_utf8_lossy(&output.stdout);
|
|
debug!("FFmpeg stdout: {}", stdout);
|
|
} else {
|
|
debug!("FFmpeg successful with exit code: {}", output.status);
|
|
}
|
|
|
|
Ok(output.status.code().unwrap_or(-1))
|
|
}
|
|
}
|
|
|
|
/// Get video duration in seconds as f64 for precise interval calculation.
|
|
///
|
|
/// Returns `Ok(None)` when ffprobe runs successfully but the container has no
|
|
/// readable duration (notably GoPro `LRV` low-res preview files, some
|
|
/// fragmented MP4s, and short Snapchat clips with stripped headers). Callers
|
|
/// can fall back to a duration-agnostic encode rather than treating this as
|
|
/// a hard failure — previously the `parse::<f64>` on empty stdout produced
|
|
/// "cannot parse float from empty string" and poisoned the preview-clip row
|
|
/// with status=failed, which the watcher would re-queue every full scan.
|
|
pub async fn get_duration_seconds(input_file: &str) -> Result<Option<f64>> {
|
|
if let Some(d) = probe_duration(input_file, "format=duration").await? {
|
|
return Ok(Some(d));
|
|
}
|
|
// Fall back to the per-stream duration — populated for some MP4s where
|
|
// the format-level duration tag is missing.
|
|
probe_duration(input_file, "stream=duration").await
|
|
}
|
|
|
|
/// Synchronous cousin of `get_duration_seconds`, for callers running on
|
|
/// blocking thread pools (Rayon). Same fallback strategy: tries
|
|
/// `format=duration`, then `stream=duration`. Returns `None` for any
|
|
/// failure — ffprobe missing, container without a duration tag, parse
|
|
/// error — so callers can pick a duration-agnostic default.
|
|
pub fn get_duration_seconds_blocking(input_file: &std::path::Path) -> Option<f64> {
|
|
if let Some(d) = probe_duration_blocking(input_file, "format=duration") {
|
|
return Some(d);
|
|
}
|
|
probe_duration_blocking(input_file, "stream=duration")
|
|
}
|
|
|
|
fn probe_duration_blocking(input_file: &std::path::Path, show_entries: &str) -> Option<f64> {
|
|
let out = std::process::Command::new("ffprobe")
|
|
.args(["-v", "quiet"])
|
|
.args(["-show_entries", show_entries])
|
|
.args(["-of", "csv=p=0"])
|
|
.arg("-i")
|
|
.arg(input_file)
|
|
.output()
|
|
.ok()?;
|
|
let raw = String::from_utf8_lossy(&out.stdout);
|
|
parse_ffprobe_duration(&raw)
|
|
}
|
|
|
|
async fn probe_duration(input_file: &str, show_entries: &str) -> Result<Option<f64>> {
|
|
let out = Command::new("ffprobe")
|
|
.args(["-v", "quiet"])
|
|
.args(["-show_entries", show_entries])
|
|
.args(["-of", "csv=p=0"])
|
|
.args(["-i", input_file])
|
|
.output()
|
|
.await?;
|
|
let raw = String::from_utf8_lossy(&out.stdout);
|
|
Ok(parse_ffprobe_duration(&raw))
|
|
}
|
|
|
|
/// Parse ffprobe's `csv=p=0` duration output. Returns the first valid
|
|
/// positive finite duration, or `None` when there isn't one.
|
|
///
|
|
/// Stream-level queries (`-show_entries stream=duration`) emit one value per
|
|
/// stream, one per line; format-level queries emit a single line. The shape
|
|
/// also varies — `N/A` for streams without a known duration, empty string
|
|
/// for containers without the tag at all, and (rarely) `0`/`-1` for
|
|
/// fragmented MP4s. All of those have to map to `None` so the caller can
|
|
/// fall back to a duration-agnostic encode.
|
|
fn parse_ffprobe_duration(stdout: &str) -> Option<f64> {
|
|
for line in stdout.lines() {
|
|
let trimmed = line.trim();
|
|
if trimmed.is_empty() || trimmed == "N/A" {
|
|
continue;
|
|
}
|
|
if let Ok(d) = trimmed.parse::<f64>()
|
|
&& d.is_finite()
|
|
&& d > 0.0
|
|
{
|
|
return Some(d);
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
/// Generate a preview clip from a video file.
|
|
///
|
|
/// Creates a ~10 second MP4 by extracting up to 10 equally-spaced 1-second segments
|
|
/// at 480p with H.264 video and AAC audio. For short videos (<10s), uses fewer segments.
|
|
/// For very short videos (<1s), transcodes the entire video.
|
|
///
|
|
/// Returns (duration_seconds, file_size_bytes) on success.
|
|
pub async fn generate_preview_clip(input_file: &str, output_file: &str) -> Result<(f64, u64)> {
|
|
info!("Generating preview clip for: '{}'", input_file);
|
|
let start = Instant::now();
|
|
|
|
let duration = get_duration_seconds(input_file).await?;
|
|
let use_nvenc = is_nvenc_available().await;
|
|
|
|
// Create parent directories for output
|
|
if let Some(parent) = std::path::Path::new(output_file).parent() {
|
|
std::fs::create_dir_all(parent)?;
|
|
}
|
|
|
|
let mut cmd = Command::new("ffmpeg");
|
|
cmd.arg("-y");
|
|
|
|
// Use CUDA hardware-accelerated decoding when available
|
|
if use_nvenc {
|
|
cmd.args(["-hwaccel", "cuda"]);
|
|
}
|
|
|
|
cmd.arg("-i").arg(input_file);
|
|
|
|
// Branch on duration. `None` means ffprobe couldn't tell us — we treat
|
|
// it like the <1s case and just transcode the whole file. The selected
|
|
// clip-duration we report back is computed alongside, so callers don't
|
|
// need to re-probe.
|
|
let clip_duration = match duration {
|
|
None => {
|
|
warn!(
|
|
"Unknown duration for '{}', transcoding whole file as preview",
|
|
input_file
|
|
);
|
|
cmd.args(["-vf", "scale=-2:480,format=yuv420p"]);
|
|
// Cap the encode at 10s so a long video with stripped duration
|
|
// metadata doesn't spend forever generating a "preview".
|
|
cmd.args(["-t", "10"]);
|
|
10.0
|
|
}
|
|
Some(d) if d < 1.0 => {
|
|
cmd.args(["-vf", "scale=-2:480,format=yuv420p"]);
|
|
d
|
|
}
|
|
Some(d) => {
|
|
let segment_count = if d < 10.0 { d.floor() as u32 } else { 10 };
|
|
let interval = d / segment_count as f64;
|
|
let vf = format!(
|
|
"select='lt(mod(t,{:.4}),1)',setpts=N/FRAME_RATE/TB,fps=30,scale=-2:480,format=yuv420p",
|
|
interval
|
|
);
|
|
let af = format!("aselect='lt(mod(t,{:.4}),1)',asetpts=N/SR/TB", interval);
|
|
cmd.args(["-vf", &vf]);
|
|
cmd.args(["-af", &af]);
|
|
if d < 10.0 { d.floor() } else { 10.0 }
|
|
}
|
|
};
|
|
|
|
// Force 30fps output so high-framerate sources (60fps) don't play back
|
|
// at double speed due to select/setpts timestamp mismatches.
|
|
cmd.args(["-r", "30"]);
|
|
|
|
// Use NVENC for encoding when available, otherwise fall back to libx264
|
|
if use_nvenc {
|
|
cmd.args(["-c:v", "h264_nvenc", "-preset", "p4", "-cq:v", "28"]);
|
|
} else {
|
|
cmd.args(["-c:v", "libx264", "-crf", "28", "-preset", "veryfast"]);
|
|
}
|
|
cmd.args(["-c:a", "aac"]);
|
|
|
|
cmd.arg(output_file);
|
|
cmd.stdout(Stdio::null());
|
|
cmd.stderr(Stdio::piped());
|
|
|
|
let output = cmd.output().await?;
|
|
|
|
if !output.status.success() {
|
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
return Err(std::io::Error::other(format!(
|
|
"ffmpeg preview generation failed: {}",
|
|
stderr
|
|
)));
|
|
}
|
|
|
|
let metadata = std::fs::metadata(output_file)?;
|
|
let file_size = metadata.len();
|
|
|
|
info!(
|
|
"Generated preview clip '{}' ({:.1}s, {} bytes) in {:?}",
|
|
output_file,
|
|
clip_duration,
|
|
file_size,
|
|
start.elapsed()
|
|
);
|
|
|
|
Ok((clip_duration, file_size))
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::parse_ffprobe_duration;
|
|
|
|
#[test]
|
|
fn empty_output_returns_none() {
|
|
// The original bug: ffprobe -show_entries format=duration returned
|
|
// "" for some GoPro LRV files, and `parse::<f64>` panicked with
|
|
// "cannot parse float from empty string".
|
|
assert_eq!(parse_ffprobe_duration(""), None);
|
|
assert_eq!(parse_ffprobe_duration("\n"), None);
|
|
assert_eq!(parse_ffprobe_duration(" \n \n"), None);
|
|
}
|
|
|
|
#[test]
|
|
fn na_returns_none() {
|
|
// ffprobe emits "N/A" for streams without a known duration.
|
|
assert_eq!(parse_ffprobe_duration("N/A"), None);
|
|
assert_eq!(parse_ffprobe_duration("N/A\nN/A\n"), None);
|
|
}
|
|
|
|
#[test]
|
|
fn parses_simple_duration() {
|
|
assert_eq!(parse_ffprobe_duration("12.345"), Some(12.345));
|
|
assert_eq!(parse_ffprobe_duration("12.345\n"), Some(12.345));
|
|
assert_eq!(parse_ffprobe_duration("0.5"), Some(0.5));
|
|
}
|
|
|
|
#[test]
|
|
fn rejects_non_positive_durations() {
|
|
// Fragmented MP4s and broken containers occasionally report 0 or a
|
|
// negative duration. Treat as "unknown" so the caller falls back to
|
|
// whole-file transcoding rather than dividing by zero downstream.
|
|
assert_eq!(parse_ffprobe_duration("0"), None);
|
|
assert_eq!(parse_ffprobe_duration("0.0"), None);
|
|
assert_eq!(parse_ffprobe_duration("-1.5"), None);
|
|
}
|
|
|
|
#[test]
|
|
fn rejects_non_finite_durations() {
|
|
assert_eq!(parse_ffprobe_duration("inf"), None);
|
|
assert_eq!(parse_ffprobe_duration("nan"), None);
|
|
}
|
|
|
|
#[test]
|
|
fn first_valid_line_wins_for_stream_query() {
|
|
// `-show_entries stream=duration` emits one value per stream. For a
|
|
// video file the video stream is first; we accept it and ignore
|
|
// any audio-stream values that follow.
|
|
assert_eq!(parse_ffprobe_duration("12.5\n8.3\n"), Some(12.5));
|
|
}
|
|
|
|
#[test]
|
|
fn skips_leading_na_and_blank_lines() {
|
|
// Stream queries can put N/A first (e.g. data stream before the
|
|
// video stream); the parser should keep scanning.
|
|
assert_eq!(parse_ffprobe_duration("N/A\n\n7.25\n"), Some(7.25));
|
|
}
|
|
|
|
#[test]
|
|
fn rejects_garbage() {
|
|
assert_eq!(parse_ffprobe_duration("not a number"), None);
|
|
assert_eq!(parse_ffprobe_duration("12.5abc"), None);
|
|
}
|
|
}
|