Reels: burst beats + duration budget for week/month, plus step logging
Restructures a reel around beats — one narration line over one or more photos — instead of one line per photo. A single-photo beat is a held shot; a multi-photo beat is a quick burst that flashes through several moments of an event while the line is read. So a week/month reel can show everything it spans without a narrated (and timed) segment per photo. Selection (selector.rs): - Duration budget: cap the number of narrated beats to ~REEL_TARGET_SECONDS (default 90, env-tunable) so week/month reels don't run minutes long. - Event clustering by time gap; when there are more events than the beat budget, adjacent events merge so the whole span stays covered. Each beat bursts up to MAX_BURST_PHOTOS (an even spread), so a 40-shot dinner contributes a handful of quick frames, not forty narrated seconds. Render (render.rs): a beat renders its photos as a concat of per-photo fills (blurred-bg portrait, fps-before-fade) under one muxed narration; burst photos get a snappier fade. beat_durations splits the narration across the photos, stretching only if a long burst would flash too fast. Adds high-level info logs across the steps (request → script → per-beat narrate/render → join → done with elapsed) for visibility. Bumps RENDER_VERSION to re-render cached reels. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
+95
-36
@@ -36,24 +36,27 @@ use crate::otel::extract_context_from_request;
|
||||
use crate::state::AppState;
|
||||
use selector::ReelSelector;
|
||||
|
||||
/// The media behind one reel segment. Photos-only for now; a `Clip` variant
|
||||
/// (a section of a source video) is the phase-2 extension point.
|
||||
/// The media behind one shot. Photos-only for now; a `Clip` variant (a section
|
||||
/// of a source video) is the phase-2 extension point.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum SegmentMedia {
|
||||
Photo { rel_path: String, library_id: i32 },
|
||||
}
|
||||
|
||||
/// A segment before narration: which photo, when it was taken, and any cached
|
||||
/// insight to feed the scripter.
|
||||
/// A beat: one narration line over one or more photos. A single-photo beat is a
|
||||
/// held shot; a multi-photo beat is a quick burst that flashes through several
|
||||
/// moments of the same event while the line is read — so a week/month reel can
|
||||
/// *show* everything it spans without a narration line (and the seconds that
|
||||
/// come with it) per photo.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PlannedSegment {
|
||||
pub media: SegmentMedia,
|
||||
pub struct PlannedBeat {
|
||||
pub photos: Vec<SegmentMedia>,
|
||||
pub date: Option<i64>,
|
||||
pub insight_title: Option<String>,
|
||||
pub insight_summary: Option<String>,
|
||||
}
|
||||
|
||||
impl PlannedSegment {
|
||||
impl PlannedBeat {
|
||||
/// Human date for the prompt, e.g. "June 12, 2019". `None` when undated.
|
||||
pub fn date_label(&self) -> Option<String> {
|
||||
let ts = self.date?;
|
||||
@@ -180,7 +183,7 @@ fn finish_job(
|
||||
|
||||
/// Render version: bump to invalidate every cached reel after a rendering /
|
||||
/// scripting change that should produce a fresh result.
|
||||
const RENDER_VERSION: u32 = 3;
|
||||
const RENDER_VERSION: u32 = 4;
|
||||
|
||||
/// Narration expressiveness — Chatterbox's `exaggeration` knob. A slight bump
|
||||
/// over the ~0.5 default warms up otherwise-flat narration without over-acting;
|
||||
@@ -306,16 +309,25 @@ pub async fn create_reel_handler(
|
||||
}));
|
||||
}
|
||||
|
||||
let media: Vec<SegmentMedia> = planned.iter().map(|p| p.media.clone()).collect();
|
||||
// Flatten every photo across beats (in order) into the cache key — the key
|
||||
// tracks exactly which photos appear and in what sequence.
|
||||
let media: Vec<SegmentMedia> = planned.iter().flat_map(|b| b.photos.clone()).collect();
|
||||
let voice = req.voice.clone().filter(|s| !s.is_empty());
|
||||
let key = cache_key(&selector, &media, voice.as_deref());
|
||||
|
||||
let job_id = Uuid::new_v4();
|
||||
log::info!(
|
||||
"reel {job_id}: request span={:?} → {} beats, {} photos",
|
||||
span,
|
||||
planned.len(),
|
||||
media.len()
|
||||
);
|
||||
|
||||
// Cache hit: register an already-Done job pointing at the existing MP4 so
|
||||
// the client's first poll returns the video URL immediately.
|
||||
let mp4 = reel_mp4_path(&app_state, &key);
|
||||
if mp4.exists() {
|
||||
log::info!("reel {job_id}: cache hit, serving existing reel");
|
||||
let title = std::fs::read(reel_sidecar_path(&app_state, &key))
|
||||
.ok()
|
||||
.and_then(|b| serde_json::from_slice::<ReelSidecar>(&b).ok())
|
||||
@@ -358,6 +370,7 @@ pub async fn create_reel_handler(
|
||||
},
|
||||
);
|
||||
}
|
||||
log::info!("reel {job_id}: queued for generation");
|
||||
|
||||
let state = app_state.clone();
|
||||
let insight_dao = insight_dao.clone();
|
||||
@@ -441,45 +454,73 @@ async fn run_reel_job(
|
||||
app_state: &AppState,
|
||||
insight_dao: &Mutex<Box<dyn InsightDao>>,
|
||||
job_id: Uuid,
|
||||
mut planned: Vec<PlannedSegment>,
|
||||
mut planned: Vec<PlannedBeat>,
|
||||
meta: ReelMeta,
|
||||
voice: Option<String>,
|
||||
key: &str,
|
||||
) -> anyhow::Result<(String, PathBuf)> {
|
||||
use anyhow::{Context, anyhow};
|
||||
|
||||
let started = Instant::now();
|
||||
let total_photos: usize = planned.iter().map(|b| b.photos.len()).sum();
|
||||
log::info!(
|
||||
"reel {job_id}: starting — span {:?}, {} beats, {} photos, voice={}",
|
||||
meta.span,
|
||||
planned.len(),
|
||||
total_photos,
|
||||
voice.as_deref().unwrap_or("default")
|
||||
);
|
||||
|
||||
let client = app_state
|
||||
.llamacpp
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("TTS/LLM backend not configured"))?
|
||||
.clone();
|
||||
|
||||
// 1. Enrich with cached insights, then script (one LLM call).
|
||||
// 1. Enrich each beat with its lead photo's cached insight, then script
|
||||
// (one LLM call → one narration line per beat).
|
||||
set_stage(job_id, "scripting");
|
||||
log::info!("reel {job_id}: scripting narration via LLM…");
|
||||
let span_context = opentelemetry::Context::new();
|
||||
selector::enrich(insight_dao, &span_context, &mut planned);
|
||||
let script = script::generate_script(&client, &meta, &planned).await?;
|
||||
log::info!(
|
||||
"reel {job_id}: scripted \"{}\" ({} lines)",
|
||||
script.title,
|
||||
script.lines.len()
|
||||
);
|
||||
|
||||
// 2. Narrate each line to speech and 3. render each photo segment. A
|
||||
// segment whose audio or render fails is skipped (logged) rather than
|
||||
// sinking the whole reel — handles an odd HEIC/corrupt file gracefully.
|
||||
// 2. Narrate each beat's line and 3. render the beat (its photos shown in
|
||||
// sequence under that one narration). A beat whose audio or render fails
|
||||
// is skipped (logged) rather than sinking the whole reel — handles an
|
||||
// odd HEIC/corrupt file gracefully.
|
||||
set_stage(job_id, "narrating");
|
||||
let work = tempfile::tempdir().context("creating reel work dir")?;
|
||||
let nvenc = render::is_nvenc_available().await;
|
||||
log::info!(
|
||||
"reel {job_id}: narrating + rendering {} beats (encoder: {})",
|
||||
planned.len(),
|
||||
if nvenc { "nvenc" } else { "cpu" }
|
||||
);
|
||||
let opts = render::SegmentOpts {
|
||||
nvenc,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut segment_files: Vec<String> = Vec::new();
|
||||
for (i, (seg, line)) in planned.iter().zip(script.lines.iter()).enumerate() {
|
||||
let image_path = match resolve_image_path(app_state, &seg.media) {
|
||||
Some(p) => p,
|
||||
None => {
|
||||
log::warn!("reel {job_id}: skipping segment {i}, image path unresolved");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let beat_total = planned.len();
|
||||
let mut beat_files: Vec<String> = Vec::new();
|
||||
for (i, (beat, line)) in planned.iter().zip(script.lines.iter()).enumerate() {
|
||||
// Resolve all of the beat's photos to absolute paths; drop any that
|
||||
// don't resolve. An empty beat is skipped.
|
||||
let image_paths: Vec<PathBuf> = beat
|
||||
.photos
|
||||
.iter()
|
||||
.filter_map(|m| resolve_image_path(app_state, m))
|
||||
.collect();
|
||||
if image_paths.is_empty() {
|
||||
log::warn!("reel {job_id}: skipping beat {i}, no image paths resolved");
|
||||
continue;
|
||||
}
|
||||
|
||||
let audio_bytes = match crate::ai::tts::synthesize_serialized(
|
||||
&client,
|
||||
@@ -492,13 +533,13 @@ async fn run_reel_job(
|
||||
{
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
log::warn!("reel {job_id}: skipping segment {i}, TTS failed: {e}");
|
||||
log::warn!("reel {job_id}: skipping beat {i}, TTS failed: {e}");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let audio_path = work.path().join(format!("narration_{i:03}.wav"));
|
||||
if let Err(e) = tokio::fs::write(&audio_path, &audio_bytes).await {
|
||||
log::warn!("reel {job_id}: skipping segment {i}, writing audio failed: {e}");
|
||||
log::warn!("reel {job_id}: skipping beat {i}, writing audio failed: {e}");
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -508,25 +549,37 @@ async fn run_reel_job(
|
||||
.ok()
|
||||
.flatten()
|
||||
.unwrap_or(render::MIN_SEGMENT_SECONDS);
|
||||
let duration = render::segment_duration(narration_secs);
|
||||
|
||||
set_stage(job_id, "rendering");
|
||||
let seg_out = work.path().join(format!("seg_{i:03}.mp4"));
|
||||
log::info!(
|
||||
"reel {job_id}: beat {}/{} — {} photo(s), narration {:.1}s",
|
||||
i + 1,
|
||||
beat_total,
|
||||
image_paths.len(),
|
||||
narration_secs
|
||||
);
|
||||
let beat_out = work.path().join(format!("beat_{i:03}.mp4"));
|
||||
if let Err(e) =
|
||||
render::render_segment(&image_path, &audio_path, &seg_out, duration, &opts).await
|
||||
render::render_beat(&image_paths, &audio_path, &beat_out, narration_secs, &opts).await
|
||||
{
|
||||
log::warn!("reel {job_id}: skipping segment {i}, render failed: {e}");
|
||||
log::warn!("reel {job_id}: skipping beat {i}, render failed: {e}");
|
||||
continue;
|
||||
}
|
||||
segment_files.push(seg_out.to_string_lossy().to_string());
|
||||
beat_files.push(beat_out.to_string_lossy().to_string());
|
||||
}
|
||||
|
||||
let segment_files = beat_files;
|
||||
if segment_files.is_empty() {
|
||||
return Err(anyhow!("no segments rendered successfully"));
|
||||
return Err(anyhow!("no beats rendered successfully"));
|
||||
}
|
||||
|
||||
// 4. Concat into the cache. Write to a temp name in the reels dir, then
|
||||
// rename atomically (same filesystem) so a reader never sees a partial.
|
||||
set_stage(job_id, "rendering");
|
||||
log::info!(
|
||||
"reel {job_id}: joining {} rendered beats into the final reel",
|
||||
segment_files.len()
|
||||
);
|
||||
std::fs::create_dir_all(&app_state.reels_path).context("creating reels dir")?;
|
||||
let final_path = reel_mp4_path(app_state, key);
|
||||
let tmp_path = final_path.with_extension("mp4.tmp");
|
||||
@@ -541,6 +594,12 @@ async fn run_reel_job(
|
||||
.context("serializing reel sidecar")?;
|
||||
let _ = std::fs::write(reel_sidecar_path(app_state, key), sidecar);
|
||||
|
||||
log::info!(
|
||||
"reel {job_id}: done in {:.1}s — {} beats → {}",
|
||||
started.elapsed().as_secs_f64(),
|
||||
segment_files.len(),
|
||||
final_path.display()
|
||||
);
|
||||
Ok((script.title, final_path))
|
||||
}
|
||||
|
||||
@@ -622,16 +681,16 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn date_label_formats_or_none() {
|
||||
let seg = PlannedSegment {
|
||||
media: photo("a.jpg", 1),
|
||||
let beat = PlannedBeat {
|
||||
photos: vec![photo("a.jpg", 1)],
|
||||
date: Some(1_560_384_000), // 2019-06-13 UTC
|
||||
insight_title: None,
|
||||
insight_summary: None,
|
||||
};
|
||||
assert!(seg.date_label().unwrap().contains("2019"));
|
||||
assert!(beat.date_label().unwrap().contains("2019"));
|
||||
|
||||
let undated = PlannedSegment {
|
||||
media: photo("a.jpg", 1),
|
||||
let undated = PlannedBeat {
|
||||
photos: vec![photo("a.jpg", 1)],
|
||||
date: None,
|
||||
insight_title: None,
|
||||
insight_summary: None,
|
||||
|
||||
Reference in New Issue
Block a user