Feature/unified nl search #106

Open
cameron wants to merge 26 commits from feature/unified-nl-search into master
9 changed files with 1615 additions and 30 deletions
Showing only changes of commit e3f731b3b2 - Show all commits
+31
View File
@@ -23,6 +23,7 @@ use std::time::{Duration, Instant};
use tokio::sync::Semaphore; use tokio::sync::Semaphore;
use uuid::Uuid; use uuid::Uuid;
use crate::ai::llamacpp::LlamaCppClient;
use crate::data::Claims; use crate::data::Claims;
use crate::file_types::{is_audio_file, is_video_file}; use crate::file_types::{is_audio_file, is_video_file};
use crate::files::is_valid_full_path; use crate::files::is_valid_full_path;
@@ -473,6 +474,36 @@ pub struct TtsJobStatusResponse {
pub error: Option<String>, pub error: Option<String>,
} }
/// Synthesize speech honoring the global single-GPU serialization
/// (`TTS_PERMIT`) and the GPU write lease, exactly as the speech-job path does.
/// Queues on the permit rather than fast-failing, so callers wait their turn
/// instead of contending. Text is run through the same markdown/emoji cleanup +
/// pronunciation pipeline as the HTTP handlers. Reused by the memory-reel
/// pipeline to narrate each segment without racing a user's TTS request on the
/// Chatterbox GPU.
pub async fn synthesize_serialized(
client: &LlamaCppClient,
text: &str,
voice: Option<&str>,
format: &str,
) -> anyhow::Result<Vec<u8>> {
let prepared = prepare_for_tts(text);
if prepared.is_empty() {
anyhow::bail!("nothing to synthesize after cleanup");
}
// Queue rather than fast-fail (mirrors create_speech_job_handler).
let _permit = TTS_PERMIT
.acquire()
.await
.map_err(|_| anyhow::anyhow!("TTS permit closed"))?;
// Wait for the LLM side to release the GPU before the request timeout
// starts (see ai::gpu).
let _gpu = crate::ai::gpu::tts_lease().await;
client
.text_to_speech(&prepared, voice, format, None, None, None)
.await
}
/// POST /tts/speech/jobs — durable variant of /tts/speech for long syntheses. /// POST /tts/speech/jobs — durable variant of /tts/speech for long syntheses.
/// Returns 202 + a job id immediately; the synth queues on the single GPU /// Returns 202 + a job id immediately; the synth queues on the single GPU
/// permit (instead of fast-failing 429) and the client polls the job until /// permit (instead of fast-failing 429) and the client polls the job until
+4
View File
@@ -62,6 +62,7 @@ mod knowledge;
mod memories; mod memories;
mod otel; mod otel;
mod personas; mod personas;
mod reels;
mod service; mod service;
#[cfg(test)] #[cfg(test)]
mod testhelpers; mod testhelpers;
@@ -344,6 +345,9 @@ fn main() -> std::io::Result<()> {
.service(handlers::image::clear_image_date) .service(handlers::image::clear_image_date)
.service(handlers::image::get_full_exif) .service(handlers::image::get_full_exif)
.service(memories::list_memories) .service(memories::list_memories)
.service(reels::create_reel_handler)
.service(reels::reel_status_handler)
.service(reels::reel_video_handler)
.service(ai::generate_insight_handler) .service(ai::generate_insight_handler)
.service(ai::generate_agentic_insight_handler) .service(ai::generate_agentic_insight_handler)
.service(ai::generation_status_handler) .service(ai::generation_status_handler)
+57 -29
View File
@@ -349,12 +349,6 @@ pub async fn list_memories(
opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
let span_mode = q.span.unwrap_or(MemoriesSpan::Day); let span_mode = q.span.unwrap_or(MemoriesSpan::Day);
let span_token = match span_mode {
MemoriesSpan::Day => "day",
MemoriesSpan::Week => "week",
MemoriesSpan::Month => "month",
};
let years_back: i32 = DEFAULT_YEARS_BACK;
// The SQL filter expects a signed offset in minutes from UTC; default // The SQL filter expects a signed offset in minutes from UTC; default
// 0 (UTC) when the client didn't send a hint. We also keep a chrono // 0 (UTC) when the client didn't send a hint. We also keep a chrono
@@ -366,18 +360,66 @@ pub async fn list_memories(
.timezone_offset_minutes .timezone_offset_minutes
.and_then(|offset_mins| FixedOffset::east_opt(offset_mins * 60)); .and_then(|offset_mins| FixedOffset::east_opt(offset_mins * 60));
debug!( let items = match gather_memory_items(
"list_memories: span={:?} tz_offset_min={} years_back={}", &app_state,
span_mode, tz_offset_minutes, years_back &exif_dao,
); &span_context,
span_mode,
let library = match crate::libraries::resolve_library_param(&app_state, q.library.as_deref()) { tz_offset_minutes,
Ok(lib) => lib, client_timezone,
q.library.as_deref(),
) {
Ok(items) => items,
Err(msg) => { Err(msg) => {
warn!("Rejecting /memories request: {}", msg); warn!("Rejecting /memories request: {}", msg);
return HttpResponse::BadRequest().body(msg); return HttpResponse::BadRequest().body(msg);
} }
}; };
span.add_event(
"memories_scanned",
vec![
KeyValue::new("span", format!("{:?}", span_mode)),
KeyValue::new("years_back", DEFAULT_YEARS_BACK.to_string()),
KeyValue::new("result_count", items.len().to_string()),
KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()),
KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)),
],
);
span.set_status(Status::Ok);
HttpResponse::Ok().json(MemoriesResponse { items })
}
/// Resolve an "on this day/week/month across past years" window into an
/// ordered list of [`MemoryItem`]s. Shared by the `/memories` handler and the
/// memory-reel selector so both honour the same library resolution, per-library
/// exclusions, timezone handling, and sort order. Returns `Err(message)` only
/// when the `library` param is invalid (callers map that to 400); per-library
/// query/lock failures are logged and skipped, matching the handler's
/// best-effort behaviour.
pub fn gather_memory_items(
app_state: &AppState,
exif_dao: &Mutex<Box<dyn ExifDao>>,
span_context: &opentelemetry::Context,
span_mode: MemoriesSpan,
tz_offset_minutes: i32,
client_timezone: Option<FixedOffset>,
library_param: Option<&str>,
) -> Result<Vec<MemoryItem>, String> {
let span_token = match span_mode {
MemoriesSpan::Day => "day",
MemoriesSpan::Week => "week",
MemoriesSpan::Month => "month",
};
let years_back: i32 = DEFAULT_YEARS_BACK;
debug!(
"gather_memory_items: span={:?} tz_offset_min={} years_back={}",
span_mode, tz_offset_minutes, years_back
);
let library = crate::libraries::resolve_library_param(app_state, library_param)?;
let libraries_to_scan: Vec<&crate::libraries::Library> = match library { let libraries_to_scan: Vec<&crate::libraries::Library> = match library {
Some(lib) => vec![lib], Some(lib) => vec![lib],
None => app_state.libraries.iter().collect(), None => app_state.libraries.iter().collect(),
@@ -394,7 +436,7 @@ pub async fn list_memories(
let rows = match exif_dao.lock() { let rows = match exif_dao.lock() {
Ok(mut dao) => match dao.get_memories_in_window( Ok(mut dao) => match dao.get_memories_in_window(
&span_context, span_context,
lib.id, lib.id,
span_token, span_token,
years_back, years_back,
@@ -469,21 +511,7 @@ pub async fn list_memories(
} }
} }
let items: Vec<MemoryItem> = memories_with_dates.into_iter().map(|(m, _)| m).collect(); Ok(memories_with_dates.into_iter().map(|(m, _)| m).collect())
span.add_event(
"memories_scanned",
vec![
KeyValue::new("span", format!("{:?}", span_mode)),
KeyValue::new("years_back", years_back.to_string()),
KeyValue::new("result_count", items.len().to_string()),
KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()),
KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)),
],
);
span.set_status(Status::Ok);
HttpResponse::Ok().json(MemoriesResponse { items })
} }
#[cfg(test)] #[cfg(test)]
+625
View File
@@ -0,0 +1,625 @@
//! Memory reels: render an MP4 slideshow of a selection of photos with an
//! LLM-written, voice-cloned narration over it.
//!
//! Pipeline: a [`selector`] resolves *which* photos (and the reel metadata),
//! the [`script`] module writes per-photo narration via the LLM, each line is
//! synthesized to speech, and [`render`] assembles the stills + narration into
//! one MP4. Jobs run in the background (mirroring the TTS speech-job registry)
//! because a reel takes minutes; the finished MP4 is cached on disk keyed by
//! the selection so a repeat request is instant.
//!
//! Phase 1 is on-demand and photos-only. The segment model is media-typed so a
//! video-clip segment (phase 2) and a nightly pre-render (phase 3) slot in
//! without reworking the pipeline.
pub mod render;
pub mod script;
pub mod selector;
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::{LazyLock, Mutex as StdMutex};
use std::time::{Duration, Instant};
use actix_files::NamedFile;
use actix_web::{HttpRequest, HttpResponse, Responder, get, post, web};
use chrono::DateTime;
use serde::{Deserialize, Serialize};
use serde_json::json;
use std::sync::Mutex;
use uuid::Uuid;
use crate::data::Claims;
use crate::database::{ExifDao, InsightDao};
use crate::memories::MemoriesSpan;
use crate::otel::extract_context_from_request;
use crate::state::AppState;
use selector::ReelSelector;
/// The media behind one reel segment. Photos-only for now; a `Clip` variant
/// (a section of a source video) is the phase-2 extension point.
#[derive(Debug, Clone)]
pub enum SegmentMedia {
Photo { rel_path: String, library_id: i32 },
}
/// A segment before narration: which photo, when it was taken, and any cached
/// insight to feed the scripter.
#[derive(Debug, Clone)]
pub struct PlannedSegment {
pub media: SegmentMedia,
pub date: Option<i64>,
pub insight_title: Option<String>,
pub insight_summary: Option<String>,
}
impl PlannedSegment {
/// Human date for the prompt, e.g. "June 12, 2019". `None` when undated.
pub fn date_label(&self) -> Option<String> {
let ts = self.date?;
let dt = DateTime::from_timestamp(ts, 0)?;
Some(dt.format("%B %-d, %Y").to_string())
}
}
/// Reel-wide metadata the scripter uses for framing.
#[derive(Debug, Clone)]
pub struct ReelMeta {
pub span: MemoriesSpan,
pub years: Vec<i32>,
}
impl ReelMeta {
/// Natural-language phrase for the span, e.g. "on this day".
pub fn span_phrase(&self) -> &'static str {
match self.span {
MemoriesSpan::Day => "on this day",
MemoriesSpan::Week => "this week",
MemoriesSpan::Month => "this month",
}
}
}
// --- Job registry ------------------------------------------------------------
//
// In-memory, same shape as the TTS speech-job registry: a reel takes minutes,
// too long to hold one HTTP request from a phone. POST /reels returns a job id;
// the client polls GET /reels/{id} until the video URL appears. The heavy
// artifact (the MP4) lives on disk, not in this map — jobs only carry status +
// the output path. State is intentionally not durable across restarts; the
// on-disk cache is what makes a repeat request cheap, not the registry.
#[derive(Clone, Copy, PartialEq, Eq, Debug, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum ReelJobStatus {
Queued,
Running,
Done,
Error,
}
impl ReelJobStatus {
fn is_terminal(self) -> bool {
matches!(self, Self::Done | Self::Error)
}
}
struct ReelJob {
status: ReelJobStatus,
/// Coarse progress label for the client ("scripting", "narrating", …).
stage: &'static str,
title: Option<String>,
output_path: Option<PathBuf>,
error: Option<String>,
created_at: Instant,
finished_at: Option<Instant>,
abort: Option<tokio::task::AbortHandle>,
}
/// Finished jobs linger so a client that lost connectivity can still collect
/// the result; anything older than MAX_AGE is dropped (aborted first if somehow
/// still running). Swept lazily on each create.
const REEL_JOB_RESULT_TTL: Duration = Duration::from_secs(30 * 60);
const REEL_JOB_MAX_AGE: Duration = Duration::from_secs(60 * 60);
static REEL_JOBS: LazyLock<StdMutex<HashMap<Uuid, ReelJob>>> =
LazyLock::new(|| StdMutex::new(HashMap::new()));
fn sweep_stale_jobs(jobs: &mut HashMap<Uuid, ReelJob>, now: Instant) {
jobs.retain(|_, job| {
let result_expired = job
.finished_at
.is_some_and(|t| now.duration_since(t) >= REEL_JOB_RESULT_TTL);
let too_old = now.duration_since(job.created_at) >= REEL_JOB_MAX_AGE;
if too_old && let Some(h) = job.abort.take() {
h.abort();
}
!(result_expired || too_old)
});
}
fn with_job<R>(id: Uuid, f: impl FnOnce(&mut ReelJob) -> R) -> Option<R> {
REEL_JOBS.lock().unwrap().get_mut(&id).map(f)
}
fn set_stage(id: Uuid, stage: &'static str) {
with_job(id, |job| {
if !job.status.is_terminal() {
job.status = ReelJobStatus::Running;
job.stage = stage;
}
});
}
/// Move a job to a terminal state (first terminal write wins).
fn finish_job(
id: Uuid,
status: ReelJobStatus,
title: Option<String>,
output_path: Option<PathBuf>,
error: Option<String>,
) {
with_job(id, |job| {
if job.status.is_terminal() {
return;
}
job.status = status;
job.stage = match status {
ReelJobStatus::Done => "done",
_ => "error",
};
job.title = title;
job.output_path = output_path;
job.error = error;
job.finished_at = Some(Instant::now());
job.abort = None;
});
}
// --- On-disk cache -----------------------------------------------------------
/// Render version: bump to invalidate every cached reel after a rendering /
/// scripting change that should produce a fresh result.
const RENDER_VERSION: u32 = 1;
/// Cache key over everything that determines *which* media and *how* it's
/// voiced — but not the (non-deterministic) narration text. Same inputs → same
/// MP4 served instantly. blake3 keeps it filesystem-safe and collision-free.
fn cache_key(selector: &ReelSelector, media: &[SegmentMedia], voice: Option<&str>) -> String {
let mut buf = format!(
"v{}|{}|voice={}|",
RENDER_VERSION,
selector.descriptor(),
voice.unwrap_or("default")
);
for m in media {
match m {
SegmentMedia::Photo {
rel_path,
library_id,
} => buf.push_str(&format!("{library_id}:{rel_path}|")),
}
}
blake3::hash(buf.as_bytes()).to_hex().to_string()
}
fn reel_mp4_path(app_state: &AppState, key: &str) -> PathBuf {
Path::new(&app_state.reels_path).join(format!("{key}.mp4"))
}
fn reel_sidecar_path(app_state: &AppState, key: &str) -> PathBuf {
Path::new(&app_state.reels_path).join(format!("{key}.json"))
}
#[derive(Serialize, Deserialize)]
struct ReelSidecar {
title: String,
}
// --- HTTP types --------------------------------------------------------------
#[derive(Debug, Deserialize)]
pub struct CreateReelRequest {
#[serde(default)]
pub span: Option<MemoriesSpan>,
#[serde(default)]
pub timezone_offset_minutes: Option<i32>,
#[serde(default)]
pub library: Option<String>,
/// Cloned TTS voice for the narration; server default when omitted.
#[serde(default)]
pub voice: Option<String>,
/// Cap on photos in the reel (clamped server-side).
#[serde(default)]
pub max_segments: Option<usize>,
}
#[derive(Debug, Serialize)]
pub struct ReelJobCreatedResponse {
pub job_id: String,
pub status: ReelJobStatus,
}
#[derive(Debug, Serialize)]
pub struct ReelStatusResponse {
pub job_id: String,
pub status: ReelJobStatus,
pub stage: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub title: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub video_url: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
}
// --- Handlers ----------------------------------------------------------------
/// POST /reels — start (or instantly serve from cache) a memory reel for the
/// requested span. Returns 202 + a job id; the client polls GET /reels/{id}.
#[post("/reels")]
pub async fn create_reel_handler(
http_request: HttpRequest,
_claims: Claims,
req: web::Json<CreateReelRequest>,
app_state: web::Data<AppState>,
exif_dao: web::Data<Mutex<Box<dyn ExifDao>>>,
insight_dao: web::Data<Mutex<Box<dyn InsightDao>>>,
) -> impl Responder {
let span_context = extract_context_from_request(&http_request);
if app_state.llamacpp.is_none() {
return HttpResponse::ServiceUnavailable().json(json!({
"error": "Reel narration needs the LLM/TTS backend (set LLAMA_SWAP_URL)"
}));
}
let span = req.span.unwrap_or(MemoriesSpan::Day);
let max_segments = req.max_segments.unwrap_or(selector::DEFAULT_MAX_SEGMENTS);
let selector = ReelSelector::Memories {
span,
tz_offset_minutes: req.timezone_offset_minutes.unwrap_or(0),
library: req.library.clone(),
max_segments,
};
// Cheap pass: resolve the media set for the cache key and the emptiness
// check. Insight enrichment + scripting happen in the background job.
let (planned, meta) = match selector::resolve(&app_state, &exif_dao, &span_context, &selector) {
Ok(r) => r,
Err(msg) => return HttpResponse::BadRequest().body(msg),
};
if planned.is_empty() {
return HttpResponse::UnprocessableEntity().json(json!({
"error": "No photo memories found for this span"
}));
}
let media: Vec<SegmentMedia> = planned.iter().map(|p| p.media.clone()).collect();
let voice = req.voice.clone().filter(|s| !s.is_empty());
let key = cache_key(&selector, &media, voice.as_deref());
let job_id = Uuid::new_v4();
// Cache hit: register an already-Done job pointing at the existing MP4 so
// the client's first poll returns the video URL immediately.
let mp4 = reel_mp4_path(&app_state, &key);
if mp4.exists() {
let title = std::fs::read(reel_sidecar_path(&app_state, &key))
.ok()
.and_then(|b| serde_json::from_slice::<ReelSidecar>(&b).ok())
.map(|s| s.title);
let mut jobs = REEL_JOBS.lock().unwrap();
sweep_stale_jobs(&mut jobs, Instant::now());
jobs.insert(
job_id,
ReelJob {
status: ReelJobStatus::Done,
stage: "done",
title,
output_path: Some(mp4),
error: None,
created_at: Instant::now(),
finished_at: Some(Instant::now()),
abort: None,
},
);
return HttpResponse::Accepted().json(ReelJobCreatedResponse {
job_id: job_id.to_string(),
status: ReelJobStatus::Done,
});
}
{
let mut jobs = REEL_JOBS.lock().unwrap();
sweep_stale_jobs(&mut jobs, Instant::now());
jobs.insert(
job_id,
ReelJob {
status: ReelJobStatus::Queued,
stage: "queued",
title: None,
output_path: None,
error: None,
created_at: Instant::now(),
finished_at: None,
abort: None,
},
);
}
let state = app_state.clone();
let insight_dao = insight_dao.clone();
let handle = tokio::spawn(async move {
match run_reel_job(&state, &insight_dao, job_id, planned, meta, voice, &key).await {
Ok((title, path)) => {
finish_job(job_id, ReelJobStatus::Done, Some(title), Some(path), None)
}
Err(e) => {
log::error!("reel job {job_id} failed: {e:?}");
finish_job(
job_id,
ReelJobStatus::Error,
None,
None,
Some(format!("{e}")),
)
}
}
});
with_job(job_id, |job| job.abort = Some(handle.abort_handle()));
HttpResponse::Accepted().json(ReelJobCreatedResponse {
job_id: job_id.to_string(),
status: ReelJobStatus::Queued,
})
}
/// GET /reels/{id} — poll a reel job. Done jobs carry a `video_url`.
#[get("/reels/{id}")]
pub async fn reel_status_handler(_claims: Claims, path: web::Path<String>) -> impl Responder {
let id_str = path.into_inner();
let Ok(id) = Uuid::parse_str(&id_str) else {
return HttpResponse::BadRequest().json(json!({ "error": "invalid job id" }));
};
let resp = with_job(id, |job| ReelStatusResponse {
job_id: id_str.clone(),
status: job.status,
stage: job.stage.to_string(),
title: job.title.clone(),
video_url: matches!(job.status, ReelJobStatus::Done)
.then(|| format!("/reels/{id_str}/video")),
error: job.error.clone(),
});
match resp {
Some(r) => HttpResponse::Ok().json(r),
None => HttpResponse::NotFound().json(json!({ "error": "job not found or expired" })),
}
}
/// GET /reels/{id}/video — stream the finished MP4 (supports range requests via
/// NamedFile, so the mobile player can seek).
#[get("/reels/{id}/video")]
pub async fn reel_video_handler(
_claims: Claims,
request: HttpRequest,
path: web::Path<String>,
) -> impl Responder {
let id_str = path.into_inner();
let Ok(id) = Uuid::parse_str(&id_str) else {
return HttpResponse::BadRequest().json(json!({ "error": "invalid job id" }));
};
let output = with_job(id, |job| job.output_path.clone()).flatten();
let Some(path) = output else {
return HttpResponse::NotFound().json(json!({ "error": "reel not ready" }));
};
match NamedFile::open(&path) {
Ok(file) => file.into_response(&request),
Err(e) => {
log::error!("opening reel mp4 {path:?} failed: {e:?}");
HttpResponse::NotFound().json(json!({ "error": "reel file missing" }))
}
}
}
// --- Pipeline ----------------------------------------------------------------
/// Run the full reel pipeline: enrich → script → narrate → render → concat,
/// then publish the MP4 into the cache. Returns (title, mp4_path).
async fn run_reel_job(
app_state: &AppState,
insight_dao: &Mutex<Box<dyn InsightDao>>,
job_id: Uuid,
mut planned: Vec<PlannedSegment>,
meta: ReelMeta,
voice: Option<String>,
key: &str,
) -> anyhow::Result<(String, PathBuf)> {
use anyhow::{Context, anyhow};
let client = app_state
.llamacpp
.as_ref()
.ok_or_else(|| anyhow!("TTS/LLM backend not configured"))?
.clone();
// 1. Enrich with cached insights, then script (one LLM call).
set_stage(job_id, "scripting");
let span_context = opentelemetry::Context::new();
selector::enrich(insight_dao, &span_context, &mut planned);
let script = script::generate_script(&client, &meta, &planned).await?;
// 2. Narrate each line to speech and 3. render each photo segment. A
// segment whose audio or render fails is skipped (logged) rather than
// sinking the whole reel — handles an odd HEIC/corrupt file gracefully.
set_stage(job_id, "narrating");
let work = tempfile::tempdir().context("creating reel work dir")?;
let nvenc = render::is_nvenc_available().await;
let opts = render::SegmentOpts {
nvenc,
..Default::default()
};
let mut segment_files: Vec<String> = Vec::new();
for (i, (seg, line)) in planned.iter().zip(script.lines.iter()).enumerate() {
let image_path = match resolve_image_path(app_state, &seg.media) {
Some(p) => p,
None => {
log::warn!("reel {job_id}: skipping segment {i}, image path unresolved");
continue;
}
};
let audio_bytes =
match crate::ai::tts::synthesize_serialized(&client, line, voice.as_deref(), "wav")
.await
{
Ok(b) => b,
Err(e) => {
log::warn!("reel {job_id}: skipping segment {i}, TTS failed: {e}");
continue;
}
};
let audio_path = work.path().join(format!("narration_{i:03}.wav"));
if let Err(e) = tokio::fs::write(&audio_path, &audio_bytes).await {
log::warn!("reel {job_id}: skipping segment {i}, writing audio failed: {e}");
continue;
}
let narration_secs =
crate::video::ffmpeg::get_duration_seconds(&audio_path.to_string_lossy())
.await
.ok()
.flatten()
.unwrap_or(render::MIN_SEGMENT_SECONDS);
let duration = render::segment_duration(narration_secs);
set_stage(job_id, "rendering");
let seg_out = work.path().join(format!("seg_{i:03}.mp4"));
if let Err(e) =
render::render_segment(&image_path, &audio_path, &seg_out, duration, &opts).await
{
log::warn!("reel {job_id}: skipping segment {i}, render failed: {e}");
continue;
}
segment_files.push(seg_out.to_string_lossy().to_string());
}
if segment_files.is_empty() {
return Err(anyhow!("no segments rendered successfully"));
}
// 4. Concat into the cache. Write to a temp name in the reels dir, then
// rename atomically (same filesystem) so a reader never sees a partial.
std::fs::create_dir_all(&app_state.reels_path).context("creating reels dir")?;
let final_path = reel_mp4_path(app_state, key);
let tmp_path = final_path.with_extension("mp4.tmp");
render::concat_segments(&segment_files, &tmp_path).await?;
std::fs::rename(&tmp_path, &final_path).context("publishing reel mp4")?;
// Sidecar carries the title so a future cache hit can return it without
// re-running the pipeline.
let sidecar = serde_json::to_vec(&ReelSidecar {
title: script.title.clone(),
})
.context("serializing reel sidecar")?;
let _ = std::fs::write(reel_sidecar_path(app_state, key), sidecar);
Ok((script.title, final_path))
}
/// Resolve a photo segment's library-relative path to a validated absolute
/// path under its library root.
fn resolve_image_path(app_state: &AppState, media: &SegmentMedia) -> Option<PathBuf> {
let SegmentMedia::Photo {
rel_path,
library_id,
} = media;
let lib = app_state.library_by_id(*library_id)?;
crate::files::is_valid_full_path(&lib.root_path, rel_path, false)
}
#[cfg(test)]
mod tests {
use super::*;
fn photo(p: &str, lib: i32) -> SegmentMedia {
SegmentMedia::Photo {
rel_path: p.to_string(),
library_id: lib,
}
}
fn day_selector() -> ReelSelector {
ReelSelector::Memories {
span: MemoriesSpan::Day,
tz_offset_minutes: 0,
library: None,
max_segments: 24,
}
}
#[test]
fn cache_key_is_stable_for_same_inputs() {
let media = vec![photo("a.jpg", 1), photo("b.jpg", 1)];
let k1 = cache_key(&day_selector(), &media, Some("grandma"));
let k2 = cache_key(&day_selector(), &media, Some("grandma"));
assert_eq!(k1, k2);
// 64-hex blake3.
assert_eq!(k1.len(), 64);
assert!(k1.chars().all(|c| c.is_ascii_hexdigit()));
}
#[test]
fn cache_key_changes_with_media_order_voice_and_selector() {
let media = vec![photo("a.jpg", 1), photo("b.jpg", 1)];
let reordered = vec![photo("b.jpg", 1), photo("a.jpg", 1)];
let base = cache_key(&day_selector(), &media, Some("grandma"));
// Order matters (the reel sequence differs).
assert_ne!(
base,
cache_key(&day_selector(), &reordered, Some("grandma"))
);
// Voice matters.
assert_ne!(base, cache_key(&day_selector(), &media, Some("dad")));
assert_ne!(base, cache_key(&day_selector(), &media, None));
// Span matters.
let week = ReelSelector::Memories {
span: MemoriesSpan::Week,
tz_offset_minutes: 0,
library: None,
max_segments: 24,
};
assert_ne!(base, cache_key(&week, &media, Some("grandma")));
}
#[test]
fn span_phrase_maps_each_span() {
let mk = |span| ReelMeta {
span,
years: vec![],
};
assert_eq!(mk(MemoriesSpan::Day).span_phrase(), "on this day");
assert_eq!(mk(MemoriesSpan::Week).span_phrase(), "this week");
assert_eq!(mk(MemoriesSpan::Month).span_phrase(), "this month");
}
#[test]
fn date_label_formats_or_none() {
let seg = PlannedSegment {
media: photo("a.jpg", 1),
date: Some(1_560_384_000), // 2019-06-13 UTC
insight_title: None,
insight_summary: None,
};
assert!(seg.date_label().unwrap().contains("2019"));
let undated = PlannedSegment {
media: photo("a.jpg", 1),
date: None,
insight_title: None,
insight_summary: None,
};
assert_eq!(undated.date_label(), None);
}
}
+338
View File
@@ -0,0 +1,338 @@
//! ffmpeg assembly for memory reels.
//!
//! Two-stage, per-segment design: each segment is rendered to its own
//! normalized MP4 (identical codec/resolution/fps/timebase), then the segments
//! are joined with the concat demuxer (stream copy, no re-encode). Rendering
//! per segment — rather than one monster filtergraph — keeps each ffmpeg
//! invocation simple to reason about, parallelizes naturally, and means a
//! video-clip segment type (phase 2) slots in as just a different per-segment
//! builder without touching the concat stage.
//!
//! The arg builders are pure (`Vec<String>` out) so the exact ffmpeg command
//! is unit-testable; the runners spawn ffmpeg and surface stderr on failure.
use anyhow::{Context, Result, bail};
use std::path::Path;
use tokio::process::Command;
/// Re-exported so the reel pipeline reaches NVENC detection through this module
/// rather than depending on `video::ffmpeg` directly.
pub use crate::video::ffmpeg::is_nvenc_available;
/// Reel canvas. Landscape matches the majority of camera photos; portrait
/// shots are letterboxed by the `pad` in [`segment_filter`] rather than
/// cropped, so faces never get cut off.
pub const REEL_WIDTH: u32 = 1920;
pub const REEL_HEIGHT: u32 = 1080;
pub const REEL_FPS: u32 = 30;
/// A still's screen time is its narration length plus a short breath, with a
/// floor so a terse line still lingers. No ceiling: the segment always covers
/// the full narration so speech is never truncated — the scripter is asked to
/// keep lines short instead.
pub const MIN_SEGMENT_SECONDS: f64 = 2.5;
const NARRATION_TAIL_SECONDS: f64 = 0.6;
/// Screen time for a photo segment given its narration audio length.
pub fn segment_duration(narration_secs: f64) -> f64 {
let d = narration_secs + NARRATION_TAIL_SECONDS;
if d.is_finite() && d > MIN_SEGMENT_SECONDS {
d
} else {
MIN_SEGMENT_SECONDS
}
}
/// Options controlling per-segment rendering. `ken_burns` adds a slow zoom for
/// motion; it's defaulted off until the effect is eyeballed on the GPU box,
/// since a wrong zoompan expression reads as jitter and can't be verified here.
#[derive(Debug, Clone, Copy)]
pub struct SegmentOpts {
pub width: u32,
pub height: u32,
pub fps: u32,
pub nvenc: bool,
pub ken_burns: bool,
}
impl Default for SegmentOpts {
fn default() -> Self {
Self {
width: REEL_WIDTH,
height: REEL_HEIGHT,
fps: REEL_FPS,
nvenc: false,
ken_burns: false,
}
}
}
/// Video filter for a photo segment: fit the image inside the canvas
/// (preserving aspect, padding the rest), normalize SAR/fps/pixel format, and
/// optionally apply a gentle Ken Burns zoom.
pub fn segment_filter(opts: &SegmentOpts, duration: f64) -> String {
let (w, h, fps) = (opts.width, opts.height, opts.fps);
if opts.ken_burns {
// Upscale first so zoompan samples from a larger frame (avoids
// shimmer), drift the zoom from 1.0→~1.12 across the segment, hold the
// crop centered, then settle to the canvas.
let frames = (duration * fps as f64).round().max(1.0) as u64;
format!(
"scale={w}*2:{h}*2:force_original_aspect_ratio=increase,\
crop={w}*2:{h}*2,\
zoompan=z='min(zoom+0.0009,1.12)':d={frames}:\
x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s={w}x{h}:fps={fps},\
setsar=1,format=yuv420p"
)
} else {
format!(
"scale={w}:{h}:force_original_aspect_ratio=decrease,\
pad={w}:{h}:(ow-iw)/2:(oh-ih)/2,\
setsar=1,fps={fps},format=yuv420p"
)
}
}
fn video_encoder_args(nvenc: bool) -> Vec<String> {
if nvenc {
// p4 ≈ balanced; cq 23 ≈ libx264 crf 21. Matches the HLS transcode path.
[
"-c:v",
"h264_nvenc",
"-preset",
"p4",
"-cq",
"23",
"-pix_fmt",
"yuv420p",
]
} else {
[
"-c:v", "libx264", "-crf", "21", "-preset", "veryfast", "-pix_fmt", "yuv420p",
]
}
.iter()
.map(|s| s.to_string())
.collect()
}
/// Build the ffmpeg args that render one photo segment: a still looped for
/// `duration` seconds with its narration muxed in. The narration is padded
/// with trailing silence (`apad`) so short lines don't end the segment early;
/// `-t` bounds both streams to the segment length.
pub fn build_segment_args(
image_path: &str,
audio_path: &str,
out_path: &str,
duration: f64,
opts: &SegmentOpts,
) -> Vec<String> {
let mut args: Vec<String> = vec!["-y".into()];
if opts.nvenc {
args.extend(["-hwaccel".into(), "cuda".into()]);
}
args.extend([
"-loop".into(),
"1".into(),
"-i".into(),
image_path.into(),
"-i".into(),
audio_path.into(),
"-filter_complex".into(),
format!("[0:v]{}[v];[1:a]apad[a]", segment_filter(opts, duration)),
"-map".into(),
"[v]".into(),
"-map".into(),
"[a]".into(),
"-t".into(),
format!("{duration:.3}"),
]);
args.extend(video_encoder_args(opts.nvenc));
args.extend(
["-c:a", "aac", "-b:a", "160k", "-ar", "48000", "-shortest"]
.iter()
.map(|s| s.to_string()),
);
args.push(out_path.into());
args
}
/// Build the concat-demuxer args that join rendered segments losslessly.
/// `+faststart` moves the moov atom up front so the reel streams immediately
/// on the mobile client.
pub fn build_concat_args(list_path: &str, out_path: &str) -> Vec<String> {
[
"-y",
"-f",
"concat",
"-safe",
"0",
"-i",
list_path,
"-c",
"copy",
"-movflags",
"+faststart",
out_path,
]
.iter()
.map(|s| s.to_string())
.collect()
}
/// Render the concat list file body. Each line points the demuxer at one
/// segment; single quotes in paths are escaped per ffmpeg's concat syntax.
pub fn build_concat_list(segment_paths: &[String]) -> String {
let mut out = String::new();
for p in segment_paths {
let escaped = p.replace('\'', r"'\''");
out.push_str(&format!("file '{escaped}'\n"));
}
out
}
async fn run_ffmpeg(args: &[String], what: &str) -> Result<()> {
let output = Command::new("ffmpeg")
.args(args)
.output()
.await
.with_context(|| format!("spawning ffmpeg for {what}"))?;
if !output.status.success() {
bail!(
"ffmpeg {what} failed: {}",
String::from_utf8_lossy(&output.stderr)
);
}
Ok(())
}
/// Render one photo segment to `out_path`.
pub async fn render_segment(
image_path: &Path,
audio_path: &Path,
out_path: &Path,
duration: f64,
opts: &SegmentOpts,
) -> Result<()> {
let args = build_segment_args(
&image_path.to_string_lossy(),
&audio_path.to_string_lossy(),
&out_path.to_string_lossy(),
duration,
opts,
);
run_ffmpeg(&args, "segment render").await
}
/// Join rendered segments into the final reel. Writes the concat list into the
/// same directory as the output so relative paths and cleanup stay local.
pub async fn concat_segments(segment_paths: &[String], out_path: &Path) -> Result<()> {
let list_path = out_path.with_extension("concat.txt");
let body = build_concat_list(segment_paths);
tokio::fs::write(&list_path, body)
.await
.context("writing concat list")?;
let args = build_concat_args(&list_path.to_string_lossy(), &out_path.to_string_lossy());
let result = run_ffmpeg(&args, "concat").await;
let _ = tokio::fs::remove_file(&list_path).await;
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn segment_duration_floors_short_lines() {
// A one-word narration still lingers at the floor.
assert_eq!(segment_duration(0.5), MIN_SEGMENT_SECONDS);
assert_eq!(segment_duration(0.0), MIN_SEGMENT_SECONDS);
}
#[test]
fn segment_duration_covers_full_narration_plus_tail() {
// No ceiling: a long line gets its full length so speech isn't cut.
assert!((segment_duration(5.0) - 5.6).abs() < 1e-9);
assert!((segment_duration(20.0) - 20.6).abs() < 1e-9);
}
#[test]
fn segment_duration_rejects_nonfinite() {
assert_eq!(segment_duration(f64::NAN), MIN_SEGMENT_SECONDS);
assert_eq!(segment_duration(f64::INFINITY), MIN_SEGMENT_SECONDS);
}
#[test]
fn static_filter_fits_and_pads_without_cropping() {
let f = segment_filter(&SegmentOpts::default(), 4.0);
assert!(f.contains("force_original_aspect_ratio=decrease"));
assert!(f.contains("pad=1920:1080"));
assert!(f.contains("format=yuv420p"));
// No zoompan when ken_burns is off.
assert!(!f.contains("zoompan"));
}
#[test]
fn ken_burns_filter_uses_duration_scaled_frame_count() {
let opts = SegmentOpts {
ken_burns: true,
..SegmentOpts::default()
};
// 4s * 30fps = 120 frames in the zoompan d= term.
let f = segment_filter(&opts, 4.0);
assert!(f.contains("zoompan"));
assert!(f.contains("d=120:"));
assert!(f.contains("s=1920x1080"));
}
#[test]
fn segment_args_loop_still_and_bound_with_t() {
let args = build_segment_args(
"/img.jpg",
"/a.wav",
"/out.mp4",
4.0,
&SegmentOpts::default(),
);
let joined = args.join(" ");
assert!(joined.contains("-loop 1 -i /img.jpg"));
assert!(joined.contains("-i /a.wav"));
assert!(joined.contains("apad"));
assert!(joined.contains("-t 4.000"));
assert!(joined.contains("libx264"));
assert!(joined.ends_with("/out.mp4"));
}
#[test]
fn segment_args_use_nvenc_and_cuda_when_enabled() {
let opts = SegmentOpts {
nvenc: true,
..SegmentOpts::default()
};
let args = build_segment_args("/img.jpg", "/a.wav", "/out.mp4", 3.0, &opts);
let joined = args.join(" ");
assert!(joined.contains("-hwaccel cuda"));
assert!(joined.contains("h264_nvenc"));
assert!(!joined.contains("libx264"));
}
#[test]
fn concat_args_stream_copy_with_faststart() {
let args = build_concat_args("/tmp/list.txt", "/out.mp4");
let joined = args.join(" ");
assert!(joined.contains("-f concat -safe 0 -i /tmp/list.txt"));
assert!(joined.contains("-c copy"));
assert!(joined.contains("+faststart"));
}
#[test]
fn concat_list_escapes_single_quotes() {
let body = build_concat_list(&[
"/tmp/seg_000.mp4".into(),
"/tmp/own's dir/seg_001.mp4".into(),
]);
assert!(body.contains("file '/tmp/seg_000.mp4'\n"));
// The apostrophe is closed-escaped-reopened per ffmpeg concat syntax.
assert!(body.contains(r"own'\''s"));
}
}
+289
View File
@@ -0,0 +1,289 @@
//! Narration scripting for memory reels.
//!
//! One LLM call turns the planned segments (each carrying its date and, where
//! available, its cached insight) into a short first-person narration line per
//! photo plus a title for the reel. We reuse the cached insight summary as the
//! richest per-photo signal rather than re-running vision at reel time — that
//! keeps reel generation off the GPU's vision slot entirely.
//!
//! The prompt builder and response parser are pure so the contract is
//! unit-testable; `generate_script` wires them to the LLM client.
use anyhow::{Context, Result};
use std::sync::Arc;
use super::{PlannedSegment, ReelMeta};
use crate::ai::llamacpp::LlamaCppClient;
use crate::ai::llm_client::LlmClient;
/// The narration for a whole reel: a title and one line per segment, in order.
#[derive(Debug, Clone, PartialEq)]
pub struct ReelScript {
pub title: String,
pub lines: Vec<String>,
}
const SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \
slideshow of someone's own photos set to a spoken voiceover. Write warm, \
specific, first-person narration as if the person is gently looking back on \
their own memories. Be concrete and grounded in the details given; never \
invent names, places, or events that aren't supported. Keep each line to one \
or two short sentences that can be read aloud in a few seconds. Avoid generic \
filler like \"what a wonderful day\" — if you have little to go on, simply \
describe the moment plainly.";
/// Build the (system, user) prompt pair for the scripter. The user message
/// describes each segment in order and asks for strict JSON back.
pub fn build_script_messages(meta: &ReelMeta, planned: &[PlannedSegment]) -> (String, String) {
let mut user = String::new();
user.push_str(&format!(
"These are {} photos surfaced as memories {}.\n\n",
planned.len(),
meta.span_phrase()
));
if !meta.years.is_empty() {
let years: Vec<String> = meta.years.iter().map(|y| y.to_string()).collect();
user.push_str(&format!("They span the years: {}.\n\n", years.join(", ")));
}
user.push_str("Photos, in the order they will appear:\n");
for (i, seg) in planned.iter().enumerate() {
user.push_str(&format!("\n[{}]", i + 1));
if let Some(date) = seg.date_label() {
user.push_str(&format!(" {date}"));
}
user.push('\n');
match (&seg.insight_title, &seg.insight_summary) {
(Some(t), Some(s)) if !s.trim().is_empty() => {
user.push_str(&format!(" Known context: {t}{s}\n"));
}
(Some(t), _) => user.push_str(&format!(" Known context: {t}\n")),
(_, Some(s)) if !s.trim().is_empty() => {
user.push_str(&format!(" Known context: {s}\n"));
}
_ => user.push_str(" (no extra context — narrate plainly from the date)\n"),
}
}
user.push_str(&format!(
"\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\
{{\"title\": \"<short reel title>\", \"segments\": [\"<line for photo 1>\", \
\"<line for photo 2>\", ... ]}}\n\
The \"segments\" array MUST have exactly {} items, one per photo in order.",
planned.len()
));
(SYSTEM_PROMPT.to_string(), user)
}
/// Parse the model's response into a script with exactly `n` lines. Tolerant of
/// code fences and surrounding prose, and of both `segments: [".."]` and
/// `segments: [{"narration": ".."}]` shapes. Missing/extra lines are padded or
/// truncated so the caller always gets `n` aligned to the segments.
pub fn parse_script_response(raw: &str, n: usize) -> ReelScript {
let fallback_line = "A moment worth remembering.";
let value = extract_json_object(raw);
let title = value
.as_ref()
.and_then(|v| v.get("title"))
.and_then(|t| t.as_str())
.map(clean_text)
.filter(|s| !s.is_empty())
.unwrap_or_else(|| "Memories".to_string());
let mut lines: Vec<String> = value
.as_ref()
.and_then(|v| v.get("segments"))
.and_then(|s| s.as_array())
.map(|arr| {
arr.iter()
.map(|item| {
let text = item
.as_str()
.map(|s| s.to_string())
.or_else(|| {
item.get("narration")
.and_then(|n| n.as_str())
.map(|s| s.to_string())
})
.unwrap_or_default();
clean_text(&text)
})
.collect()
})
.unwrap_or_default();
// Align to exactly n: drop extras, pad shortfalls with a neutral line so
// every photo still gets spoken audio.
lines.truncate(n);
while lines.len() < n {
lines.push(fallback_line.to_string());
}
for line in lines.iter_mut() {
if line.is_empty() {
*line = fallback_line.to_string();
}
}
ReelScript { title, lines }
}
/// Pull the first balanced top-level JSON object out of a possibly-noisy model
/// response (code fences, leading prose). Returns None if nothing parses.
fn extract_json_object(raw: &str) -> Option<serde_json::Value> {
// Fast path: the whole thing is valid JSON.
if let Ok(v) = serde_json::from_str::<serde_json::Value>(raw.trim()) {
return Some(v);
}
// Otherwise scan for the first '{' ... matching '}' span, ignoring braces
// inside strings.
let bytes = raw.as_bytes();
let start = raw.find('{')?;
let mut depth = 0i32;
let mut in_str = false;
let mut escaped = false;
for i in start..bytes.len() {
let c = bytes[i] as char;
if in_str {
if escaped {
escaped = false;
} else if c == '\\' {
escaped = true;
} else if c == '"' {
in_str = false;
}
continue;
}
match c {
'"' => in_str = true,
'{' => depth += 1,
'}' => {
depth -= 1;
if depth == 0 {
return serde_json::from_str(&raw[start..=i]).ok();
}
}
_ => {}
}
}
None
}
/// Collapse whitespace and strip stray markdown/quote decorations a model
/// sometimes leaves around a line.
fn clean_text(s: &str) -> String {
let trimmed = s.trim().trim_matches('"').trim();
trimmed.split_whitespace().collect::<Vec<_>>().join(" ")
}
/// Generate the reel script via the LLM. Text-only (no images) — the per-photo
/// context comes from cached insights. The call takes the GPU read lease
/// internally (see `LlamaCppClient::generate`).
pub async fn generate_script(
client: &Arc<LlamaCppClient>,
meta: &ReelMeta,
planned: &[PlannedSegment],
) -> Result<ReelScript> {
let (system, user) = build_script_messages(meta, planned);
let raw = client
.generate(&user, Some(&system), None)
.await
.context("LLM script generation failed")?;
Ok(parse_script_response(&raw, planned.len()))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::memories::MemoriesSpan;
fn meta() -> ReelMeta {
ReelMeta {
span: MemoriesSpan::Day,
years: vec![2019, 2021],
}
}
fn planned(n: usize) -> Vec<PlannedSegment> {
(0..n)
.map(|i| PlannedSegment {
media: super::super::SegmentMedia::Photo {
rel_path: format!("p{i}.jpg"),
library_id: 1,
},
date: Some(1_560_000_000 + i as i64 * 86_400),
insight_title: None,
insight_summary: None,
})
.collect()
}
#[test]
fn prompt_states_exact_segment_count_and_span() {
let (sys, user) = build_script_messages(&meta(), &planned(3));
assert!(sys.contains("memory reel"));
assert!(user.contains("3 photos"));
assert!(user.contains("on this day"));
assert!(user.contains("exactly 3 items"));
// Each photo gets an indexed entry.
assert!(user.contains("[1]") && user.contains("[2]") && user.contains("[3]"));
}
#[test]
fn prompt_includes_insight_context_when_present() {
let mut p = planned(1);
p[0].insight_title = Some("Lake house weekend".into());
p[0].insight_summary = Some("Swimming with the dogs.".into());
let (_sys, user) = build_script_messages(&meta(), &p);
assert!(user.contains("Lake house weekend — Swimming with the dogs."));
}
#[test]
fn parse_plain_json_object() {
let raw = r#"{"title":"Summer Days","segments":["First line.","Second line."]}"#;
let script = parse_script_response(raw, 2);
assert_eq!(script.title, "Summer Days");
assert_eq!(script.lines, vec!["First line.", "Second line."]);
}
#[test]
fn parse_tolerates_code_fences_and_prose() {
let raw = "Sure! Here's your reel:\n```json\n{\"title\": \"Trip\", \"segments\": [\"A.\", \"B.\"]}\n```\nEnjoy!";
let script = parse_script_response(raw, 2);
assert_eq!(script.title, "Trip");
assert_eq!(script.lines, vec!["A.", "B."]);
}
#[test]
fn parse_accepts_object_segment_shape() {
let raw = r#"{"title":"T","segments":[{"narration":"One."},{"narration":"Two."}]}"#;
let script = parse_script_response(raw, 2);
assert_eq!(script.lines, vec!["One.", "Two."]);
}
#[test]
fn parse_pads_short_and_truncates_long_to_n() {
// Model returned 1 line but we have 3 segments → pad with neutral lines.
let short = parse_script_response(r#"{"title":"T","segments":["Only one."]}"#, 3);
assert_eq!(short.lines.len(), 3);
assert_eq!(short.lines[0], "Only one.");
assert!(!short.lines[1].is_empty());
// Model returned 3 but we have 2 → truncate.
let long = parse_script_response(r#"{"title":"T","segments":["a","b","c"]}"#, 2);
assert_eq!(long.lines, vec!["a", "b"]);
}
#[test]
fn parse_falls_back_on_garbage() {
let script = parse_script_response("the model said no", 2);
assert_eq!(script.title, "Memories");
assert_eq!(script.lines.len(), 2);
assert!(script.lines.iter().all(|l| !l.is_empty()));
}
#[test]
fn parse_blank_line_replaced_with_fallback() {
let script = parse_script_response(r#"{"title":"T","segments":[" ","Real."]}"#, 2);
assert!(!script.lines[0].is_empty());
assert_eq!(script.lines[1], "Real.");
}
}
+252
View File
@@ -0,0 +1,252 @@
//! Reel selectors: resolve "what goes in the reel" into an ordered media set
//! plus the metadata the scripter needs. The renderer and scripter are
//! selector-agnostic, so adding tag- or date-range-based reels later means
//! adding a variant here, not touching the pipeline.
//!
//! Resolution is split in two so the handler can compute a cache key (and
//! short-circuit on a cache hit) without the per-photo insight lookups:
//! [`resolve`] is the cheap media-set pass; [`enrich`] adds cached insights and
//! runs in the background job.
use std::path::Path;
use std::sync::Mutex;
use chrono::{DateTime, Datelike, FixedOffset};
use super::{PlannedSegment, ReelMeta, SegmentMedia};
use crate::database::{ExifDao, InsightDao};
use crate::file_types::is_image_file;
use crate::memories::{self, MemoriesSpan};
use crate::state::AppState;
/// Default and hard caps on how many photos a reel covers. The cap bounds the
/// LLM/TTS/ffmpeg work per reel; when a span has more, [`sample_evenly`] keeps
/// a representative spread across the years rather than just the oldest.
pub const DEFAULT_MAX_SEGMENTS: usize = 24;
pub const HARD_MAX_SEGMENTS: usize = 40;
/// What a reel is built from. v1 ships the memories (on this day/week/month)
/// selector; tag and date-range variants slot in here later.
#[derive(Debug, Clone)]
pub enum ReelSelector {
Memories {
span: MemoriesSpan,
tz_offset_minutes: i32,
library: Option<String>,
max_segments: usize,
},
}
impl ReelSelector {
/// Stable string identity for the cache key. Captures everything that
/// changes *which* media is selected (but not the non-deterministic
/// narration, which can't be part of a pre-render key).
pub fn descriptor(&self) -> String {
match self {
ReelSelector::Memories {
span,
tz_offset_minutes,
library,
max_segments,
} => format!(
"memories:span={:?}:tz={}:lib={}:max={}",
span,
tz_offset_minutes,
library.as_deref().unwrap_or("all"),
max_segments
),
}
}
}
/// Pick at most `max` items spread evenly across the input, always keeping the
/// first and last. Returns the input unchanged when it already fits.
pub fn sample_evenly<T: Clone>(items: &[T], max: usize) -> Vec<T> {
if max == 0 {
return Vec::new();
}
if items.len() <= max {
return items.to_vec();
}
if max == 1 {
return vec![items[0].clone()];
}
let last = items.len() - 1;
(0..max)
.map(|i| {
// Spread indices 0..=last across max picks, endpoints included.
let idx = (i * last + (max - 1) / 2) / (max - 1);
items[idx.min(last)].clone()
})
.collect()
}
/// Cheap pass: resolve the selector into an ordered list of media (no insight
/// lookups yet) plus reel metadata. `Err` only on an invalid library param.
pub fn resolve(
app_state: &AppState,
exif_dao: &Mutex<Box<dyn ExifDao>>,
span_context: &opentelemetry::Context,
selector: &ReelSelector,
) -> Result<(Vec<PlannedSegment>, ReelMeta), String> {
match selector {
ReelSelector::Memories {
span,
tz_offset_minutes,
library,
max_segments,
} => {
let client_tz = FixedOffset::east_opt(tz_offset_minutes * 60);
let items = memories::gather_memory_items(
app_state,
exif_dao,
span_context,
*span,
*tz_offset_minutes,
client_tz,
library.as_deref(),
)?;
// Phase 1 is photos-only: drop videos (a clip segment type lands
// in phase 2). Filter before sampling so the spread is over the
// photos that will actually appear.
let items: Vec<memories::MemoryItem> = items
.into_iter()
.filter(|it| is_image_file(Path::new(&it.path)))
.collect();
let cap = (*max_segments).clamp(1, HARD_MAX_SEGMENTS);
let items = sample_evenly(&items, cap);
let years = distinct_years(&items, client_tz);
let meta = ReelMeta { span: *span, years };
let planned = items
.into_iter()
.map(|it| PlannedSegment {
media: SegmentMedia::Photo {
rel_path: it.path,
library_id: it.library_id,
},
date: it.created,
insight_title: None,
insight_summary: None,
})
.collect();
Ok((planned, meta))
}
}
}
/// Distinct calendar years represented by the selected media, in the client's
/// timezone, ascending. Used to tell the scripter how far back the reel reaches.
fn distinct_years(items: &[memories::MemoryItem], tz: Option<FixedOffset>) -> Vec<i32> {
let mut years: Vec<i32> = items
.iter()
.filter_map(|it| it.created)
.filter_map(|ts| DateTime::from_timestamp(ts, 0))
.map(|dt| match tz {
Some(off) => dt.with_timezone(&off).year(),
None => dt.year(),
})
.collect();
years.sort_unstable();
years.dedup();
years
}
/// Background pass: fill each segment's cached insight (title + summary) where
/// one exists. Best-effort — a missing or errored lookup leaves the fields
/// `None` and the scripter narrates from the date alone.
pub fn enrich(
insight_dao: &Mutex<Box<dyn InsightDao>>,
span_context: &opentelemetry::Context,
planned: &mut [PlannedSegment],
) {
let Ok(mut dao) = insight_dao.lock() else {
return;
};
for seg in planned.iter_mut() {
let rel_path = match &seg.media {
SegmentMedia::Photo { rel_path, .. } => rel_path,
};
if let Ok(Some(insight)) = dao.get_insight(span_context, rel_path) {
seg.insight_title = Some(insight.title);
seg.insight_summary = Some(insight.summary);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn sample_evenly_returns_all_when_under_cap() {
let v = vec![1, 2, 3];
assert_eq!(sample_evenly(&v, 5), vec![1, 2, 3]);
assert_eq!(sample_evenly(&v, 3), vec![1, 2, 3]);
}
#[test]
fn sample_evenly_keeps_endpoints_and_spreads() {
let v: Vec<i32> = (0..100).collect();
let picked = sample_evenly(&v, 5);
assert_eq!(picked.len(), 5);
assert_eq!(picked[0], 0); // first kept
assert_eq!(*picked.last().unwrap(), 99); // last kept
// Strictly increasing, no dupes.
assert!(picked.windows(2).all(|w| w[0] < w[1]));
}
#[test]
fn sample_evenly_handles_one_and_zero() {
let v: Vec<i32> = (0..10).collect();
assert_eq!(sample_evenly(&v, 1), vec![0]);
assert!(sample_evenly(&v, 0).is_empty());
}
#[test]
fn descriptor_is_stable_and_distinguishes_inputs() {
let a = ReelSelector::Memories {
span: MemoriesSpan::Day,
tz_offset_minutes: -480,
library: None,
max_segments: 24,
};
let b = ReelSelector::Memories {
span: MemoriesSpan::Week,
tz_offset_minutes: -480,
library: None,
max_segments: 24,
};
assert_eq!(a.descriptor(), a.clone().descriptor());
assert_ne!(a.descriptor(), b.descriptor());
assert!(a.descriptor().contains("lib=all"));
}
#[test]
fn distinct_years_dedupes_and_sorts() {
let items = vec![
memories::MemoryItem {
path: "a".into(),
created: Some(1_560_000_000), // 2019
modified: None,
library_id: 1,
},
memories::MemoryItem {
path: "b".into(),
created: Some(1_560_086_400), // 2019
modified: None,
library_id: 1,
},
memories::MemoryItem {
path: "c".into(),
created: Some(1_623_000_000), // 2021
modified: None,
library_id: 1,
},
];
assert_eq!(distinct_years(&items, None), vec![2019, 2021]);
}
}
+18
View File
@@ -53,6 +53,10 @@ pub struct AppState {
pub video_path: String, pub video_path: String,
pub gif_path: String, pub gif_path: String,
pub preview_clips_path: String, pub preview_clips_path: String,
/// Directory for cached memory-reel MP4s (+ title sidecars). Derived from
/// `REELS_DIRECTORY`, defaulting to a `reels` dir beside the preview clips.
/// Created lazily by the reel pipeline on first render.
pub reels_path: String,
pub excluded_dirs: Vec<String>, pub excluded_dirs: Vec<String>,
pub ollama: OllamaClient, pub ollama: OllamaClient,
/// `None` when `OPENROUTER_API_KEY` is not configured. Consulted only /// `None` when `OPENROUTER_API_KEY` is not configured. Consulted only
@@ -141,6 +145,19 @@ impl AppState {
preview_dao, preview_dao,
); );
// Reels cache dir: explicit env, else a `reels` sibling of the preview
// clips dir (a known-writable, test-safe location). Not created here —
// the reel pipeline does `create_dir_all` before its first write, so
// construction (incl. tests) never touches the filesystem.
let reels_path = std::env::var("REELS_DIRECTORY").unwrap_or_else(|_| {
std::path::Path::new(&preview_clips_path)
.parent()
.map(|p| p.join("reels"))
.unwrap_or_else(|| std::path::PathBuf::from("reels"))
.to_string_lossy()
.to_string()
});
let library_health = libraries::new_health_map(&libraries_vec); let library_health = libraries::new_health_map(&libraries_vec);
let live_libraries = Arc::new(RwLock::new(libraries_vec.clone())); let live_libraries = Arc::new(RwLock::new(libraries_vec.clone()));
Self { Self {
@@ -155,6 +172,7 @@ impl AppState {
video_path, video_path,
gif_path, gif_path,
preview_clips_path, preview_clips_path,
reels_path,
excluded_dirs, excluded_dirs,
ollama, ollama,
openrouter, openrouter,
+1 -1
View File
@@ -231,7 +231,7 @@ impl Ffmpeg {
/// a hard failure — previously the `parse::<f64>` on empty stdout produced /// a hard failure — previously the `parse::<f64>` on empty stdout produced
/// "cannot parse float from empty string" and poisoned the preview-clip row /// "cannot parse float from empty string" and poisoned the preview-clip row
/// with status=failed, which the watcher would re-queue every full scan. /// with status=failed, which the watcher would re-queue every full scan.
async fn get_duration_seconds(input_file: &str) -> Result<Option<f64>> { pub async fn get_duration_seconds(input_file: &str) -> Result<Option<f64>> {
if let Some(d) = probe_duration(input_file, "format=duration").await? { if let Some(d) = probe_duration(input_file, "format=duration").await? {
return Ok(Some(d)); return Ok(Some(d));
} }