diff --git a/.env.example b/.env.example index a45fdd5..2e431bc 100644 --- a/.env.example +++ b/.env.example @@ -139,3 +139,31 @@ CLIP_REQUEST_TIMEOUT_SEC=60 # ── RAG / search ──────────────────────────────────────────────────────── # Set to `1` to enable cross-encoder reranking on /search results. SEARCH_RAG_RERANK=0 + +# ── Nightly reel pre-generation (Phase 3+) ────────────────────────────── +# Set to `1` to enable the scheduler. Disabled by default. +# REEL_PREGEN_ENABLED=1 +# Hour (0-23) when the nightly batch fires. Default 3 AM. +# REEL_PREGEN_HOUR=3 +# Day of week for weekly reels (0=Sun, 1=Mon, …). Default Monday. +# REEL_PREGEN_WEEK_DOW=1 +# Timezone offset in minutes from UTC (e.g., -480 = PST). Defaults to +# the server's local timezone. +# REEL_PREGEN_TZ_OFFSET_MINUTES= +# Fixed timezone offset — overrides auto-detect to avoid DST shifts. +# When set, both the DB fallback and env fallback use this value. +# REEL_PREGEN_TZ_FIXED_MINUTES=-480 +# Voice ID for narration (e.g., "grandma"). Falls back to the value +# stored in the user_ai_prefs DB row when set. +# REEL_PREGEN_VOICE= +# Library filter: a library id (e.g. "1") or "all" for every library. +# REEL_PREGEN_LIBRARY=all +# Max agentic tool iterations for pre-gen scripter. Default 8. +# REEL_PREGEN_MAX_TOOL_ITERS=8 +# +# On-disk reel cache sweep (runs every 24h, independent of pre-gen). Removes +# reel MP4s with no ledger row + no live job that are older than the max age — +# i.e. the on-demand cache, which otherwise grows forever. Set to 0 to disable. +# REEL_CACHE_SWEEP_ENABLED=1 +# Age (days) before an unreferenced reel MP4 is swept. Default 7. +# REEL_CACHE_MAX_AGE_DAYS=7 diff --git a/Cargo.lock b/Cargo.lock index a35a7d2..9455f5c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2051,7 +2051,7 @@ dependencies = [ [[package]] name = "image-api" -version = "1.3.0" +version = "1.4.0" dependencies = [ "actix", "actix-cors", diff --git a/Cargo.toml b/Cargo.toml index 3b3a08a..860e6ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "image-api" -version = "1.3.0" +version = "1.4.0" authors = ["Cameron Cordes "] edition = "2024" diff --git a/migrations/2026-06-13-000000_add_precomputed_reels/down.sql b/migrations/2026-06-13-000000_add_precomputed_reels/down.sql new file mode 100644 index 0000000..91863c2 --- /dev/null +++ b/migrations/2026-06-13-000000_add_precomputed_reels/down.sql @@ -0,0 +1,2 @@ +DROP INDEX IF EXISTS idx_precomputed_reels_span_library; +DROP TABLE IF EXISTS precomputed_reels; diff --git a/migrations/2026-06-13-000000_add_precomputed_reels/up.sql b/migrations/2026-06-13-000000_add_precomputed_reels/up.sql new file mode 100644 index 0000000..ba49b72 --- /dev/null +++ b/migrations/2026-06-13-000000_add_precomputed_reels/up.sql @@ -0,0 +1,14 @@ +CREATE TABLE precomputed_reels ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + span TEXT NOT NULL, + library_key TEXT NOT NULL, + cache_key TEXT NOT NULL, + output_path TEXT NOT NULL, + title TEXT NOT NULL, + media_count INT NOT NULL, + render_version INT NOT NULL DEFAULT 1, + tz_offset_minutes INT NOT NULL, + voice TEXT, + generated_at BIGINT NOT NULL +); +CREATE INDEX idx_precomputed_reels_span_library ON precomputed_reels(span, library_key, generated_at DESC); diff --git a/migrations/2026-06-13-000010_add_user_ai_prefs/down.sql b/migrations/2026-06-13-000010_add_user_ai_prefs/down.sql new file mode 100644 index 0000000..83b82a3 --- /dev/null +++ b/migrations/2026-06-13-000010_add_user_ai_prefs/down.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS user_ai_prefs; diff --git a/migrations/2026-06-13-000010_add_user_ai_prefs/up.sql b/migrations/2026-06-13-000010_add_user_ai_prefs/up.sql new file mode 100644 index 0000000..fd8f6f2 --- /dev/null +++ b/migrations/2026-06-13-000010_add_user_ai_prefs/up.sql @@ -0,0 +1,7 @@ +CREATE TABLE user_ai_prefs ( + id INTEGER PRIMARY KEY CHECK(id=1), + voice TEXT, + tz_offset_minutes INTEGER, + library TEXT, + updated_at BIGINT NOT NULL +); diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs index cb21b14..c6bc212 100644 --- a/src/ai/handlers.rs +++ b/src/ai/handlers.rs @@ -120,7 +120,7 @@ pub async fn generation_status_handler( } if let Some(ref fp) = query.path { - let library = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -218,10 +218,11 @@ pub async fn cancel_generation_handler( } if let Some(ref fp) = request.file_path { - let library = libraries::resolve_library_param(&app_state, request.library.as_deref()) - .ok() - .flatten() - .unwrap_or_else(|| app_state.primary_library()); + let library = + libraries::resolve_library_param_state(&app_state, request.library.as_deref()) + .ok() + .flatten() + .unwrap_or_else(|| app_state.primary_library()); let normalized = normalize_path(fp); // Get active job ids first, then cancel in DB, then abort tasks @@ -580,7 +581,7 @@ pub async fn get_insight_handler( // Expand to rel_paths sharing content so an insight generated under // library 1 still shows when the same photo is viewed from library 2. - let library = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -1218,15 +1219,16 @@ pub async fn chat_turn_handler( let mut span = tracer.start_with_context("http.insights.chat", &parent_context); span.set_attribute(KeyValue::new("file_path", request.file_path.clone())); - let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { - Ok(Some(lib)) => lib, - Ok(None) => app_state.primary_library(), - Err(e) => { - return HttpResponse::BadRequest().json(serde_json::json!({ - "error": format!("invalid library: {}", e) - })); - } - }; + let library = + match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(e) => { + return HttpResponse::BadRequest().json(serde_json::json!({ + "error": format!("invalid library: {}", e) + })); + } + }; // Service-token claims (sub: "service:apollo") fall through to // user_id=1 — the operator convention. Mobile/web clients have a @@ -1344,15 +1346,16 @@ pub async fn chat_rewind_handler( request: web::Json, app_state: web::Data, ) -> impl Responder { - let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { - Ok(Some(lib)) => lib, - Ok(None) => app_state.primary_library(), - Err(e) => { - return HttpResponse::BadRequest().json(serde_json::json!({ - "error": format!("invalid library: {}", e) - })); - } - }; + let library = + match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(e) => { + return HttpResponse::BadRequest().json(serde_json::json!({ + "error": format!("invalid library: {}", e) + })); + } + }; match app_state .insight_chat @@ -1393,7 +1396,7 @@ pub async fn chat_history_handler( // cross-library lookup when the scoped one misses, so a photo // with no insight in this library but one in another still // surfaces (the "show this photo's primary insight" merge case). - let library = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -1444,15 +1447,16 @@ pub async fn chat_stream_handler( request: web::Json, app_state: web::Data, ) -> HttpResponse { - let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { - Ok(Some(lib)) => lib, - Ok(None) => app_state.primary_library(), - Err(e) => { - return HttpResponse::BadRequest().json(serde_json::json!({ - "error": format!("invalid library: {}", e) - })); - } - }; + let library = + match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(e) => { + return HttpResponse::BadRequest().json(serde_json::json!({ + "error": format!("invalid library: {}", e) + })); + } + }; // Service-token sub falls through to user_id=1 (see chat_turn_handler). let user_id = claims.sub.parse::().unwrap_or(1); @@ -1589,15 +1593,16 @@ pub async fn turn_async_handler( let mut span = tracer.start_with_context("http.insights.chat_turn_async", &parent_context); span.set_attribute(KeyValue::new("file_path", request.file_path.clone())); - let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { - Ok(Some(lib)) => lib, - Ok(None) => app_state.primary_library(), - Err(e) => { - return HttpResponse::BadRequest().json(serde_json::json!({ - "error": format!("invalid library: {}", e) - })); - } - }; + let library = + match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(e) => { + return HttpResponse::BadRequest().json(serde_json::json!({ + "error": format!("invalid library: {}", e) + })); + } + }; let user_id = claims.sub.parse::().unwrap_or(1); diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 3673c43..4ff8494 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -217,6 +217,13 @@ impl InsightGenerator { &self.insight_dao } + /// Accessor for the EXIF DAO (used by the reel scheduler to resolve + /// GPS enrichment without creating a separate DB connection). + #[allow(dead_code)] + pub fn exif_dao(&self) -> &Arc>> { + &self.exif_dao + } + /// Whether the optional Apollo Places integration is wired up. Drives /// tool-definition gating (no point offering `get_personal_place_at` /// when Apollo is unreachable) — exposed publicly so `insight_chat` @@ -4497,6 +4504,110 @@ Return ONLY the summary, nothing else."#, )) } + /// A read-only agentic tool loop: chat with tools until the model stops + /// calling them, then return the final content. + /// + /// This is the loop body extracted from + /// `generate_agentic_insight_for_photo` (lines 4316-4377) so it can be + /// reused by the reel-scripter without the photo-specific context + /// (image_base64, file_path, persona_id). The photo insight loop still + /// has its own copy because it threads image/file context through + /// `execute_tool`. + /// + /// Calls `execute_tool` with empty file/image context; enabled tools + /// never read those fields. + /// + /// Only used by the `reels` module (compiled in `main.rs`, not `lib.rs`), + /// so the `#[allow(dead_code)]` suppresses the lib-target warning. + #[allow(dead_code)] + pub(crate) async fn run_readonly_tool_loop( + &self, + backend: &ResolvedBackend, + mut messages: Vec, + tools: Vec, + max_iter: usize, + ) -> Result { + let mut final_content = String::new(); + + for iteration in 0..max_iter { + log::info!("Agentic iteration {}/{}", iteration + 1, max_iter); + + let (response, _prompt_tokens, _eval_tokens) = backend + .chat() + .chat_with_tools(messages.clone(), tools.clone()) + .await?; + + // Sanitize tool call arguments before pushing back into history. + // Some models occasionally return non-object arguments (bool, + // string, null) which Ollama rejects when they are re-sent in + // a subsequent request. + let mut response = response; + if let Some(ref mut tool_calls) = response.tool_calls { + for tc in tool_calls.iter_mut() { + if !tc.function.arguments.is_object() { + log::warn!( + "Tool '{}' returned non-object arguments ({:?}), normalising to {{}}", + tc.function.name, + tc.function.arguments + ); + tc.function.arguments = serde_json::Value::Object(Default::default()); + } + } + } + + messages.push(response.clone()); + + if let Some(ref tool_calls) = response.tool_calls + && !tool_calls.is_empty() + { + for tool_call in tool_calls { + log::info!( + "Agentic tool call [{}]: {} {}", + iteration, + tool_call.function.name, + tool_call.function.arguments + ); + let result = self + .execute_tool( + &tool_call.function.name, + &tool_call.function.arguments, + backend, + &None, + "", + 0, + "", + &opentelemetry::Context::new(), + ) + .await; + messages.push(ChatMessage::tool_result(result)); + } + continue; + } + + // No tool calls — this is the final answer + final_content = response.content; + break; + } + + // If loop exhausted without final answer, ask for one + if final_content.is_empty() { + log::info!( + "Agentic loop exhausted after {} iterations, requesting final answer", + max_iter + ); + messages.push(ChatMessage::user( + "Based on the context gathered, please write the final answer. Return ONLY the JSON object, no prose or code fences.", + )); + let (final_response, _, _) = backend + .chat() + .chat_with_tools(messages.clone(), vec![]) + .await?; + final_content = final_response.content; + } + + Ok(final_content) + } + /// Reverse geocode GPS coordinates to human-readable place names async fn reverse_geocode(&self, lat: f64, lon: f64) -> Option { let url = format!( diff --git a/src/ai/tts.rs b/src/ai/tts.rs index 08d9dcd..d6ef89d 100644 --- a/src/ai/tts.rs +++ b/src/ai/tts.rs @@ -23,6 +23,7 @@ use std::time::{Duration, Instant}; use tokio::sync::Semaphore; use uuid::Uuid; +use crate::ai::llamacpp::LlamaCppClient; use crate::data::Claims; use crate::file_types::{is_audio_file, is_video_file}; use crate::files::is_valid_full_path; @@ -473,6 +474,40 @@ pub struct TtsJobStatusResponse { pub error: Option, } +/// Synthesize speech honoring the global single-GPU serialization +/// (`TTS_PERMIT`) and the GPU write lease, exactly as the speech-job path does. +/// Queues on the permit rather than fast-failing, so callers wait their turn +/// instead of contending. Text is run through the same markdown/emoji cleanup + +/// pronunciation pipeline as the HTTP handlers. Reused by the memory-reel +/// pipeline to narrate each segment without racing a user's TTS request on the +/// Chatterbox GPU. +pub async fn synthesize_serialized( + client: &LlamaCppClient, + text: &str, + voice: Option<&str>, + format: &str, + exaggeration: Option, +) -> anyhow::Result> { + let prepared = prepare_for_tts(text); + if prepared.is_empty() { + anyhow::bail!("nothing to synthesize after cleanup"); + } + // Clamp to Chatterbox's documented range, matching the HTTP handlers + // (which clamp before forwarding; this path bypasses them). + let exaggeration = exaggeration.map(|x| x.clamp(0.25, 2.0)); + // Queue rather than fast-fail (mirrors create_speech_job_handler). + let _permit = TTS_PERMIT + .acquire() + .await + .map_err(|_| anyhow::anyhow!("TTS permit closed"))?; + // Wait for the LLM side to release the GPU before the request timeout + // starts (see ai::gpu). + let _gpu = crate::ai::gpu::tts_lease().await; + client + .text_to_speech(&prepared, voice, format, exaggeration, None, None) + .await +} + /// POST /tts/speech/jobs — durable variant of /tts/speech for long syntheses. /// Returns 202 + a job id immediately; the synth queues on the single GPU /// permit (instead of fast-failing 429) and the client polls the job until @@ -985,7 +1020,7 @@ pub async fn create_voice_from_library_handler( let voice_name = append_ref_window(&voice_name, ref_start, ref_duration.round().max(1.0) as u32); - let library = match libraries::resolve_library_param(&app_state, req.library.as_deref()) { + let library = match libraries::resolve_library_param_state(&app_state, req.library.as_deref()) { Ok(Some(l)) => l, Ok(None) => app_state.primary_library(), Err(msg) => { diff --git a/src/database/mod.rs b/src/database/mod.rs index d063bd0..981f6a4 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -51,10 +51,12 @@ pub mod knowledge_dao; pub mod location_dao; pub mod models; pub mod persona_dao; +pub mod precomputed_reel_dao; pub mod preview_dao; pub mod reconcile; pub mod schema; pub mod search_dao; +pub mod user_ai_prefs_dao; pub use calendar_dao::{CalendarEventDao, SqliteCalendarEventDao}; pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao}; @@ -66,8 +68,10 @@ pub use knowledge_dao::{ }; pub use location_dao::{LocationHistoryDao, SqliteLocationHistoryDao}; pub use persona_dao::{ImportPersona, PersonaDao, PersonaPatch, SqlitePersonaDao}; +pub use precomputed_reel_dao::{PrecomputedReelDao, SqlitePrecomputedReelDao}; pub use preview_dao::{PreviewDao, SqlitePreviewDao}; pub use search_dao::{SearchHistoryDao, SqliteSearchHistoryDao}; +pub use user_ai_prefs_dao::{SqliteUserAiPrefsDao, UserAiPrefsDao}; pub trait UserDao { fn create_user(&mut self, user: &str, password: &str) -> Option; diff --git a/src/database/models.rs b/src/database/models.rs index 62274e2..d3d5440 100644 --- a/src/database/models.rs +++ b/src/database/models.rs @@ -1,6 +1,7 @@ use crate::database::schema::{ entities, entity_facts, entity_photo_links, favorites, image_exif, insight_generation_jobs, - libraries, personas, photo_insights, users, video_preview_clips, + libraries, personas, photo_insights, precomputed_reels, user_ai_prefs, users, + video_preview_clips, }; use serde::Serialize; @@ -505,3 +506,56 @@ pub struct InsightGenerationJob { pub result_insight_id: Option, pub error_message: Option, } + +// --- Precomputed reels ------------------------------------------------------- + +#[derive(Insertable)] +#[diesel(table_name = precomputed_reels)] +pub struct InsertablePrecomputedReel { + pub span: String, + pub library_key: String, + pub cache_key: String, + pub output_path: String, + pub title: String, + pub media_count: i32, + pub render_version: i32, + pub tz_offset_minutes: i32, + pub voice: Option, + pub generated_at: i64, +} + +#[derive(Serialize, Queryable, Clone, Debug)] +pub struct PrecomputedReel { + pub id: i32, + pub span: String, + pub library_key: String, + pub cache_key: String, + pub output_path: String, + pub title: String, + pub media_count: i32, + pub render_version: i32, + pub tz_offset_minutes: i32, + pub voice: Option, + pub generated_at: i64, +} + +// --- User AI preferences (Section E) ---------------------------------------- + +#[derive(Queryable, Insertable, Debug, Clone, serde::Deserialize, serde::Serialize)] +#[diesel(table_name = user_ai_prefs)] +pub struct UserAiPrefs { + pub id: i32, + pub voice: Option, + pub tz_offset_minutes: Option, + pub library: Option, + pub updated_at: i64, +} + +#[derive(Insertable, Debug, Clone, serde::Deserialize, serde::Serialize)] +#[diesel(table_name = user_ai_prefs)] +pub struct UpsertUserAiPrefs { + pub voice: Option, + pub tz_offset_minutes: Option, + pub library: Option, + pub updated_at: i64, +} diff --git a/src/database/precomputed_reel_dao.rs b/src/database/precomputed_reel_dao.rs new file mode 100644 index 0000000..b66573b --- /dev/null +++ b/src/database/precomputed_reel_dao.rs @@ -0,0 +1,439 @@ +use diesel::prelude::*; +use diesel::sqlite::SqliteConnection; +use std::ops::DerefMut; +use std::sync::{Arc, Mutex}; + +use crate::database::models::{InsertablePrecomputedReel, PrecomputedReel}; +use crate::database::schema; +use crate::database::{DbError, DbErrorKind, connect}; +use crate::otel::trace_db_call; + +/// Ledger for precomputed memory reels. The nightly agentic job writes a +/// row after each successful render; the `GET /reels/precomputed` handler +/// reads it to gate on freshness and serve the cached MP4. +pub trait PrecomputedReelDao: Sync + Send { + /// Insert a precomputed reel row. Returns the new row's id. + /// Written by the nightly agentic job (Section D). + #[allow(dead_code)] + fn record_reel( + &mut self, + context: &opentelemetry::Context, + row: &InsertablePrecomputedReel, + ) -> Result; + + /// Find the latest precomputed reel for the given (span, library_key). + fn latest_for( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + ) -> Result, DbError>; + + /// Return true when a fresh precomputed reel exists for the given + /// (span, library_key, render_version) that was generated at or after + /// `min_generated_at`. Used as a fast existence gate before falling + /// back to `latest_for` (avoids a second query path). + fn exists_fresh( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + render_version: i32, + min_generated_at: i64, + ) -> Result; + + /// Delete all but the newest `keep` rows for (span, library_key), returning + /// the deleted rows so the caller can unlink their output files. Used by the + /// nightly job to retire superseded reels (e.g. yesterday's daily). + #[allow(dead_code)] + fn prune_superseded( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + keep: usize, + ) -> Result, DbError>; + + /// Every cache_key currently in the ledger. Used by the on-disk cache sweep + /// to protect files a ledger row still points at. + #[allow(dead_code)] + fn all_cache_keys(&mut self, context: &opentelemetry::Context) -> Result, DbError>; +} + +pub struct SqlitePrecomputedReelDao { + connection: Arc>, +} + +impl Default for SqlitePrecomputedReelDao { + fn default() -> Self { + Self::new() + } +} + +impl SqlitePrecomputedReelDao { + pub fn new() -> Self { + Self { + connection: Arc::new(Mutex::new(connect())), + } + } + + #[cfg(test)] + pub fn from_connection(conn: Arc>) -> Self { + Self { connection: conn } + } +} + +impl PrecomputedReelDao for SqlitePrecomputedReelDao { + fn record_reel( + &mut self, + context: &opentelemetry::Context, + row: &InsertablePrecomputedReel, + ) -> Result { + trace_db_call(context, "insert", "record_reel", |_span| { + use schema::precomputed_reels::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + diesel::insert_into(dsl::precomputed_reels) + .values(row) + .execute(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to insert reel: {}", e))?; + + dsl::precomputed_reels + .order(dsl::id.desc()) + .select(dsl::id) + .first::(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to get reel id: {}", e)) + }) + .map_err(|e| DbError::log(DbErrorKind::InsertError, e)) + } + + fn latest_for( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + ) -> Result, DbError> { + trace_db_call(context, "query", "latest_for", |_span| { + use schema::precomputed_reels::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + dsl::precomputed_reels + .filter(dsl::span.eq(span)) + .filter(dsl::library_key.eq(library_key)) + .order(dsl::generated_at.desc()) + .first::(connection.deref_mut()) + .optional() + .map_err(|e| anyhow::anyhow!("Failed to get latest reel: {}", e)) + }) + .map_err(|e| DbError::log(DbErrorKind::QueryError, e)) + } + + fn exists_fresh( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + render_version: i32, + min_generated_at: i64, + ) -> Result { + trace_db_call(context, "query", "exists_fresh", |_span| { + use schema::precomputed_reels::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + let count: i64 = dsl::precomputed_reels + .filter(dsl::span.eq(span)) + .filter(dsl::library_key.eq(library_key)) + .filter(dsl::render_version.eq(render_version)) + .filter(dsl::generated_at.ge(min_generated_at)) + .count() + .get_result(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to check fresh reel: {}", e))?; + + Ok(count > 0) + }) + .map_err(|e| DbError::log(DbErrorKind::QueryError, e)) + } + + fn prune_superseded( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + keep: usize, + ) -> Result, DbError> { + trace_db_call(context, "delete", "prune_superseded", |_span| { + use schema::precomputed_reels::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + // Newest first; everything past `keep` is superseded. The table + // holds at most a handful of rows per (span, library), so loading + // and slicing in Rust is cheaper than a correlated subquery. + let mut rows: Vec = dsl::precomputed_reels + .filter(dsl::span.eq(span)) + .filter(dsl::library_key.eq(library_key)) + .order(dsl::generated_at.desc()) + .load::(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to load reels for prune: {}", e))?; + + let stale = rows.split_off(rows.len().min(keep)); + if !stale.is_empty() { + let ids: Vec = stale.iter().map(|r| r.id).collect(); + diesel::delete(dsl::precomputed_reels.filter(dsl::id.eq_any(ids))) + .execute(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to delete superseded reels: {}", e))?; + } + Ok(stale) + }) + .map_err(|e| DbError::log(DbErrorKind::UpdateError, e)) + } + + fn all_cache_keys(&mut self, context: &opentelemetry::Context) -> Result, DbError> { + trace_db_call(context, "query", "all_cache_keys", |_span| { + use schema::precomputed_reels::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + dsl::precomputed_reels + .select(dsl::cache_key) + .load::(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to load cache keys: {}", e)) + }) + .map_err(|e| DbError::log(DbErrorKind::QueryError, e)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use diesel::Connection; + use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations}; + + const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!(); + + fn setup_dao() -> SqlitePrecomputedReelDao { + let mut conn = SqliteConnection::establish(":memory:") + .expect("Unable to create in-memory db connection"); + conn.run_pending_migrations(DB_MIGRATIONS) + .expect("Failure running DB migrations"); + SqlitePrecomputedReelDao::from_connection(Arc::new(Mutex::new(conn))) + } + + fn ctx() -> opentelemetry::Context { + opentelemetry::Context::new() + } + + fn sample_row() -> InsertablePrecomputedReel { + InsertablePrecomputedReel { + span: "day".to_string(), + library_key: "1".to_string(), + cache_key: "abc123".to_string(), + output_path: "/tmp/reel.mp4".to_string(), + title: "Test Reel".to_string(), + media_count: 10, + render_version: 1, + tz_offset_minutes: 0, + voice: Some("default".to_string()), + generated_at: 1_000_000, + } + } + + #[test] + fn record_reel_inserts_and_returns_id() { + let mut dao = setup_dao(); + let ctx = ctx(); + let row = sample_row(); + + let id = dao.record_reel(&ctx, &row).unwrap(); + assert!(id > 0, "should return a positive id"); + } + + #[test] + fn record_reel_returns_increasing_ids() { + let mut dao = setup_dao(); + let ctx = ctx(); + let row = sample_row(); + + let id1 = dao.record_reel(&ctx, &row).unwrap(); + let id2 = dao.record_reel(&ctx, &row).unwrap(); + assert!(id2 > id1, "each insert should get a higher id"); + } + + #[test] + fn latest_for_returns_latest() { + let mut dao = setup_dao(); + let ctx = ctx(); + + let row1 = InsertablePrecomputedReel { + generated_at: 1_000_000, + ..sample_row() + }; + let row2 = InsertablePrecomputedReel { + generated_at: 2_000_000, + ..sample_row() + }; + + dao.record_reel(&ctx, &row1).unwrap(); + dao.record_reel(&ctx, &row2).unwrap(); + + let latest = dao.latest_for(&ctx, "day", "1").unwrap().unwrap(); + assert_eq!(latest.generated_at, 2_000_000); + } + + #[test] + fn latest_for_scoped_by_span_and_library() { + let mut dao = setup_dao(); + let ctx = ctx(); + + let day_row = InsertablePrecomputedReel { + span: "day".to_string(), + library_key: "1".to_string(), + generated_at: 1_000_000, + ..sample_row() + }; + let week_row = InsertablePrecomputedReel { + span: "week".to_string(), + library_key: "1".to_string(), + generated_at: 2_000_000, + ..sample_row() + }; + + dao.record_reel(&ctx, &day_row).unwrap(); + dao.record_reel(&ctx, &week_row).unwrap(); + + let day_latest = dao.latest_for(&ctx, "day", "1").unwrap().unwrap(); + assert_eq!(day_latest.span, "day"); + + let week_latest = dao.latest_for(&ctx, "week", "1").unwrap().unwrap(); + assert_eq!(week_latest.span, "week"); + + // Different library returns None + let missing = dao.latest_for(&ctx, "day", "99").unwrap(); + assert!(missing.is_none()); + } + + #[test] + fn latest_for_returns_none_when_no_rows() { + let mut dao = setup_dao(); + let ctx = ctx(); + + let result = dao.latest_for(&ctx, "day", "1").unwrap(); + assert!(result.is_none()); + } + + #[test] + fn exists_fresh_returns_true_when_present() { + let mut dao = setup_dao(); + let ctx = ctx(); + + dao.record_reel(&ctx, &sample_row()).unwrap(); + + let exists = dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap(); + assert!(exists, "should find the row we just inserted"); + } + + #[test] + fn exists_fresh_returns_false_when_missing() { + let mut dao = setup_dao(); + let ctx = ctx(); + + let exists = dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap(); + assert!(!exists, "should not find anything in empty table"); + } + + #[test] + fn exists_fresh_respects_min_generated_at() { + let mut dao = setup_dao(); + let ctx = ctx(); + + dao.record_reel(&ctx, &sample_row()).unwrap(); + + // Below the threshold — should exist + let exists = dao.exists_fresh(&ctx, "day", "1", 1, 500_000).unwrap(); + assert!(exists); + + // Above the threshold — should not exist + let exists = dao.exists_fresh(&ctx, "day", "1", 1, 2_000_000).unwrap(); + assert!(!exists); + } + + #[test] + fn exists_fresh_respects_render_version() { + let mut dao = setup_dao(); + let ctx = ctx(); + + let row_v1 = InsertablePrecomputedReel { + render_version: 1, + ..sample_row() + }; + dao.record_reel(&ctx, &row_v1).unwrap(); + + assert!(dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap()); + assert!(!dao.exists_fresh(&ctx, "day", "1", 2, 900_000).unwrap()); + } + + #[test] + fn prune_superseded_keeps_newest_and_returns_deleted() { + let mut dao = setup_dao(); + let ctx = ctx(); + // Three day/lib1 reels at increasing timestamps, plus an unrelated one. + for (i, key) in ["k1", "k2", "k3"].iter().enumerate() { + dao.record_reel( + &ctx, + &InsertablePrecomputedReel { + cache_key: key.to_string(), + generated_at: 1_000_000 + i as i64 * 1000, + ..sample_row() + }, + ) + .unwrap(); + } + let other = InsertablePrecomputedReel { + library_key: "2".to_string(), + cache_key: "other".to_string(), + ..sample_row() + }; + dao.record_reel(&ctx, &other).unwrap(); + + // Keep the newest 2 of (day, "1"); k1 (oldest) is superseded. + let deleted = dao.prune_superseded(&ctx, "day", "1", 2).unwrap(); + assert_eq!(deleted.len(), 1); + assert_eq!(deleted[0].cache_key, "k1"); + + // The newest 2 survive; the other-library row is untouched. + let keys = dao.all_cache_keys(&ctx).unwrap(); + assert_eq!(keys.len(), 3); + assert!(keys.contains(&"k2".to_string())); + assert!(keys.contains(&"k3".to_string())); + assert!(keys.contains(&"other".to_string())); + assert!(!keys.contains(&"k1".to_string())); + } + + #[test] + fn prune_superseded_noop_when_within_keep() { + let mut dao = setup_dao(); + let ctx = ctx(); + dao.record_reel(&ctx, &sample_row()).unwrap(); + let deleted = dao.prune_superseded(&ctx, "day", "1", 2).unwrap(); + assert!(deleted.is_empty()); + assert_eq!(dao.all_cache_keys(&ctx).unwrap().len(), 1); + } +} diff --git a/src/database/schema.rs b/src/database/schema.rs index bf5791b..846542d 100644 --- a/src/database/schema.rs +++ b/src/database/schema.rs @@ -266,6 +266,16 @@ diesel::table! { } } +diesel::table! { + user_ai_prefs (id) { + id -> Integer, + voice -> Nullable, + tz_offset_minutes -> Nullable, + library -> Nullable, + updated_at -> BigInt, + } +} + diesel::table! { video_preview_clips (id) { id -> Integer, @@ -294,6 +304,22 @@ diesel::table! { } } +diesel::table! { + precomputed_reels (id) { + id -> Integer, + span -> Text, + library_key -> Text, + cache_key -> Text, + output_path -> Text, + title -> Text, + media_count -> Integer, + render_version -> Integer, + tz_offset_minutes -> Integer, + voice -> Nullable, + generated_at -> BigInt, + } +} + diesel::joinable!(entity_facts -> photo_insights (source_insight_id)); diesel::joinable!(entity_photo_links -> entities (entity_id)); diesel::joinable!(entity_photo_links -> libraries (library_id)); @@ -322,9 +348,11 @@ diesel::allow_tables_to_appear_in_same_query!( personas, persons, photo_insights, + precomputed_reels, search_history, tagged_photo, tags, + user_ai_prefs, users, video_preview_clips, ); diff --git a/src/database/user_ai_prefs_dao.rs b/src/database/user_ai_prefs_dao.rs new file mode 100644 index 0000000..129ef0c --- /dev/null +++ b/src/database/user_ai_prefs_dao.rs @@ -0,0 +1,206 @@ +use diesel::prelude::*; +use diesel::sqlite::SqliteConnection; +use std::ops::DerefMut; +use std::sync::{Arc, Mutex}; + +use crate::database::models::{UpsertUserAiPrefs, UserAiPrefs}; +use crate::database::schema; +use crate::database::{DbError, DbErrorKind, connect}; +use crate::otel::trace_db_call; + +/// Generic single-row table that passively mirrors the latest client AI +/// request parameters (voice, timezone, library). Read by the nightly +/// pre-generation scheduler (Section D) to pick up user preferences. +pub trait UserAiPrefsDao: Sync + Send { + /// Read the single row; `None` when it hasn't been populated yet. + fn get_prefs( + &mut self, + context: &opentelemetry::Context, + ) -> Result, DbError>; + + /// Upsert the single row (id is always 1). + #[allow(dead_code)] + fn upsert_prefs( + &mut self, + context: &opentelemetry::Context, + prefs: &UpsertUserAiPrefs, + ) -> Result<(), DbError>; +} + +pub struct SqliteUserAiPrefsDao { + connection: Arc>, +} + +impl Default for SqliteUserAiPrefsDao { + fn default() -> Self { + Self::new() + } +} + +impl SqliteUserAiPrefsDao { + pub fn new() -> Self { + Self { + connection: Arc::new(Mutex::new(connect())), + } + } + + #[cfg(test)] + pub fn from_connection(conn: Arc>) -> Self { + Self { connection: conn } + } +} + +impl UserAiPrefsDao for SqliteUserAiPrefsDao { + fn get_prefs( + &mut self, + context: &opentelemetry::Context, + ) -> Result, DbError> { + trace_db_call(context, "query", "get_prefs", |_span| { + use schema::user_ai_prefs::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock UserAiPrefsDao"); + + dsl::user_ai_prefs + .first::(connection.deref_mut()) + .optional() + .map_err(|e| anyhow::anyhow!("Failed to get prefs: {}", e)) + }) + .map_err(|e| DbError::log(DbErrorKind::QueryError, e)) + } + + fn upsert_prefs( + &mut self, + context: &opentelemetry::Context, + prefs: &UpsertUserAiPrefs, + ) -> Result<(), DbError> { + trace_db_call(context, "upsert", "upsert_prefs", |_span| { + use schema::user_ai_prefs::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock UserAiPrefsDao"); + + // Single-row table (id=1): one atomic upsert. The explicit id=1 + // makes the conflict target deterministic so the second call + // updates in place rather than tripping the CHECK(id=1) constraint, + // and real insert errors surface instead of being swallowed into a + // separate update branch. The columns are set explicitly (rather + // than via AsChangeset) so a None field overwrites to NULL — the + // row mirrors the latest request exactly, not a merge of past ones. + diesel::insert_into(dsl::user_ai_prefs) + .values((dsl::id.eq(1), prefs)) + .on_conflict(dsl::id) + .do_update() + .set(( + dsl::voice.eq(&prefs.voice), + dsl::tz_offset_minutes.eq(&prefs.tz_offset_minutes), + dsl::library.eq(&prefs.library), + dsl::updated_at.eq(&prefs.updated_at), + )) + .execute(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to upsert prefs: {}", e))?; + Ok(()) + }) + .map_err(|e| DbError::log(DbErrorKind::InsertError, e)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use diesel::Connection; + use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations}; + + const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!(); + + fn setup_dao() -> SqliteUserAiPrefsDao { + let mut conn = SqliteConnection::establish(":memory:") + .expect("Unable to create in-memory db connection"); + conn.run_pending_migrations(DB_MIGRATIONS) + .expect("Failure running DB migrations"); + SqliteUserAiPrefsDao::from_connection(Arc::new(Mutex::new(conn))) + } + + fn ctx() -> opentelemetry::Context { + opentelemetry::Context::new() + } + + #[test] + fn get_prefs_returns_none_when_empty() { + let mut dao = setup_dao(); + let result = dao.get_prefs(&ctx()).unwrap(); + assert!(result.is_none()); + } + + #[test] + fn upsert_prefs_inserts_row() { + let mut dao = setup_dao(); + let now = 1_700_000_000i64; + let prefs = UpsertUserAiPrefs { + voice: Some("grandma".to_string()), + tz_offset_minutes: Some(-480), + library: Some("1".to_string()), + updated_at: now, + }; + dao.upsert_prefs(&ctx(), &prefs).unwrap(); + + let row = dao.get_prefs(&ctx()).unwrap().unwrap(); + assert_eq!(row.id, 1); + assert_eq!(row.voice, Some("grandma".to_string())); + assert_eq!(row.tz_offset_minutes, Some(-480)); + assert_eq!(row.library, Some("1".to_string())); + assert_eq!(row.updated_at, now); + } + + #[test] + fn upsert_prefs_replaces_existing() { + let mut dao = setup_dao(); + let now1 = 1_700_000_000i64; + let now2 = 1_800_000_000i64; + + let prefs1 = UpsertUserAiPrefs { + voice: Some("grandma".to_string()), + tz_offset_minutes: Some(-480), + library: Some("1".to_string()), + updated_at: now1, + }; + dao.upsert_prefs(&ctx(), &prefs1).unwrap(); + + let prefs2 = UpsertUserAiPrefs { + voice: Some("dad".to_string()), + tz_offset_minutes: Some(-300), + library: None, + updated_at: now2, + }; + dao.upsert_prefs(&ctx(), &prefs2).unwrap(); + + let row = dao.get_prefs(&ctx()).unwrap().unwrap(); + assert_eq!(row.voice, Some("dad".to_string())); + assert_eq!(row.tz_offset_minutes, Some(-300)); + assert!(row.library.is_none()); + assert_eq!(row.updated_at, now2); + } + + #[test] + fn upsert_partial_fields() { + let mut dao = setup_dao(); + let now = 1_700_000_000i64; + + let prefs = UpsertUserAiPrefs { + voice: None, + tz_offset_minutes: Some(-480), + library: None, + updated_at: now, + }; + dao.upsert_prefs(&ctx(), &prefs).unwrap(); + + let row = dao.get_prefs(&ctx()).unwrap().unwrap(); + assert_eq!(row.tz_offset_minutes, Some(-480)); + assert!(row.voice.is_none()); + assert!(row.library.is_none()); + } +} diff --git a/src/duplicates.rs b/src/duplicates.rs index 372415b..32ed92b 100644 --- a/src/duplicates.rs +++ b/src/duplicates.rs @@ -234,7 +234,7 @@ async fn list_exact_handler( let span = global_tracer().start_with_context("duplicates.list_exact", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); @@ -265,7 +265,7 @@ async fn list_perceptual_handler( let span = global_tracer().start_with_context("duplicates.list_perceptual", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); @@ -449,7 +449,7 @@ async fn list_folder_pairs_handler( let span = global_tracer().start_with_context("duplicates.list_folder_pairs", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); diff --git a/src/faces.rs b/src/faces.rs index 3288aa3..f619966 100644 --- a/src/faces.rs +++ b/src/faces.rs @@ -1755,7 +1755,7 @@ async fn stats_handler( let span = global_tracer().start_with_context("faces.stats", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); @@ -1782,11 +1782,12 @@ async fn list_faces_handler( let normalized_path = normalize_path(&query.path); // resolve_library_param returns Option<&Library>; clone so the result // is owned (matching the primary_library fallback's type). - let library: Library = libraries::resolve_library_param(&app_state, query.library.as_deref()) - .ok() - .flatten() - .cloned() - .unwrap_or_else(|| app_state.primary_library().clone()); + let library: Library = + libraries::resolve_library_param_state(&app_state, query.library.as_deref()) + .ok() + .flatten() + .cloned() + .unwrap_or_else(|| app_state.primary_library().clone()); let mut dao = face_dao.lock().expect("face dao lock"); let hash = match dao.resolve_content_hash(&span_context, library.id, &normalized_path) { @@ -1870,7 +1871,7 @@ async fn create_face_handler( } let normalized_path = normalize_path(&body.path); - let library: Library = match libraries::resolve_library_param( + let library: Library = match libraries::resolve_library_param_state( &app_state, body.library.as_ref().map(|i| i.to_string()).as_deref(), ) { @@ -2192,7 +2193,7 @@ async fn list_persons_handler( let span = global_tracer().start_with_context("persons.list", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); @@ -2345,7 +2346,7 @@ async fn person_faces_handler( let context = extract_context_from_request(&request); let span = global_tracer().start_with_context("persons.faces", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); diff --git a/src/files.rs b/src/files.rs index 59cd49e..920540e 100644 --- a/src/files.rs +++ b/src/files.rs @@ -275,14 +275,14 @@ pub async fn list_photos( // Resolve the optional library filter. Unknown values return 400. A // `None` result means "union across all libraries" and downstream // walks iterate every configured library root. - let library = match crate::libraries::resolve_library_param(&app_state, req.library.as_deref()) - { - Ok(lib) => lib, - Err(msg) => { - log::warn!("Rejecting /photos request: {}", msg); - return HttpResponse::BadRequest().body(msg); - } - }; + let library = + match crate::libraries::resolve_library_param_state(&app_state, req.library.as_deref()) { + Ok(lib) => lib, + Err(msg) => { + log::warn!("Rejecting /photos request: {}", msg); + return HttpResponse::BadRequest().body(msg); + } + }; let span_context = opentelemetry::Context::current_with_span(span); @@ -1238,7 +1238,7 @@ pub async fn list_exif_summary( // Resolve the library filter up front so a bad id/name 400s before we // ever take the DAO mutex. None == union across all libraries. let library_filter = - match crate::libraries::resolve_library_param(&app_state, req.library.as_deref()) { + match crate::libraries::resolve_library_param_state(&app_state, req.library.as_deref()) { Ok(lib) => lib.map(|l| l.id), Err(msg) => { span.set_status(Status::error(msg.clone())); diff --git a/src/handlers/image.rs b/src/handlers/image.rs index f0d2310..923fff3 100644 --- a/src/handlers/image.rs +++ b/src/handlers/image.rs @@ -53,7 +53,7 @@ pub async fn get_image( // Resolve library from query param; default to primary so clients that // don't yet send `library=` continue to work. - let library = match libraries::resolve_library_param(&app_state, req.library.as_deref()) { + let library = match libraries::resolve_library_param_state(&app_state, req.library.as_deref()) { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(msg) => { @@ -492,7 +492,7 @@ pub async fn get_file_metadata( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); - let library = libraries::resolve_library_param(&app_state, path.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, path.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -580,7 +580,7 @@ pub async fn set_image_gps( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); - let library = libraries::resolve_library_param(&app_state, body.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, body.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -746,7 +746,7 @@ pub async fn get_full_exif( let context = extract_context_from_request(&request); let mut span = tracer.start_with_context("get_full_exif", &context); - let library = libraries::resolve_library_param(&app_state, path.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, path.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -888,7 +888,8 @@ pub async fn set_image_date( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); - let library = match libraries::resolve_library_param(&app_state, body.library.as_deref()) { + let library = match libraries::resolve_library_param_state(&app_state, body.library.as_deref()) + { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(msg) => { @@ -941,7 +942,8 @@ pub async fn clear_image_date( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); - let library = match libraries::resolve_library_param(&app_state, body.library.as_deref()) { + let library = match libraries::resolve_library_param_state(&app_state, body.library.as_deref()) + { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(msg) => { @@ -1001,7 +1003,7 @@ pub async fn upload_image( // Resolve the optional library selector. Absent → primary library // (backwards-compatible with clients that don't yet send `library=`). let target_library = - match libraries::resolve_library_param(&app_state, query.library.as_deref()) { + match libraries::resolve_library_param_state(&app_state, query.library.as_deref()) { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(msg) => { diff --git a/src/handlers/video.rs b/src/handlers/video.rs index f9f4e64..b56a67e 100644 --- a/src/handlers/video.rs +++ b/src/handlers/video.rs @@ -67,10 +67,11 @@ pub async fn generate_video( let context = extract_context_from_request(&request); let mut span = tracer.start_with_context("generate_video", &context); - let preferred_library = libraries::resolve_library_param(&app_state, body.library.as_deref()) - .ok() - .flatten() - .unwrap_or_else(|| app_state.primary_library()); + let preferred_library = + libraries::resolve_library_param_state(&app_state, body.library.as_deref()) + .ok() + .flatten() + .unwrap_or_else(|| app_state.primary_library()); // Try the resolved library first, then fall back to any other library // that actually contains the file — handles union-mode requests where diff --git a/src/libraries.rs b/src/libraries.rs index 55bf5c1..377b442 100644 --- a/src/libraries.rs +++ b/src/libraries.rs @@ -291,11 +291,11 @@ pub fn seed_or_patch_from_env(conn: &mut SqliteConnection, base_path: &str) { } /// Resolve a library request parameter (accepts numeric id as string or name) -/// against the configured libraries. Returns `Ok(None)` when the param is +/// against a list of libraries. Returns `Ok(None)` when the param is /// absent, meaning "span all libraries". Returns `Err` when a value is /// provided but does not match any library. pub fn resolve_library_param<'a>( - state: &'a AppState, + libs: &'a [Library], param: Option<&str>, ) -> Result, String> { let Some(raw) = param.map(str::trim).filter(|s| !s.is_empty()) else { @@ -303,18 +303,29 @@ pub fn resolve_library_param<'a>( }; if let Ok(id) = raw.parse::() { - return state - .library_by_id(id) + return libs + .iter() + .find(|l| l.id == id) .map(Some) .ok_or_else(|| format!("unknown library id: {}", id)); } - state - .library_by_name(raw) + libs.iter() + .find(|l| l.name == raw) .map(Some) .ok_or_else(|| format!("unknown library name: {}", raw)) } +/// Resolve a library request parameter against the AppState's libraries. +/// Returns `Ok(None)` when the param is absent, meaning "span all libraries". +/// Returns `Err` when a value is provided but does not match any library. +pub fn resolve_library_param_state<'a>( + state: &'a AppState, + param: Option<&str>, +) -> Result, String> { + resolve_library_param(&state.libraries, param) +} + /// Health of a library at a point in time. Probed at the top of each /// file-watcher tick. The `Stale` state is the "be conservative" signal: /// destructive paths (ingest writes, future move-handoff and orphan GC in @@ -662,12 +673,6 @@ mod tests { assert_eq!(abs, PathBuf::from("/tmp/media/2024/photo.jpg")); } - fn state_with_libraries(libs: Vec) -> AppState { - let mut state = AppState::test_state(); - state.libraries = libs; - state - } - fn sample_libraries() -> Vec { vec![ Library { @@ -687,52 +692,52 @@ mod tests { ] } - #[actix_rt::test] - async fn resolve_library_param_absent_is_union() { - let state = state_with_libraries(sample_libraries()); - assert!(matches!(resolve_library_param(&state, None), Ok(None))); + #[test] + fn resolve_library_param_absent_is_union() { + let libs = sample_libraries(); + assert!(matches!(resolve_library_param(&libs, None), Ok(None))); } - #[actix_rt::test] - async fn resolve_library_param_empty_or_whitespace_is_union() { - let state = state_with_libraries(sample_libraries()); - assert!(matches!(resolve_library_param(&state, Some("")), Ok(None))); + #[test] + fn resolve_library_param_empty_or_whitespace_is_union() { + let libs = sample_libraries(); + assert!(matches!(resolve_library_param(&libs, Some("")), Ok(None))); assert!(matches!( - resolve_library_param(&state, Some(" ")), + resolve_library_param(&libs, Some(" ")), Ok(None) )); } - #[actix_rt::test] - async fn resolve_library_param_numeric_id_matches() { - let state = state_with_libraries(sample_libraries()); - let lib = resolve_library_param(&state, Some("7")) + #[test] + fn resolve_library_param_numeric_id_matches() { + let libs = sample_libraries(); + let lib = resolve_library_param(&libs, Some("7")) .expect("valid id") .expect("some library"); assert_eq!(lib.id, 7); assert_eq!(lib.name, "archive"); } - #[actix_rt::test] - async fn resolve_library_param_name_matches() { - let state = state_with_libraries(sample_libraries()); - let lib = resolve_library_param(&state, Some("main")) + #[test] + fn resolve_library_param_name_matches() { + let libs = sample_libraries(); + let lib = resolve_library_param(&libs, Some("main")) .expect("valid name") .expect("some library"); assert_eq!(lib.id, 1); } - #[actix_rt::test] - async fn resolve_library_param_unknown_id_errs() { - let state = state_with_libraries(sample_libraries()); - let err = resolve_library_param(&state, Some("999")).unwrap_err(); + #[test] + fn resolve_library_param_unknown_id_errs() { + let libs = sample_libraries(); + let err = resolve_library_param(&libs, Some("999")).unwrap_err(); assert!(err.contains("unknown library id")); } - #[actix_rt::test] - async fn resolve_library_param_unknown_name_errs() { - let state = state_with_libraries(sample_libraries()); - let err = resolve_library_param(&state, Some("missing")).unwrap_err(); + #[test] + fn resolve_library_param_unknown_name_errs() { + let libs = sample_libraries(); + let err = resolve_library_param(&libs, Some("missing")).unwrap_err(); assert!(err.contains("unknown library name")); } diff --git a/src/main.rs b/src/main.rs index 8b56efd..e420d8b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -62,6 +62,7 @@ mod knowledge; mod memories; mod otel; mod personas; +mod reels; mod service; #[cfg(test)] mod testhelpers; @@ -266,6 +267,11 @@ fn main() -> std::io::Result<()> { } } + // Spawn the nightly pre-generation scheduler (Section D). + reels::spawn_pregen_scheduler(app_state.clone()).await; + // Spawn the on-disk reel-cache sweeper (bounds pre-gen + on-demand reels). + reels::spawn_reel_cache_sweeper(app_state.clone()).await; + HttpServer::new(move || { let user_dao = SqliteUserDao::new(); let favorites_dao = SqliteFavoriteDao::new(); @@ -344,6 +350,11 @@ fn main() -> std::io::Result<()> { .service(handlers::image::clear_image_date) .service(handlers::image::get_full_exif) .service(memories::list_memories) + .service(reels::create_reel_handler) + .service(reels::reel_status_handler) + .service(reels::reel_video_handler) + .service(reels::precomputed_reel_handler) + .service(reels::precomputed_video_handler) .service(ai::generate_insight_handler) .service(ai::generate_agentic_insight_handler) .service(ai::generation_status_handler) diff --git a/src/memories.rs b/src/memories.rs index 4b1682b..2b1f473 100644 --- a/src/memories.rs +++ b/src/memories.rs @@ -349,12 +349,6 @@ pub async fn list_memories( opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); let span_mode = q.span.unwrap_or(MemoriesSpan::Day); - let span_token = match span_mode { - MemoriesSpan::Day => "day", - MemoriesSpan::Week => "week", - MemoriesSpan::Month => "month", - }; - let years_back: i32 = DEFAULT_YEARS_BACK; // The SQL filter expects a signed offset in minutes from UTC; default // 0 (UTC) when the client didn't send a hint. We also keep a chrono @@ -366,18 +360,66 @@ pub async fn list_memories( .timezone_offset_minutes .and_then(|offset_mins| FixedOffset::east_opt(offset_mins * 60)); - debug!( - "list_memories: span={:?} tz_offset_min={} years_back={}", - span_mode, tz_offset_minutes, years_back - ); - - let library = match crate::libraries::resolve_library_param(&app_state, q.library.as_deref()) { - Ok(lib) => lib, + let items = match gather_memory_items( + &app_state, + &exif_dao, + &span_context, + span_mode, + tz_offset_minutes, + client_timezone, + q.library.as_deref(), + ) { + Ok(items) => items, Err(msg) => { warn!("Rejecting /memories request: {}", msg); return HttpResponse::BadRequest().body(msg); } }; + + span.add_event( + "memories_scanned", + vec![ + KeyValue::new("span", format!("{:?}", span_mode)), + KeyValue::new("years_back", DEFAULT_YEARS_BACK.to_string()), + KeyValue::new("result_count", items.len().to_string()), + KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()), + KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)), + ], + ); + span.set_status(Status::Ok); + + HttpResponse::Ok().json(MemoriesResponse { items }) +} + +/// Resolve an "on this day/week/month across past years" window into an +/// ordered list of [`MemoryItem`]s. Shared by the `/memories` handler and the +/// memory-reel selector so both honour the same library resolution, per-library +/// exclusions, timezone handling, and sort order. Returns `Err(message)` only +/// when the `library` param is invalid (callers map that to 400); per-library +/// query/lock failures are logged and skipped, matching the handler's +/// best-effort behaviour. +pub fn gather_memory_items( + app_state: &AppState, + exif_dao: &Mutex>, + span_context: &opentelemetry::Context, + span_mode: MemoriesSpan, + tz_offset_minutes: i32, + client_timezone: Option, + library_param: Option<&str>, +) -> Result, String> { + let span_token = match span_mode { + MemoriesSpan::Day => "day", + MemoriesSpan::Week => "week", + MemoriesSpan::Month => "month", + }; + let years_back: i32 = DEFAULT_YEARS_BACK; + + debug!( + "gather_memory_items: span={:?} tz_offset_min={} years_back={}", + span_mode, tz_offset_minutes, years_back + ); + + let library = crate::libraries::resolve_library_param_state(app_state, library_param)?; let libraries_to_scan: Vec<&crate::libraries::Library> = match library { Some(lib) => vec![lib], None => app_state.libraries.iter().collect(), @@ -394,7 +436,7 @@ pub async fn list_memories( let rows = match exif_dao.lock() { Ok(mut dao) => match dao.get_memories_in_window( - &span_context, + span_context, lib.id, span_token, years_back, @@ -469,21 +511,7 @@ pub async fn list_memories( } } - let items: Vec = memories_with_dates.into_iter().map(|(m, _)| m).collect(); - - span.add_event( - "memories_scanned", - vec![ - KeyValue::new("span", format!("{:?}", span_mode)), - KeyValue::new("years_back", years_back.to_string()), - KeyValue::new("result_count", items.len().to_string()), - KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()), - KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)), - ], - ); - span.set_status(Status::Ok); - - HttpResponse::Ok().json(MemoriesResponse { items }) + Ok(memories_with_dates.into_iter().map(|(m, _)| m).collect()) } #[cfg(test)] diff --git a/src/reels/mod.rs b/src/reels/mod.rs new file mode 100644 index 0000000..afe2ced --- /dev/null +++ b/src/reels/mod.rs @@ -0,0 +1,1568 @@ +//! Memory reels: render an MP4 slideshow of a selection of photos with an +//! LLM-written, voice-cloned narration over it. +//! +//! Pipeline: a [`selector`] resolves *which* photos (and the reel metadata), +//! the [`script`] module writes per-photo narration via the LLM, each line is +//! synthesized to speech, and [`render`] assembles the stills + narration into +//! one MP4. Jobs run in the background (mirroring the TTS speech-job registry) +//! because a reel takes minutes; the finished MP4 is cached on disk keyed by +//! the selection so a repeat request is instant. +//! +//! Phase 1 is on-demand and photos-only. The segment model is media-typed so a +//! video-clip segment (phase 2) and a nightly pre-render (phase 3) slot in +//! without reworking the pipeline. + +pub mod render; +pub mod script; +pub mod selector; + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::{LazyLock, Mutex, Mutex as StdMutex}; +use std::time::{Duration, Instant}; + +use actix_files::NamedFile; +use actix_web::{HttpRequest, HttpResponse, Responder, get, post, web}; +use anyhow::{Context, anyhow}; +use chrono::{DateTime, Datelike, Timelike}; +use serde::{Deserialize, Serialize}; +use serde_json::json; +use uuid::Uuid; + +use crate::data::Claims; +use crate::database::{ExifDao, InsightDao}; +use crate::libraries::{Library, resolve_library_param}; +use crate::memories::MemoriesSpan; +use crate::otel::extract_context_from_request; +use crate::state::AppState; +use selector::ReelSelector; + +// --- Precomputed reel age limits (hours) ------------------------------------- + +/// Maximum age for a precomputed day reel before it's considered stale. +const REEL_PRECOMPUTED_DAY_MAX_AGE_HOURS: u64 = 26; +/// Maximum age for a precomputed week reel. +const REEL_PRECOMPUTED_WEEK_MAX_AGE_HOURS: u64 = 192; +/// Maximum age for a precomputed month reel. +const REEL_PRECOMPUTED_MONTH_MAX_AGE_HOURS: u64 = 768; + +/// How many precomputed reels to keep per (span, library). The newest is the +/// one served; one extra is a grace window so a regen mid-flight (or a client +/// that started a fetch just before the swap) isn't left without a file. +const PREGEN_KEEP_PER_SCOPE: usize = 2; + +/// On-disk reel cache sweep: an unreferenced reel MP4 older than this is +/// removed. Catches the on-demand cache (which has no ledger row and otherwise +/// grows forever) and any pre-gen orphans. Tunable via `REEL_CACHE_MAX_AGE_DAYS`. +const REEL_CACHE_MAX_AGE_DAYS_DEFAULT: u64 = 7; +/// Interval between on-disk cache sweeps. +const REEL_CACHE_SWEEP_INTERVAL_SECS: u64 = 24 * 3600; +/// Transient render artifacts (`.mp4.tmp`, `.concat.txt`, orphaned sidecars) +/// older than this are leftovers from a crashed render and safe to remove. +const REEL_TMP_MAX_AGE_SECS: u64 = 3600; + +/// Resolve a library request parameter to a stable key string. +/// Returns the library's id as a string when found, or `"all"` when +/// the param is absent or the lookup fails. +pub fn normalize_library_key(libs: &[Library], param: Option<&str>) -> String { + match resolve_library_param(libs, param) { + Ok(Some(lib)) => lib.id.to_string(), + _ => "all".to_string(), + } +} + +/// Best-effort: mirror the latest client reel params into `user_ai_prefs` +/// so the nightly pre-gen scheduler can pick them up. Never fails the +/// caller regardless of DB errors. +fn capture_prefs( + app_state: &AppState, + req: &web::Json, + library_param: Option<&str>, +) -> Result<(), anyhow::Error> { + use crate::database::models::UpsertUserAiPrefs; + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("Time went backwards") + .as_secs(); + let library = match library_param { + Some(p) if !p.is_empty() => { + // Resolve to the actual library id for the DB row. + normalize_library_key(&app_state.libraries, Some(p)) + } + _ => "all".to_string(), + }; + let mut dao = app_state.user_ai_prefs_dao.lock().expect("lock"); + let ctx = opentelemetry::Context::new(); + dao.upsert_prefs( + &ctx, + &UpsertUserAiPrefs { + voice: req.voice.clone().filter(|s| !s.is_empty()), + tz_offset_minutes: Some( + req.timezone_offset_minutes + .unwrap_or_else(|| chrono::Local::now().offset().local_minus_utc()), + ), + library: Some(library), + updated_at: now as i64, + }, + ) + .map_err(|e| anyhow::anyhow!("failed to upsert user_ai_prefs: {e}")) +} + +/// Which scripting strategy to use for the reel narration. +#[derive(Clone, Copy)] +pub enum ScripterMode { + /// Fast path: single LLM call via the direct client. + Fast, + /// Agentic path: resolves the backend through the InsightGenerator + /// (honouring LLM_BACKEND, model overrides, etc.). Falls back to + /// Fast on error so a scripting failure never sinks a reel. + Agentic, +} + +/// Progress callback type — receives a static-stage label. +pub type ProgressFn<'a> = dyn Fn(&'static str) + Send + Sync + 'a; + +/// The media behind one shot: a still photo, or a short section of a source +/// video (played with its live audio ducked under the narration). Both carry +/// just the library-relative path; the renderer applies fixed clip framing +/// (start/length) from constants. +#[derive(Debug, Clone)] +pub enum SegmentMedia { + Photo { rel_path: String, library_id: i32 }, + Clip { rel_path: String, library_id: i32 }, +} + +impl SegmentMedia { + fn rel_path(&self) -> &str { + match self { + SegmentMedia::Photo { rel_path, .. } | SegmentMedia::Clip { rel_path, .. } => rel_path, + } + } + fn library_id(&self) -> i32 { + match self { + SegmentMedia::Photo { library_id, .. } | SegmentMedia::Clip { library_id, .. } => { + *library_id + } + } + } +} + +/// A beat: one narration line over its media. A photo beat holds one still (a +/// held shot) or several (a quick burst that flashes through moments of an +/// event while the line is read). A clip beat holds a single video clip. Either +/// way one narration line covers the whole beat, so a week/month reel can +/// *show* everything it spans without a narration line — and the seconds that +/// come with it — per item. +#[derive(Debug, Clone)] +pub struct PlannedBeat { + pub media: Vec, + pub date: Option, + pub insight_title: Option, + pub insight_summary: Option, + /// GPS coordinates of the lead media item, when available. + pub gps: Option<(f64, f64)>, +} + +impl PlannedBeat { + /// Human date for the prompt, e.g. "June 12, 2019". `None` when undated. + pub fn date_label(&self) -> Option { + let ts = self.date?; + let dt = DateTime::from_timestamp(ts, 0)?; + Some(dt.format("%B %-d, %Y").to_string()) + } + + /// True when this beat is a single video clip (vs one or more photos). + pub fn is_clip(&self) -> bool { + matches!(self.media.as_slice(), [SegmentMedia::Clip { .. }]) + } +} + +/// Reel-wide metadata the scripter uses for framing. +#[derive(Debug, Clone)] +pub struct ReelMeta { + pub span: MemoriesSpan, + pub years: Vec, +} + +impl ReelMeta { + /// Natural-language phrase for the span, e.g. "on this day". + pub fn span_phrase(&self) -> &'static str { + match self.span { + MemoriesSpan::Day => "on this day", + MemoriesSpan::Week => "this week", + MemoriesSpan::Month => "this month", + } + } +} + +// --- Job registry ------------------------------------------------------------ +// +// In-memory, same shape as the TTS speech-job registry: a reel takes minutes, +// too long to hold one HTTP request from a phone. POST /reels returns a job id; +// the client polls GET /reels/{id} until the video URL appears. The heavy +// artifact (the MP4) lives on disk, not in this map — jobs only carry status + +// the output path. State is intentionally not durable across restarts; the +// on-disk cache is what makes a repeat request cheap, not the registry. + +#[derive(Clone, Copy, PartialEq, Eq, Debug, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum ReelJobStatus { + Queued, + Running, + Done, + Error, +} + +impl ReelJobStatus { + fn is_terminal(self) -> bool { + matches!(self, Self::Done | Self::Error) + } +} + +struct ReelJob { + status: ReelJobStatus, + /// Coarse progress label for the client ("scripting", "narrating", …). + stage: &'static str, + title: Option, + output_path: Option, + error: Option, + created_at: Instant, + finished_at: Option, + abort: Option, +} + +/// Finished jobs linger so a client that lost connectivity can still collect +/// the result; anything older than MAX_AGE is dropped (aborted first if somehow +/// still running). Swept lazily on each create. +const REEL_JOB_RESULT_TTL: Duration = Duration::from_secs(30 * 60); +const REEL_JOB_MAX_AGE: Duration = Duration::from_secs(60 * 60); + +static REEL_JOBS: LazyLock>> = + LazyLock::new(|| StdMutex::new(HashMap::new())); + +fn sweep_stale_jobs(jobs: &mut HashMap, now: Instant) { + jobs.retain(|_, job| { + let result_expired = job + .finished_at + .is_some_and(|t| now.duration_since(t) >= REEL_JOB_RESULT_TTL); + let too_old = now.duration_since(job.created_at) >= REEL_JOB_MAX_AGE; + if too_old && let Some(h) = job.abort.take() { + h.abort(); + } + !(result_expired || too_old) + }); +} + +fn with_job(id: Uuid, f: impl FnOnce(&mut ReelJob) -> R) -> Option { + REEL_JOBS.lock().unwrap().get_mut(&id).map(f) +} + +fn set_stage(id: Uuid, stage: &'static str) { + with_job(id, |job| { + if !job.status.is_terminal() { + job.status = ReelJobStatus::Running; + job.stage = stage; + } + }); +} + +/// Move a job to a terminal state (first terminal write wins). +fn finish_job( + id: Uuid, + status: ReelJobStatus, + title: Option, + output_path: Option, + error: Option, +) { + with_job(id, |job| { + if job.status.is_terminal() { + return; + } + job.status = status; + job.stage = match status { + ReelJobStatus::Done => "done", + _ => "error", + }; + job.title = title; + job.output_path = output_path; + job.error = error; + job.finished_at = Some(Instant::now()); + job.abort = None; + }); +} + +// --- On-disk cache ----------------------------------------------------------- + +/// Render version: bump to invalidate every cached reel after a rendering / +/// scripting change that should produce a fresh result. +const RENDER_VERSION: u32 = 7; + +/// Narration expressiveness — Chatterbox's `exaggeration` knob. A slight bump +/// over the ~0.5 default warms up otherwise-flat narration without over-acting; +/// tune via `REEL_TTS_EXAGGERATION` (0.25–2.0). +fn reel_tts_exaggeration() -> f32 { + std::env::var("REEL_TTS_EXAGGERATION") + .ok() + .and_then(|s| s.trim().parse::().ok()) + .filter(|x| x.is_finite()) + .unwrap_or(0.6) +} + +/// Cache key over everything that determines *which* media and *how* it's +/// voiced — but not the (non-deterministic) narration text. Same inputs → same +/// MP4 served instantly. blake3 keeps it filesystem-safe and collision-free. +fn cache_key(selector: &ReelSelector, media: &[SegmentMedia], voice: Option<&str>) -> String { + let mut buf = format!( + "v{}|{}|voice={}|", + RENDER_VERSION, + selector.descriptor(), + voice.unwrap_or("default") + ); + for m in media { + // Tag photo vs clip so the same path used as a still and as a video + // clip produce different keys. + let tag = match m { + SegmentMedia::Photo { .. } => 'P', + SegmentMedia::Clip { .. } => 'C', + }; + buf.push_str(&format!("{tag}{}:{}|", m.library_id(), m.rel_path())); + } + blake3::hash(buf.as_bytes()).to_hex().to_string() +} + +fn reel_mp4_path(app_state: &AppState, key: &str) -> PathBuf { + Path::new(&app_state.reels_path).join(format!("{key}.mp4")) +} + +fn reel_sidecar_path(app_state: &AppState, key: &str) -> PathBuf { + Path::new(&app_state.reels_path).join(format!("{key}.json")) +} + +#[derive(Serialize, Deserialize)] +struct ReelSidecar { + title: String, +} + +// --- HTTP types -------------------------------------------------------------- + +#[derive(Debug, Deserialize)] +pub struct CreateReelRequest { + #[serde(default)] + pub span: Option, + #[serde(default)] + pub timezone_offset_minutes: Option, + #[serde(default)] + pub library: Option, + /// Cloned TTS voice for the narration; server default when omitted. + #[serde(default)] + pub voice: Option, + /// Cap on photos in the reel (clamped server-side). + #[serde(default)] + pub max_segments: Option, +} + +#[derive(Debug, Serialize)] +pub struct ReelJobCreatedResponse { + pub job_id: String, + pub status: ReelJobStatus, +} + +#[derive(Debug, Serialize)] +pub struct ReelStatusResponse { + pub job_id: String, + pub status: ReelJobStatus, + pub stage: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub title: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub video_url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, +} + +/// Response shape for `GET /reels/precomputed`. +#[derive(Debug, Serialize)] +pub struct PrecomputedReelResponse { + pub video_url: String, + pub title: String, +} + +// --- Handlers ---------------------------------------------------------------- + +/// POST /reels — start (or instantly serve from cache) a memory reel for the +/// requested span. Returns 202 + a job id; the client polls GET /reels/{id}. +#[post("/reels")] +pub async fn create_reel_handler( + http_request: HttpRequest, + _claims: Claims, + req: web::Json, + app_state: web::Data, + exif_dao: web::Data>>, + insight_dao: web::Data>>, +) -> impl Responder { + let span_context = extract_context_from_request(&http_request); + + if app_state.llamacpp.is_none() { + return HttpResponse::ServiceUnavailable().json(json!({ + "error": "Reel narration needs the LLM/TTS backend (set LLAMA_SWAP_URL)" + })); + } + + let span = req.span.unwrap_or(MemoriesSpan::Day); + let max_segments = req.max_segments.unwrap_or(selector::DEFAULT_MAX_SEGMENTS); + let selector = ReelSelector::Memories { + span, + tz_offset_minutes: req.timezone_offset_minutes.unwrap_or(0), + library: req.library.clone(), + max_segments, + }; + + // Cheap pass: resolve the media set for the cache key and the emptiness + // check. Insight enrichment + scripting happen in the background job. + let (planned, meta) = match selector::resolve(&app_state, &exif_dao, &span_context, &selector) { + Ok(r) => r, + Err(msg) => return HttpResponse::BadRequest().body(msg), + }; + if planned.is_empty() { + return HttpResponse::UnprocessableEntity().json(json!({ + "error": "No photo memories found for this span" + })); + } + + // Flatten every media item across beats (in order) into the cache key — the + // key tracks exactly which photos/clips appear and in what sequence. + let media: Vec = planned.iter().flat_map(|b| b.media.clone()).collect(); + let voice = req.voice.clone().filter(|s| !s.is_empty()); + let key = cache_key(&selector, &media, voice.as_deref()); + + let job_id = Uuid::new_v4(); + log::info!( + "reel {job_id}: request span={:?} → {} beats, {} photos", + span, + planned.len(), + media.len() + ); + + // Cache hit: register an already-Done job pointing at the existing MP4 so + // the client's first poll returns the video URL immediately. + let mp4 = reel_mp4_path(&app_state, &key); + if mp4.exists() { + log::info!("reel {job_id}: cache hit, serving existing reel"); + let title = std::fs::read(reel_sidecar_path(&app_state, &key)) + .ok() + .and_then(|b| serde_json::from_slice::(&b).ok()) + .map(|s| s.title); + let mut jobs = REEL_JOBS.lock().unwrap(); + sweep_stale_jobs(&mut jobs, Instant::now()); + jobs.insert( + job_id, + ReelJob { + status: ReelJobStatus::Done, + stage: "done", + title, + output_path: Some(mp4), + error: None, + created_at: Instant::now(), + finished_at: Some(Instant::now()), + abort: None, + }, + ); + // Capture params for passive prefs mirror (best-effort, never fails). + let _ = capture_prefs(&app_state, &req, req.library.as_deref()); + return HttpResponse::Accepted().json(ReelJobCreatedResponse { + job_id: job_id.to_string(), + status: ReelJobStatus::Done, + }); + } + + { + let mut jobs = REEL_JOBS.lock().unwrap(); + sweep_stale_jobs(&mut jobs, Instant::now()); + jobs.insert( + job_id, + ReelJob { + status: ReelJobStatus::Queued, + stage: "queued", + title: None, + output_path: None, + error: None, + created_at: Instant::now(), + finished_at: None, + abort: None, + }, + ); + } + log::info!("reel {job_id}: queued for generation"); + + let state = app_state.clone(); + let insight_dao = insight_dao.clone(); + let exif_dao = exif_dao.clone(); + let handle = tokio::spawn(async move { + match run_reel_job( + &state, + &insight_dao, + &exif_dao, + job_id, + planned, + meta, + voice, + &key, + ) + .await + { + Ok((title, path)) => { + finish_job(job_id, ReelJobStatus::Done, Some(title), Some(path), None) + } + Err(e) => { + log::error!("reel job {job_id} failed: {e:?}"); + finish_job( + job_id, + ReelJobStatus::Error, + None, + None, + Some(format!("{e}")), + ) + } + } + }); + with_job(job_id, |job| job.abort = Some(handle.abort_handle())); + + // Capture params for passive prefs mirror (best-effort, never fails). + let _ = capture_prefs(&app_state, &req, req.library.as_deref()); + + HttpResponse::Accepted().json(ReelJobCreatedResponse { + job_id: job_id.to_string(), + status: ReelJobStatus::Queued, + }) +} + +/// GET /reels/{id} — poll a reel job. Done jobs carry a `video_url`. +#[get("/reels/{id}")] +pub async fn reel_status_handler(_claims: Claims, path: web::Path) -> impl Responder { + let id_str = path.into_inner(); + let Ok(id) = Uuid::parse_str(&id_str) else { + return HttpResponse::BadRequest().json(json!({ "error": "invalid job id" })); + }; + let resp = with_job(id, |job| ReelStatusResponse { + job_id: id_str.clone(), + status: job.status, + stage: job.stage.to_string(), + title: job.title.clone(), + video_url: matches!(job.status, ReelJobStatus::Done) + .then(|| format!("/reels/{id_str}/video")), + error: job.error.clone(), + }); + match resp { + Some(r) => HttpResponse::Ok().json(r), + None => HttpResponse::NotFound().json(json!({ "error": "job not found or expired" })), + } +} + +/// GET /reels/{id}/video — stream the finished MP4 (supports range requests via +/// NamedFile, so the mobile player can seek). +#[get("/reels/{id}/video")] +pub async fn reel_video_handler( + _claims: Claims, + request: HttpRequest, + path: web::Path, +) -> impl Responder { + let id_str = path.into_inner(); + let Ok(id) = Uuid::parse_str(&id_str) else { + return HttpResponse::BadRequest().json(json!({ "error": "invalid job id" })); + }; + let output = with_job(id, |job| job.output_path.clone()).flatten(); + let Some(path) = output else { + return HttpResponse::NotFound().json(json!({ "error": "reel not ready" })); + }; + match NamedFile::open(&path) { + Ok(file) => file.into_response(&request), + Err(e) => { + log::error!("opening reel mp4 {path:?} failed: {e:?}"); + HttpResponse::NotFound().json(json!({ "error": "reel file missing" })) + } + } +} + +/// GET /reels/precomputed?span=&library= +/// +/// Look up the latest precomputed reel for the given span and library key. +/// Validity gate (all must hold, else 404): +/// 1. `render_version == RENDER_VERSION` +/// 2. `output_path` exists on disk +/// 3. age <= max_age(span) (Day 26h, Week 8d, Month 32d) +/// +/// Returns `{ video_url: "/reels/by-key/{cache_key}/video", title }`. +#[get("/reels/precomputed")] +pub async fn precomputed_reel_handler( + _claims: Claims, + query: web::Query>, + app_state: web::Data, +) -> impl Responder { + let span = query.get("span").map(|s| s.as_str()).unwrap_or("day"); + let library_key = normalize_library_key( + &app_state.libraries, + query.get("library").map(|s| s.as_str()), + ); + + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("Time went backwards") + .as_secs() as i64; + + let max_age_hours = match span { + "week" => REEL_PRECOMPUTED_WEEK_MAX_AGE_HOURS as i64, + "month" => REEL_PRECOMPUTED_MONTH_MAX_AGE_HOURS as i64, + _ => REEL_PRECOMPUTED_DAY_MAX_AGE_HOURS as i64, + }; + let min_generated_at = now - (max_age_hours * 3600); + + let ctx = opentelemetry::Context::new(); + let mut dao = app_state + .precomputed_reel_dao + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + // Fast existence gate: is there a fresh row at all? + if !dao + .exists_fresh( + &ctx, + span, + &library_key, + RENDER_VERSION as i32, + min_generated_at, + ) + .unwrap_or(false) + { + return HttpResponse::NotFound().json(json!({ "error": "no precomputed reel found" })); + } + + // Fetch the latest row for full validity checks. + let reel = match dao.latest_for(&ctx, span, &library_key) { + Ok(Some(r)) => r, + _ => { + return HttpResponse::NotFound().json(json!({ "error": "no precomputed reel found" })); + } + }; + + // Validity gate 1: render version must match. + if reel.render_version != RENDER_VERSION as i32 { + return HttpResponse::NotFound() + .json(json!({ "error": "precomputed reel is stale (render version mismatch)" })); + } + + // Validity gate 2: output_path must exist. + let output = std::path::Path::new(&reel.output_path); + if !output.exists() { + return HttpResponse::NotFound().json(json!({ "error": "precomputed reel file missing" })); + } + + // Validity gate 3: age <= max_age (re-checked via min_generated_at). + if reel.generated_at < min_generated_at { + return HttpResponse::NotFound().json(json!({ "error": "precomputed reel has expired" })); + } + + HttpResponse::Ok().json(PrecomputedReelResponse { + video_url: format!("/reels/by-key/{}/video", reel.cache_key), + title: reel.title, + }) +} + +/// GET /reels/by-key/{key}/video — stream a precomputed reel MP4 by cache key. +#[get("/reels/by-key/{key}/video")] +pub async fn precomputed_video_handler( + _claims: Claims, + request: HttpRequest, + path: web::Path, + app_state: web::Data, +) -> impl Responder { + let key = path.into_inner(); + let mp4 = reel_mp4_path(&app_state, &key); + match NamedFile::open(&mp4) { + Ok(file) => file.into_response(&request), + Err(e) => { + log::error!("opening precomputed reel {key} failed: {e:?}"); + HttpResponse::NotFound().json(json!({ "error": "precomputed reel file missing" })) + } + } +} + +// --- Pipeline ---------------------------------------------------------------- + +/// Run the full reel pipeline: enrich → script → narrate → render → concat, +/// then publish the MP4 into the cache. Returns (title, mp4_path). +/// +/// The `scripter` parameter controls which narration-generation strategy is +/// used (fast single-call vs. agentic backend resolution). On scripting +/// failure in Agentic mode the pipeline falls back to the fast path so a +/// single LLM failure never sinks a reel. +pub(crate) async fn produce_reel( + app_state: &AppState, + insight_dao: &Mutex>, + exif_dao: &Mutex>, + mut planned: Vec, + meta: ReelMeta, + voice: Option, + key: &str, + scripter: ScripterMode, + progress: Option<&ProgressFn<'_>>, +) -> anyhow::Result<(String, PathBuf)> { + let started = Instant::now(); + let total_photos: usize = planned.iter().map(|b| b.media.len()).sum(); + log::info!( + "reel produce_reel: starting — span {:?}, {} beats, {} photos, voice={}", + meta.span, + planned.len(), + total_photos, + voice.as_deref().unwrap_or("default") + ); + + let client = app_state + .llamacpp + .as_ref() + .ok_or_else(|| anyhow::anyhow!("TTS/LLM backend not configured"))? + .clone(); + + // 1. Enrich each beat with its lead photo's cached insight, then script + // (one LLM call → one narration line per beat). + emit_progress(progress, "scripting"); + log::info!("reel produce_reel: scripting narration via LLM…"); + let span_context = opentelemetry::Context::new(); + selector::enrich(insight_dao, exif_dao, &span_context, &mut planned); + let script = match scripter { + ScripterMode::Fast => script::generate_script(&client, &meta, &planned).await?, + ScripterMode::Agentic => { + match script::generate_script_agentic(&app_state.insight_generator, &meta, &planned) + .await + { + Ok(s) => s, + Err(e) => { + log::warn!( + "reel produce_reel: agentic script failed, falling back to fast: {e}" + ); + script::generate_script(&client, &meta, &planned).await? + } + } + } + }; + log::info!( + "reel produce_reel: scripted \"{}\" ({} lines)", + script.title, + script.lines.len() + ); + + // 2. Narrate each beat's line and 3. render the beat (its photos shown in + // sequence under that one narration). A beat whose audio or render fails + // is skipped (logged) rather than sinking the whole reel — handles an + // odd HEIC/corrupt file gracefully. + emit_progress(progress, "narrating"); + let work = tempfile::tempdir().context("creating reel work dir")?; + let nvenc = render::is_nvenc_available().await; + log::info!( + "reel produce_reel: narrating + rendering {} beats (encoder: {})", + planned.len(), + if nvenc { "nvenc" } else { "cpu" } + ); + let opts = render::SegmentOpts { + nvenc, + ..Default::default() + }; + + let beat_total = planned.len(); + let mut beat_files: Vec = Vec::new(); + for (i, (beat, line)) in planned.iter().zip(script.lines.iter()).enumerate() { + // Resolve the beat's media to absolute paths; drop any that don't + // resolve. An empty beat is skipped. + let paths: Vec = beat + .media + .iter() + .filter_map(|m| resolve_media_path(app_state, m)) + .collect(); + if paths.is_empty() { + log::warn!("reel produce_reel: skipping beat {i}, no media paths resolved"); + continue; + } + + let audio_bytes = match crate::ai::tts::synthesize_serialized( + &client, + line, + voice.as_deref(), + "wav", + Some(reel_tts_exaggeration()), + ) + .await + { + Ok(b) => b, + Err(e) => { + log::warn!("reel produce_reel: skipping beat {i}, TTS failed: {e}"); + continue; + } + }; + let audio_path = work.path().join(format!("narration_{i:03}.wav")); + if let Err(e) = tokio::fs::write(&audio_path, &audio_bytes).await { + log::warn!("reel produce_reel: skipping beat {i}, writing audio failed: {e}"); + continue; + } + + let narration_secs = + crate::video::ffmpeg::get_duration_seconds(&audio_path.to_string_lossy()) + .await + .ok() + .flatten() + .unwrap_or(render::MIN_SEGMENT_SECONDS); + + emit_progress(progress, "rendering"); + let beat_out = work.path().join(format!("beat_{i:03}.mp4")); + let render_result = if beat.is_clip() { + log::info!( + "reel produce_reel: beat {}/{} — video clip, narration {:.1}s", + i + 1, + beat_total, + narration_secs + ); + render::render_clip_beat(&paths[0], &audio_path, &beat_out, narration_secs, &opts).await + } else { + log::info!( + "reel produce_reel: beat {}/{} — {} photo(s), narration {:.1}s", + i + 1, + beat_total, + paths.len(), + narration_secs + ); + render::render_beat(&paths, &audio_path, &beat_out, narration_secs, &opts).await + }; + if let Err(e) = render_result { + log::warn!("reel produce_reel: skipping beat {i}, render failed: {e}"); + continue; + } + beat_files.push(beat_out.to_string_lossy().to_string()); + } + + let segment_files = beat_files; + if segment_files.is_empty() { + return Err(anyhow!("no beats rendered successfully")); + } + + // 4. Concat into the cache. Write to a temp name in the reels dir, then + // rename atomically (same filesystem) so a reader never sees a partial. + emit_progress(progress, "rendering"); + log::info!( + "reel produce_reel: joining {} rendered beats into the final reel", + segment_files.len() + ); + std::fs::create_dir_all(&app_state.reels_path).context("creating reels dir")?; + let final_path = reel_mp4_path(app_state, key); + let tmp_path = final_path.with_extension("mp4.tmp"); + render::concat_segments(&segment_files, &tmp_path).await?; + std::fs::rename(&tmp_path, &final_path).context("publishing reel mp4")?; + + // Sidecar carries the title so a future cache hit can return it without + // re-running the pipeline. + let sidecar = serde_json::to_vec(&ReelSidecar { + title: script.title.clone(), + }) + .context("serializing reel sidecar")?; + let _ = std::fs::write(reel_sidecar_path(app_state, key), sidecar); + + log::info!( + "reel produce_reel: done in {:.1}s — {} beats → {}", + started.elapsed().as_secs_f64(), + segment_files.len(), + final_path.display() + ); + Ok((script.title, final_path)) +} + +/// Emit a progress stage label via the optional callback. +fn emit_progress(progress: Option<&ProgressFn<'_>>, stage: &'static str) { + if let Some(p) = progress { + p(stage); + } +} + +/// Run the full reel pipeline and publish the MP4 into the cache. +/// Thin wrapper around [`produce_reel`] that wires up job-stage tracking. +async fn run_reel_job( + app_state: &AppState, + insight_dao: &Mutex>, + exif_dao: &Mutex>, + job_id: Uuid, + planned: Vec, + meta: ReelMeta, + voice: Option, + key: &str, +) -> anyhow::Result<(String, PathBuf)> { + let progress = move |stage: &'static str| { + set_stage(job_id, stage); + }; + produce_reel( + app_state, + insight_dao, + exif_dao, + planned, + meta, + voice, + key, + ScripterMode::Fast, + Some(&progress), + ) + .await +} + +/// Resolve a media item's library-relative path to a validated absolute path +/// under its library root (works for both photos and clips). +fn resolve_media_path(app_state: &AppState, media: &SegmentMedia) -> Option { + let lib = app_state.library_by_id(media.library_id())?; + let rel = media.rel_path().to_string(); + crate::files::is_valid_full_path(&lib.root_path, &rel, false) +} + +// --- Nightly pre-generation scheduler (Section D) ---------------------------- + +/// Env: "3" (default). The hour (0-23) when the nightly pre-gen batch fires. +/// Clamped to 0-23; invalid values fall back to default. +fn pregen_run_hour() -> u32 { + std::env::var("REEL_PREGEN_HOUR") + .ok() + .and_then(|v| v.trim().parse().ok()) + .filter(|h| *h <= 23) + .unwrap_or(3) +} + +/// Env: "1" (default, Monday). Day of week for weekly pre-gen (0=Sun, 1=Mon, ...). +/// Clamped to 0-6; invalid values fall back to default. +fn pregen_week_dow() -> u32 { + std::env::var("REEL_PREGEN_WEEK_DOW") + .ok() + .and_then(|v| v.trim().parse().ok()) + .filter(|d| *d <= 6) + .unwrap_or(1) +} + +/// Pure: seconds until the next `run_hour:00:00` strictly after `now`. +/// +/// Minute/second-accurate (not just hour-granular): when `now` is already at or +/// past the target this wraps to the same hour tomorrow, so a batch that +/// finishes inside the run hour sleeps ~24h rather than busy-looping (waking, +/// re-running, and re-sleeping 0s) for the rest of that hour. The tradeoff is +/// that booting at or after `run_hour` waits until the next day. Recomputed each +/// loop iteration from `Local::now()` so DST shifts are absorbed. +pub(crate) fn secs_until_next_run_hour(now: chrono::DateTime, run_hour: u32) -> u64 { + let now_secs = now.hour() * 3600 + now.minute() * 60 + now.second(); + let target_secs = run_hour * 3600; + let diff = if target_secs > now_secs { + target_secs - now_secs + } else { + 86_400 - now_secs + target_secs + }; + diff as u64 +} + +/// Load pre-gen parameters: tries the user_ai_prefs DB row first, falls back +/// to env vars, then to server-local defaults. +fn load_pregen_params(app_state: &AppState) -> (i32, Option, String) { + // Try DB row first + if let Ok(mut dao) = app_state.user_ai_prefs_dao.lock() { + let ctx = opentelemetry::Context::new(); + if let Ok(Some(prefs)) = dao.get_prefs(&ctx) { + let tz = prefs.tz_offset_minutes.unwrap_or_else(fixed_tz_offset); + let voice = prefs.voice; + let library = prefs.library.unwrap_or_else(|| "all".to_string()); + return (tz, voice, library); + } + } + // Fall back to env (explicit offset overrides auto-detect) + let tz = std::env::var("REEL_PREGEN_TZ_OFFSET_MINUTES") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or_else(fixed_tz_offset); + let voice = std::env::var("REEL_PREGEN_VOICE").ok(); + let library = std::env::var("REEL_PREGEN_LIBRARY") + .ok() + .unwrap_or_else(|| "all".to_string()); + (tz, voice, library) +} + +/// Fixed timezone offset: reads `REEL_PREGEN_TZ_FIXED_MINUTES` (e.g. "-480" +/// for US Eastern) when set, falling back to the system local offset. Using +/// a fixed offset avoids DST shifts changing the pre-gen schedule halfway +/// through the year. +fn fixed_tz_offset() -> i32 { + std::env::var("REEL_PREGEN_TZ_FIXED_MINUTES") + .ok() + .and_then(|v| v.trim().parse().ok()) + .unwrap_or_else(|| chrono::Local::now().offset().local_minus_utc()) +} + +/// Spawn the nightly pre-generation scheduler. Runs behind `REEL_PREGEN_ENABLED`. +pub(crate) async fn spawn_pregen_scheduler(app_state: web::Data) { + if std::env::var("REEL_PREGEN_ENABLED").ok() != Some("1".to_string()) { + log::info!("Reel pre-generation scheduler disabled (REEL_PREGEN_ENABLED != 1)"); + return; + } + + let run_hour = pregen_run_hour(); + log::info!( + "Reel pre-generation scheduler enabled, running at hour {} local", + run_hour + ); + + tokio::spawn(async move { + loop { + let now = chrono::Local::now(); + let sleep_secs = secs_until_next_run_hour(now, run_hour); + log::debug!("Next pre-gen run in {}s", sleep_secs); + tokio::time::sleep(std::time::Duration::from_secs(sleep_secs)).await; + + if let Err(e) = run_pregen_batch(&app_state).await { + log::error!("Reel pre-generation batch failed: {}", e); + } + } + }); +} + +/// Run the pre-generation batch for all applicable spans. +async fn run_pregen_batch(app_state: &AppState) -> anyhow::Result<()> { + let now = chrono::Local::now(); + let weekday = now.weekday().num_days_from_sunday(); // 0=Sun, 1=Mon, ... + let day_of_month = now.day(); + + let mut spans = vec!["day"]; + if weekday == pregen_week_dow() { + spans.push("week"); + } + if day_of_month == 1 { + spans.push("month"); + } + + let (tz, voice, library) = load_pregen_params(app_state); + + for span in spans { + if let Err(e) = pregen_one(app_state, span, tz, voice.clone(), &library).await { + log::error!("Pre-gen failed for span={}: {}", span, e); + } + } + + Ok(()) +} + +/// Pre-generate a single reel for the given span. +async fn pregen_one( + app_state: &AppState, + span: &str, + tz: i32, + voice: Option, + library: &str, +) -> anyhow::Result<()> { + let memories_span = match span { + "day" => MemoriesSpan::Day, + "week" => MemoriesSpan::Week, + "month" => MemoriesSpan::Month, + _ => MemoriesSpan::Day, + }; + + let selector = ReelSelector::Memories { + span: memories_span, + tz_offset_minutes: tz, + library: if library == "all" { + None + } else { + Some(library.to_string()) + }, + // Must match the on-demand default (create_reel_handler) so the cache + // key — which encodes the raw max_segments — lines up and the on-demand + // cache-hit path serves this pre-generated reel. The client sends no + // max_segments, so it defaults to DEFAULT_MAX_SEGMENTS there too. + max_segments: selector::DEFAULT_MAX_SEGMENTS, + }; + + let exif_dao = app_state.insight_generator.exif_dao(); + let insight_dao = app_state.insight_generator.insight_dao(); + let ctx = opentelemetry::Context::new(); + let (planned, reel_meta) = match selector::resolve(app_state, exif_dao, &ctx, &selector) { + Ok((p, m)) => (p, m), + Err(e) => { + log::warn!("Pre-gen resolve failed for span={}: {}", span, e); + return Ok(()); + } + }; + + if planned.is_empty() { + log::info!("No beats for span={}, skipping", span); + return Ok(()); + } + + // Flatten every media item across beats (in order) into the cache key. + let media: Vec = planned.iter().flat_map(|b| b.media.clone()).collect(); + let key = cache_key(&selector, &media, voice.as_deref()); + // Total media items shown (photos + clips), not beat count. + let media_count = media.len() as i32; + + // Dedup: check if fresh ledger row exists + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("Time went backwards") + .as_secs() as i64; + + let max_age_hours = match span { + "week" => REEL_PRECOMPUTED_WEEK_MAX_AGE_HOURS, + "month" => REEL_PRECOMPUTED_MONTH_MAX_AGE_HOURS, + _ => REEL_PRECOMPUTED_DAY_MAX_AGE_HOURS, + }; + let min_generated_at = now - (max_age_hours as i64 * 3600); + + // Skip only when a fresh ledger row points at THIS exact cache key (same + // media, params, render version) and its file still exists. Comparing the + // stored cache_key — not just (span, library) — means a key change from + // selection-logic/params drift that doesn't bump RENDER_VERSION still forces + // a regen within the freshness window, instead of leaving a stale row that + // points at an orphaned reel. + let already_current = { + let mut dao = app_state.precomputed_reel_dao.lock().expect("lock"); + matches!( + dao.latest_for(&ctx, span, library), + Ok(Some(row)) + if row.cache_key == key + && row.render_version == RENDER_VERSION as i32 + && row.generated_at >= min_generated_at + ) && reel_mp4_path(app_state, &key).exists() + }; + + if already_current { + log::info!( + "Fresh precomputed reel already current for span={} key={}, skipping", + span, + key + ); + return Ok(()); + } + + // Past the key-aware dedup above, any MP4 already at this key was NOT + // pre-generated by us (it has no matching ledger row) — most likely an + // on-demand fast-scripted reel that happens to share the key. Don't adopt + // it: regenerate so the precomputed reel is the agentic one. produce_reel + // publishes atomically, overwriting whatever is there. (The narrow + // render-succeeded-but-ledger-write-failed crash window just costs one + // redundant re-render next run.) + log::info!("Generating precomputed reel for span={}, key={}", span, key); + let (title, mp4) = produce_reel( + app_state, + insight_dao, + exif_dao, + planned, + reel_meta, + voice.clone(), + &key, + ScripterMode::Agentic, + None, + ) + .await?; + + // Record to ledger, then retire superseded reels for this (span, library) + // — yesterday's daily, an older render-version, etc. — keeping a small + // grace window. Done under one lock so the prune sees the row we just wrote. + let superseded = { + let mut reel_dao = app_state.precomputed_reel_dao.lock().expect("lock"); + reel_dao.record_reel( + &ctx, + &crate::database::models::InsertablePrecomputedReel { + span: span.to_string(), + library_key: library.to_string(), + cache_key: key.clone(), + output_path: mp4.to_string_lossy().to_string(), + title, + media_count, + render_version: RENDER_VERSION as i32, + tz_offset_minutes: tz, + voice: voice.clone(), + generated_at: now, + }, + )?; + reel_dao + .prune_superseded(&ctx, span, library, PREGEN_KEEP_PER_SCOPE) + .unwrap_or_default() + }; + for row in &superseded { + delete_reel_files(&row.output_path); + } + if !superseded.is_empty() { + log::info!( + "Pruned {} superseded precomputed reel(s) for span={}", + superseded.len(), + span + ); + } + + log::info!("Precomputed reel generated for span={}, key={}", span, key); + Ok(()) +} + +// --- On-disk cache sweep ----------------------------------------------------- + +/// Best-effort unlink of a reel's MP4 and its `.json` sidecar. +fn delete_reel_files(mp4_output_path: &str) { + let mp4 = Path::new(mp4_output_path); + let _ = std::fs::remove_file(mp4); + let _ = std::fs::remove_file(mp4.with_extension("json")); +} + +/// Max age (seconds) before an unreferenced reel MP4 is swept. +fn reel_cache_max_age_secs() -> u64 { + std::env::var("REEL_CACHE_MAX_AGE_DAYS") + .ok() + .and_then(|v| v.trim().parse::().ok()) + .filter(|d| *d > 0) + .unwrap_or(REEL_CACHE_MAX_AGE_DAYS_DEFAULT) + * 86_400 +} + +/// Spawn the periodic on-disk reel-cache sweeper. Runs independently of the +/// pre-gen scheduler because the on-demand cache grows whether or not pre-gen +/// is enabled. Disable with `REEL_CACHE_SWEEP_ENABLED=0`. +pub(crate) async fn spawn_reel_cache_sweeper(app_state: web::Data) { + if std::env::var("REEL_CACHE_SWEEP_ENABLED").ok().as_deref() == Some("0") { + log::info!("Reel cache sweeper disabled (REEL_CACHE_SWEEP_ENABLED=0)"); + return; + } + tokio::spawn(async move { + // Settle after startup, then sweep on a fixed cadence. + tokio::time::sleep(Duration::from_secs(300)).await; + loop { + let removed = sweep_reel_cache(&app_state); + if removed > 0 { + log::info!("Reel cache sweep removed {removed} stale file(s)"); + } + tokio::time::sleep(Duration::from_secs(REEL_CACHE_SWEEP_INTERVAL_SECS)).await; + } + }); +} + +/// One sweep of `reels_path`. Removes: stale render artifacts (`.mp4.tmp`, +/// `.concat.txt`, orphaned sidecars) from crashed runs; and reel MP4s that no +/// ledger row references, that no live job points at, and that are older than +/// the cache max age (the on-demand cache, which has no ledger row). Returns the +/// number of files removed. Best-effort — any IO error on one entry is skipped. +fn sweep_reel_cache(app_state: &AppState) -> usize { + let dir = Path::new(&app_state.reels_path); + let read_dir = match std::fs::read_dir(dir) { + Ok(rd) => rd, + Err(_) => return 0, // dir not created yet → nothing to sweep + }; + + // Files a ledger row still points at (current pre-gen reels). + let protected: std::collections::HashSet = { + let ctx = opentelemetry::Context::new(); + let mut dao = app_state.precomputed_reel_dao.lock().expect("lock"); + dao.all_cache_keys(&ctx) + .unwrap_or_default() + .into_iter() + .collect() + }; + // Outputs of live in-memory jobs (a Done reel a client may still be fetching). + let active: std::collections::HashSet = { + let jobs = REEL_JOBS.lock().unwrap(); + jobs.values() + .filter_map(|j| j.output_path.as_ref()) + .map(|p| p.to_string_lossy().to_string()) + .collect() + }; + + let now = std::time::SystemTime::now(); + let max_age = Duration::from_secs(reel_cache_max_age_secs()); + let tmp_max_age = Duration::from_secs(REEL_TMP_MAX_AGE_SECS); + let mut removed = 0usize; + + for entry in read_dir.flatten() { + let path = entry.path(); + let Some(name) = path.file_name().and_then(|n| n.to_str()) else { + continue; + }; + let age = entry + .metadata() + .and_then(|m| m.modified()) + .ok() + .and_then(|t| now.duration_since(t).ok()) + .unwrap_or_default(); + + // Transient render artifacts from a crashed run. + if name.ends_with(".mp4.tmp") || name.ends_with(".concat.txt") { + if age > tmp_max_age && std::fs::remove_file(&path).is_ok() { + removed += 1; + } + continue; + } + + // Reel MP4: keep if referenced (ledger or live job) or still recent. + if let Some(key) = name.strip_suffix(".mp4") { + let p = path.to_string_lossy().to_string(); + if protected.contains(key) || active.contains(&p) || age < max_age { + continue; + } + if std::fs::remove_file(&path).is_ok() { + let _ = std::fs::remove_file(path.with_extension("json")); + removed += 1; + } + continue; + } + + // Orphaned sidecar (its MP4 is gone). + if name.ends_with(".json") + && !path.with_extension("mp4").exists() + && age > tmp_max_age + && std::fs::remove_file(&path).is_ok() + { + removed += 1; + } + } + removed +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::libraries::Library; + use chrono::TimeZone; + + fn photo(p: &str, lib: i32) -> SegmentMedia { + SegmentMedia::Photo { + rel_path: p.to_string(), + library_id: lib, + } + } + + fn clip(p: &str, lib: i32) -> SegmentMedia { + SegmentMedia::Clip { + rel_path: p.to_string(), + library_id: lib, + } + } + + fn day_selector() -> ReelSelector { + ReelSelector::Memories { + span: MemoriesSpan::Day, + tz_offset_minutes: 0, + library: None, + max_segments: 24, + } + } + + #[test] + fn cache_key_is_stable_for_same_inputs() { + let media = vec![photo("a.jpg", 1), photo("b.jpg", 1)]; + let k1 = cache_key(&day_selector(), &media, Some("grandma")); + let k2 = cache_key(&day_selector(), &media, Some("grandma")); + assert_eq!(k1, k2); + // 64-hex blake3. + assert_eq!(k1.len(), 64); + assert!(k1.chars().all(|c| c.is_ascii_hexdigit())); + } + + #[test] + fn cache_key_changes_with_media_order_voice_and_selector() { + let media = vec![photo("a.jpg", 1), photo("b.jpg", 1)]; + let reordered = vec![photo("b.jpg", 1), photo("a.jpg", 1)]; + let base = cache_key(&day_selector(), &media, Some("grandma")); + // Order matters (the reel sequence differs). + assert_ne!( + base, + cache_key(&day_selector(), &reordered, Some("grandma")) + ); + // Voice matters. + assert_ne!(base, cache_key(&day_selector(), &media, Some("dad"))); + assert_ne!(base, cache_key(&day_selector(), &media, None)); + // Span matters. + let week = ReelSelector::Memories { + span: MemoriesSpan::Week, + tz_offset_minutes: 0, + library: None, + max_segments: 24, + }; + assert_ne!(base, cache_key(&week, &media, Some("grandma"))); + } + + #[test] + fn cache_key_distinguishes_photo_from_clip() { + // Same path/library used as a still vs a video clip must differ. + let as_photo = vec![photo("v.mp4", 1)]; + let as_clip = vec![clip("v.mp4", 1)]; + assert_ne!( + cache_key(&day_selector(), &as_photo, None), + cache_key(&day_selector(), &as_clip, None) + ); + } + + #[test] + fn is_clip_only_for_single_clip_beat() { + let clip_beat = PlannedBeat { + media: vec![clip("v.mp4", 1)], + date: None, + insight_title: None, + insight_summary: None, + gps: None, + }; + let photo_beat = PlannedBeat { + media: vec![photo("a.jpg", 1), photo("b.jpg", 1)], + date: None, + insight_title: None, + insight_summary: None, + gps: None, + }; + assert!(clip_beat.is_clip()); + assert!(!photo_beat.is_clip()); + } + + #[test] + fn span_phrase_maps_each_span() { + let mk = |span| ReelMeta { + span, + years: vec![], + }; + assert_eq!(mk(MemoriesSpan::Day).span_phrase(), "on this day"); + assert_eq!(mk(MemoriesSpan::Week).span_phrase(), "this week"); + assert_eq!(mk(MemoriesSpan::Month).span_phrase(), "this month"); + } + + #[test] + fn date_label_formats_or_none() { + let beat = PlannedBeat { + media: vec![photo("a.jpg", 1)], + date: Some(1_560_384_000), // 2019-06-13 UTC + insight_title: None, + insight_summary: None, + gps: None, + }; + assert!(beat.date_label().unwrap().contains("2019")); + + let undated = PlannedBeat { + media: vec![photo("a.jpg", 1)], + date: None, + insight_title: None, + insight_summary: None, + gps: None, + }; + assert_eq!(undated.date_label(), None); + } + + #[test] + fn normalize_library_key_returns_id_when_found_numeric() { + let libs = vec![ + Library { + id: 1, + name: "main".to_string(), + root_path: "/tmp/main".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }, + Library { + id: 7, + name: "archive".to_string(), + root_path: "/tmp/archive".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }, + ]; + assert_eq!(normalize_library_key(&libs, Some("1")), "1"); + } + + #[test] + fn normalize_library_key_returns_id_when_found_by_name() { + let libs = vec![Library { + id: 1, + name: "main".to_string(), + root_path: "/tmp/main".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }]; + assert_eq!(normalize_library_key(&libs, Some("main")), "1"); + } + + #[test] + fn normalize_library_key_returns_all_when_absent() { + let libs = vec![Library { + id: 1, + name: "main".to_string(), + root_path: "/tmp/main".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }]; + assert_eq!(normalize_library_key(&libs, None), "all"); + } + + #[test] + fn normalize_library_key_returns_all_when_empty() { + let libs = vec![Library { + id: 1, + name: "main".to_string(), + root_path: "/tmp/main".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }]; + assert_eq!(normalize_library_key(&libs, Some("")), "all"); + } + + #[test] + fn normalize_library_key_returns_all_when_unknown() { + let libs = vec![Library { + id: 1, + name: "main".to_string(), + root_path: "/tmp/main".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }]; + assert_eq!(normalize_library_key(&libs, Some("missing")), "all"); + } + + #[test] + fn secs_until_next_run_hour_within_run_hour_wraps_to_tomorrow() { + // 03:30, run 3 → already past today's 03:00, so wait until tomorrow + // 03:00 (23h30m). Crucially NOT 0 — that would busy-loop the scheduler + // for the rest of the hour. + let dt = chrono::Local + .with_ymd_and_hms(2026, 6, 13, 3, 30, 0) + .single() + .expect("valid datetime"); + assert_eq!(secs_until_next_run_hour(dt, 3), 23 * 3600 + 30 * 60); + } + + #[test] + fn secs_until_next_run_hour_future_today_counts_minutes() { + // 10:15 → 14:00 is 3h45m, not a whole-hour 4h (minutes count). + let dt = chrono::Local + .with_ymd_and_hms(2026, 6, 13, 10, 15, 0) + .single() + .expect("valid datetime"); + assert_eq!(secs_until_next_run_hour(dt, 14), 3 * 3600 + 45 * 60); + } + + #[test] + fn secs_until_next_run_hour_past_today_wraps() { + let dt = chrono::Local + .with_ymd_and_hms(2026, 6, 13, 20, 0, 0) + .single() + .expect("valid datetime"); + assert_eq!(secs_until_next_run_hour(dt, 3), (24 - 20 + 3) * 3600); + } + + #[test] + fn secs_until_next_run_hour_midnight() { + let dt = chrono::Local + .with_ymd_and_hms(2026, 6, 13, 0, 0, 0) + .single() + .expect("valid datetime"); + // 0:00, run at 3 → 3 hours + assert_eq!(secs_until_next_run_hour(dt, 3), 3 * 3600); + // 0:00 exactly, run at 0 → wraps to next midnight (not 0, so no busy loop) + assert_eq!(secs_until_next_run_hour(dt, 0), 86_400); + } + + #[test] + fn secs_until_next_run_hour_just_before_target() { + // 23:30, run 0 → 30 minutes to midnight (minute-accurate, not 1h). + let dt = chrono::Local + .with_ymd_and_hms(2026, 6, 13, 23, 30, 0) + .single() + .expect("valid datetime"); + assert_eq!(secs_until_next_run_hour(dt, 0), 30 * 60); + // 23:30, run 23 → already past today's 23:00, wait until tomorrow. + assert_eq!(secs_until_next_run_hour(dt, 23), 86_400 - 30 * 60); + } +} diff --git a/src/reels/render.rs b/src/reels/render.rs new file mode 100644 index 0000000..221df5f --- /dev/null +++ b/src/reels/render.rs @@ -0,0 +1,742 @@ +//! ffmpeg assembly for memory reels. +//! +//! Two-stage, per-segment design: each segment is rendered to its own +//! normalized MP4 (identical codec/resolution/fps/timebase), then the segments +//! are joined with the concat demuxer (stream copy, no re-encode). Rendering +//! per segment — rather than one monster filtergraph — keeps each ffmpeg +//! invocation simple to reason about, parallelizes naturally, and means a +//! video-clip segment type (phase 2) slots in as just a different per-segment +//! builder without touching the concat stage. +//! +//! The arg builders are pure (`Vec` out) so the exact ffmpeg command +//! is unit-testable; the runners spawn ffmpeg and surface stderr on failure. + +use anyhow::{Context, Result, bail}; +use std::path::Path; +use tokio::process::Command; + +/// Re-exported so the reel pipeline reaches NVENC detection through this module +/// rather than depending on `video::ffmpeg` directly. +pub use crate::video::ffmpeg::is_nvenc_available; + +/// Reel canvas. Portrait, because reels are watched on a phone held upright — +/// a landscape canvas letterboxes to a thin ~25%-height band there. Each photo +/// is fitted sharp and centered over a blurred, zoomed copy of itself (see +/// [`photo_filter_chain`]) so the frame is always filled regardless of the +/// photo's orientation, without cropping the subject. +pub const REEL_WIDTH: u32 = 1080; +pub const REEL_HEIGHT: u32 = 1920; +pub const REEL_FPS: u32 = 30; + +/// A beat's screen time is its narration length plus a short breath, with a +/// floor so a terse line still lingers. No ceiling: the beat always covers the +/// full narration so speech is never truncated — the scripter is asked to keep +/// lines short instead. +pub const MIN_SEGMENT_SECONDS: f64 = 2.5; +const NARRATION_TAIL_SECONDS: f64 = 0.6; + +/// Fade durations baked into each photo. A held (single-photo) beat gets a +/// gentle dip; burst photos get a much snappier fade so the difference between +/// a held shot and a quick burst is obvious. +const SINGLE_FADE_SECONDS: f64 = 0.35; +const BURST_FADE_SECONDS: f64 = 0.12; + +/// Video-clip framing. Fallback cap on how much of a clip we read when the +/// source length can't be probed; with a known length, a clip instead plays for +/// as much of its beat as its footage allows (see [`clip_beat_plan`]). Its live +/// audio is ducked to `CLIP_DUCK_VOLUME` under the narration. +pub const CLIP_SECONDS: f64 = 5.0; +const CLIP_DUCK_VOLUME: f64 = 0.35; + +/// Floor on how long each burst photo stays up, so a long line over many photos +/// doesn't flash them subliminally. If the narration is too short to give every +/// photo this much, the beat is stretched to fit. +const MIN_BURST_PHOTO_SECONDS: f64 = 0.6; + +/// Base screen time for a beat given its narration length: narration + breath, +/// floored. Used as the lower bound on a beat's total duration. +pub fn segment_duration(narration_secs: f64) -> f64 { + let d = narration_secs + NARRATION_TAIL_SECONDS; + if d.is_finite() && d > MIN_SEGMENT_SECONDS { + d + } else { + MIN_SEGMENT_SECONDS + } +} + +/// Split a beat into per-photo durations. The beat lasts at least its narration +/// (so speech isn't cut) and at least `n × MIN_BURST_PHOTO_SECONDS` (so a fast +/// burst stays legible); the photos share that total evenly. Returns +/// `(total_seconds, per_photo_seconds)`. +pub fn beat_durations(narration_secs: f64, n_photos: usize) -> (f64, Vec) { + let n = n_photos.max(1); + let base = segment_duration(narration_secs); + let min_total = n as f64 * MIN_BURST_PHOTO_SECONDS; + let total = if base > min_total { base } else { min_total }; + let each = total / n as f64; + (total, vec![each; n]) +} + +/// Fade length to use for a beat of `n_photos` (gentle when held, snappy in a +/// burst). +fn fade_for(n_photos: usize) -> f64 { + if n_photos > 1 { + BURST_FADE_SECONDS + } else { + SINGLE_FADE_SECONDS + } +} + +/// Options controlling per-segment rendering. +#[derive(Debug, Clone, Copy)] +pub struct SegmentOpts { + pub width: u32, + pub height: u32, + pub fps: u32, + pub nvenc: bool, +} + +impl Default for SegmentOpts { + fn default() -> Self { + Self { + width: REEL_WIDTH, + height: REEL_HEIGHT, + fps: REEL_FPS, + nvenc: false, + } + } +} + +/// Filter chain for one photo (input `idx`) producing the labelled output +/// `[v{idx}]`. Splits the still into a background and foreground: the background +/// is scaled to *cover* the canvas and heavily blurred; the foreground is +/// scaled to *fit* and overlaid centered. This fills the portrait frame for any +/// photo orientation — no black bars, no cropping of the subject — then a fade +/// in/out softens the cut. Intermediate labels are suffixed with `idx` so +/// several chains coexist in one `filter_complex`. +/// +/// `fps` is normalized BEFORE the fades so the brightness ramp is computed on a +/// true {fps}-frame timeline; otherwise the fade is sampled at the looped +/// still's coarse cadence and duplicated up, which reads as a steppy dip. +fn photo_filter_chain(idx: usize, opts: &SegmentOpts, duration: f64, fade: f64) -> String { + let (w, h, fps) = (opts.width, opts.height, opts.fps); + let fade_out_start = (duration - fade).max(0.0); + format!( + "[{idx}:v]split=2[bg{idx}][fg{idx}];\ + [bg{idx}]scale={w}:{h}:force_original_aspect_ratio=increase,\ + crop={w}:{h},boxblur=20:2[bgb{idx}];\ + [fg{idx}]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs{idx}];\ + [bgb{idx}][fgs{idx}]overlay=(W-w)/2:(H-h)/2,\ + fps={fps},\ + fade=t=in:st=0:d={fade},\ + fade=t=out:st={fade_out_start:.3}:d={fade},\ + setsar=1,format=yuv420p[v{idx}]" + ) +} + +/// Full `filter_complex` for a beat of `per_photo` durations: one chain per +/// photo, concatenated into `[v]`, with the narration (the last input, index +/// `per_photo.len()`) padded with trailing silence into `[a]`. A single-photo +/// beat degenerates to one chain + `concat=n=1` (a passthrough). +pub fn beat_filtergraph(opts: &SegmentOpts, per_photo: &[f64]) -> String { + let n = per_photo.len().max(1); + let fade = fade_for(n); + let chains: Vec = per_photo + .iter() + .enumerate() + .map(|(i, &d)| photo_filter_chain(i, opts, d, fade)) + .collect(); + let concat_inputs: String = (0..n).map(|i| format!("[v{i}]")).collect(); + format!( + "{chains};{concat_inputs}concat=n={n}:v=1:a=0[v];[{n}:a]apad[a]", + chains = chains.join(";") + ) +} + +fn video_encoder_args(nvenc: bool) -> Vec { + if nvenc { + // p4 ≈ balanced; cq 23 ≈ libx264 crf 21. Matches the HLS transcode path. + [ + "-c:v", + "h264_nvenc", + "-preset", + "p4", + "-cq", + "23", + "-pix_fmt", + "yuv420p", + ] + } else { + [ + "-c:v", "libx264", "-crf", "21", "-preset", "veryfast", "-pix_fmt", "yuv420p", + ] + } + .iter() + .map(|s| s.to_string()) + .collect() +} + +/// Build the ffmpeg args that render one beat: each photo looped for its slice +/// of the beat (filled to the portrait canvas with a blurred backdrop), the +/// slices concatenated, and the single narration muxed over the whole thing. +/// `total` bounds the output (and the apad'd audio) to the beat length. +pub fn build_beat_args( + image_paths: &[String], + audio_path: &str, + out_path: &str, + per_photo: &[f64], + total: f64, + opts: &SegmentOpts, +) -> Vec { + let fps = opts.fps.to_string(); + let mut args: Vec = vec!["-y".into()]; + if opts.nvenc { + args.extend(["-hwaccel".into(), "cuda".into()]); + } + // One looped-still input per photo, each bounded to its slice by an input + // `-t`; reading at the target `-framerate` gives the fades real frames to + // ramp across. + for (path, &dur) in image_paths.iter().zip(per_photo.iter()) { + args.extend([ + "-framerate".into(), + fps.clone(), + "-loop".into(), + "1".into(), + "-t".into(), + format!("{dur:.3}"), + "-i".into(), + path.clone(), + ]); + } + args.extend([ + "-i".into(), + audio_path.into(), + "-filter_complex".into(), + beat_filtergraph(opts, per_photo), + "-map".into(), + "[v]".into(), + "-map".into(), + "[a]".into(), + "-t".into(), + format!("{total:.3}"), + // Force constant frame rate so the beat (and the concatenated reel) + // plays at a steady {fps} rather than a variable cadence. + "-r".into(), + fps, + ]); + args.extend(video_encoder_args(opts.nvenc)); + args.extend( + ["-c:a", "aac", "-b:a", "160k", "-ar", "48000", "-shortest"] + .iter() + .map(|s| s.to_string()), + ); + args.push(out_path.into()); + args +} + +/// Build the concat-demuxer args that join rendered segments losslessly. +/// `+faststart` moves the moov atom up front so the reel streams immediately +/// on the mobile client. The output muxer is forced with `-f mp4` because we +/// write to a `.tmp` path (atomic publish) whose extension ffmpeg can't map to +/// a format on its own. +pub fn build_concat_args(list_path: &str, out_path: &str) -> Vec { + [ + "-y", + "-f", + "concat", + "-safe", + "0", + "-i", + list_path, + "-c", + "copy", + "-movflags", + "+faststart", + "-f", + "mp4", + out_path, + ] + .iter() + .map(|s| s.to_string()) + .collect() +} + +/// Render the concat list file body. Each line points the demuxer at one +/// segment; single quotes in paths are escaped per ffmpeg's concat syntax. +pub fn build_concat_list(segment_paths: &[String]) -> String { + let mut out = String::new(); + for p in segment_paths { + let escaped = p.replace('\'', r"'\''"); + out.push_str(&format!("file '{escaped}'\n")); + } + out +} + +async fn run_ffmpeg(args: &[String], what: &str) -> Result<()> { + let output = Command::new("ffmpeg") + .args(args) + .output() + .await + .with_context(|| format!("spawning ffmpeg for {what}"))?; + if !output.status.success() { + bail!( + "ffmpeg {what} failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + } + Ok(()) +} + +/// Render one beat to `out_path`: its photos shown in sequence (a held shot for +/// one photo, a quick burst for several) under the single narration in +/// `audio_path`, whose measured length sets the beat's pacing. +pub async fn render_beat( + image_paths: &[std::path::PathBuf], + audio_path: &Path, + out_path: &Path, + narration_secs: f64, + opts: &SegmentOpts, +) -> Result<()> { + if image_paths.is_empty() { + bail!("render_beat called with no images"); + } + let (total, per_photo) = beat_durations(narration_secs, image_paths.len()); + let paths: Vec = image_paths + .iter() + .map(|p| p.to_string_lossy().to_string()) + .collect(); + let args = build_beat_args( + &paths, + &audio_path.to_string_lossy(), + &out_path.to_string_lossy(), + &per_photo, + total, + opts, + ); + run_ffmpeg(&args, "beat render").await +} + +// --- Video-clip beats -------------------------------------------------------- + +/// Decide how long the clip plays and how long the whole beat lasts, from the +/// source video's length (if known) and the narration length. Returns +/// `(clip_dur, beat_total)`. +/// +/// The beat always lasts long enough for the full narration. The clip plays for +/// as much of that beat as its footage covers — so the motion fills the screen +/// time rather than stopping early. We only freeze the last frame (the +/// `beat_total - clip_dur` gap, handled by `tpad` in [`clip_video_filter`]) when +/// the source video is genuinely shorter than the narration. Capping clip +/// playback at a fixed length while the narration ran longer was what produced +/// the second-or-two freeze that read as a glitchy pause before the transition. +pub fn clip_beat_plan(source_dur: Option, narration_secs: f64) -> (f64, f64) { + let want = segment_duration(narration_secs); + let clip_dur = match source_dur { + // Known length: play up to the whole beat, but never past the source. + Some(d) if d > 0.0 => d.min(want), + // Unknown length: read up to the fallback cap; tpad covers any shortfall. + _ => want.min(CLIP_SECONDS), + }; + (clip_dur, want.max(clip_dur)) +} + +/// Video chain for a clip beat: fill the clip to the portrait canvas (blurred +/// backdrop, same look as photos), normalize fps, hold the last frame if the +/// narration outlasts the clip (`tpad`), then fade. Produces `[v]`. +fn clip_video_filter(opts: &SegmentOpts, clip_dur: f64, beat_total: f64) -> String { + let (w, h, fps) = (opts.width, opts.height, opts.fps); + let fade = SINGLE_FADE_SECONDS; + let hold = (beat_total - clip_dur).max(0.0); + let fade_out_start = (beat_total - fade).max(0.0); + // Freeze the final frame to cover narration that runs past the clip. + let tpad = if hold > 0.05 { + format!(",tpad=stop_mode=clone:stop_duration={hold:.3}") + } else { + String::new() + }; + format!( + "[0:v]split=2[bg][fg];\ + [bg]scale={w}:{h}:force_original_aspect_ratio=increase,\ + crop={w}:{h},boxblur=20:2[bgb];\ + [fg]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs];\ + [bgb][fgs]overlay=(W-w)/2:(H-h)/2,fps={fps}{tpad},\ + fade=t=in:st=0:d={fade},fade=t=out:st={fade_out_start:.3}:d={fade},\ + setsar=1,format=yuv420p[v]" + ) +} + +/// Audio chain for a clip beat. With a clip audio track, duck it under the +/// narration and mix; without one, just the narration. Produces `[a]`. +fn clip_audio_filter(has_audio: bool) -> String { + if has_audio { + format!( + "[0:a]volume={CLIP_DUCK_VOLUME}[duck];[1:a]apad[narr];\ + [duck][narr]amix=inputs=2:duration=longest:normalize=0[a]" + ) + } else { + "[1:a]apad[a]".to_string() + } +} + +/// Full `filter_complex` for a clip beat (input 0 = clip, input 1 = narration). +pub fn clip_beat_filtergraph( + opts: &SegmentOpts, + clip_dur: f64, + beat_total: f64, + has_audio: bool, +) -> String { + format!( + "{};{}", + clip_video_filter(opts, clip_dur, beat_total), + clip_audio_filter(has_audio) + ) +} + +/// Build the ffmpeg args for a clip beat: the first `clip_dur` seconds of the +/// source video, filled to the portrait canvas with its live audio ducked under +/// the narration, bounded to `beat_total`. +pub fn build_clip_beat_args( + clip_path: &str, + audio_path: &str, + out_path: &str, + clip_dur: f64, + beat_total: f64, + has_audio: bool, + opts: &SegmentOpts, +) -> Vec { + let fps = opts.fps.to_string(); + let mut args: Vec = vec!["-y".into()]; + if opts.nvenc { + args.extend(["-hwaccel".into(), "cuda".into()]); + } + args.extend([ + // Input `-t` limits the clip to its window; audio has none (apad fills). + "-t".into(), + format!("{clip_dur:.3}"), + "-i".into(), + clip_path.into(), + "-i".into(), + audio_path.into(), + "-filter_complex".into(), + clip_beat_filtergraph(opts, clip_dur, beat_total, has_audio), + "-map".into(), + "[v]".into(), + "-map".into(), + "[a]".into(), + "-t".into(), + format!("{beat_total:.3}"), + "-r".into(), + fps, + ]); + args.extend(video_encoder_args(opts.nvenc)); + args.extend( + ["-c:a", "aac", "-b:a", "160k", "-ar", "48000"] + .iter() + .map(|s| s.to_string()), + ); + args.push(out_path.into()); + args +} + +/// Whether a media file has at least one audio stream (so a clip beat knows +/// whether to mix in live audio). Defaults to `false` on any probe failure. +pub async fn has_audio_stream(path: &str) -> bool { + Command::new("ffprobe") + .args([ + "-v", + "error", + "-select_streams", + "a", + "-show_entries", + "stream=index", + "-of", + "csv=p=0", + path, + ]) + .output() + .await + .map(|out| !out.stdout.is_empty()) + .unwrap_or(false) +} + +/// Render one clip beat: a section of `clip_path` (capped at [`CLIP_SECONDS`], +/// and to the source length) under the narration in `audio_path`. The beat +/// lasts at least the narration, freezing the clip's last frame if needed. +pub async fn render_clip_beat( + clip_path: &Path, + audio_path: &Path, + out_path: &Path, + narration_secs: f64, + opts: &SegmentOpts, +) -> Result<()> { + let clip_str = clip_path.to_string_lossy().to_string(); + // Play the clip for as much of the beat as its footage covers; freeze only + // when the source is genuinely shorter than the narration (see clip_beat_plan). + let source_dur = crate::video::ffmpeg::get_duration_seconds(&clip_str) + .await + .ok() + .flatten(); + let (clip_dur, beat_total) = clip_beat_plan(source_dur, narration_secs); + let has_audio = has_audio_stream(&clip_str).await; + + let args = build_clip_beat_args( + &clip_str, + &audio_path.to_string_lossy(), + &out_path.to_string_lossy(), + clip_dur, + beat_total, + has_audio, + opts, + ); + run_ffmpeg(&args, "clip beat render").await +} + +/// Join rendered segments into the final reel. Writes the concat list into the +/// same directory as the output so relative paths and cleanup stay local. +pub async fn concat_segments(segment_paths: &[String], out_path: &Path) -> Result<()> { + let list_path = out_path.with_extension("concat.txt"); + let body = build_concat_list(segment_paths); + tokio::fs::write(&list_path, body) + .await + .context("writing concat list")?; + let args = build_concat_args(&list_path.to_string_lossy(), &out_path.to_string_lossy()); + let result = run_ffmpeg(&args, "concat").await; + let _ = tokio::fs::remove_file(&list_path).await; + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn segment_duration_floors_short_lines() { + // A one-word narration still lingers at the floor. + assert_eq!(segment_duration(0.5), MIN_SEGMENT_SECONDS); + assert_eq!(segment_duration(0.0), MIN_SEGMENT_SECONDS); + } + + #[test] + fn segment_duration_covers_full_narration_plus_tail() { + // No ceiling: a long line gets its full length so speech isn't cut. + assert!((segment_duration(5.0) - 5.6).abs() < 1e-9); + assert!((segment_duration(20.0) - 20.6).abs() < 1e-9); + } + + #[test] + fn segment_duration_rejects_nonfinite() { + assert_eq!(segment_duration(f64::NAN), MIN_SEGMENT_SECONDS); + assert_eq!(segment_duration(f64::INFINITY), MIN_SEGMENT_SECONDS); + } + + #[test] + fn beat_durations_single_photo_matches_base() { + let (total, per) = beat_durations(4.0, 1); + assert!((total - 4.6).abs() < 1e-9); // narration + tail + assert_eq!(per.len(), 1); + assert!((per[0] - 4.6).abs() < 1e-9); + } + + #[test] + fn beat_durations_burst_splits_evenly() { + // 5 photos, narration 4.6s base → ~0.92s each (above the 0.6 floor). + let (total, per) = beat_durations(4.0, 5); + assert!((total - 4.6).abs() < 1e-9); + assert_eq!(per.len(), 5); + assert!((per.iter().sum::() - total).abs() < 1e-9); + assert!(per.iter().all(|&d| d >= MIN_BURST_PHOTO_SECONDS)); + } + + #[test] + fn beat_durations_stretches_when_narration_too_short_for_burst() { + // Floor narration (2.5s) over 10 photos would be 0.25s each — below the + // legibility floor, so the beat stretches to 10 × 0.6 = 6s. + let (total, per) = beat_durations(0.0, 10); + assert!((total - 6.0).abs() < 1e-9); + assert!(per.iter().all(|&d| (d - 0.6).abs() < 1e-9)); + } + + #[test] + fn beat_filtergraph_single_photo_fills_portrait_and_holds() { + let (_t, per) = beat_durations(4.0, 1); + let g = beat_filtergraph(&SegmentOpts::default(), &per); + assert!(g.contains("[0:v]split=2[bg0][fg0]")); + assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=increase")); + assert!(g.contains("crop=1080:1920")); + assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=decrease")); + assert!(g.contains("overlay=(W-w)/2:(H-h)/2")); + // Single photo → concat of one, gentle fade, audio is input 1. + assert!(g.contains("concat=n=1:v=1:a=0[v]")); + assert!(g.contains("d=0.35")); // SINGLE_FADE + assert!(g.contains("[1:a]apad[a]")); + } + + #[test] + fn beat_filtergraph_burst_chains_concats_and_snappy_fade() { + let (_t, per) = beat_durations(4.0, 3); + let g = beat_filtergraph(&SegmentOpts::default(), &per); + // One chain per photo with index-suffixed labels. + assert!(g.contains("[0:v]split") && g.contains("[1:v]split") && g.contains("[2:v]split")); + // Concatenated in order, audio is the 4th input (index 3). + assert!(g.contains("[v0][v1][v2]concat=n=3:v=1:a=0[v]")); + assert!(g.contains("[3:a]apad[a]")); + // Burst uses the much snappier fade (vs 0.35 for a held shot). + assert!(g.contains("d=0.12")); + assert!(!g.contains("d=0.35")); + } + + #[test] + fn beat_filtergraph_normalizes_fps_before_fading() { + // fps must precede the fades on every chain (else the dip looks steppy). + let (_t, per) = beat_durations(4.0, 1); + let g = beat_filtergraph(&SegmentOpts::default(), &per); + let fps_at = g.find("fps=30").expect("fps in graph"); + let fade_at = g.find("fade=t=in").expect("fade in graph"); + assert!(fps_at < fade_at); + } + + #[test] + fn beat_args_one_input_per_photo_plus_audio_bound_by_total() { + let (total, per) = beat_durations(4.0, 2); + let args = build_beat_args( + &["/a.jpg".into(), "/b.jpg".into()], + "/n.wav", + "/out.mp4", + &per, + total, + &SegmentOpts::default(), + ); + let joined = args.join(" "); + // A looped-still input per photo, each with its slice -t, then the audio. + assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /a.jpg")); + assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /b.jpg")); + assert!(joined.contains("-i /n.wav")); + // Output bounded to the beat total and forced CFR. + assert!(joined.contains("-t 4.600")); + assert!(joined.contains("-r 30")); + assert!(joined.ends_with("/out.mp4")); + } + + #[test] + fn beat_args_use_nvenc_and_cuda_when_enabled() { + let opts = SegmentOpts { + nvenc: true, + ..SegmentOpts::default() + }; + let (total, per) = beat_durations(3.0, 1); + let args = build_beat_args( + &["/img.jpg".into()], + "/a.wav", + "/out.mp4", + &per, + total, + &opts, + ); + let joined = args.join(" "); + assert!(joined.contains("-hwaccel cuda")); + assert!(joined.contains("h264_nvenc")); + assert!(!joined.contains("libx264")); + } + + #[test] + fn clip_filter_ducks_audio_and_holds_last_frame_when_narration_longer() { + // 5s clip, 7s beat → 2s freeze of the last frame, ducked-audio mix. + let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 7.0, true); + assert!(g.contains("tpad=stop_mode=clone:stop_duration=2.000")); + assert!(g.contains("volume=0.35")); + assert!(g.contains("amix=inputs=2")); + assert!(g.contains("[1:a]apad[narr]")); + // Fill applied to the clip too. + assert!(g.contains("boxblur")); + assert!(g.contains("overlay=(W-w)/2:(H-h)/2")); + } + + #[test] + fn clip_beat_plan_plays_clip_through_the_whole_beat_when_source_is_long() { + // 30s source, 4s narration → beat is narration+tail (4.6), and the clip + // plays that whole 4.6s of motion: no freeze (clip_dur == beat_total). + let (clip_dur, beat_total) = clip_beat_plan(Some(30.0), 4.0); + assert!((beat_total - 4.6).abs() < 1e-9); + assert!((clip_dur - 4.6).abs() < 1e-9); + assert!((beat_total - clip_dur).abs() < 1e-9); // no hold + } + + #[test] + fn clip_beat_plan_freezes_only_when_source_shorter_than_narration() { + // 2s source under a 4s narration → play all 2s, freeze the remainder. + let (clip_dur, beat_total) = clip_beat_plan(Some(2.0), 4.0); + assert!((clip_dur - 2.0).abs() < 1e-9); + assert!((beat_total - 4.6).abs() < 1e-9); + assert!(beat_total - clip_dur > 2.0); // unavoidable freeze gap + } + + #[test] + fn clip_beat_plan_caps_read_when_source_length_unknown() { + // Probe failed: read up to the fallback cap, beat still covers narration. + let (clip_dur, beat_total) = clip_beat_plan(None, 8.0); + assert!((clip_dur - CLIP_SECONDS).abs() < 1e-9); + assert!((beat_total - 8.6).abs() < 1e-9); + } + + #[test] + fn clip_filter_no_tpad_when_clip_covers_the_beat() { + // Clip at least as long as the beat → no freeze. + let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 5.0, true); + assert!(!g.contains("tpad")); + } + + #[test] + fn clip_filter_narration_only_without_clip_audio() { + let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 5.0, false); + assert!(!g.contains("amix")); + assert!(!g.contains("volume=")); + assert!(g.contains("[1:a]apad[a]")); + } + + #[test] + fn clip_beat_args_bound_clip_and_output() { + let args = build_clip_beat_args( + "/v.mp4", + "/n.wav", + "/out.mp4", + 5.0, + 6.6, + true, + &SegmentOpts::default(), + ); + let joined = args.join(" "); + // Input -t bounds the clip read; output -t bounds the beat. + assert!(joined.contains("-t 5.000 -i /v.mp4")); + assert!(joined.contains("-i /n.wav")); + assert!(joined.contains("-t 6.600")); + assert!(joined.contains("-r 30")); + assert!(joined.ends_with("/out.mp4")); + } + + #[test] + fn concat_args_stream_copy_with_faststart_and_forced_muxer() { + // Output goes to a .tmp path, so the muxer must be forced — ffmpeg + // can't infer mp4 from the extension (the bug this guards against). + let args = build_concat_args("/tmp/list.txt", "/out.mp4.tmp"); + let joined = args.join(" "); + assert!(joined.contains("-f concat -safe 0 -i /tmp/list.txt")); + assert!(joined.contains("-c copy")); + assert!(joined.contains("+faststart")); + assert!(joined.contains("-f mp4")); + // The forced muxer must come before the output path. + let f_mp4 = args.windows(2).position(|w| w == ["-f", "mp4"]).unwrap(); + let out = args.iter().position(|a| a == "/out.mp4.tmp").unwrap(); + assert!(f_mp4 < out); + } + + #[test] + fn concat_list_escapes_single_quotes() { + let body = build_concat_list(&[ + "/tmp/seg_000.mp4".into(), + "/tmp/own's dir/seg_001.mp4".into(), + ]); + assert!(body.contains("file '/tmp/seg_000.mp4'\n")); + // The apostrophe is closed-escaped-reopened per ffmpeg concat syntax. + assert!(body.contains(r"own'\''s")); + } +} diff --git a/src/reels/script.rs b/src/reels/script.rs new file mode 100644 index 0000000..858efd1 --- /dev/null +++ b/src/reels/script.rs @@ -0,0 +1,490 @@ +//! Narration scripting for memory reels. +//! +//! One LLM call turns the planned beats (each carrying its date and, where +//! available, its cached insight) into a short first-person narration line per +//! beat plus a title for the reel. A beat may show several photos in a quick +//! burst, so a line narrates the *moment*, not a single frame. We reuse the +//! cached insight summary as the richest signal rather than re-running vision +//! at reel time — that keeps reel generation off the GPU's vision slot. +//! +//! The prompt builder and response parser are pure so the contract is +//! unit-testable; `generate_script` wires them to the LLM client. +//! +//! The agentic scripter (pre-generation) resolves the backend through the +//! InsightGenerator, builds a read-only tool set, and runs a tool loop to +//! ground the narration in retrieved context before asking for the final JSON. + +use anyhow::{Context, Result}; +use std::sync::Arc; + +use super::{PlannedBeat, ReelMeta}; +use crate::ai::backend::{BackendKind, SamplingOverrides}; +use crate::ai::insight_generator::InsightGenerator; +use crate::ai::llamacpp::LlamaCppClient; +use crate::ai::llm_client::{LlmClient, Tool}; +use crate::ai::ollama::ChatMessage; + +/// The narration for a whole reel: a title and one line per beat, in order. +#[derive(Debug, Clone, PartialEq)] +pub struct ReelScript { + pub title: String, + pub lines: Vec, +} + +const SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \ +slideshow of someone's own photos set to a spoken voiceover. Write warm, \ +specific, first-person narration as if the person is gently looking back on \ +their own memories. Each line plays over one moment, which may be a quick burst \ +of several photos, so narrate the moment as a whole rather than a single frame. \ +Be concrete and grounded in the details given; never invent names, places, or \ +events that aren't supported. Keep each line to one or two short sentences that \ +can be read aloud in a few seconds. Avoid generic filler like \"what a \ +wonderful day\" — if you have little to go on, simply describe the moment \ +plainly."; + +/// Agentic scripter system prompt: richer version that tells the model it may +/// call read-only tools to ground each line. +const AGENTIC_SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \ +slideshow of someone's own photos set to a spoken voiceover. Write warm, \ +specific, first-person narration as if the person is gently looking back on \ +their own memories. Each line plays over one moment, which may be a quick burst \ +of several photos, so narrate the moment as a whole rather than a single frame. \ +Be concrete and grounded in the details given; never invent names, places, or \ +events that aren't supported. Keep each line to one or two short sentences that \ +can be read aloud in a few seconds. Avoid generic filler like \"what a \ +wonderful day\" — if you have little to go on, simply describe the moment \ +plainly.\n\nYou may call read-only tools (search_rag, search_messages, \ +get_sms_messages, get_calendar_events, get_location_history, reverse_geocode, \ +get_personal_place_at, recall_entities, get_current_datetime) to ground each \ +line in real context — e.g. reverse_geocode a moment's GPS to name the place, \ +or check the calendar/messages around its date. Never invent details. Return \ +ONLY the JSON object, no prose or code fences."; + +/// Maximum agentic tool iterations for pre-generation. Tunable via +/// `REEL_PREGEN_MAX_TOOL_ITERS` (default 8). +fn reel_pregen_max_tool_iters() -> usize { + std::env::var("REEL_PREGEN_MAX_TOOL_ITERS") + .ok() + .and_then(|s| s.trim().parse::().ok()) + .filter(|x| *x > 0) + .unwrap_or(8) +} + +/// Build the (system, user) prompt pair for the scripter. The user message +/// describes each beat in order and asks for strict JSON back. +pub fn build_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> (String, String) { + let mut user = String::new(); + user.push_str(&format!( + "This reel has {} moments surfaced as memories {}.\n\n", + beats.len(), + meta.span_phrase() + )); + if !meta.years.is_empty() { + let years: Vec = meta.years.iter().map(|y| y.to_string()).collect(); + user.push_str(&format!("They span the years: {}.\n\n", years.join(", "))); + } + user.push_str("Moments, in the order they will appear:\n"); + for (i, beat) in beats.iter().enumerate() { + user.push_str(&format!("\n[{}]", i + 1)); + if let Some(date) = beat.date_label() { + user.push_str(&format!(" {date}")); + } + if beat.is_clip() { + user.push_str(" (a video clip)"); + } else if beat.media.len() > 1 { + user.push_str(&format!(" (a burst of {} photos)", beat.media.len())); + } + user.push('\n'); + match (&beat.insight_title, &beat.insight_summary) { + (Some(t), Some(s)) if !s.trim().is_empty() => { + user.push_str(&format!(" Known context: {t} — {s}\n")); + } + (Some(t), _) => user.push_str(&format!(" Known context: {t}\n")), + (_, Some(s)) if !s.trim().is_empty() => { + user.push_str(&format!(" Known context: {s}\n")); + } + _ => user.push_str(" (no extra context — narrate plainly from the date)\n"), + } + } + user.push_str(&format!( + "\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\ + {{\"title\": \"\", \"segments\": [\"\", \ + \"\", ... ]}}\n\ + The \"segments\" array MUST have exactly {} items, one per moment in order.", + beats.len() + )); + (SYSTEM_PROMPT.to_string(), user) +} + +/// Build a richer (system, user) prompt pair for the agentic scripter. The +/// system prompt tells the model it may call read-only tools to ground each +/// line. The user message uses the same per-beat enumeration as +/// `build_script_messages` plus a GPS line per beat when available. +pub fn build_agentic_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> Vec { + let mut user = String::new(); + user.push_str(&format!( + "This reel has {} moments surfaced as memories {}.\n\n", + beats.len(), + meta.span_phrase() + )); + if !meta.years.is_empty() { + let years: Vec = meta.years.iter().map(|y| y.to_string()).collect(); + user.push_str(&format!("They span the years: {}.\n\n", years.join(", "))); + } + user.push_str("Moments, in the order they will appear:\n"); + for (i, beat) in beats.iter().enumerate() { + user.push_str(&format!("\n[{}]", i + 1)); + if let Some(date) = beat.date_label() { + user.push_str(&format!(" {date}")); + } + if beat.is_clip() { + user.push_str(" (a video clip)"); + } else if beat.media.len() > 1 { + user.push_str(&format!(" (a burst of {} photos)", beat.media.len())); + } + if let Some((lat, lon)) = beat.gps { + user.push_str(&format!("\n GPS: {:.4}, {:.4}", lat, lon)); + } + user.push('\n'); + match (&beat.insight_title, &beat.insight_summary) { + (Some(t), Some(s)) if !s.trim().is_empty() => { + user.push_str(&format!(" Known context: {t} — {s}\n")); + } + (Some(t), _) => user.push_str(&format!(" Known context: {t}\n")), + (_, Some(s)) if !s.trim().is_empty() => { + user.push_str(&format!(" Known context: {s}\n")); + } + _ => user.push_str(" (no extra context — narrate plainly from the date)\n"), + } + } + user.push_str(&format!( + "\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\ + {{\"title\": \"\", \"segments\": [\"\", \ + \"\", ... ]}}\n\ + The \"segments\" array MUST have exactly {} items, one per moment in order.", + beats.len() + )); + + vec![ + ChatMessage::system(AGENTIC_SYSTEM_PROMPT.to_string()), + ChatMessage::user(user), + ] +} + +/// Parse the model's response into a script with exactly `n` lines. Tolerant of +/// code fences and surrounding prose, and of both `segments: [".."]` and +/// `segments: [{"narration": ".."}]` shapes. Missing/extra lines are padded or +/// truncated so the caller always gets `n` aligned to the segments. +pub fn parse_script_response(raw: &str, n: usize) -> ReelScript { + let fallback_line = "A moment worth remembering."; + let value = extract_json_object(raw); + + let title = value + .as_ref() + .and_then(|v| v.get("title")) + .and_then(|t| t.as_str()) + .map(clean_text) + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| "Memories".to_string()); + + let mut lines: Vec = value + .as_ref() + .and_then(|v| v.get("segments")) + .and_then(|s| s.as_array()) + .map(|arr| { + arr.iter() + .map(|item| { + let text = item + .as_str() + .map(|s| s.to_string()) + .or_else(|| { + item.get("narration") + .and_then(|n| n.as_str()) + .map(|s| s.to_string()) + }) + .unwrap_or_default(); + clean_text(&text) + }) + .collect() + }) + .unwrap_or_default(); + + // Align to exactly n: drop extras, pad shortfalls with a neutral line so + // every photo still gets spoken audio. + lines.truncate(n); + while lines.len() < n { + lines.push(fallback_line.to_string()); + } + for line in lines.iter_mut() { + if line.is_empty() { + *line = fallback_line.to_string(); + } + } + + ReelScript { title, lines } +} + +/// Pull the first balanced top-level JSON object out of a possibly-noisy model +/// response (code fences, leading prose). Returns None if nothing parses. +fn extract_json_object(raw: &str) -> Option { + // Fast path: the whole thing is valid JSON. + if let Ok(v) = serde_json::from_str::(raw.trim()) { + return Some(v); + } + // Otherwise scan for the first '{' ... matching '}' span, ignoring braces + // inside strings. + let bytes = raw.as_bytes(); + let start = raw.find('{')?; + let mut depth = 0i32; + let mut in_str = false; + let mut escaped = false; + for i in start..bytes.len() { + let c = bytes[i] as char; + if in_str { + if escaped { + escaped = false; + } else if c == '\\' { + escaped = true; + } else if c == '"' { + in_str = false; + } + continue; + } + match c { + '"' => in_str = true, + '{' => depth += 1, + '}' => { + depth -= 1; + if depth == 0 { + return serde_json::from_str(&raw[start..=i]).ok(); + } + } + _ => {} + } + } + None +} + +/// Collapse whitespace and strip stray markdown/quote decorations a model +/// sometimes leaves around a line. +fn clean_text(s: &str) -> String { + let trimmed = s.trim().trim_matches('"').trim(); + trimmed.split_whitespace().collect::>().join(" ") +} + +/// Generate the reel script via the LLM. Text-only (no images) — the per-beat +/// context comes from cached insights. The call takes the GPU read lease +/// internally (see `LlamaCppClient::generate`). +pub async fn generate_script( + client: &Arc, + meta: &ReelMeta, + beats: &[PlannedBeat], +) -> Result { + let (system, user) = build_script_messages(meta, beats); + let raw = client + .generate(&user, Some(&system), None) + .await + .context("LLM script generation failed")?; + Ok(parse_script_response(&raw, beats.len())) +} + +/// Agentic version of script generation: resolves the backend via the +/// InsightGenerator (honouring LLM_BACKEND, model overrides, etc.), builds +/// a read-only tool set, runs the tool loop, then parses the JSON response. +/// Returns the same ReelScript shape. On failure the caller may fall back to +/// `generate_script`. +pub async fn generate_script_agentic( + generator: &InsightGenerator, + meta: &ReelMeta, + beats: &[PlannedBeat], +) -> Result { + // 1. Resolve the backend. Bail if the local model lacks tool-calling. + let backend = generator + .resolve_backend( + BackendKind::Local, + &SamplingOverrides { + model: None, + num_ctx: None, + temperature: None, + top_p: None, + top_k: None, + min_p: None, + }, + ) + .await + .context("resolving backend for agentic script")?; + + // 2. Build the read-only tool set. Start from the persona gate (no + // persona context, so corrections are closed), force has_vision=false, + // then filter out write tools. + let gate = generator.current_gate_opts_for_persona(false, None); + let all_tools = InsightGenerator::build_tool_definitions(gate); + // Whole-reel calls have no single photo and no authenticated user, so the + // loop runs execute_tool with empty file/image context and user_id=0. Only + // tools that work without that context are useful here — photo/user-bound + // tools (get_file_tags, get_faces_in_photo, recall_facts_for_photo, + // recall_facts_for_entity) would just no-op or error, burning iterations, + // so they're excluded. + let read_only_names: std::collections::HashSet<&str> = [ + "search_rag", + "search_messages", + "get_sms_messages", + "get_calendar_events", + "get_location_history", + "reverse_geocode", + "get_personal_place_at", + "recall_entities", + "get_current_datetime", + ] + .into_iter() + .collect(); + let tools: Vec = all_tools + .into_iter() + .filter(|t| read_only_names.contains(t.function.name.as_str())) + .collect(); + + // 3. Build the agentic prompt messages. + let messages = build_agentic_script_messages(meta, beats); + + // 4. Run the tool loop. + let max_iter = reel_pregen_max_tool_iters(); + let raw = generator + .run_readonly_tool_loop(&backend, messages, tools, max_iter) + .await + .context("agentic tool loop failed")?; + + // 5. Strip any think-blocks the model may have emitted, then parse. + let raw = crate::ai::llm_client::strip_think_blocks(&raw); + Ok(parse_script_response(&raw, beats.len())) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::memories::MemoriesSpan; + + fn meta() -> ReelMeta { + ReelMeta { + span: MemoriesSpan::Day, + years: vec![2019, 2021], + } + } + + fn planned(n: usize) -> Vec { + (0..n) + .map(|i| PlannedBeat { + media: vec![super::super::SegmentMedia::Photo { + rel_path: format!("p{i}.jpg"), + library_id: 1, + }], + date: Some(1_560_000_000 + i as i64 * 86_400), + insight_title: None, + insight_summary: None, + gps: None, + }) + .collect() + } + + #[test] + fn prompt_states_exact_moment_count_and_span() { + let (sys, user) = build_script_messages(&meta(), &planned(3)); + assert!(sys.contains("memory reel")); + assert!(user.contains("3 moments")); + assert!(user.contains("on this day")); + assert!(user.contains("exactly 3 items")); + // Each moment gets an indexed entry. + assert!(user.contains("[1]") && user.contains("[2]") && user.contains("[3]")); + } + + #[test] + fn prompt_notes_burst_photo_count() { + let mut p = planned(1); + p[0].media = vec![ + super::super::SegmentMedia::Photo { + rel_path: "a.jpg".into(), + library_id: 1, + }, + super::super::SegmentMedia::Photo { + rel_path: "b.jpg".into(), + library_id: 1, + }, + super::super::SegmentMedia::Photo { + rel_path: "c.jpg".into(), + library_id: 1, + }, + ]; + let (_sys, user) = build_script_messages(&meta(), &p); + assert!(user.contains("a burst of 3 photos")); + } + + #[test] + fn prompt_marks_clip_beats() { + let mut p = planned(1); + p[0].media = vec![super::super::SegmentMedia::Clip { + rel_path: "v.mp4".into(), + library_id: 1, + }]; + let (_sys, user) = build_script_messages(&meta(), &p); + assert!(user.contains("a video clip")); + } + + #[test] + fn prompt_includes_insight_context_when_present() { + let mut p = planned(1); + p[0].insight_title = Some("Lake house weekend".into()); + p[0].insight_summary = Some("Swimming with the dogs.".into()); + let (_sys, user) = build_script_messages(&meta(), &p); + assert!(user.contains("Lake house weekend — Swimming with the dogs.")); + } + + #[test] + fn parse_plain_json_object() { + let raw = r#"{"title":"Summer Days","segments":["First line.","Second line."]}"#; + let script = parse_script_response(raw, 2); + assert_eq!(script.title, "Summer Days"); + assert_eq!(script.lines, vec!["First line.", "Second line."]); + } + + #[test] + fn parse_tolerates_code_fences_and_prose() { + let raw = "Sure! Here's your reel:\n```json\n{\"title\": \"Trip\", \"segments\": [\"A.\", \"B.\"]}\n```\nEnjoy!"; + let script = parse_script_response(raw, 2); + assert_eq!(script.title, "Trip"); + assert_eq!(script.lines, vec!["A.", "B."]); + } + + #[test] + fn parse_accepts_object_segment_shape() { + let raw = r#"{"title":"T","segments":[{"narration":"One."},{"narration":"Two."}]}"#; + let script = parse_script_response(raw, 2); + assert_eq!(script.lines, vec!["One.", "Two."]); + } + + #[test] + fn parse_pads_short_and_truncates_long_to_n() { + // Model returned 1 line but we have 3 segments → pad with neutral lines. + let short = parse_script_response(r#"{"title":"T","segments":["Only one."]}"#, 3); + assert_eq!(short.lines.len(), 3); + assert_eq!(short.lines[0], "Only one."); + assert!(!short.lines[1].is_empty()); + + // Model returned 3 but we have 2 → truncate. + let long = parse_script_response(r#"{"title":"T","segments":["a","b","c"]}"#, 2); + assert_eq!(long.lines, vec!["a", "b"]); + } + + #[test] + fn parse_falls_back_on_garbage() { + let script = parse_script_response("the model said no", 2); + assert_eq!(script.title, "Memories"); + assert_eq!(script.lines.len(), 2); + assert!(script.lines.iter().all(|l| !l.is_empty())); + } + + #[test] + fn parse_blank_line_replaced_with_fallback() { + let script = parse_script_response(r#"{"title":"T","segments":[" ","Real."]}"#, 2); + assert!(!script.lines[0].is_empty()); + assert_eq!(script.lines[1], "Real."); + } +} diff --git a/src/reels/selector.rs b/src/reels/selector.rs new file mode 100644 index 0000000..a02cbb8 --- /dev/null +++ b/src/reels/selector.rs @@ -0,0 +1,560 @@ +//! Reel selectors: resolve "what goes in the reel" into an ordered media set +//! plus the metadata the scripter needs. The renderer and scripter are +//! selector-agnostic, so adding tag- or date-range-based reels later means +//! adding a variant here, not touching the pipeline. +//! +//! Resolution is split in two so the handler can compute a cache key (and +//! short-circuit on a cache hit) without the per-photo insight lookups: +//! [`resolve`] is the cheap media-set pass; [`enrich`] adds cached insights and +//! runs in the background job. + +use std::path::Path; +use std::sync::Mutex; + +use chrono::{DateTime, Datelike, FixedOffset}; + +use super::{PlannedBeat, ReelMeta, SegmentMedia}; +use crate::database::{ExifDao, InsightDao}; +use crate::file_types::{is_image_file, is_video_file}; +use crate::memories::{self, MemoriesSpan}; +use crate::state::AppState; + +/// Default and hard caps on how many photos a reel covers. The default is an +/// upper bound on the request; the effective count is usually smaller, set by +/// the duration budget (see [`budget_segments`]). The hard cap bounds work per +/// reel regardless. +pub const DEFAULT_MAX_SEGMENTS: usize = 40; +pub const HARD_MAX_SEGMENTS: usize = 40; + +/// Target reel length. Week and especially month spans can surface hundreds of +/// photos; at a few seconds of narration each, a naive reel runs minutes. We +/// cap the segment count to keep the reel near this length. Tunable via +/// `REEL_TARGET_SECONDS`. +const DEFAULT_TARGET_REEL_SECONDS: f64 = 90.0; + +/// Rough average wall-time per photo segment (a short narration line + the +/// silent tail). Only used to turn the duration target into a segment count; +/// the real per-segment time is the measured narration length. +const EST_SECONDS_PER_SEGMENT: f64 = 5.0; + +/// Time gap that separates one "event/moment" from the next when clustering a +/// span's photos. Photos within a few hours are treated as the same occasion +/// (and across years/days the gaps are far larger, so each instance clusters +/// on its own). 4 hours splits e.g. a morning hike from an evening dinner. +const EVENT_GAP_SECONDS: i64 = 4 * 3600; + +fn target_reel_seconds() -> f64 { + std::env::var("REEL_TARGET_SECONDS") + .ok() + .and_then(|s| s.trim().parse::().ok()) + .filter(|x| x.is_finite() && *x > 0.0) + .unwrap_or(DEFAULT_TARGET_REEL_SECONDS) +} + +/// How many photo segments fit the duration budget, bounded by the request's +/// max and the hard cap. This is what keeps week/month reels from running long. +pub fn budget_segments(requested_max: usize) -> usize { + let by_budget = (target_reel_seconds() / EST_SECONDS_PER_SEGMENT).floor() as usize; + by_budget.min(requested_max).clamp(1, HARD_MAX_SEGMENTS) +} + +/// What a reel is built from. v1 ships the memories (on this day/week/month) +/// selector; tag and date-range variants slot in here later. +#[derive(Debug, Clone)] +pub enum ReelSelector { + Memories { + span: MemoriesSpan, + tz_offset_minutes: i32, + library: Option, + max_segments: usize, + }, +} + +impl ReelSelector { + /// Stable string identity for the cache key. Captures everything that + /// changes *which* media is selected (but not the non-deterministic + /// narration, which can't be part of a pre-render key). + pub fn descriptor(&self) -> String { + match self { + ReelSelector::Memories { + span, + tz_offset_minutes, + library, + max_segments, + } => format!( + "memories:span={:?}:tz={}:lib={}:max={}", + span, + tz_offset_minutes, + library.as_deref().unwrap_or("all"), + max_segments + ), + } + } +} + +/// Pick at most `max` items spread evenly across the input, always keeping the +/// first and last. Returns the input unchanged when it already fits. +pub fn sample_evenly(items: &[T], max: usize) -> Vec { + if max == 0 { + return Vec::new(); + } + if items.len() <= max { + return items.to_vec(); + } + if max == 1 { + return vec![items[0].clone()]; + } + let last = items.len() - 1; + (0..max) + .map(|i| { + // Spread indices 0..=last across max picks, endpoints included. + let idx = (i * last + (max - 1) / 2) / (max - 1); + items[idx.min(last)].clone() + }) + .collect() +} + +/// Group time-sorted items into events by gap: a new event starts whenever the +/// jump from the previous photo exceeds `gap_seconds`. Preserves order; items +/// without a timestamp extend the current event. +fn cluster_by_gap( + items: &[memories::MemoryItem], + gap_seconds: i64, +) -> Vec> { + let mut clusters: Vec> = Vec::new(); + let mut prev_ts: Option = None; + for it in items { + let starts_new = match (prev_ts, it.created) { + (Some(p), Some(c)) => c - p > gap_seconds, + _ => false, + }; + if starts_new || clusters.is_empty() { + clusters.push(Vec::new()); + } + clusters.last_mut().unwrap().push(it.clone()); + if let Some(c) = it.created { + prev_ts = Some(c); + } + } + clusters +} + +/// Most photos a single beat will flash through. Bounds the burst so one huge +/// event doesn't dominate, and keeps each photo on screen long enough to +/// register at the per-beat narration length (see render's beat timing). +pub const MAX_BURST_PHOTOS: usize = 10; + +/// Merge a list of (time-ordered) event clusters into exactly `n` contiguous +/// groups, so a span with more events than the beat budget still covers the +/// whole timeline — adjacent events fold together into one beat rather than +/// getting dropped. `n` must be ≥ 1 and ≤ clusters.len(). +fn partition_into_groups( + clusters: Vec>, + n: usize, +) -> Vec> { + let c = clusters.len(); + let mut clusters = clusters.into_iter(); + (0..n) + .map(|j| { + // Even contiguous split of c clusters into n groups. + let start = j * c / n; + let end = (j + 1) * c / n; + let take = end.saturating_sub(start).max(1); + (0..take) + .flat_map(|_| clusters.next().into_iter().flatten()) + .collect() + }) + .collect() +} + +/// Turn photo items into `n_beats` photo beats. Clusters photos into events by +/// time gap; if there are more events than beats, adjacent events are merged so +/// the whole span is still covered. Each beat then flashes up to `max_burst` +/// photos (an even spread of its group) under one narration line — so a +/// week/month reel *shows* all its moments without a narrated (and timed) +/// segment per photo. +fn form_photo_beats( + items: &[memories::MemoryItem], + n_beats: usize, + max_burst: usize, +) -> Vec { + if n_beats == 0 || items.is_empty() { + return Vec::new(); + } + let clusters = cluster_by_gap(items, EVENT_GAP_SECONDS); + // One beat per event when they fit; otherwise fold adjacent events together + // into exactly n_beats groups. + let groups = if clusters.len() <= n_beats { + clusters + } else { + partition_into_groups(clusters, n_beats) + }; + + groups + .into_iter() + .filter(|g| !g.is_empty()) + .map(|group| { + let shown = sample_evenly(&group, max_burst); + let date = shown.first().and_then(|it| it.created); + PlannedBeat { + media: shown + .into_iter() + .map(|it| SegmentMedia::Photo { + rel_path: it.path, + library_id: it.library_id, + }) + .collect(), + date, + insight_title: None, + insight_summary: None, + gps: None, + } + }) + .collect() +} + +/// Split the beat budget between photo beats and video-clip beats. Clips are +/// individually valuable (motion + live audio) so they get up to half the +/// budget (at least one if any exist); photos take the rest. With only one +/// kind present, it gets the whole budget. +fn split_beat_budget(n_photos: usize, n_videos: usize, n_beats: usize) -> (usize, usize) { + if n_videos == 0 { + return (n_beats, 0); + } + if n_photos == 0 { + return (0, n_beats.min(n_videos)); + } + let clip_beats = n_videos.min((n_beats / 2).max(1)); + let photo_beats = n_beats.saturating_sub(clip_beats); + (photo_beats, clip_beats) +} + +/// Build the reel's beats from a span's photos and videos under a beat budget. +/// Videos become one-clip beats (sampled across time if there are more than the +/// clip budget); photos cluster into burst beats. The two are merged back into +/// chronological order so the reel reads as the span unfolded. +pub fn form_beats( + photos: &[memories::MemoryItem], + videos: &[memories::MemoryItem], + n_beats: usize, + max_burst: usize, +) -> Vec { + if n_beats == 0 { + return Vec::new(); + } + let (photo_budget, clip_budget) = split_beat_budget(photos.len(), videos.len(), n_beats); + + let mut beats = form_photo_beats(photos, photo_budget, max_burst); + + // One clip beat per chosen video, spread across the span's videos. + for v in sample_evenly(videos, clip_budget) { + beats.push(PlannedBeat { + media: vec![SegmentMedia::Clip { + rel_path: v.path, + library_id: v.library_id, + }], + date: v.created, + insight_title: None, + insight_summary: None, + gps: None, + }); + } + + // Merge photo and clip beats back into chronological order (undated last). + beats.sort_by(|a, b| match (a.date, b.date) { + (Some(x), Some(y)) => x.cmp(&y), + (Some(_), None) => std::cmp::Ordering::Less, + (None, Some(_)) => std::cmp::Ordering::Greater, + (None, None) => std::cmp::Ordering::Equal, + }); + beats +} + +/// Cheap pass: resolve the selector into an ordered list of media (no insight +/// lookups yet) plus reel metadata. `Err` only on an invalid library param. +pub fn resolve( + app_state: &AppState, + exif_dao: &Mutex>, + span_context: &opentelemetry::Context, + selector: &ReelSelector, +) -> Result<(Vec, ReelMeta), String> { + match selector { + ReelSelector::Memories { + span, + tz_offset_minutes, + library, + max_segments, + } => { + let client_tz = FixedOffset::east_opt(tz_offset_minutes * 60); + let items = memories::gather_memory_items( + app_state, + exif_dao, + span_context, + *span, + *tz_offset_minutes, + client_tz, + library.as_deref(), + )?; + + // Split into photos and video clips; anything that's neither is + // dropped. Years span both, computed before the budget narrows it. + let years = distinct_years(&items, client_tz); + let meta = ReelMeta { span: *span, years }; + + let (photos, videos): (Vec<_>, Vec<_>) = items + .into_iter() + .filter(|it| { + is_image_file(Path::new(&it.path)) || is_video_file(Path::new(&it.path)) + }) + .partition(|it| is_image_file(Path::new(&it.path))); + + // The budget caps the number of narrated beats (≈ reel length); + // photo beats then burst through several photos and video beats + // play a short clip, so the reel covers the span without running + // minutes long. + let n_beats = budget_segments(*max_segments); + let beats = form_beats(&photos, &videos, n_beats, MAX_BURST_PHOTOS); + Ok((beats, meta)) + } + } +} + +/// Distinct calendar years represented by the selected media, in the client's +/// timezone, ascending. Used to tell the scripter how far back the reel reaches. +fn distinct_years(items: &[memories::MemoryItem], tz: Option) -> Vec { + let mut years: Vec = items + .iter() + .filter_map(|it| it.created) + .filter_map(|ts| DateTime::from_timestamp(ts, 0)) + .map(|dt| match tz { + Some(off) => dt.with_timezone(&off).year(), + None => dt.year(), + }) + .collect(); + years.sort_unstable(); + years.dedup(); + years +} + +/// Background pass: fill each beat's cached insight (title + summary) and +/// GPS coordinates from its lead photo, where one exists. Best-effort — a +/// missing or errored lookup leaves the fields `None` and the scripter +/// narrates from the date alone. +pub fn enrich( + insight_dao: &Mutex>, + exif_dao: &Mutex>, + span_context: &opentelemetry::Context, + beats: &mut [PlannedBeat], +) { + let Ok(mut insight_dao) = insight_dao.lock() else { + return; + }; + let Ok(mut exif_dao) = exif_dao.lock() else { + return; + }; + for beat in beats.iter_mut() { + let rel_path = match beat.media.first() { + Some(SegmentMedia::Photo { rel_path, .. } | SegmentMedia::Clip { rel_path, .. }) => { + rel_path.clone() + } + None => continue, + }; + if let Ok(Some(insight)) = insight_dao.get_insight(span_context, &rel_path) { + beat.insight_title = Some(insight.title); + beat.insight_summary = Some(insight.summary); + } + // Enrich GPS from EXIF when the lead media is a photo. + if let Some(SegmentMedia::Photo { .. }) = beat.media.first() + && let Ok(Some(exif)) = exif_dao.get_exif(span_context, &rel_path) + && let (Some(lat), Some(lon)) = (exif.gps_latitude, exif.gps_longitude) + { + beat.gps = Some((lat as f64, lon as f64)); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sample_evenly_returns_all_when_under_cap() { + let v = vec![1, 2, 3]; + assert_eq!(sample_evenly(&v, 5), vec![1, 2, 3]); + assert_eq!(sample_evenly(&v, 3), vec![1, 2, 3]); + } + + #[test] + fn sample_evenly_keeps_endpoints_and_spreads() { + let v: Vec = (0..100).collect(); + let picked = sample_evenly(&v, 5); + assert_eq!(picked.len(), 5); + assert_eq!(picked[0], 0); // first kept + assert_eq!(*picked.last().unwrap(), 99); // last kept + // Strictly increasing, no dupes. + assert!(picked.windows(2).all(|w| w[0] < w[1])); + } + + #[test] + fn sample_evenly_handles_one_and_zero() { + let v: Vec = (0..10).collect(); + assert_eq!(sample_evenly(&v, 1), vec![0]); + assert!(sample_evenly(&v, 0).is_empty()); + } + + #[test] + fn descriptor_is_stable_and_distinguishes_inputs() { + let a = ReelSelector::Memories { + span: MemoriesSpan::Day, + tz_offset_minutes: -480, + library: None, + max_segments: 24, + }; + let b = ReelSelector::Memories { + span: MemoriesSpan::Week, + tz_offset_minutes: -480, + library: None, + max_segments: 24, + }; + assert_eq!(a.descriptor(), a.clone().descriptor()); + assert_ne!(a.descriptor(), b.descriptor()); + assert!(a.descriptor().contains("lib=all")); + } + + #[test] + fn distinct_years_dedupes_and_sorts() { + let items = vec![ + memories::MemoryItem { + path: "a".into(), + created: Some(1_560_000_000), // 2019 + modified: None, + library_id: 1, + }, + memories::MemoryItem { + path: "b".into(), + created: Some(1_560_086_400), // 2019 + modified: None, + library_id: 1, + }, + memories::MemoryItem { + path: "c".into(), + created: Some(1_623_000_000), // 2021 + modified: None, + library_id: 1, + }, + ]; + assert_eq!(distinct_years(&items, None), vec![2019, 2021]); + } + + // Build an item at a given unix timestamp (seconds) with a chosen extension. + fn item_ext(ts: i64, name: &str, ext: &str) -> memories::MemoryItem { + memories::MemoryItem { + path: format!("{name}.{ext}"), + created: Some(ts), + modified: None, + library_id: 1, + } + } + fn item_at(ts: i64, name: &str) -> memories::MemoryItem { + item_ext(ts, name, "jpg") + } + + #[test] + fn budget_segments_caps_to_duration_target() { + // 90s / 5s ≈ 18, bounded by the request max and hard cap. + assert_eq!(budget_segments(40), 18); + assert_eq!(budget_segments(5), 5); // request asked for fewer + assert_eq!(budget_segments(1000), 18); // hard cap / budget wins + } + + #[test] + fn cluster_by_gap_splits_on_large_jumps() { + // Two photos minutes apart, then one a day later → two events. + let items = vec![ + item_at(1_000_000, "a"), + item_at(1_000_300, "b"), // +5 min → same event + item_at(1_100_000, "c"), // +~27h → new event + ]; + let clusters = cluster_by_gap(&items, EVENT_GAP_SECONDS); + assert_eq!(clusters.len(), 2); + assert_eq!(clusters[0].len(), 2); + assert_eq!(clusters[1].len(), 1); + } + + #[test] + fn photo_beats_one_per_event_when_they_fit() { + // Three well-separated events, budget of 10 → three beats, each holding + // all of its (few) photos. + let items = vec![ + item_at(0, "a"), + item_at(50, "b"), // same event as a + item_at(1_000_000, "c"), + item_at(2_000_000, "d"), + ]; + let beats = form_photo_beats(&items, 10, MAX_BURST_PHOTOS); + assert_eq!(beats.len(), 3); + assert_eq!(beats[0].media.len(), 2); // burst of the first event + assert_eq!(beats[1].media.len(), 1); + assert_eq!(beats[2].media.len(), 1); + } + + #[test] + fn photo_beats_merge_events_when_over_budget() { + // Six distinct events but only two beats → adjacent events fold in, and + // every event's photos still appear (capped by the burst max). + let items: Vec = (0..6) + .map(|i| item_at(i as i64 * 1_000_000, &format!("e{i}"))) + .collect(); + let beats = form_photo_beats(&items, 2, MAX_BURST_PHOTOS); + assert_eq!(beats.len(), 2); + let shown: usize = beats.iter().map(|b| b.media.len()).sum(); + assert_eq!(shown, 6); // all six moments still shown across two beats + } + + #[test] + fn photo_beats_cap_burst_to_max() { + // One dense event of 30 photos, generous budget → a single beat that + // bursts at most MAX_BURST_PHOTOS, not all 30. + let items: Vec = (0..30) + .map(|i| item_at(i as i64, &format!("p{i}"))) + .collect(); + let beats = form_photo_beats(&items, 18, MAX_BURST_PHOTOS); + assert_eq!(beats.len(), 1); + assert_eq!(beats[0].media.len(), MAX_BURST_PHOTOS); + } + + #[test] + fn split_beat_budget_handles_each_mix() { + // Only photos / only videos → that kind gets the whole budget. + assert_eq!(split_beat_budget(10, 0, 18), (18, 0)); + assert_eq!(split_beat_budget(0, 10, 18), (0, 10)); // capped at n_videos + assert_eq!(split_beat_budget(0, 30, 18), (0, 18)); // capped at budget + // Mixed → clips up to half (≥1), photos the rest. + assert_eq!(split_beat_budget(100, 100, 18), (9, 9)); + assert_eq!(split_beat_budget(100, 1, 18), (17, 1)); // few videos + } + + #[test] + fn form_beats_mixes_clip_and_photo_beats_in_time_order() { + let photos = vec![item_at(0, "p0"), item_at(2_000_000, "p1")]; + // A video between the two photo events (in time). + let videos = vec![item_ext(1_000_000, "v0", "mp4")]; + let beats = form_beats(&photos, &videos, 10, MAX_BURST_PHOTOS); + // Two photo events + one clip = three beats, chronological. + assert_eq!(beats.len(), 3); + assert!(!beats[0].is_clip()); // p0 @ t=0 + assert!(beats[1].is_clip()); // v0 @ t=1e6 + assert!(!beats[2].is_clip()); // p1 @ t=2e6 + assert!(matches!(beats[1].media[0], SegmentMedia::Clip { .. })); + } + + #[test] + fn form_beats_videos_only_become_clip_beats() { + let videos: Vec = (0..3) + .map(|i| item_ext(i as i64 * 1_000_000, &format!("v{i}"), "mov")) + .collect(); + let beats = form_beats(&[], &videos, 10, MAX_BURST_PHOTOS); + assert_eq!(beats.len(), 3); + assert!(beats.iter().all(|b| b.is_clip())); + } +} diff --git a/src/state.rs b/src/state.rs index e678ad1..33e8e3f 100644 --- a/src/state.rs +++ b/src/state.rs @@ -8,9 +8,10 @@ use crate::ai::turn_registry::TurnRegistry; use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient}; use crate::database::{ CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, InsightGenerationJobDao, KnowledgeDao, - LocationHistoryDao, SearchHistoryDao, SqliteCalendarEventDao, SqliteDailySummaryDao, - SqliteExifDao, SqliteInsightDao, SqliteInsightGenerationJobDao, SqliteKnowledgeDao, - SqliteLocationHistoryDao, SqliteSearchHistoryDao, connect, + LocationHistoryDao, PrecomputedReelDao, SearchHistoryDao, SqliteCalendarEventDao, + SqliteDailySummaryDao, SqliteExifDao, SqliteInsightDao, SqliteInsightGenerationJobDao, + SqliteKnowledgeDao, SqliteLocationHistoryDao, SqlitePrecomputedReelDao, SqliteSearchHistoryDao, + SqliteUserAiPrefsDao, UserAiPrefsDao, connect, }; use crate::database::{PreviewDao, SqlitePreviewDao}; use crate::faces; @@ -53,6 +54,10 @@ pub struct AppState { pub video_path: String, pub gif_path: String, pub preview_clips_path: String, + /// Directory for cached memory-reel MP4s (+ title sidecars). Derived from + /// `REELS_DIRECTORY`, defaulting to a `reels` dir beside the preview clips. + /// Created lazily by the reel pipeline on first render. + pub reels_path: String, pub excluded_dirs: Vec, pub ollama: OllamaClient, /// `None` when `OPENROUTER_API_KEY` is not configured. Consulted only @@ -84,6 +89,14 @@ pub struct AppState { pub clip_client: ClipClient, pub insight_job_dao: Arc>>, pub insight_job_handles: Arc>>, + /// Ledger for precomputed memory reels. Written by the nightly agentic + /// job (Section D); read by `GET /reels/precomputed` (Section C). + #[allow(dead_code)] + pub precomputed_reel_dao: Arc>>, + /// User AI preferences (voice, timezone, library). Mirrored by the + /// client; read by the nightly pre-generation scheduler. + #[allow(dead_code)] + pub user_ai_prefs_dao: Arc>>, } impl AppState { @@ -97,6 +110,7 @@ impl AppState { self.libraries.iter().find(|l| l.id == id) } + #[allow(dead_code)] pub fn library_by_name(&self, name: &str) -> Option<&Library> { self.libraries.iter().find(|l| l.name == name) } @@ -125,6 +139,8 @@ impl AppState { clip_client: ClipClient, insight_job_dao: Arc>>, insight_job_handles: Arc>>, + precomputed_reel_dao: Arc>>, + user_ai_prefs_dao: Arc>>, ) -> Self { assert!( !libraries_vec.is_empty(), @@ -141,6 +157,19 @@ impl AppState { preview_dao, ); + // Reels cache dir: explicit env, else a `reels` sibling of the preview + // clips dir (a known-writable, test-safe location). Not created here — + // the reel pipeline does `create_dir_all` before its first write, so + // construction (incl. tests) never touches the filesystem. + let reels_path = std::env::var("REELS_DIRECTORY").unwrap_or_else(|_| { + std::path::Path::new(&preview_clips_path) + .parent() + .map(|p| p.join("reels")) + .unwrap_or_else(|| std::path::PathBuf::from("reels")) + .to_string_lossy() + .to_string() + }); + let library_health = libraries::new_health_map(&libraries_vec); let live_libraries = Arc::new(RwLock::new(libraries_vec.clone())); Self { @@ -155,6 +184,7 @@ impl AppState { video_path, gif_path, preview_clips_path, + reels_path, excluded_dirs, ollama, openrouter, @@ -169,6 +199,8 @@ impl AppState { clip_client, insight_job_dao, insight_job_handles, + precomputed_reel_dao, + user_ai_prefs_dao, } } @@ -249,6 +281,14 @@ impl Default for AppState { let insight_job_handles: Arc>> = Arc::new(Mutex::new(HashMap::new())); + // Initialize precomputed reel DAO (nightly pre-generation ledger) + let precomputed_reel_dao: Arc>> = + Arc::new(Mutex::new(Box::new(SqlitePrecomputedReelDao::new()))); + + // Initialize user AI preferences DAO (Section E) + let user_ai_prefs_dao: Arc>> = + Arc::new(Mutex::new(Box::new(SqliteUserAiPrefsDao::new()))); + // Load base path and ensure the primary library row reflects it. let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env"); let mut seed_conn = connect(); @@ -326,6 +366,8 @@ impl Default for AppState { clip_client, insight_job_dao, insight_job_handles, + precomputed_reel_dao, + user_ai_prefs_dao, ) } } @@ -535,6 +577,8 @@ impl AppState { ClipClient::new(None), // disabled in test Arc::new(Mutex::new(Box::new(SqliteInsightGenerationJobDao::new()))), // placeholder for test Arc::new(Mutex::new(HashMap::new())), // placeholder for test + Arc::new(Mutex::new(Box::new(SqlitePrecomputedReelDao::new()))), // placeholder for test + Arc::new(Mutex::new(Box::new(SqliteUserAiPrefsDao::new()))), // placeholder for test ) } } diff --git a/src/tags.rs b/src/tags.rs index f3e0135..3dc0859 100644 --- a/src/tags.rs +++ b/src/tags.rs @@ -168,7 +168,7 @@ async fn get_tags( // this file, so tags added under one library show up under the // others when they hold the same file. Falls back to direct rel_path // match when the file hasn't been hashed yet. - let library = libraries::resolve_library_param(&app_state, request.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, request.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); diff --git a/src/video/ffmpeg.rs b/src/video/ffmpeg.rs index d385cac..019bd86 100644 --- a/src/video/ffmpeg.rs +++ b/src/video/ffmpeg.rs @@ -231,7 +231,7 @@ impl Ffmpeg { /// a hard failure — previously the `parse::` on empty stdout produced /// "cannot parse float from empty string" and poisoned the preview-clip row /// with status=failed, which the watcher would re-queue every full scan. -async fn get_duration_seconds(input_file: &str) -> Result> { +pub async fn get_duration_seconds(input_file: &str) -> Result> { if let Some(d) = probe_duration(input_file, "format=duration").await? { return Ok(Some(d)); }