diff --git a/.env.example b/.env.example index a45fdd5..64c31d3 100644 --- a/.env.example +++ b/.env.example @@ -80,6 +80,16 @@ AGENTIC_CHAT_MAX_ITERATIONS=6 # LLAMA_SWAP_ALLOWED_MODELS=chat,vision,embed # LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS=180 +# ── Unified search translation model (optional) ───────────────────────── +# /photos/search/unified runs one small LLM call to translate a natural- +# language query into structured filters + a semantic term, then CLIP-ranks. +# That step needs an LLM AND CLIP available at once. On a tight VRAM budget a +# large chat model can't co-reside with CLIP, so pin a small, fast model here +# (it can stay loaded alongside CLIP and the chat model). Precedence: +# UNIFIED_SEARCH_MODEL > the client's selected model > the configured default. +# Use the configured backend (LLM_BACKEND); local only — no hybrid. +# UNIFIED_SEARCH_MODEL=qwen3-0.6b + # ── Text-to-speech (optional, requires LLAMA_SWAP_URL) ─────────────────── # TTS routes through the same llama-swap proxy (a Chatterbox model id), so it # only needs LLAMA_SWAP_URL — it does NOT require LLM_BACKEND=llamacpp. @@ -139,3 +149,31 @@ CLIP_REQUEST_TIMEOUT_SEC=60 # ── RAG / search ──────────────────────────────────────────────────────── # Set to `1` to enable cross-encoder reranking on /search results. SEARCH_RAG_RERANK=0 + +# ── Nightly reel pre-generation (Phase 3+) ────────────────────────────── +# Set to `1` to enable the scheduler. Disabled by default. +# REEL_PREGEN_ENABLED=1 +# Hour (0-23) when the nightly batch fires. Default 3 AM. +# REEL_PREGEN_HOUR=3 +# Day of week for weekly reels (0=Sun, 1=Mon, …). Default Monday. +# REEL_PREGEN_WEEK_DOW=1 +# Timezone offset in minutes from UTC (e.g., -480 = PST). Defaults to +# the server's local timezone. +# REEL_PREGEN_TZ_OFFSET_MINUTES= +# Fixed timezone offset — overrides auto-detect to avoid DST shifts. +# When set, both the DB fallback and env fallback use this value. +# REEL_PREGEN_TZ_FIXED_MINUTES=-480 +# Voice ID for narration (e.g., "grandma"). Falls back to the value +# stored in the user_ai_prefs DB row when set. +# REEL_PREGEN_VOICE= +# Library filter: a library id (e.g. "1") or "all" for every library. +# REEL_PREGEN_LIBRARY=all +# Max agentic tool iterations for pre-gen scripter. Default 8. +# REEL_PREGEN_MAX_TOOL_ITERS=8 +# +# On-disk reel cache sweep (runs every 24h, independent of pre-gen). Removes +# reel MP4s with no ledger row + no live job that are older than the max age — +# i.e. the on-demand cache, which otherwise grows forever. Set to 0 to disable. +# REEL_CACHE_SWEEP_ENABLED=1 +# Age (days) before an unreferenced reel MP4 is swept. Default 7. +# REEL_CACHE_MAX_AGE_DAYS=7 diff --git a/Cargo.lock b/Cargo.lock index a35a7d2..9455f5c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2051,7 +2051,7 @@ dependencies = [ [[package]] name = "image-api" -version = "1.3.0" +version = "1.4.0" dependencies = [ "actix", "actix-cors", diff --git a/Cargo.toml b/Cargo.toml index 3b3a08a..860e6ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "image-api" -version = "1.3.0" +version = "1.4.0" authors = ["Cameron Cordes "] edition = "2024" diff --git a/migrations/2026-06-13-000000_add_precomputed_reels/down.sql b/migrations/2026-06-13-000000_add_precomputed_reels/down.sql new file mode 100644 index 0000000..91863c2 --- /dev/null +++ b/migrations/2026-06-13-000000_add_precomputed_reels/down.sql @@ -0,0 +1,2 @@ +DROP INDEX IF EXISTS idx_precomputed_reels_span_library; +DROP TABLE IF EXISTS precomputed_reels; diff --git a/migrations/2026-06-13-000000_add_precomputed_reels/up.sql b/migrations/2026-06-13-000000_add_precomputed_reels/up.sql new file mode 100644 index 0000000..ba49b72 --- /dev/null +++ b/migrations/2026-06-13-000000_add_precomputed_reels/up.sql @@ -0,0 +1,14 @@ +CREATE TABLE precomputed_reels ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + span TEXT NOT NULL, + library_key TEXT NOT NULL, + cache_key TEXT NOT NULL, + output_path TEXT NOT NULL, + title TEXT NOT NULL, + media_count INT NOT NULL, + render_version INT NOT NULL DEFAULT 1, + tz_offset_minutes INT NOT NULL, + voice TEXT, + generated_at BIGINT NOT NULL +); +CREATE INDEX idx_precomputed_reels_span_library ON precomputed_reels(span, library_key, generated_at DESC); diff --git a/migrations/2026-06-13-000010_add_user_ai_prefs/down.sql b/migrations/2026-06-13-000010_add_user_ai_prefs/down.sql new file mode 100644 index 0000000..83b82a3 --- /dev/null +++ b/migrations/2026-06-13-000010_add_user_ai_prefs/down.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS user_ai_prefs; diff --git a/migrations/2026-06-13-000010_add_user_ai_prefs/up.sql b/migrations/2026-06-13-000010_add_user_ai_prefs/up.sql new file mode 100644 index 0000000..fd8f6f2 --- /dev/null +++ b/migrations/2026-06-13-000010_add_user_ai_prefs/up.sql @@ -0,0 +1,7 @@ +CREATE TABLE user_ai_prefs ( + id INTEGER PRIMARY KEY CHECK(id=1), + voice TEXT, + tz_offset_minutes INTEGER, + library TEXT, + updated_at BIGINT NOT NULL +); diff --git a/src/ai/backend.rs b/src/ai/backend.rs index 0515f1c..dfcdd03 100644 --- a/src/ai/backend.rs +++ b/src/ai/backend.rs @@ -41,6 +41,10 @@ pub struct SamplingOverrides { pub top_p: Option, pub top_k: Option, pub min_p: Option, + /// Reasoning toggle. Only the llama.cpp backend honors it (forwarded as + /// `chat_template_kwargs.enable_thinking`); other backends ignore it. + /// `None` leaves the model/template default in place. + pub enable_thinking: Option, } impl SamplingOverrides { @@ -124,6 +128,7 @@ mod tests { top_p: None, top_k: None, min_p: None, + enable_thinking: None, }; assert!(!empty.has_sampling()); @@ -134,6 +139,7 @@ mod tests { top_p: None, top_k: None, min_p: None, + enable_thinking: None, }; assert!(with_temp.has_sampling()); } diff --git a/src/ai/clip_client.rs b/src/ai/clip_client.rs index 85c66a7..3519e8b 100644 --- a/src/ai/clip_client.rs +++ b/src/ai/clip_client.rs @@ -191,11 +191,13 @@ impl ClipClient { let resp = match self.client.post(&url).json(&body).send().await { Ok(r) => r, Err(e) if e.is_timeout() || e.is_connect() => { + log::warn!("clip encode_text network error to {url}: {e}"); return Err(ClipError::Transient(anyhow::anyhow!( "clip client network: {e}" ))); } Err(e) => { + log::warn!("clip encode_text request error to {url}: {e}"); return Err(ClipError::Transient(anyhow::anyhow!( "clip client request: {e}" ))); @@ -210,6 +212,7 @@ impl ClipClient { return Ok(body); } let body_text = resp.text().await.unwrap_or_default(); + log::warn!("clip encode_text HTTP {status} from {url}: {body_text}"); Err(classify_error_response(status.as_u16(), &body_text)) } diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs index cb21b14..ae9f300 100644 --- a/src/ai/handlers.rs +++ b/src/ai/handlers.rs @@ -40,6 +40,12 @@ pub struct GeneratePhotoInsightRequest { pub top_k: Option, #[serde(default)] pub min_p: Option, + /// Reasoning toggle for thinking-capable models. Forwarded to the + /// llama.cpp backend as `chat_template_kwargs.enable_thinking`; ignored + /// by other backends and the non-agentic (Ollama) path. Only the agentic + /// endpoint routes through llama.cpp. None defers to the template default. + #[serde(default)] + pub enable_thinking: Option, /// `"local"` (default, Ollama with images) | `"hybrid"` (local vision + /// OpenRouter chat). Only respected by the agentic endpoint. #[serde(default)] @@ -120,7 +126,7 @@ pub async fn generation_status_handler( } if let Some(ref fp) = query.path { - let library = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -218,10 +224,11 @@ pub async fn cancel_generation_handler( } if let Some(ref fp) = request.file_path { - let library = libraries::resolve_library_param(&app_state, request.library.as_deref()) - .ok() - .flatten() - .unwrap_or_else(|| app_state.primary_library()); + let library = + libraries::resolve_library_param_state(&app_state, request.library.as_deref()) + .ok() + .flatten() + .unwrap_or_else(|| app_state.primary_library()); let normalized = normalize_path(fp); // Get active job ids first, then cancel in DB, then abort tasks @@ -580,7 +587,7 @@ pub async fn get_insight_handler( // Expand to rel_paths sharing content so an insight generated under // library 1 still shows when the same photo is viewed from library 2. - let library = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -867,6 +874,7 @@ pub async fn generate_agentic_insight_handler( request.top_p, request.top_k, request.min_p, + request.enable_thinking, max_iterations, request.backend.clone(), fewshot_examples, @@ -1168,6 +1176,11 @@ pub struct ChatTurnHttpRequest { pub top_k: Option, #[serde(default)] pub min_p: Option, + /// Reasoning toggle for thinking-capable models. Forwarded to the + /// llama.cpp backend as `chat_template_kwargs.enable_thinking`; ignored + /// by other backends. None defers to the model/template default. + #[serde(default)] + pub enable_thinking: Option, #[serde(default)] pub max_iterations: Option, /// Per-turn system-prompt override. Ephemeral in append mode, @@ -1218,15 +1231,16 @@ pub async fn chat_turn_handler( let mut span = tracer.start_with_context("http.insights.chat", &parent_context); span.set_attribute(KeyValue::new("file_path", request.file_path.clone())); - let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { - Ok(Some(lib)) => lib, - Ok(None) => app_state.primary_library(), - Err(e) => { - return HttpResponse::BadRequest().json(serde_json::json!({ - "error": format!("invalid library: {}", e) - })); - } - }; + let library = + match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(e) => { + return HttpResponse::BadRequest().json(serde_json::json!({ + "error": format!("invalid library: {}", e) + })); + } + }; // Service-token claims (sub: "service:apollo") fall through to // user_id=1 — the operator convention. Mobile/web clients have a @@ -1245,6 +1259,7 @@ pub async fn chat_turn_handler( top_p: request.top_p, top_k: request.top_k, min_p: request.min_p, + enable_thinking: request.enable_thinking, max_iterations: request.max_iterations, system_prompt: request.system_prompt.clone(), persona_id: request.persona_id.clone(), @@ -1344,15 +1359,16 @@ pub async fn chat_rewind_handler( request: web::Json, app_state: web::Data, ) -> impl Responder { - let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { - Ok(Some(lib)) => lib, - Ok(None) => app_state.primary_library(), - Err(e) => { - return HttpResponse::BadRequest().json(serde_json::json!({ - "error": format!("invalid library: {}", e) - })); - } - }; + let library = + match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(e) => { + return HttpResponse::BadRequest().json(serde_json::json!({ + "error": format!("invalid library: {}", e) + })); + } + }; match app_state .insight_chat @@ -1393,7 +1409,7 @@ pub async fn chat_history_handler( // cross-library lookup when the scoped one misses, so a photo // with no insight in this library but one in another still // surfaces (the "show this photo's primary insight" merge case). - let library = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -1444,15 +1460,16 @@ pub async fn chat_stream_handler( request: web::Json, app_state: web::Data, ) -> HttpResponse { - let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { - Ok(Some(lib)) => lib, - Ok(None) => app_state.primary_library(), - Err(e) => { - return HttpResponse::BadRequest().json(serde_json::json!({ - "error": format!("invalid library: {}", e) - })); - } - }; + let library = + match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(e) => { + return HttpResponse::BadRequest().json(serde_json::json!({ + "error": format!("invalid library: {}", e) + })); + } + }; // Service-token sub falls through to user_id=1 (see chat_turn_handler). let user_id = claims.sub.parse::().unwrap_or(1); @@ -1469,6 +1486,7 @@ pub async fn chat_stream_handler( top_p: request.top_p, top_k: request.top_k, min_p: request.min_p, + enable_thinking: request.enable_thinking, max_iterations: request.max_iterations, system_prompt: request.system_prompt.clone(), persona_id: request.persona_id.clone(), @@ -1589,15 +1607,16 @@ pub async fn turn_async_handler( let mut span = tracer.start_with_context("http.insights.chat_turn_async", &parent_context); span.set_attribute(KeyValue::new("file_path", request.file_path.clone())); - let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) { - Ok(Some(lib)) => lib, - Ok(None) => app_state.primary_library(), - Err(e) => { - return HttpResponse::BadRequest().json(serde_json::json!({ - "error": format!("invalid library: {}", e) - })); - } - }; + let library = + match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) { + Ok(Some(lib)) => lib, + Ok(None) => app_state.primary_library(), + Err(e) => { + return HttpResponse::BadRequest().json(serde_json::json!({ + "error": format!("invalid library: {}", e) + })); + } + }; let user_id = claims.sub.parse::().unwrap_or(1); @@ -1613,6 +1632,7 @@ pub async fn turn_async_handler( top_p: request.top_p, top_k: request.top_k, min_p: request.min_p, + enable_thinking: request.enable_thinking, max_iterations: request.max_iterations, system_prompt: request.system_prompt.clone(), persona_id: request.persona_id.clone(), diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs index 84f2b32..af00731 100644 --- a/src/ai/insight_chat.rs +++ b/src/ai/insight_chat.rs @@ -70,6 +70,10 @@ pub struct ChatTurnRequest { pub top_p: Option, pub top_k: Option, pub min_p: Option, + /// Reasoning toggle for thinking-capable models. Forwarded to the + /// llama.cpp backend as `chat_template_kwargs.enable_thinking`; ignored + /// by other backends. None defers to the model/template default. + pub enable_thinking: Option, pub max_iterations: Option, /// Per-turn system-prompt override. In append mode (default), applied /// ephemerally — original system message restored before persistence. @@ -344,6 +348,7 @@ impl InsightChatService { top_p: req.top_p, top_k: req.top_k, min_p: req.min_p, + enable_thinking: req.enable_thinking, }; let backend = self.generator.resolve_backend(kind, &overrides).await?; let model_used = backend.model().to_string(); @@ -847,6 +852,7 @@ impl InsightChatService { top_p: req.top_p, top_k: req.top_k, min_p: req.min_p, + enable_thinking: req.enable_thinking, }; let backend = self.generator.resolve_backend(kind, &overrides).await?; let model_used = backend.model().to_string(); @@ -1017,6 +1023,7 @@ impl InsightChatService { top_p: req.top_p, top_k: req.top_k, min_p: req.min_p, + enable_thinking: req.enable_thinking, }; let backend = self.generator.resolve_backend(kind, &overrides).await?; let model_used = backend.model().to_string(); @@ -1425,6 +1432,7 @@ impl InsightChatService { top_p: req.top_p, top_k: req.top_k, min_p: req.min_p, + enable_thinking: req.enable_thinking, }; let backend = self.generator.resolve_backend(kind, &overrides).await?; let model_used = backend.model().to_string(); @@ -1607,6 +1615,7 @@ impl InsightChatService { top_p: req.top_p, top_k: req.top_k, min_p: req.min_p, + enable_thinking: req.enable_thinking, }; let backend = self.generator.resolve_backend(kind, &overrides).await?; let model_used = backend.model().to_string(); diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 3673c43..d45fa55 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -217,6 +217,13 @@ impl InsightGenerator { &self.insight_dao } + /// Accessor for the EXIF DAO (used by the reel scheduler to resolve + /// GPS enrichment without creating a separate DB connection). + #[allow(dead_code)] + pub fn exif_dao(&self) -> &Arc>> { + &self.exif_dao + } + /// Whether the optional Apollo Places integration is wired up. Drives /// tool-definition gating (no point offering `get_personal_place_at` /// when Apollo is unreachable) — exposed publicly so `insight_chat` @@ -3926,6 +3933,7 @@ Return ONLY the summary, nothing else."#, if let Some(ctx) = overrides.num_ctx { c.set_num_ctx(Some(ctx)); } + c.set_enable_thinking(overrides.enable_thinking); Box::new(c) } else { // Pure Ollama local. @@ -4057,6 +4065,7 @@ Return ONLY the summary, nothing else."#, top_p: Option, top_k: Option, min_p: Option, + enable_thinking: Option, max_iterations: usize, backend: Option, fewshot_examples: Vec>, @@ -4084,6 +4093,7 @@ Return ONLY the summary, nothing else."#, top_p, top_k, min_p, + enable_thinking, }; let backend = self.resolve_backend(kind, &overrides).await?; span.set_attribute(KeyValue::new("model", backend.model().to_string())); @@ -4497,6 +4507,110 @@ Return ONLY the summary, nothing else."#, )) } + /// A read-only agentic tool loop: chat with tools until the model stops + /// calling them, then return the final content. + /// + /// This is the loop body extracted from + /// `generate_agentic_insight_for_photo` (lines 4316-4377) so it can be + /// reused by the reel-scripter without the photo-specific context + /// (image_base64, file_path, persona_id). The photo insight loop still + /// has its own copy because it threads image/file context through + /// `execute_tool`. + /// + /// Calls `execute_tool` with empty file/image context; enabled tools + /// never read those fields. + /// + /// Only used by the `reels` module (compiled in `main.rs`, not `lib.rs`), + /// so the `#[allow(dead_code)]` suppresses the lib-target warning. + #[allow(dead_code)] + pub(crate) async fn run_readonly_tool_loop( + &self, + backend: &ResolvedBackend, + mut messages: Vec, + tools: Vec, + max_iter: usize, + ) -> Result { + let mut final_content = String::new(); + + for iteration in 0..max_iter { + log::info!("Agentic iteration {}/{}", iteration + 1, max_iter); + + let (response, _prompt_tokens, _eval_tokens) = backend + .chat() + .chat_with_tools(messages.clone(), tools.clone()) + .await?; + + // Sanitize tool call arguments before pushing back into history. + // Some models occasionally return non-object arguments (bool, + // string, null) which Ollama rejects when they are re-sent in + // a subsequent request. + let mut response = response; + if let Some(ref mut tool_calls) = response.tool_calls { + for tc in tool_calls.iter_mut() { + if !tc.function.arguments.is_object() { + log::warn!( + "Tool '{}' returned non-object arguments ({:?}), normalising to {{}}", + tc.function.name, + tc.function.arguments + ); + tc.function.arguments = serde_json::Value::Object(Default::default()); + } + } + } + + messages.push(response.clone()); + + if let Some(ref tool_calls) = response.tool_calls + && !tool_calls.is_empty() + { + for tool_call in tool_calls { + log::info!( + "Agentic tool call [{}]: {} {}", + iteration, + tool_call.function.name, + tool_call.function.arguments + ); + let result = self + .execute_tool( + &tool_call.function.name, + &tool_call.function.arguments, + backend, + &None, + "", + 0, + "", + &opentelemetry::Context::new(), + ) + .await; + messages.push(ChatMessage::tool_result(result)); + } + continue; + } + + // No tool calls — this is the final answer + final_content = response.content; + break; + } + + // If loop exhausted without final answer, ask for one + if final_content.is_empty() { + log::info!( + "Agentic loop exhausted after {} iterations, requesting final answer", + max_iter + ); + messages.push(ChatMessage::user( + "Based on the context gathered, please write the final answer. Return ONLY the JSON object, no prose or code fences.", + )); + let (final_response, _, _) = backend + .chat() + .chat_with_tools(messages.clone(), vec![]) + .await?; + final_content = final_response.content; + } + + Ok(final_content) + } + /// Reverse geocode GPS coordinates to human-readable place names async fn reverse_geocode(&self, lat: f64, lon: f64) -> Option { let url = format!( diff --git a/src/ai/llamacpp.rs b/src/ai/llamacpp.rs index 8a7c898..77e7f63 100644 --- a/src/ai/llamacpp.rs +++ b/src/ai/llamacpp.rs @@ -64,6 +64,12 @@ pub struct LlamaCppClient { top_p: Option, top_k: Option, min_p: Option, + /// When `Some`, forwarded to llama-server as + /// `chat_template_kwargs: {"enable_thinking": }`. The Jinja chat + /// template (e.g. Qwen3) reads this to gate its reasoning block. `None` + /// omits the key entirely, leaving the template's own default. Templates + /// that don't reference the key ignore it, so sending it is harmless. + enable_thinking: Option, } impl LlamaCppClient { @@ -89,6 +95,7 @@ impl LlamaCppClient { top_p: None, top_k: None, min_p: None, + enable_thinking: None, } } @@ -104,6 +111,12 @@ impl LlamaCppClient { self.num_ctx = num_ctx; } + /// Set the reasoning toggle forwarded as `chat_template_kwargs.enable_thinking`. + /// `None` leaves the chat template's own default in place. + pub fn set_enable_thinking(&mut self, enable_thinking: Option) { + self.enable_thinking = enable_thinking; + } + pub fn set_sampling_params( &mut self, temperature: Option, @@ -458,6 +471,12 @@ impl LlamaCppClient { // via -c, so we silently drop the override here. The config.yaml // entry is the source of truth for context size. let _ = self.num_ctx; + // Reasoning toggle for thinking-capable templates (Qwen3 et al.). + // llama-server forwards chat_template_kwargs into the Jinja render + // (requires --jinja); templates that ignore the key are unaffected. + if let Some(think) = self.enable_thinking { + v.push(("chat_template_kwargs", json!({ "enable_thinking": think }))); + } v } diff --git a/src/ai/mod.rs b/src/ai/mod.rs index c5302fb..7d0802e 100644 --- a/src/ai/mod.rs +++ b/src/ai/mod.rs @@ -10,6 +10,7 @@ pub mod insight_generator; pub mod llamacpp; pub mod llm_client; pub mod local_llm; +pub mod nl_query; pub mod ollama; pub mod openrouter; pub mod pronunciation; diff --git a/src/ai/nl_query.rs b/src/ai/nl_query.rs new file mode 100644 index 0000000..d709322 --- /dev/null +++ b/src/ai/nl_query.rs @@ -0,0 +1,408 @@ +//! Natural-language → structured-query translation for unified photo search. +//! +//! The unified search endpoint (`/photos/search/unified`, Phase 2) needs to +//! turn a free-text query like *"sunset photos in Italy from last summer"* +//! into the structured filter the existing `/photos` engine understands plus +//! a semantic term for CLIP ranking. That translation is a single grounded +//! LLM call, isolated here so it can be unit-tested without a network or the +//! full `InsightGenerator`. +//! +//! Two-stage design: +//! 1. The LLM emits a [`RawNlQuery`] — references are by *name* (tags) and +//! dates as ISO strings, never numeric ids it could hallucinate. +//! 2. [`resolve_raw_query`] maps names against the real tag vocabulary and +//! converts ISO dates to unix seconds, producing a [`StructuredQuery`]. +//! A tag the model invents that isn't in the vocab is surfaced in +//! `unmatched_tags` (the caller folds it back into the semantic term) +//! rather than silently dropped — this is the anti-noise guard. +//! +//! Geocoding of `place` and person filtering are intentionally *not* handled +//! here: `place` stays as text for the caller to forward-geocode (async, see +//! `geo::forward_geocode`), and person filtering is deferred until a +//! person→photos resolver exists. + +use crate::ai::llm_client::{ChatMessage, LlmClient, Tool, strip_think_blocks}; +use anyhow::{Result, anyhow}; +use serde::{Deserialize, Serialize}; + +/// Raw query object as emitted by the LLM. Tag references are by name +/// (resolved against the real vocab in Rust); dates are ISO `YYYY-MM-DD`. +/// Every field is optional so a partial / minimal model response still +/// deserializes. +#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +pub struct RawNlQuery { + /// Visual/scene description handed to CLIP for ranking. The descriptive + /// remainder after structured filters are peeled off. + #[serde(default)] + pub semantic: Option, + /// Tag names the photos must have. Matched case-insensitively against + /// the supplied vocabulary; non-matches land in `unmatched_tags`. + #[serde(default)] + pub tags: Vec, + /// Tag names the photos must NOT have. + #[serde(default)] + pub exclude_tags: Vec, + #[serde(default)] + pub camera_make: Option, + #[serde(default)] + pub camera_model: Option, + #[serde(default)] + pub lens_model: Option, + /// Free-text place/location name to forward-geocode (e.g. "Italy"). + #[serde(default)] + pub place: Option, + /// Inclusive start date, ISO `YYYY-MM-DD`. + #[serde(default)] + pub date_from: Option, + /// Inclusive end date, ISO `YYYY-MM-DD`. + #[serde(default)] + pub date_to: Option, + /// "photo" | "video" — normalized in [`resolve_raw_query`]. + #[serde(default)] + pub media_type: Option, +} + +/// Resolved structured query: tag names mapped to ids against the real +/// vocab, ISO dates converted to unix seconds. `place` stays as text for the +/// caller to forward-geocode into a gps circle. Serializable so the endpoint +/// can echo it back to the client as "this is how I read your query" +/// (editable filter chips). +#[derive(Debug, Clone, Default, PartialEq, Serialize)] +pub struct StructuredQuery { + pub semantic: Option, + pub tag_ids: Vec, + pub exclude_tag_ids: Vec, + /// Tag names the model produced that don't exist in the vocabulary. + /// The caller folds these back into the semantic term so the concept + /// isn't lost — and surfacing them keeps a hallucinated tag from + /// silently filtering the whole library to nothing. + pub unmatched_tags: Vec, + pub camera_make: Option, + pub camera_model: Option, + pub lens_model: Option, + /// Raw place name awaiting forward-geocoding by the caller. + pub place: Option, + pub date_from: Option, + pub date_to: Option, + /// Normalized to "photo" | "video"; `None` means no media-type filter. + pub media_type: Option, +} + +/// Convert an ISO `YYYY-MM-DD` date to a unix timestamp (seconds). With +/// `end_of_day`, returns 23:59:59 of that day so a `date_to` filter is +/// inclusive of the whole day; otherwise 00:00:00. Returns `None` for any +/// unparseable input (the filter is simply omitted rather than erroring). +pub fn iso_to_unix(date: &str, end_of_day: bool) -> Option { + let d = chrono::NaiveDate::parse_from_str(date.trim(), "%Y-%m-%d").ok()?; + let time = if end_of_day { + chrono::NaiveTime::from_hms_opt(23, 59, 59)? + } else { + chrono::NaiveTime::from_hms_opt(0, 0, 0)? + }; + Some(d.and_time(time).and_utc().timestamp()) +} + +/// Normalize a free-form media-type string to the engine's vocabulary. +/// Anything that isn't clearly photo or video (including "all") yields +/// `None` — no filter. +fn normalize_media_type(raw: &str) -> Option { + match raw.trim().to_lowercase().as_str() { + "photo" | "photos" | "image" | "images" | "picture" | "pictures" => { + Some("photo".to_string()) + } + "video" | "videos" | "movie" | "movies" | "clip" | "clips" => Some("video".to_string()), + _ => None, + } +} + +/// Resolve a raw LLM query against the real tag vocabulary, producing the +/// structured filter. Pure — no network, no LLM — so it carries the +/// correctness-critical mapping logic under unit test. +/// +/// `tag_vocab` is `(tag_id, tag_name)` pairs (the shape `TagDao::get_all_tags` +/// yields once the count is dropped). Matching is case-insensitive and exact +/// on the trimmed name. +pub fn resolve_raw_query(raw: RawNlQuery, tag_vocab: &[(i32, String)]) -> StructuredQuery { + // Case-insensitive name → id lookup. Built once per call. + let lookup: std::collections::HashMap = tag_vocab + .iter() + .map(|(id, name)| (name.trim().to_lowercase(), *id)) + .collect(); + + let resolve_names = |names: &[String], ids: &mut Vec, unmatched: &mut Vec| { + for name in names { + let key = name.trim().to_lowercase(); + if key.is_empty() { + continue; + } + match lookup.get(&key) { + Some(id) if !ids.contains(id) => ids.push(*id), + Some(_) => {} // duplicate, already collected + None => { + if !unmatched.iter().any(|u| u.eq_ignore_ascii_case(name)) { + unmatched.push(name.trim().to_string()); + } + } + } + } + }; + + let mut tag_ids = Vec::new(); + let mut unmatched_tags = Vec::new(); + resolve_names(&raw.tags, &mut tag_ids, &mut unmatched_tags); + + // Excluded tags that don't match a real tag are simply ignored — you + // can't exclude a tag that doesn't exist, and folding them into + // `semantic` would make no sense. + let mut exclude_tag_ids = Vec::new(); + let mut exclude_unmatched = Vec::new(); + resolve_names( + &raw.exclude_tags, + &mut exclude_tag_ids, + &mut exclude_unmatched, + ); + + let clean = |s: Option| s.map(|v| v.trim().to_string()).filter(|v| !v.is_empty()); + + StructuredQuery { + semantic: clean(raw.semantic), + tag_ids, + exclude_tag_ids, + unmatched_tags, + camera_make: clean(raw.camera_make), + camera_model: clean(raw.camera_model), + lens_model: clean(raw.lens_model), + place: clean(raw.place), + date_from: raw.date_from.as_deref().and_then(|d| iso_to_unix(d, false)), + date_to: raw.date_to.as_deref().and_then(|d| iso_to_unix(d, true)), + media_type: raw.media_type.as_deref().and_then(normalize_media_type), + } +} + +/// Build the grounded system prompt. The model is told the current date (so +/// "last summer" resolves) and the exact tag vocabulary (so it uses real +/// tags or routes the concept to `semantic` instead of inventing one). +fn build_system_prompt(tag_vocab: &[(i32, String)], today: chrono::NaiveDate) -> String { + // Cap the vocab dump so a huge library doesn't blow the context window; + // the most-used tags are the ones a query is likely to reference. + const MAX_TAGS: usize = 400; + let mut names: Vec<&str> = tag_vocab.iter().map(|(_, n)| n.as_str()).collect(); + names.sort_unstable(); + names.dedup(); + let shown = names.len().min(MAX_TAGS); + let vocab = names[..shown].join(", "); + let truncation = if names.len() > MAX_TAGS { + format!(" (showing {MAX_TAGS} of {} tags)", names.len()) + } else { + String::new() + }; + + format!( + "You translate a user's natural-language photo-search request into a JSON \ +filter. Today's date is {today}. Respond with ONLY a JSON object, no prose, no \ +code fences.\n\n\ +Schema (all fields optional):\n\ +{{\n \ +\"semantic\": string|null, // visual scene/subject for image similarity search\n \ +\"tags\": string[], // ONLY names from the tag list below\n \ +\"exclude_tags\": string[], // ONLY names from the tag list below\n \ +\"camera_make\": string|null,\n \ +\"camera_model\": string|null,\n \ +\"lens_model\": string|null,\n \ +\"place\": string|null, // a location name to look up (city, country, landmark)\n \ +\"date_from\": \"YYYY-MM-DD\"|null, // inclusive\n \ +\"date_to\": \"YYYY-MM-DD\"|null, // inclusive\n \ +\"media_type\": \"photo\"|\"video\"|null\n\ +}}\n\n\ +Rules:\n\ +- Put descriptive/visual concepts (\"sunset\", \"crowded beach\", \"red car\") in \"semantic\".\n\ +- Only use \"tags\"/\"exclude_tags\" values that appear EXACTLY in the tag list. If a \ +concept isn't a listed tag, put it in \"semantic\" instead — never invent a tag.\n\ +- Resolve relative dates against today's date (\"last summer\", \"2023\", \"last month\").\n\ +- Put place/location names in \"place\" (not \"semantic\").\n\ +- Omit (use null / empty array) anything the request doesn't mention.\n\n\ +Available tags{truncation}: {vocab}" + ) +} + +/// Extract the JSON object from a model response that may include a leading +/// `` block, code fences, or trailing prose. Strips the think block +/// first (so reasoning that mentions braces can't fool the scan), then +/// returns the substring from the first `{` to the last `}` inclusive — or +/// the trimmed text if no braces are found (which then fails to parse with a +/// clear error). +fn extract_json(raw: &str) -> String { + let s = strip_think_blocks(raw); + let start = s.find('{'); + let end = s.rfind('}'); + match (start, end) { + (Some(a), Some(b)) if b >= a => s[a..=b].to_string(), + _ => s.trim().to_string(), + } +} + +/// Parse a model response string into a [`StructuredQuery`], resolving names +/// against the vocab. Separated from the LLM call so it's unit-testable. +pub fn parse_response(response: &str, tag_vocab: &[(i32, String)]) -> Result { + let json = extract_json(response); + let raw: RawNlQuery = serde_json::from_str(&json) + .map_err(|e| anyhow!("failed to parse NL query JSON: {e}; raw response: {response:?}"))?; + Ok(resolve_raw_query(raw, tag_vocab)) +} + +/// Translate a natural-language query into a [`StructuredQuery`] via one +/// grounded LLM call. The `client` is any configured backend (the unified +/// endpoint passes the resolved chat backend); `tag_vocab` grounds the tag +/// mapping; `today` anchors relative-date resolution. +pub async fn translate_nl_query( + client: &dyn LlmClient, + nl: &str, + tag_vocab: &[(i32, String)], + today: chrono::NaiveDate, +) -> Result { + let system = build_system_prompt(tag_vocab, today); + let messages = vec![ChatMessage::system(system), ChatMessage::user(nl)]; + let (msg, _, _) = client.chat_with_tools(messages, Vec::::new()).await?; + parse_response(&msg.content, tag_vocab) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn vocab() -> Vec<(i32, String)> { + vec![ + (1, "beach".to_string()), + (2, "Sunset".to_string()), // mixed case to exercise case-insensitivity + (3, "family".to_string()), + ] + } + + #[test] + fn iso_to_unix_start_and_end_of_day() { + // 2023-01-01 UTC midnight = 1672531200. + assert_eq!(iso_to_unix("2023-01-01", false), Some(1_672_531_200)); + // End of that day is 86399 seconds later. + assert_eq!( + iso_to_unix("2023-01-01", true), + Some(1_672_531_200 + 86_399) + ); + } + + #[test] + fn iso_to_unix_rejects_garbage() { + assert_eq!(iso_to_unix("last summer", false), None); + assert_eq!(iso_to_unix("2023-13-99", false), None); + assert_eq!(iso_to_unix("", false), None); + } + + #[test] + fn resolve_matches_tags_case_insensitively() { + let raw = RawNlQuery { + tags: vec!["BEACH".to_string(), "sunset".to_string()], + ..Default::default() + }; + let q = resolve_raw_query(raw, &vocab()); + assert_eq!(q.tag_ids, vec![1, 2]); + assert!(q.unmatched_tags.is_empty()); + } + + #[test] + fn resolve_surfaces_unmatched_tags_not_silently_dropped() { + // A hallucinated / non-vocab tag must be surfaced so the caller can + // fold it into semantic — never silently used as a hard filter. + let raw = RawNlQuery { + tags: vec!["beach".to_string(), "golden hour".to_string()], + ..Default::default() + }; + let q = resolve_raw_query(raw, &vocab()); + assert_eq!(q.tag_ids, vec![1]); + assert_eq!(q.unmatched_tags, vec!["golden hour".to_string()]); + } + + #[test] + fn resolve_dedups_repeated_tags() { + let raw = RawNlQuery { + tags: vec![ + "beach".to_string(), + "Beach".to_string(), + "beach".to_string(), + ], + ..Default::default() + }; + let q = resolve_raw_query(raw, &vocab()); + assert_eq!(q.tag_ids, vec![1]); + } + + #[test] + fn resolve_normalizes_media_type_and_dates() { + let raw = RawNlQuery { + media_type: Some("Videos".to_string()), + date_from: Some("2023-06-01".to_string()), + date_to: Some("2023-06-30".to_string()), + ..Default::default() + }; + let q = resolve_raw_query(raw, &vocab()); + assert_eq!(q.media_type.as_deref(), Some("video")); + assert_eq!(q.date_from, iso_to_unix("2023-06-01", false)); + assert_eq!(q.date_to, iso_to_unix("2023-06-30", true)); + } + + #[test] + fn resolve_media_type_all_is_no_filter() { + let raw = RawNlQuery { + media_type: Some("all".to_string()), + ..Default::default() + }; + assert_eq!(resolve_raw_query(raw, &vocab()).media_type, None); + } + + #[test] + fn resolve_trims_and_empties_to_none() { + let raw = RawNlQuery { + semantic: Some(" ".to_string()), + camera_make: Some(" Fujifilm ".to_string()), + place: Some("".to_string()), + ..Default::default() + }; + let q = resolve_raw_query(raw, &vocab()); + assert_eq!(q.semantic, None); + assert_eq!(q.camera_make.as_deref(), Some("Fujifilm")); + assert_eq!(q.place, None); + } + + #[test] + fn parse_response_handles_code_fences_and_prose() { + let resp = "Here is the filter:\n```json\n{\"semantic\":\"sunset\",\"tags\":[\"beach\"]}\n```\nDone."; + let q = parse_response(resp, &vocab()).expect("parse"); + assert_eq!(q.semantic.as_deref(), Some("sunset")); + assert_eq!(q.tag_ids, vec![1]); + } + + #[test] + fn parse_response_handles_think_block_then_json() { + let resp = "user wants beach sunsets{\"tags\":[\"beach\",\"sunset\"]}"; + let q = parse_response(resp, &vocab()).expect("parse"); + assert_eq!(q.tag_ids, vec![1, 2]); + } + + #[test] + fn parse_response_errors_on_non_json() { + assert!(parse_response("I cannot help with that.", &vocab()).is_err()); + } + + #[test] + fn build_system_prompt_includes_date_and_vocab() { + let today = chrono::NaiveDate::from_ymd_opt(2026, 6, 14).unwrap(); + let prompt = build_system_prompt(&vocab(), today); + assert!( + prompt.contains("2026-06-14"), + "prompt should state today's date" + ); + assert!(prompt.contains("beach"), "prompt should list the vocab"); + assert!( + prompt.contains("never invent a tag"), + "prompt should warn against inventing tags" + ); + } +} diff --git a/src/ai/tts.rs b/src/ai/tts.rs index 08d9dcd..d6ef89d 100644 --- a/src/ai/tts.rs +++ b/src/ai/tts.rs @@ -23,6 +23,7 @@ use std::time::{Duration, Instant}; use tokio::sync::Semaphore; use uuid::Uuid; +use crate::ai::llamacpp::LlamaCppClient; use crate::data::Claims; use crate::file_types::{is_audio_file, is_video_file}; use crate::files::is_valid_full_path; @@ -473,6 +474,40 @@ pub struct TtsJobStatusResponse { pub error: Option, } +/// Synthesize speech honoring the global single-GPU serialization +/// (`TTS_PERMIT`) and the GPU write lease, exactly as the speech-job path does. +/// Queues on the permit rather than fast-failing, so callers wait their turn +/// instead of contending. Text is run through the same markdown/emoji cleanup + +/// pronunciation pipeline as the HTTP handlers. Reused by the memory-reel +/// pipeline to narrate each segment without racing a user's TTS request on the +/// Chatterbox GPU. +pub async fn synthesize_serialized( + client: &LlamaCppClient, + text: &str, + voice: Option<&str>, + format: &str, + exaggeration: Option, +) -> anyhow::Result> { + let prepared = prepare_for_tts(text); + if prepared.is_empty() { + anyhow::bail!("nothing to synthesize after cleanup"); + } + // Clamp to Chatterbox's documented range, matching the HTTP handlers + // (which clamp before forwarding; this path bypasses them). + let exaggeration = exaggeration.map(|x| x.clamp(0.25, 2.0)); + // Queue rather than fast-fail (mirrors create_speech_job_handler). + let _permit = TTS_PERMIT + .acquire() + .await + .map_err(|_| anyhow::anyhow!("TTS permit closed"))?; + // Wait for the LLM side to release the GPU before the request timeout + // starts (see ai::gpu). + let _gpu = crate::ai::gpu::tts_lease().await; + client + .text_to_speech(&prepared, voice, format, exaggeration, None, None) + .await +} + /// POST /tts/speech/jobs — durable variant of /tts/speech for long syntheses. /// Returns 202 + a job id immediately; the synth queues on the single GPU /// permit (instead of fast-failing 429) and the client polls the job until @@ -985,7 +1020,7 @@ pub async fn create_voice_from_library_handler( let voice_name = append_ref_window(&voice_name, ref_start, ref_duration.round().max(1.0) as u32); - let library = match libraries::resolve_library_param(&app_state, req.library.as_deref()) { + let library = match libraries::resolve_library_param_state(&app_state, req.library.as_deref()) { Ok(Some(l)) => l, Ok(None) => app_state.primary_library(), Err(msg) => { diff --git a/src/bin/populate_knowledge.rs b/src/bin/populate_knowledge.rs index 71f2f8a..396eddc 100644 --- a/src/bin/populate_knowledge.rs +++ b/src/bin/populate_knowledge.rs @@ -336,6 +336,7 @@ async fn main() -> anyhow::Result<()> { args.top_p, args.top_k, args.min_p, + None, // enable_thinking: leave model/template default args.max_iterations, None, Vec::new(), diff --git a/src/clip_search.rs b/src/clip_search.rs index 98ea96e..7b4510e 100644 --- a/src/clip_search.rs +++ b/src/clip_search.rs @@ -124,65 +124,161 @@ fn dot(a: &[f32], b: &[f32]) -> f32 { a.iter().zip(b.iter()).map(|(x, y)| x * y).sum() } -pub async fn search_photos( - state: web::Data, - exif_dao: web::Data>>, - query: web::Query, -) -> ActixResult { - let q_text = query.q.trim().to_string(); - if q_text.is_empty() { - return Ok(HttpResponse::BadRequest().json(SearchError { - error: "query parameter `q` is required".into(), - })); - } +/// Failure modes of [`score_photos`]. Carries enough to let each caller pick +/// an appropriate HTTP status (the CLIP service being down is a 502, a +/// disabled feature is a 503, a rejected query is a 400, a DB failure 500). +pub enum ScoreError { + /// CLIP search isn't configured at all (no Apollo endpoint). + Disabled, + /// The query was rejected by the encoder (client error). + Rejected(String), + /// The CLIP service is transiently unavailable (upstream error). + Unavailable(String), + /// The encoder returned an embedding we couldn't decode. + MalformedEmbedding, + /// A database / index load failure. + Internal(String), +} + +/// Result of scoring the whole library against a query embedding: the +/// resolved model version, how many embeddings were considered, and every +/// `(score, content_hash)` above threshold, sorted by descending score. +/// Pagination and path resolution are the caller's job (see [`resolve_hits`]) +/// so this core can be reused for both the plain search endpoint and the +/// unified endpoint (which filters by hash before paginating). +pub struct ScoredPhotos { + pub model_version: String, + pub considered: usize, + /// `(cosine_score, content_hash)` pairs, descending by score. + pub hits: Vec<(f32, String)>, +} + +/// Encode `q_text` via CLIP and score it against every stored embedding in +/// the given library scope. Returns all matches above `threshold`, sorted by +/// descending similarity. Pure of HTTP concerns so it's shared by +/// `search_photos` and the unified search endpoint. +pub async fn score_photos( + state: &AppState, + exif_dao: &Mutex>, + q_text: &str, + library_ids: &[i32], + threshold: f32, + model_version: Option<&str>, +) -> Result { if !state.clip_client.is_enabled() { - return Ok(HttpResponse::ServiceUnavailable().json(SearchError { - error: "CLIP search is disabled (no Apollo CLIP endpoint configured)".into(), - })); + return Err(ScoreError::Disabled); } - let limit = query.limit.clamp(1, 200); - let offset = query.offset; - let threshold = query.threshold.clamp(-1.0, 1.0); - - // 1. Encode the query text. Fast — Apollo's text encoder is ~50ms - // on CPU. Bail with a clear error message if Apollo's down so the - // user sees "service unavailable" rather than empty results. - let query_resp = match state.clip_client.encode_text(&q_text).await { + // 1. Encode the query text. Fast — Apollo's text encoder is ~50ms on CPU. + let query_resp = match state.clip_client.encode_text(q_text).await { Ok(r) => r, - Err(ClipError::Permanent(e)) => { - return Ok(HttpResponse::BadRequest().json(SearchError { - error: format!("query rejected: {e}"), - })); - } - Err(ClipError::Transient(e)) => { - return Ok(HttpResponse::BadGateway().json(SearchError { - error: format!("CLIP service unavailable: {e}"), - })); - } - Err(ClipError::Disabled) => { - return Ok(HttpResponse::ServiceUnavailable().json(SearchError { - error: "CLIP service disabled".into(), - })); - } + Err(ClipError::Permanent(e)) => return Err(ScoreError::Rejected(e.to_string())), + Err(ClipError::Transient(e)) => return Err(ScoreError::Unavailable(e.to_string())), + Err(ClipError::Disabled) => return Err(ScoreError::Disabled), }; // decode_embedding works on raw bytes; the wire format is b64. let query_bytes = base64::engine::general_purpose::STANDARD .decode(query_resp.embedding.as_bytes()) .unwrap_or_default(); - let query_vec = match decode_embedding(&query_bytes) { - Some(v) => v, - None => { - return Ok(HttpResponse::BadGateway().json(SearchError { - error: "CLIP service returned a malformed query embedding".into(), - })); - } - }; + let query_vec = decode_embedding(&query_bytes).ok_or(ScoreError::MalformedEmbedding)?; - // 2. Decide which library scope to search. `library_ids` (multi) - // wins over the legacy `library` (single) when both are present; - // either / both empty falls back to "every enabled library". - let library_ids: Vec = if let Some(raw) = query.library_ids.as_deref() { + // 2. Pull the (hash, embedding) matrix under the dao lock, release + // before scoring. The caller-supplied `model_version` (or the live + // engine's) forces a strict join so a mid-flight model swap can't mix + // geometries. + let ctx = opentelemetry::Context::current(); + let rows: Vec<(String, Vec)> = { + let mut dao = exif_dao.lock().expect("exif dao"); + dao.list_clip_index( + &ctx, + library_ids, + model_version.or(Some(&query_resp.model_version)), + ) + .map_err(|e| { + log::warn!("clip_search: list_clip_index failed: {:?}", e); + ScoreError::Internal("failed to load search index".into()) + })? + }; + let considered = rows.len(); + + // 3. Score. Keep all matches and sort at the end (~microseconds at 14k). + let mut hits: Vec<(f32, String)> = Vec::with_capacity(considered); + for (hash, blob) in rows { + let Some(emb) = decode_embedding(&blob) else { + continue; + }; + if emb.len() != query_vec.len() { + continue; + } + let sim = dot(&emb, &query_vec); + if sim < threshold { + continue; + } + hits.push((sim, hash)); + } + hits.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)); + + Ok(ScoredPhotos { + model_version: query_resp.model_version, + considered, + hits, + }) +} + +/// Resolve a page of `(score, content_hash)` pairs back to [`SearchHit`]s +/// (each carrying `library_id` + `rel_path`). Hashes that no longer resolve +/// to a row are skipped. Shared by both endpoints. +pub fn resolve_hits( + exif_dao: &Mutex>, + scored: &[(f32, String)], +) -> Vec { + if scored.is_empty() { + return Vec::new(); + } + let ctx = opentelemetry::Context::current(); + let hashes: Vec = scored.iter().map(|(_, h)| h.clone()).collect(); + let mut dao = exif_dao.lock().expect("exif dao"); + let path_map = dao + .get_rel_paths_for_hashes(&ctx, &hashes) + .unwrap_or_else(|e| { + log::warn!("clip_search: get_rel_paths_for_hashes failed: {:?}", e); + std::collections::HashMap::new() + }); + + let mut results = Vec::with_capacity(scored.len()); + for (score, hash) in scored { + let row = match dao.find_by_content_hash(&ctx, hash) { + Ok(Some(r)) => r, + Ok(None) => continue, + Err(e) => { + log::warn!("clip_search: find_by_content_hash failed for {hash}: {e:?}"); + continue; + } + }; + // Prefer get_rel_paths_for_hashes's first entry (shares image_exif's + // natural order), falling back to the ImageExif row. + let rel_path = path_map + .get(hash) + .and_then(|paths| paths.first().cloned()) + .unwrap_or(row.file_path); + results.push(SearchHit { + library_id: row.library_id, + rel_path, + content_hash: hash.clone(), + score: *score, + }); + } + results +} + +/// Parse the `library_ids` (multi) / `library` (single) scope params into a +/// deduped id list. Empty = "every enabled library". Shared so the unified +/// endpoint scopes CLIP identically. +pub fn parse_library_scope( + library_ids: Option<&str>, + library: Option, +) -> Result, String> { + if let Some(raw) = library_ids { let mut out: Vec = Vec::new(); for piece in raw.split(',') { let trimmed = piece.trim(); @@ -195,158 +291,92 @@ pub async fn search_photos( out.push(id); } } - Err(_) => { - return Ok(HttpResponse::BadRequest().json(SearchError { - error: format!("invalid library_ids entry: {trimmed:?}"), - })); - } + Err(_) => return Err(format!("invalid library_ids entry: {trimmed:?}")), } } - out - } else if let Some(id) = query.library { - vec![id] + Ok(out) + } else if let Some(id) = library { + Ok(vec![id]) } else { - Vec::new() - }; + Ok(Vec::new()) + } +} - // 3. Pull the (hash, embedding) matrix. Lock contention here is - // bounded — one big SELECT under a mutex Arc> - // and then we release before scoring. If this becomes a hotspot - // we'll cache the decoded matrix in AppState with TTL. - let ctx = opentelemetry::Context::current(); - let rows: Vec<(String, Vec)> = { - let mut dao = exif_dao.lock().expect("exif dao"); - match dao.list_clip_index( - &ctx, - &library_ids, - query - .model_version - .as_deref() - .or(Some(&query_resp.model_version)), - ) { - Ok(r) => r, - Err(e) => { - log::warn!("clip_search: list_clip_index failed: {:?}", e); - return Ok(HttpResponse::InternalServerError().json(SearchError { - error: "failed to load search index".into(), - })); - } - } - }; - let considered = rows.len(); - if considered == 0 { - return Ok(HttpResponse::Ok().json(SearchResponse { - query: q_text, - model_version: query_resp.model_version, - threshold, - considered, - total_matching: 0, - offset, - results: Vec::new(), +pub async fn search_photos( + state: web::Data, + exif_dao: web::Data>>, + query: web::Query, +) -> ActixResult { + let q_text = query.q.trim().to_string(); + if q_text.is_empty() { + return Ok(HttpResponse::BadRequest().json(SearchError { + error: "query parameter `q` is required".into(), })); } - // 4. Score. Cap the loop's transient allocation; we keep all scores - // and sort at the end. With ~14k entries the sort is microseconds. - let mut scored: Vec<(f32, String)> = Vec::with_capacity(considered); - for (hash, blob) in rows { - let Some(emb) = decode_embedding(&blob) else { - continue; - }; - if emb.len() != query_vec.len() { - continue; - } - let sim = dot(&emb, &query_vec); - if sim < threshold { - continue; - } - scored.push((sim, hash)); - } - scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)); - let total_matching = scored.len(); - // Pagination — slice the sorted list at `[offset, offset+limit)`. - // Offsets past the end produce empty pages rather than an error so - // the client can stop fetching naturally on "load more" past the end. - let scored: Vec<(f32, String)> = if offset >= total_matching { + let limit = query.limit.clamp(1, 200); + let offset = query.offset; + let threshold = query.threshold.clamp(-1.0, 1.0); + + let library_ids = match parse_library_scope(query.library_ids.as_deref(), query.library) { + Ok(ids) => ids, + Err(msg) => return Ok(HttpResponse::BadRequest().json(SearchError { error: msg })), + }; + + let scored = match score_photos( + &state, + &exif_dao, + &q_text, + &library_ids, + threshold, + query.model_version.as_deref(), + ) + .await + { + Ok(s) => s, + Err(e) => return Ok(score_error_response(e)), + }; + + let total_matching = scored.hits.len(); + // Pagination — slice the sorted list at `[offset, offset+limit)`. Offsets + // past the end produce empty pages so "load more" stops naturally. + let page: Vec<(f32, String)> = if offset >= total_matching { Vec::new() } else { let end = (offset + limit).min(total_matching); - scored[offset..end].to_vec() + scored.hits[offset..end].to_vec() }; - - if scored.is_empty() { - return Ok(HttpResponse::Ok().json(SearchResponse { - query: q_text, - model_version: query_resp.model_version, - threshold, - considered, - total_matching, - offset, - results: Vec::new(), - })); - } - - // 5. Resolve each surviving hash back to a `(library_id, rel_path)`. - // `get_rel_paths_by_hash` returns every rel_path; we pick the first - // one for the result. Apollo / the UI can fetch alternatives via - // /image/metadata when needed. - let hashes: Vec = scored.iter().map(|(_, h)| h.clone()).collect(); - let path_map = { - let mut dao = exif_dao.lock().expect("exif dao"); - match dao.get_rel_paths_for_hashes(&ctx, &hashes) { - Ok(m) => m, - Err(e) => { - log::warn!("clip_search: get_rel_paths_for_hashes failed: {:?}", e); - return Ok(HttpResponse::InternalServerError().json(SearchError { - error: "failed to resolve photo paths".into(), - })); - } - } - }; - - // We need (library_id, rel_path) — get_rel_paths_for_hashes only - // returns rel_paths. Cross-reference via find_by_content_hash to - // pick the library too. Single call per surviving hash; cheap at - // top-20. - let mut results = Vec::with_capacity(scored.len()); - { - let mut dao = exif_dao.lock().expect("exif dao"); - for (score, hash) in scored { - let row = match dao.find_by_content_hash(&ctx, &hash) { - Ok(Some(r)) => r, - Ok(None) => continue, - Err(e) => { - log::warn!( - "clip_search: find_by_content_hash failed for {}: {:?}", - hash, - e - ); - continue; - } - }; - // Prefer get_rel_paths_for_hashes's first entry if it - // exists (it shares semantics with `image_exif`'s natural - // order), falling back to the ImageExif row. - let rel_path = path_map - .get(&hash) - .and_then(|paths| paths.first().cloned()) - .unwrap_or(row.file_path); - results.push(SearchHit { - library_id: row.library_id, - rel_path, - content_hash: hash, - score, - }); - } - } + let results = resolve_hits(&exif_dao, &page); Ok(HttpResponse::Ok().json(SearchResponse { query: q_text, - model_version: query_resp.model_version, + model_version: scored.model_version, threshold, - considered, + considered: scored.considered, total_matching, offset, results, })) } + +/// Map a [`ScoreError`] to the HTTP response `search_photos` historically +/// returned for each failure mode. Reused by the unified endpoint. +pub fn score_error_response(e: ScoreError) -> HttpResponse { + match e { + ScoreError::Disabled => HttpResponse::ServiceUnavailable().json(SearchError { + error: "CLIP search is disabled (no Apollo CLIP endpoint configured)".into(), + }), + ScoreError::Rejected(msg) => HttpResponse::BadRequest().json(SearchError { + error: format!("query rejected: {msg}"), + }), + ScoreError::Unavailable(msg) => HttpResponse::BadGateway().json(SearchError { + error: format!("CLIP service unavailable: {msg}"), + }), + ScoreError::MalformedEmbedding => HttpResponse::BadGateway().json(SearchError { + error: "CLIP service returned a malformed query embedding".into(), + }), + ScoreError::Internal(msg) => { + HttpResponse::InternalServerError().json(SearchError { error: msg }) + } + } +} diff --git a/src/database/mod.rs b/src/database/mod.rs index d063bd0..981f6a4 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -51,10 +51,12 @@ pub mod knowledge_dao; pub mod location_dao; pub mod models; pub mod persona_dao; +pub mod precomputed_reel_dao; pub mod preview_dao; pub mod reconcile; pub mod schema; pub mod search_dao; +pub mod user_ai_prefs_dao; pub use calendar_dao::{CalendarEventDao, SqliteCalendarEventDao}; pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao}; @@ -66,8 +68,10 @@ pub use knowledge_dao::{ }; pub use location_dao::{LocationHistoryDao, SqliteLocationHistoryDao}; pub use persona_dao::{ImportPersona, PersonaDao, PersonaPatch, SqlitePersonaDao}; +pub use precomputed_reel_dao::{PrecomputedReelDao, SqlitePrecomputedReelDao}; pub use preview_dao::{PreviewDao, SqlitePreviewDao}; pub use search_dao::{SearchHistoryDao, SqliteSearchHistoryDao}; +pub use user_ai_prefs_dao::{SqliteUserAiPrefsDao, UserAiPrefsDao}; pub trait UserDao { fn create_user(&mut self, user: &str, password: &str) -> Option; diff --git a/src/database/models.rs b/src/database/models.rs index 62274e2..d3d5440 100644 --- a/src/database/models.rs +++ b/src/database/models.rs @@ -1,6 +1,7 @@ use crate::database::schema::{ entities, entity_facts, entity_photo_links, favorites, image_exif, insight_generation_jobs, - libraries, personas, photo_insights, users, video_preview_clips, + libraries, personas, photo_insights, precomputed_reels, user_ai_prefs, users, + video_preview_clips, }; use serde::Serialize; @@ -505,3 +506,56 @@ pub struct InsightGenerationJob { pub result_insight_id: Option, pub error_message: Option, } + +// --- Precomputed reels ------------------------------------------------------- + +#[derive(Insertable)] +#[diesel(table_name = precomputed_reels)] +pub struct InsertablePrecomputedReel { + pub span: String, + pub library_key: String, + pub cache_key: String, + pub output_path: String, + pub title: String, + pub media_count: i32, + pub render_version: i32, + pub tz_offset_minutes: i32, + pub voice: Option, + pub generated_at: i64, +} + +#[derive(Serialize, Queryable, Clone, Debug)] +pub struct PrecomputedReel { + pub id: i32, + pub span: String, + pub library_key: String, + pub cache_key: String, + pub output_path: String, + pub title: String, + pub media_count: i32, + pub render_version: i32, + pub tz_offset_minutes: i32, + pub voice: Option, + pub generated_at: i64, +} + +// --- User AI preferences (Section E) ---------------------------------------- + +#[derive(Queryable, Insertable, Debug, Clone, serde::Deserialize, serde::Serialize)] +#[diesel(table_name = user_ai_prefs)] +pub struct UserAiPrefs { + pub id: i32, + pub voice: Option, + pub tz_offset_minutes: Option, + pub library: Option, + pub updated_at: i64, +} + +#[derive(Insertable, Debug, Clone, serde::Deserialize, serde::Serialize)] +#[diesel(table_name = user_ai_prefs)] +pub struct UpsertUserAiPrefs { + pub voice: Option, + pub tz_offset_minutes: Option, + pub library: Option, + pub updated_at: i64, +} diff --git a/src/database/precomputed_reel_dao.rs b/src/database/precomputed_reel_dao.rs new file mode 100644 index 0000000..b66573b --- /dev/null +++ b/src/database/precomputed_reel_dao.rs @@ -0,0 +1,439 @@ +use diesel::prelude::*; +use diesel::sqlite::SqliteConnection; +use std::ops::DerefMut; +use std::sync::{Arc, Mutex}; + +use crate::database::models::{InsertablePrecomputedReel, PrecomputedReel}; +use crate::database::schema; +use crate::database::{DbError, DbErrorKind, connect}; +use crate::otel::trace_db_call; + +/// Ledger for precomputed memory reels. The nightly agentic job writes a +/// row after each successful render; the `GET /reels/precomputed` handler +/// reads it to gate on freshness and serve the cached MP4. +pub trait PrecomputedReelDao: Sync + Send { + /// Insert a precomputed reel row. Returns the new row's id. + /// Written by the nightly agentic job (Section D). + #[allow(dead_code)] + fn record_reel( + &mut self, + context: &opentelemetry::Context, + row: &InsertablePrecomputedReel, + ) -> Result; + + /// Find the latest precomputed reel for the given (span, library_key). + fn latest_for( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + ) -> Result, DbError>; + + /// Return true when a fresh precomputed reel exists for the given + /// (span, library_key, render_version) that was generated at or after + /// `min_generated_at`. Used as a fast existence gate before falling + /// back to `latest_for` (avoids a second query path). + fn exists_fresh( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + render_version: i32, + min_generated_at: i64, + ) -> Result; + + /// Delete all but the newest `keep` rows for (span, library_key), returning + /// the deleted rows so the caller can unlink their output files. Used by the + /// nightly job to retire superseded reels (e.g. yesterday's daily). + #[allow(dead_code)] + fn prune_superseded( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + keep: usize, + ) -> Result, DbError>; + + /// Every cache_key currently in the ledger. Used by the on-disk cache sweep + /// to protect files a ledger row still points at. + #[allow(dead_code)] + fn all_cache_keys(&mut self, context: &opentelemetry::Context) -> Result, DbError>; +} + +pub struct SqlitePrecomputedReelDao { + connection: Arc>, +} + +impl Default for SqlitePrecomputedReelDao { + fn default() -> Self { + Self::new() + } +} + +impl SqlitePrecomputedReelDao { + pub fn new() -> Self { + Self { + connection: Arc::new(Mutex::new(connect())), + } + } + + #[cfg(test)] + pub fn from_connection(conn: Arc>) -> Self { + Self { connection: conn } + } +} + +impl PrecomputedReelDao for SqlitePrecomputedReelDao { + fn record_reel( + &mut self, + context: &opentelemetry::Context, + row: &InsertablePrecomputedReel, + ) -> Result { + trace_db_call(context, "insert", "record_reel", |_span| { + use schema::precomputed_reels::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + diesel::insert_into(dsl::precomputed_reels) + .values(row) + .execute(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to insert reel: {}", e))?; + + dsl::precomputed_reels + .order(dsl::id.desc()) + .select(dsl::id) + .first::(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to get reel id: {}", e)) + }) + .map_err(|e| DbError::log(DbErrorKind::InsertError, e)) + } + + fn latest_for( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + ) -> Result, DbError> { + trace_db_call(context, "query", "latest_for", |_span| { + use schema::precomputed_reels::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + dsl::precomputed_reels + .filter(dsl::span.eq(span)) + .filter(dsl::library_key.eq(library_key)) + .order(dsl::generated_at.desc()) + .first::(connection.deref_mut()) + .optional() + .map_err(|e| anyhow::anyhow!("Failed to get latest reel: {}", e)) + }) + .map_err(|e| DbError::log(DbErrorKind::QueryError, e)) + } + + fn exists_fresh( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + render_version: i32, + min_generated_at: i64, + ) -> Result { + trace_db_call(context, "query", "exists_fresh", |_span| { + use schema::precomputed_reels::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + let count: i64 = dsl::precomputed_reels + .filter(dsl::span.eq(span)) + .filter(dsl::library_key.eq(library_key)) + .filter(dsl::render_version.eq(render_version)) + .filter(dsl::generated_at.ge(min_generated_at)) + .count() + .get_result(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to check fresh reel: {}", e))?; + + Ok(count > 0) + }) + .map_err(|e| DbError::log(DbErrorKind::QueryError, e)) + } + + fn prune_superseded( + &mut self, + context: &opentelemetry::Context, + span: &str, + library_key: &str, + keep: usize, + ) -> Result, DbError> { + trace_db_call(context, "delete", "prune_superseded", |_span| { + use schema::precomputed_reels::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + // Newest first; everything past `keep` is superseded. The table + // holds at most a handful of rows per (span, library), so loading + // and slicing in Rust is cheaper than a correlated subquery. + let mut rows: Vec = dsl::precomputed_reels + .filter(dsl::span.eq(span)) + .filter(dsl::library_key.eq(library_key)) + .order(dsl::generated_at.desc()) + .load::(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to load reels for prune: {}", e))?; + + let stale = rows.split_off(rows.len().min(keep)); + if !stale.is_empty() { + let ids: Vec = stale.iter().map(|r| r.id).collect(); + diesel::delete(dsl::precomputed_reels.filter(dsl::id.eq_any(ids))) + .execute(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to delete superseded reels: {}", e))?; + } + Ok(stale) + }) + .map_err(|e| DbError::log(DbErrorKind::UpdateError, e)) + } + + fn all_cache_keys(&mut self, context: &opentelemetry::Context) -> Result, DbError> { + trace_db_call(context, "query", "all_cache_keys", |_span| { + use schema::precomputed_reels::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + dsl::precomputed_reels + .select(dsl::cache_key) + .load::(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to load cache keys: {}", e)) + }) + .map_err(|e| DbError::log(DbErrorKind::QueryError, e)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use diesel::Connection; + use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations}; + + const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!(); + + fn setup_dao() -> SqlitePrecomputedReelDao { + let mut conn = SqliteConnection::establish(":memory:") + .expect("Unable to create in-memory db connection"); + conn.run_pending_migrations(DB_MIGRATIONS) + .expect("Failure running DB migrations"); + SqlitePrecomputedReelDao::from_connection(Arc::new(Mutex::new(conn))) + } + + fn ctx() -> opentelemetry::Context { + opentelemetry::Context::new() + } + + fn sample_row() -> InsertablePrecomputedReel { + InsertablePrecomputedReel { + span: "day".to_string(), + library_key: "1".to_string(), + cache_key: "abc123".to_string(), + output_path: "/tmp/reel.mp4".to_string(), + title: "Test Reel".to_string(), + media_count: 10, + render_version: 1, + tz_offset_minutes: 0, + voice: Some("default".to_string()), + generated_at: 1_000_000, + } + } + + #[test] + fn record_reel_inserts_and_returns_id() { + let mut dao = setup_dao(); + let ctx = ctx(); + let row = sample_row(); + + let id = dao.record_reel(&ctx, &row).unwrap(); + assert!(id > 0, "should return a positive id"); + } + + #[test] + fn record_reel_returns_increasing_ids() { + let mut dao = setup_dao(); + let ctx = ctx(); + let row = sample_row(); + + let id1 = dao.record_reel(&ctx, &row).unwrap(); + let id2 = dao.record_reel(&ctx, &row).unwrap(); + assert!(id2 > id1, "each insert should get a higher id"); + } + + #[test] + fn latest_for_returns_latest() { + let mut dao = setup_dao(); + let ctx = ctx(); + + let row1 = InsertablePrecomputedReel { + generated_at: 1_000_000, + ..sample_row() + }; + let row2 = InsertablePrecomputedReel { + generated_at: 2_000_000, + ..sample_row() + }; + + dao.record_reel(&ctx, &row1).unwrap(); + dao.record_reel(&ctx, &row2).unwrap(); + + let latest = dao.latest_for(&ctx, "day", "1").unwrap().unwrap(); + assert_eq!(latest.generated_at, 2_000_000); + } + + #[test] + fn latest_for_scoped_by_span_and_library() { + let mut dao = setup_dao(); + let ctx = ctx(); + + let day_row = InsertablePrecomputedReel { + span: "day".to_string(), + library_key: "1".to_string(), + generated_at: 1_000_000, + ..sample_row() + }; + let week_row = InsertablePrecomputedReel { + span: "week".to_string(), + library_key: "1".to_string(), + generated_at: 2_000_000, + ..sample_row() + }; + + dao.record_reel(&ctx, &day_row).unwrap(); + dao.record_reel(&ctx, &week_row).unwrap(); + + let day_latest = dao.latest_for(&ctx, "day", "1").unwrap().unwrap(); + assert_eq!(day_latest.span, "day"); + + let week_latest = dao.latest_for(&ctx, "week", "1").unwrap().unwrap(); + assert_eq!(week_latest.span, "week"); + + // Different library returns None + let missing = dao.latest_for(&ctx, "day", "99").unwrap(); + assert!(missing.is_none()); + } + + #[test] + fn latest_for_returns_none_when_no_rows() { + let mut dao = setup_dao(); + let ctx = ctx(); + + let result = dao.latest_for(&ctx, "day", "1").unwrap(); + assert!(result.is_none()); + } + + #[test] + fn exists_fresh_returns_true_when_present() { + let mut dao = setup_dao(); + let ctx = ctx(); + + dao.record_reel(&ctx, &sample_row()).unwrap(); + + let exists = dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap(); + assert!(exists, "should find the row we just inserted"); + } + + #[test] + fn exists_fresh_returns_false_when_missing() { + let mut dao = setup_dao(); + let ctx = ctx(); + + let exists = dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap(); + assert!(!exists, "should not find anything in empty table"); + } + + #[test] + fn exists_fresh_respects_min_generated_at() { + let mut dao = setup_dao(); + let ctx = ctx(); + + dao.record_reel(&ctx, &sample_row()).unwrap(); + + // Below the threshold — should exist + let exists = dao.exists_fresh(&ctx, "day", "1", 1, 500_000).unwrap(); + assert!(exists); + + // Above the threshold — should not exist + let exists = dao.exists_fresh(&ctx, "day", "1", 1, 2_000_000).unwrap(); + assert!(!exists); + } + + #[test] + fn exists_fresh_respects_render_version() { + let mut dao = setup_dao(); + let ctx = ctx(); + + let row_v1 = InsertablePrecomputedReel { + render_version: 1, + ..sample_row() + }; + dao.record_reel(&ctx, &row_v1).unwrap(); + + assert!(dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap()); + assert!(!dao.exists_fresh(&ctx, "day", "1", 2, 900_000).unwrap()); + } + + #[test] + fn prune_superseded_keeps_newest_and_returns_deleted() { + let mut dao = setup_dao(); + let ctx = ctx(); + // Three day/lib1 reels at increasing timestamps, plus an unrelated one. + for (i, key) in ["k1", "k2", "k3"].iter().enumerate() { + dao.record_reel( + &ctx, + &InsertablePrecomputedReel { + cache_key: key.to_string(), + generated_at: 1_000_000 + i as i64 * 1000, + ..sample_row() + }, + ) + .unwrap(); + } + let other = InsertablePrecomputedReel { + library_key: "2".to_string(), + cache_key: "other".to_string(), + ..sample_row() + }; + dao.record_reel(&ctx, &other).unwrap(); + + // Keep the newest 2 of (day, "1"); k1 (oldest) is superseded. + let deleted = dao.prune_superseded(&ctx, "day", "1", 2).unwrap(); + assert_eq!(deleted.len(), 1); + assert_eq!(deleted[0].cache_key, "k1"); + + // The newest 2 survive; the other-library row is untouched. + let keys = dao.all_cache_keys(&ctx).unwrap(); + assert_eq!(keys.len(), 3); + assert!(keys.contains(&"k2".to_string())); + assert!(keys.contains(&"k3".to_string())); + assert!(keys.contains(&"other".to_string())); + assert!(!keys.contains(&"k1".to_string())); + } + + #[test] + fn prune_superseded_noop_when_within_keep() { + let mut dao = setup_dao(); + let ctx = ctx(); + dao.record_reel(&ctx, &sample_row()).unwrap(); + let deleted = dao.prune_superseded(&ctx, "day", "1", 2).unwrap(); + assert!(deleted.is_empty()); + assert_eq!(dao.all_cache_keys(&ctx).unwrap().len(), 1); + } +} diff --git a/src/database/schema.rs b/src/database/schema.rs index bf5791b..846542d 100644 --- a/src/database/schema.rs +++ b/src/database/schema.rs @@ -266,6 +266,16 @@ diesel::table! { } } +diesel::table! { + user_ai_prefs (id) { + id -> Integer, + voice -> Nullable, + tz_offset_minutes -> Nullable, + library -> Nullable, + updated_at -> BigInt, + } +} + diesel::table! { video_preview_clips (id) { id -> Integer, @@ -294,6 +304,22 @@ diesel::table! { } } +diesel::table! { + precomputed_reels (id) { + id -> Integer, + span -> Text, + library_key -> Text, + cache_key -> Text, + output_path -> Text, + title -> Text, + media_count -> Integer, + render_version -> Integer, + tz_offset_minutes -> Integer, + voice -> Nullable, + generated_at -> BigInt, + } +} + diesel::joinable!(entity_facts -> photo_insights (source_insight_id)); diesel::joinable!(entity_photo_links -> entities (entity_id)); diesel::joinable!(entity_photo_links -> libraries (library_id)); @@ -322,9 +348,11 @@ diesel::allow_tables_to_appear_in_same_query!( personas, persons, photo_insights, + precomputed_reels, search_history, tagged_photo, tags, + user_ai_prefs, users, video_preview_clips, ); diff --git a/src/database/user_ai_prefs_dao.rs b/src/database/user_ai_prefs_dao.rs new file mode 100644 index 0000000..129ef0c --- /dev/null +++ b/src/database/user_ai_prefs_dao.rs @@ -0,0 +1,206 @@ +use diesel::prelude::*; +use diesel::sqlite::SqliteConnection; +use std::ops::DerefMut; +use std::sync::{Arc, Mutex}; + +use crate::database::models::{UpsertUserAiPrefs, UserAiPrefs}; +use crate::database::schema; +use crate::database::{DbError, DbErrorKind, connect}; +use crate::otel::trace_db_call; + +/// Generic single-row table that passively mirrors the latest client AI +/// request parameters (voice, timezone, library). Read by the nightly +/// pre-generation scheduler (Section D) to pick up user preferences. +pub trait UserAiPrefsDao: Sync + Send { + /// Read the single row; `None` when it hasn't been populated yet. + fn get_prefs( + &mut self, + context: &opentelemetry::Context, + ) -> Result, DbError>; + + /// Upsert the single row (id is always 1). + #[allow(dead_code)] + fn upsert_prefs( + &mut self, + context: &opentelemetry::Context, + prefs: &UpsertUserAiPrefs, + ) -> Result<(), DbError>; +} + +pub struct SqliteUserAiPrefsDao { + connection: Arc>, +} + +impl Default for SqliteUserAiPrefsDao { + fn default() -> Self { + Self::new() + } +} + +impl SqliteUserAiPrefsDao { + pub fn new() -> Self { + Self { + connection: Arc::new(Mutex::new(connect())), + } + } + + #[cfg(test)] + pub fn from_connection(conn: Arc>) -> Self { + Self { connection: conn } + } +} + +impl UserAiPrefsDao for SqliteUserAiPrefsDao { + fn get_prefs( + &mut self, + context: &opentelemetry::Context, + ) -> Result, DbError> { + trace_db_call(context, "query", "get_prefs", |_span| { + use schema::user_ai_prefs::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock UserAiPrefsDao"); + + dsl::user_ai_prefs + .first::(connection.deref_mut()) + .optional() + .map_err(|e| anyhow::anyhow!("Failed to get prefs: {}", e)) + }) + .map_err(|e| DbError::log(DbErrorKind::QueryError, e)) + } + + fn upsert_prefs( + &mut self, + context: &opentelemetry::Context, + prefs: &UpsertUserAiPrefs, + ) -> Result<(), DbError> { + trace_db_call(context, "upsert", "upsert_prefs", |_span| { + use schema::user_ai_prefs::dsl; + + let mut connection = self + .connection + .lock() + .expect("Unable to lock UserAiPrefsDao"); + + // Single-row table (id=1): one atomic upsert. The explicit id=1 + // makes the conflict target deterministic so the second call + // updates in place rather than tripping the CHECK(id=1) constraint, + // and real insert errors surface instead of being swallowed into a + // separate update branch. The columns are set explicitly (rather + // than via AsChangeset) so a None field overwrites to NULL — the + // row mirrors the latest request exactly, not a merge of past ones. + diesel::insert_into(dsl::user_ai_prefs) + .values((dsl::id.eq(1), prefs)) + .on_conflict(dsl::id) + .do_update() + .set(( + dsl::voice.eq(&prefs.voice), + dsl::tz_offset_minutes.eq(&prefs.tz_offset_minutes), + dsl::library.eq(&prefs.library), + dsl::updated_at.eq(&prefs.updated_at), + )) + .execute(connection.deref_mut()) + .map_err(|e| anyhow::anyhow!("Failed to upsert prefs: {}", e))?; + Ok(()) + }) + .map_err(|e| DbError::log(DbErrorKind::InsertError, e)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use diesel::Connection; + use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations}; + + const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!(); + + fn setup_dao() -> SqliteUserAiPrefsDao { + let mut conn = SqliteConnection::establish(":memory:") + .expect("Unable to create in-memory db connection"); + conn.run_pending_migrations(DB_MIGRATIONS) + .expect("Failure running DB migrations"); + SqliteUserAiPrefsDao::from_connection(Arc::new(Mutex::new(conn))) + } + + fn ctx() -> opentelemetry::Context { + opentelemetry::Context::new() + } + + #[test] + fn get_prefs_returns_none_when_empty() { + let mut dao = setup_dao(); + let result = dao.get_prefs(&ctx()).unwrap(); + assert!(result.is_none()); + } + + #[test] + fn upsert_prefs_inserts_row() { + let mut dao = setup_dao(); + let now = 1_700_000_000i64; + let prefs = UpsertUserAiPrefs { + voice: Some("grandma".to_string()), + tz_offset_minutes: Some(-480), + library: Some("1".to_string()), + updated_at: now, + }; + dao.upsert_prefs(&ctx(), &prefs).unwrap(); + + let row = dao.get_prefs(&ctx()).unwrap().unwrap(); + assert_eq!(row.id, 1); + assert_eq!(row.voice, Some("grandma".to_string())); + assert_eq!(row.tz_offset_minutes, Some(-480)); + assert_eq!(row.library, Some("1".to_string())); + assert_eq!(row.updated_at, now); + } + + #[test] + fn upsert_prefs_replaces_existing() { + let mut dao = setup_dao(); + let now1 = 1_700_000_000i64; + let now2 = 1_800_000_000i64; + + let prefs1 = UpsertUserAiPrefs { + voice: Some("grandma".to_string()), + tz_offset_minutes: Some(-480), + library: Some("1".to_string()), + updated_at: now1, + }; + dao.upsert_prefs(&ctx(), &prefs1).unwrap(); + + let prefs2 = UpsertUserAiPrefs { + voice: Some("dad".to_string()), + tz_offset_minutes: Some(-300), + library: None, + updated_at: now2, + }; + dao.upsert_prefs(&ctx(), &prefs2).unwrap(); + + let row = dao.get_prefs(&ctx()).unwrap().unwrap(); + assert_eq!(row.voice, Some("dad".to_string())); + assert_eq!(row.tz_offset_minutes, Some(-300)); + assert!(row.library.is_none()); + assert_eq!(row.updated_at, now2); + } + + #[test] + fn upsert_partial_fields() { + let mut dao = setup_dao(); + let now = 1_700_000_000i64; + + let prefs = UpsertUserAiPrefs { + voice: None, + tz_offset_minutes: Some(-480), + library: None, + updated_at: now, + }; + dao.upsert_prefs(&ctx(), &prefs).unwrap(); + + let row = dao.get_prefs(&ctx()).unwrap().unwrap(); + assert_eq!(row.tz_offset_minutes, Some(-480)); + assert!(row.voice.is_none()); + assert!(row.library.is_none()); + } +} diff --git a/src/duplicates.rs b/src/duplicates.rs index 372415b..32ed92b 100644 --- a/src/duplicates.rs +++ b/src/duplicates.rs @@ -234,7 +234,7 @@ async fn list_exact_handler( let span = global_tracer().start_with_context("duplicates.list_exact", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); @@ -265,7 +265,7 @@ async fn list_perceptual_handler( let span = global_tracer().start_with_context("duplicates.list_perceptual", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); @@ -449,7 +449,7 @@ async fn list_folder_pairs_handler( let span = global_tracer().start_with_context("duplicates.list_folder_pairs", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); diff --git a/src/faces.rs b/src/faces.rs index 3288aa3..f619966 100644 --- a/src/faces.rs +++ b/src/faces.rs @@ -1755,7 +1755,7 @@ async fn stats_handler( let span = global_tracer().start_with_context("faces.stats", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); @@ -1782,11 +1782,12 @@ async fn list_faces_handler( let normalized_path = normalize_path(&query.path); // resolve_library_param returns Option<&Library>; clone so the result // is owned (matching the primary_library fallback's type). - let library: Library = libraries::resolve_library_param(&app_state, query.library.as_deref()) - .ok() - .flatten() - .cloned() - .unwrap_or_else(|| app_state.primary_library().clone()); + let library: Library = + libraries::resolve_library_param_state(&app_state, query.library.as_deref()) + .ok() + .flatten() + .cloned() + .unwrap_or_else(|| app_state.primary_library().clone()); let mut dao = face_dao.lock().expect("face dao lock"); let hash = match dao.resolve_content_hash(&span_context, library.id, &normalized_path) { @@ -1870,7 +1871,7 @@ async fn create_face_handler( } let normalized_path = normalize_path(&body.path); - let library: Library = match libraries::resolve_library_param( + let library: Library = match libraries::resolve_library_param_state( &app_state, body.library.as_ref().map(|i| i.to_string()).as_deref(), ) { @@ -2192,7 +2193,7 @@ async fn list_persons_handler( let span = global_tracer().start_with_context("persons.list", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); @@ -2345,7 +2346,7 @@ async fn person_faces_handler( let context = extract_context_from_request(&request); let span = global_tracer().start_with_context("persons.faces", &context); let span_context = opentelemetry::Context::current_with_span(span); - let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref()) .ok() .flatten() .map(|l| l.id); diff --git a/src/files.rs b/src/files.rs index 59cd49e..920540e 100644 --- a/src/files.rs +++ b/src/files.rs @@ -275,14 +275,14 @@ pub async fn list_photos( // Resolve the optional library filter. Unknown values return 400. A // `None` result means "union across all libraries" and downstream // walks iterate every configured library root. - let library = match crate::libraries::resolve_library_param(&app_state, req.library.as_deref()) - { - Ok(lib) => lib, - Err(msg) => { - log::warn!("Rejecting /photos request: {}", msg); - return HttpResponse::BadRequest().body(msg); - } - }; + let library = + match crate::libraries::resolve_library_param_state(&app_state, req.library.as_deref()) { + Ok(lib) => lib, + Err(msg) => { + log::warn!("Rejecting /photos request: {}", msg); + return HttpResponse::BadRequest().body(msg); + } + }; let span_context = opentelemetry::Context::current_with_span(span); @@ -1238,7 +1238,7 @@ pub async fn list_exif_summary( // Resolve the library filter up front so a bad id/name 400s before we // ever take the DAO mutex. None == union across all libraries. let library_filter = - match crate::libraries::resolve_library_param(&app_state, req.library.as_deref()) { + match crate::libraries::resolve_library_param_state(&app_state, req.library.as_deref()) { Ok(lib) => lib.map(|l| l.id), Err(msg) => { span.set_status(Status::error(msg.clone())); diff --git a/src/geo.rs b/src/geo.rs index 46cc1dc..b54f609 100644 --- a/src/geo.rs +++ b/src/geo.rs @@ -1,4 +1,5 @@ /// Geographic calculation utilities for GPS-based search +use serde::Deserialize; use std::f64; /// Calculate distance between two GPS coordinates using the Haversine formula. @@ -61,6 +62,140 @@ pub fn gps_bounding_box(lat: f64, lon: f64, radius_km: f64) -> (f64, f64, f64, f ) } +/// A place resolved from a free-text query via forward geocoding. +/// +/// The filter pipeline searches a *circle* (`gps_lat`/`gps_lon`/ +/// `gps_radius_km`), but a place can be anything from a single address to +/// a whole country. We collapse Nominatim's bounding box into the smallest +/// circle that circumscribes it (see [`bbox_to_circle`]) so "Portland" and +/// "Italy" both map onto the existing circle filter without a schema change. +#[derive(Debug, Clone, PartialEq)] +pub struct GeoPlace { + /// Nominatim's canonical name for the match (e.g. "Italia"). + pub display_name: String, + /// Centroid latitude in decimal degrees. + pub lat: f64, + /// Centroid longitude in decimal degrees. + pub lon: f64, + /// Radius (km) of a circle centred on the centroid that covers the + /// matched area. Floored to [`MIN_PLACE_RADIUS_KM`] so a point result + /// (whose bounding box is microscopic) still yields a usable circle. + pub radius_km: f64, +} + +/// Floor for a geocoded place's radius. Point results (a street address) +/// come back with a near-zero bounding box; without a floor the circle +/// filter would match nothing. +pub const MIN_PLACE_RADIUS_KM: f64 = 0.5; + +/// Collapse a bounding box into the centroid + circumscribing radius. +/// +/// Input is Nominatim's `boundingbox` order: `(south_lat, north_lat, +/// west_lon, east_lon)`. The radius is the *largest* great-circle distance +/// from the centroid to any of the four corners, so the resulting circle +/// fully covers the box. (The corners aren't equidistant on a sphere — +/// longitude lines converge toward the poles, so the equator-facing edge's +/// corners are farthest; taking the max guarantees coverage in either +/// hemisphere.) +/// +/// Pure and exact (no flooring) so it can be unit-tested directly; callers +/// apply [`MIN_PLACE_RADIUS_KM`] when turning the result into a filter. +pub fn bbox_to_circle(south: f64, north: f64, west: f64, east: f64) -> (f64, f64, f64) { + let center_lat = (south + north) / 2.0; + let center_lon = (west + east) / 2.0; + let radius_km = [(south, west), (south, east), (north, west), (north, east)] + .iter() + .map(|(clat, clon)| haversine_distance(center_lat, center_lon, *clat, *clon)) + .fold(0.0_f64, f64::max); + (center_lat, center_lon, radius_km) +} + +/// Raw Nominatim `/search` result. `lat`/`lon` arrive as strings and +/// `boundingbox` as a 4-element string array `[south, north, west, east]`. +#[derive(Deserialize)] +struct NominatimSearchResult { + lat: String, + lon: String, + display_name: String, + boundingbox: Option<[String; 4]>, +} + +/// Forward-geocode a free-text place name to a [`GeoPlace`] via the public +/// OpenStreetMap Nominatim `/search` endpoint. +/// +/// Mirrors `InsightGenerator::reverse_geocode`'s error posture: any network, +/// HTTP, or parse failure returns `None` rather than propagating, so a flaky +/// geocoder degrades the query to "no location filter" instead of failing it. +/// +/// Nominatim's usage policy requires a `User-Agent` and rate-limits to ~1 +/// request/second; callers doing this interactively should cache results. +pub async fn forward_geocode(query: &str) -> Option { + let q = query.trim(); + if q.is_empty() { + return None; + } + + let client = reqwest::Client::new(); + let response = match client + .get("https://nominatim.openstreetmap.org/search") + .query(&[("format", "json"), ("limit", "1"), ("q", q)]) + .header("User-Agent", "ImageAPI/1.0") // Nominatim requires User-Agent + .send() + .await + { + Ok(resp) => resp, + Err(e) => { + log::warn!("Forward geocoding network error for {q:?}: {e}"); + return None; + } + }; + + if !response.status().is_success() { + log::warn!( + "Forward geocoding HTTP error for {q:?}: {}", + response.status() + ); + return None; + } + + let results: Vec = match response.json().await { + Ok(r) => r, + Err(e) => { + log::warn!("Forward geocoding JSON parse error for {q:?}: {e}"); + return None; + } + }; + + let top = results.into_iter().next()?; + let lat: f64 = top.lat.parse().ok()?; + let lon: f64 = top.lon.parse().ok()?; + + // Prefer the bounding box (handles large places); fall back to a + // point + floor radius when Nominatim omits it. + let (center_lat, center_lon, radius_km) = match &top.boundingbox { + Some([s, n, w, e]) => match (s.parse(), n.parse(), w.parse(), e.parse()) { + (Ok(s), Ok(n), Ok(w), Ok(e)) => bbox_to_circle(s, n, w, e), + _ => (lat, lon, 0.0), + }, + None => (lat, lon, 0.0), + }; + + let place = GeoPlace { + display_name: top.display_name, + lat: center_lat, + lon: center_lon, + radius_km: radius_km.max(MIN_PLACE_RADIUS_KM), + }; + log::info!( + "Forward geocoded {q:?} -> {} ({:.4}, {:.4}, r={:.1}km)", + place.display_name, + place.lat, + place.lon, + place.radius_km + ); + Some(place) +} + #[cfg(test)] mod tests { use super::*; @@ -118,4 +253,41 @@ mod tests { distance ); } + + #[test] + fn test_bbox_to_circle_centroid() { + // Symmetric box around (10, 20): centroid should land dead centre. + let (lat, lon, radius) = bbox_to_circle(9.0, 11.0, 19.0, 21.0); + assert!((lat - 10.0).abs() < 1e-9, "centroid lat, got {lat}"); + assert!((lon - 20.0).abs() < 1e-9, "centroid lon, got {lon}"); + assert!(radius > 0.0, "radius should be positive, got {radius}"); + } + + #[test] + fn test_bbox_to_circle_covers_corner() { + // The radius must reach every corner of the box. Verify the + // centroid-to-corner distance equals the returned radius for all + // four corners (they're symmetric, so all equal). + let (south, north, west, east) = (40.0, 42.0, -74.0, -72.0); + let (lat, lon, radius) = bbox_to_circle(south, north, west, east); + for (clat, clon) in [(south, west), (south, east), (north, west), (north, east)] { + let d = haversine_distance(lat, lon, clat, clon); + assert!( + d <= radius + 1e-6, + "corner ({clat},{clon}) at {d}km should be within radius {radius}km" + ); + } + } + + #[test] + fn test_bbox_to_circle_country_vs_city_scale() { + // A country-sized box yields a far larger radius than a city-sized + // one — confirming the bbox approach scales with place size. + let (_, _, country) = bbox_to_circle(35.5, 47.1, 6.6, 18.5); // ~Italy + let (_, _, city) = bbox_to_circle(45.4, 45.6, -122.8, -122.5); // ~Portland + assert!( + country > city * 10.0, + "country radius {country}km should dwarf city radius {city}km" + ); + } } diff --git a/src/handlers/image.rs b/src/handlers/image.rs index f0d2310..923fff3 100644 --- a/src/handlers/image.rs +++ b/src/handlers/image.rs @@ -53,7 +53,7 @@ pub async fn get_image( // Resolve library from query param; default to primary so clients that // don't yet send `library=` continue to work. - let library = match libraries::resolve_library_param(&app_state, req.library.as_deref()) { + let library = match libraries::resolve_library_param_state(&app_state, req.library.as_deref()) { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(msg) => { @@ -492,7 +492,7 @@ pub async fn get_file_metadata( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); - let library = libraries::resolve_library_param(&app_state, path.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, path.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -580,7 +580,7 @@ pub async fn set_image_gps( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); - let library = libraries::resolve_library_param(&app_state, body.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, body.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -746,7 +746,7 @@ pub async fn get_full_exif( let context = extract_context_from_request(&request); let mut span = tracer.start_with_context("get_full_exif", &context); - let library = libraries::resolve_library_param(&app_state, path.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, path.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); @@ -888,7 +888,8 @@ pub async fn set_image_date( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); - let library = match libraries::resolve_library_param(&app_state, body.library.as_deref()) { + let library = match libraries::resolve_library_param_state(&app_state, body.library.as_deref()) + { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(msg) => { @@ -941,7 +942,8 @@ pub async fn clear_image_date( let span_context = opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); - let library = match libraries::resolve_library_param(&app_state, body.library.as_deref()) { + let library = match libraries::resolve_library_param_state(&app_state, body.library.as_deref()) + { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(msg) => { @@ -1001,7 +1003,7 @@ pub async fn upload_image( // Resolve the optional library selector. Absent → primary library // (backwards-compatible with clients that don't yet send `library=`). let target_library = - match libraries::resolve_library_param(&app_state, query.library.as_deref()) { + match libraries::resolve_library_param_state(&app_state, query.library.as_deref()) { Ok(Some(lib)) => lib, Ok(None) => app_state.primary_library(), Err(msg) => { diff --git a/src/handlers/video.rs b/src/handlers/video.rs index f9f4e64..b56a67e 100644 --- a/src/handlers/video.rs +++ b/src/handlers/video.rs @@ -67,10 +67,11 @@ pub async fn generate_video( let context = extract_context_from_request(&request); let mut span = tracer.start_with_context("generate_video", &context); - let preferred_library = libraries::resolve_library_param(&app_state, body.library.as_deref()) - .ok() - .flatten() - .unwrap_or_else(|| app_state.primary_library()); + let preferred_library = + libraries::resolve_library_param_state(&app_state, body.library.as_deref()) + .ok() + .flatten() + .unwrap_or_else(|| app_state.primary_library()); // Try the resolved library first, then fall back to any other library // that actually contains the file — handles union-mode requests where diff --git a/src/lib.rs b/src/lib.rs index 0ea7ddb..a228472 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,6 +35,7 @@ pub mod tags; #[cfg(test)] pub mod testhelpers; pub mod thumbnails; +pub mod unified_search; pub mod utils; pub mod video; diff --git a/src/libraries.rs b/src/libraries.rs index 55bf5c1..377b442 100644 --- a/src/libraries.rs +++ b/src/libraries.rs @@ -291,11 +291,11 @@ pub fn seed_or_patch_from_env(conn: &mut SqliteConnection, base_path: &str) { } /// Resolve a library request parameter (accepts numeric id as string or name) -/// against the configured libraries. Returns `Ok(None)` when the param is +/// against a list of libraries. Returns `Ok(None)` when the param is /// absent, meaning "span all libraries". Returns `Err` when a value is /// provided but does not match any library. pub fn resolve_library_param<'a>( - state: &'a AppState, + libs: &'a [Library], param: Option<&str>, ) -> Result, String> { let Some(raw) = param.map(str::trim).filter(|s| !s.is_empty()) else { @@ -303,18 +303,29 @@ pub fn resolve_library_param<'a>( }; if let Ok(id) = raw.parse::() { - return state - .library_by_id(id) + return libs + .iter() + .find(|l| l.id == id) .map(Some) .ok_or_else(|| format!("unknown library id: {}", id)); } - state - .library_by_name(raw) + libs.iter() + .find(|l| l.name == raw) .map(Some) .ok_or_else(|| format!("unknown library name: {}", raw)) } +/// Resolve a library request parameter against the AppState's libraries. +/// Returns `Ok(None)` when the param is absent, meaning "span all libraries". +/// Returns `Err` when a value is provided but does not match any library. +pub fn resolve_library_param_state<'a>( + state: &'a AppState, + param: Option<&str>, +) -> Result, String> { + resolve_library_param(&state.libraries, param) +} + /// Health of a library at a point in time. Probed at the top of each /// file-watcher tick. The `Stale` state is the "be conservative" signal: /// destructive paths (ingest writes, future move-handoff and orphan GC in @@ -662,12 +673,6 @@ mod tests { assert_eq!(abs, PathBuf::from("/tmp/media/2024/photo.jpg")); } - fn state_with_libraries(libs: Vec) -> AppState { - let mut state = AppState::test_state(); - state.libraries = libs; - state - } - fn sample_libraries() -> Vec { vec![ Library { @@ -687,52 +692,52 @@ mod tests { ] } - #[actix_rt::test] - async fn resolve_library_param_absent_is_union() { - let state = state_with_libraries(sample_libraries()); - assert!(matches!(resolve_library_param(&state, None), Ok(None))); + #[test] + fn resolve_library_param_absent_is_union() { + let libs = sample_libraries(); + assert!(matches!(resolve_library_param(&libs, None), Ok(None))); } - #[actix_rt::test] - async fn resolve_library_param_empty_or_whitespace_is_union() { - let state = state_with_libraries(sample_libraries()); - assert!(matches!(resolve_library_param(&state, Some("")), Ok(None))); + #[test] + fn resolve_library_param_empty_or_whitespace_is_union() { + let libs = sample_libraries(); + assert!(matches!(resolve_library_param(&libs, Some("")), Ok(None))); assert!(matches!( - resolve_library_param(&state, Some(" ")), + resolve_library_param(&libs, Some(" ")), Ok(None) )); } - #[actix_rt::test] - async fn resolve_library_param_numeric_id_matches() { - let state = state_with_libraries(sample_libraries()); - let lib = resolve_library_param(&state, Some("7")) + #[test] + fn resolve_library_param_numeric_id_matches() { + let libs = sample_libraries(); + let lib = resolve_library_param(&libs, Some("7")) .expect("valid id") .expect("some library"); assert_eq!(lib.id, 7); assert_eq!(lib.name, "archive"); } - #[actix_rt::test] - async fn resolve_library_param_name_matches() { - let state = state_with_libraries(sample_libraries()); - let lib = resolve_library_param(&state, Some("main")) + #[test] + fn resolve_library_param_name_matches() { + let libs = sample_libraries(); + let lib = resolve_library_param(&libs, Some("main")) .expect("valid name") .expect("some library"); assert_eq!(lib.id, 1); } - #[actix_rt::test] - async fn resolve_library_param_unknown_id_errs() { - let state = state_with_libraries(sample_libraries()); - let err = resolve_library_param(&state, Some("999")).unwrap_err(); + #[test] + fn resolve_library_param_unknown_id_errs() { + let libs = sample_libraries(); + let err = resolve_library_param(&libs, Some("999")).unwrap_err(); assert!(err.contains("unknown library id")); } - #[actix_rt::test] - async fn resolve_library_param_unknown_name_errs() { - let state = state_with_libraries(sample_libraries()); - let err = resolve_library_param(&state, Some("missing")).unwrap_err(); + #[test] + fn resolve_library_param_unknown_name_errs() { + let libs = sample_libraries(); + let err = resolve_library_param(&libs, Some("missing")).unwrap_err(); assert!(err.contains("unknown library name")); } diff --git a/src/main.rs b/src/main.rs index 8b56efd..7faa959 100644 --- a/src/main.rs +++ b/src/main.rs @@ -54,6 +54,7 @@ mod perceptual_hash; mod state; mod tags; mod thumbnails; +mod unified_search; mod utils; mod video; mod watcher; @@ -62,6 +63,7 @@ mod knowledge; mod memories; mod otel; mod personas; +mod reels; mod service; #[cfg(test)] mod testhelpers; @@ -266,6 +268,11 @@ fn main() -> std::io::Result<()> { } } + // Spawn the nightly pre-generation scheduler (Section D). + reels::spawn_pregen_scheduler(app_state.clone()).await; + // Spawn the on-disk reel-cache sweeper (bounds pre-gen + on-demand reels). + reels::spawn_reel_cache_sweeper(app_state.clone()).await; + HttpServer::new(move || { let user_dao = SqliteUserDao::new(); let favorites_dao = SqliteFavoriteDao::new(); @@ -327,6 +334,13 @@ fn main() -> std::io::Result<()> { web::resource("/photos/search") .route(web::get().to(clip_search::search_photos)), ) + .service( + // Unified natural-language search: LLM translates the + // query into structured filters + a semantic term, then + // filters constrain and CLIP ranks. See src/unified_search.rs. + web::resource("/photos/search/unified") + .route(web::get().to(unified_search::unified_search::)), + ) .service(web::resource("/file/move").post(move_file::)) .service(handlers::image::get_image) .service(handlers::image::upload_image) @@ -344,6 +358,11 @@ fn main() -> std::io::Result<()> { .service(handlers::image::clear_image_date) .service(handlers::image::get_full_exif) .service(memories::list_memories) + .service(reels::create_reel_handler) + .service(reels::reel_status_handler) + .service(reels::reel_video_handler) + .service(reels::precomputed_reel_handler) + .service(reels::precomputed_video_handler) .service(ai::generate_insight_handler) .service(ai::generate_agentic_insight_handler) .service(ai::generation_status_handler) diff --git a/src/memories.rs b/src/memories.rs index 4b1682b..2b1f473 100644 --- a/src/memories.rs +++ b/src/memories.rs @@ -349,12 +349,6 @@ pub async fn list_memories( opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); let span_mode = q.span.unwrap_or(MemoriesSpan::Day); - let span_token = match span_mode { - MemoriesSpan::Day => "day", - MemoriesSpan::Week => "week", - MemoriesSpan::Month => "month", - }; - let years_back: i32 = DEFAULT_YEARS_BACK; // The SQL filter expects a signed offset in minutes from UTC; default // 0 (UTC) when the client didn't send a hint. We also keep a chrono @@ -366,18 +360,66 @@ pub async fn list_memories( .timezone_offset_minutes .and_then(|offset_mins| FixedOffset::east_opt(offset_mins * 60)); - debug!( - "list_memories: span={:?} tz_offset_min={} years_back={}", - span_mode, tz_offset_minutes, years_back - ); - - let library = match crate::libraries::resolve_library_param(&app_state, q.library.as_deref()) { - Ok(lib) => lib, + let items = match gather_memory_items( + &app_state, + &exif_dao, + &span_context, + span_mode, + tz_offset_minutes, + client_timezone, + q.library.as_deref(), + ) { + Ok(items) => items, Err(msg) => { warn!("Rejecting /memories request: {}", msg); return HttpResponse::BadRequest().body(msg); } }; + + span.add_event( + "memories_scanned", + vec![ + KeyValue::new("span", format!("{:?}", span_mode)), + KeyValue::new("years_back", DEFAULT_YEARS_BACK.to_string()), + KeyValue::new("result_count", items.len().to_string()), + KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()), + KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)), + ], + ); + span.set_status(Status::Ok); + + HttpResponse::Ok().json(MemoriesResponse { items }) +} + +/// Resolve an "on this day/week/month across past years" window into an +/// ordered list of [`MemoryItem`]s. Shared by the `/memories` handler and the +/// memory-reel selector so both honour the same library resolution, per-library +/// exclusions, timezone handling, and sort order. Returns `Err(message)` only +/// when the `library` param is invalid (callers map that to 400); per-library +/// query/lock failures are logged and skipped, matching the handler's +/// best-effort behaviour. +pub fn gather_memory_items( + app_state: &AppState, + exif_dao: &Mutex>, + span_context: &opentelemetry::Context, + span_mode: MemoriesSpan, + tz_offset_minutes: i32, + client_timezone: Option, + library_param: Option<&str>, +) -> Result, String> { + let span_token = match span_mode { + MemoriesSpan::Day => "day", + MemoriesSpan::Week => "week", + MemoriesSpan::Month => "month", + }; + let years_back: i32 = DEFAULT_YEARS_BACK; + + debug!( + "gather_memory_items: span={:?} tz_offset_min={} years_back={}", + span_mode, tz_offset_minutes, years_back + ); + + let library = crate::libraries::resolve_library_param_state(app_state, library_param)?; let libraries_to_scan: Vec<&crate::libraries::Library> = match library { Some(lib) => vec![lib], None => app_state.libraries.iter().collect(), @@ -394,7 +436,7 @@ pub async fn list_memories( let rows = match exif_dao.lock() { Ok(mut dao) => match dao.get_memories_in_window( - &span_context, + span_context, lib.id, span_token, years_back, @@ -469,21 +511,7 @@ pub async fn list_memories( } } - let items: Vec = memories_with_dates.into_iter().map(|(m, _)| m).collect(); - - span.add_event( - "memories_scanned", - vec![ - KeyValue::new("span", format!("{:?}", span_mode)), - KeyValue::new("years_back", years_back.to_string()), - KeyValue::new("result_count", items.len().to_string()), - KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()), - KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)), - ], - ); - span.set_status(Status::Ok); - - HttpResponse::Ok().json(MemoriesResponse { items }) + Ok(memories_with_dates.into_iter().map(|(m, _)| m).collect()) } #[cfg(test)] diff --git a/src/reels/mod.rs b/src/reels/mod.rs new file mode 100644 index 0000000..afe2ced --- /dev/null +++ b/src/reels/mod.rs @@ -0,0 +1,1568 @@ +//! Memory reels: render an MP4 slideshow of a selection of photos with an +//! LLM-written, voice-cloned narration over it. +//! +//! Pipeline: a [`selector`] resolves *which* photos (and the reel metadata), +//! the [`script`] module writes per-photo narration via the LLM, each line is +//! synthesized to speech, and [`render`] assembles the stills + narration into +//! one MP4. Jobs run in the background (mirroring the TTS speech-job registry) +//! because a reel takes minutes; the finished MP4 is cached on disk keyed by +//! the selection so a repeat request is instant. +//! +//! Phase 1 is on-demand and photos-only. The segment model is media-typed so a +//! video-clip segment (phase 2) and a nightly pre-render (phase 3) slot in +//! without reworking the pipeline. + +pub mod render; +pub mod script; +pub mod selector; + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::{LazyLock, Mutex, Mutex as StdMutex}; +use std::time::{Duration, Instant}; + +use actix_files::NamedFile; +use actix_web::{HttpRequest, HttpResponse, Responder, get, post, web}; +use anyhow::{Context, anyhow}; +use chrono::{DateTime, Datelike, Timelike}; +use serde::{Deserialize, Serialize}; +use serde_json::json; +use uuid::Uuid; + +use crate::data::Claims; +use crate::database::{ExifDao, InsightDao}; +use crate::libraries::{Library, resolve_library_param}; +use crate::memories::MemoriesSpan; +use crate::otel::extract_context_from_request; +use crate::state::AppState; +use selector::ReelSelector; + +// --- Precomputed reel age limits (hours) ------------------------------------- + +/// Maximum age for a precomputed day reel before it's considered stale. +const REEL_PRECOMPUTED_DAY_MAX_AGE_HOURS: u64 = 26; +/// Maximum age for a precomputed week reel. +const REEL_PRECOMPUTED_WEEK_MAX_AGE_HOURS: u64 = 192; +/// Maximum age for a precomputed month reel. +const REEL_PRECOMPUTED_MONTH_MAX_AGE_HOURS: u64 = 768; + +/// How many precomputed reels to keep per (span, library). The newest is the +/// one served; one extra is a grace window so a regen mid-flight (or a client +/// that started a fetch just before the swap) isn't left without a file. +const PREGEN_KEEP_PER_SCOPE: usize = 2; + +/// On-disk reel cache sweep: an unreferenced reel MP4 older than this is +/// removed. Catches the on-demand cache (which has no ledger row and otherwise +/// grows forever) and any pre-gen orphans. Tunable via `REEL_CACHE_MAX_AGE_DAYS`. +const REEL_CACHE_MAX_AGE_DAYS_DEFAULT: u64 = 7; +/// Interval between on-disk cache sweeps. +const REEL_CACHE_SWEEP_INTERVAL_SECS: u64 = 24 * 3600; +/// Transient render artifacts (`.mp4.tmp`, `.concat.txt`, orphaned sidecars) +/// older than this are leftovers from a crashed render and safe to remove. +const REEL_TMP_MAX_AGE_SECS: u64 = 3600; + +/// Resolve a library request parameter to a stable key string. +/// Returns the library's id as a string when found, or `"all"` when +/// the param is absent or the lookup fails. +pub fn normalize_library_key(libs: &[Library], param: Option<&str>) -> String { + match resolve_library_param(libs, param) { + Ok(Some(lib)) => lib.id.to_string(), + _ => "all".to_string(), + } +} + +/// Best-effort: mirror the latest client reel params into `user_ai_prefs` +/// so the nightly pre-gen scheduler can pick them up. Never fails the +/// caller regardless of DB errors. +fn capture_prefs( + app_state: &AppState, + req: &web::Json, + library_param: Option<&str>, +) -> Result<(), anyhow::Error> { + use crate::database::models::UpsertUserAiPrefs; + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("Time went backwards") + .as_secs(); + let library = match library_param { + Some(p) if !p.is_empty() => { + // Resolve to the actual library id for the DB row. + normalize_library_key(&app_state.libraries, Some(p)) + } + _ => "all".to_string(), + }; + let mut dao = app_state.user_ai_prefs_dao.lock().expect("lock"); + let ctx = opentelemetry::Context::new(); + dao.upsert_prefs( + &ctx, + &UpsertUserAiPrefs { + voice: req.voice.clone().filter(|s| !s.is_empty()), + tz_offset_minutes: Some( + req.timezone_offset_minutes + .unwrap_or_else(|| chrono::Local::now().offset().local_minus_utc()), + ), + library: Some(library), + updated_at: now as i64, + }, + ) + .map_err(|e| anyhow::anyhow!("failed to upsert user_ai_prefs: {e}")) +} + +/// Which scripting strategy to use for the reel narration. +#[derive(Clone, Copy)] +pub enum ScripterMode { + /// Fast path: single LLM call via the direct client. + Fast, + /// Agentic path: resolves the backend through the InsightGenerator + /// (honouring LLM_BACKEND, model overrides, etc.). Falls back to + /// Fast on error so a scripting failure never sinks a reel. + Agentic, +} + +/// Progress callback type — receives a static-stage label. +pub type ProgressFn<'a> = dyn Fn(&'static str) + Send + Sync + 'a; + +/// The media behind one shot: a still photo, or a short section of a source +/// video (played with its live audio ducked under the narration). Both carry +/// just the library-relative path; the renderer applies fixed clip framing +/// (start/length) from constants. +#[derive(Debug, Clone)] +pub enum SegmentMedia { + Photo { rel_path: String, library_id: i32 }, + Clip { rel_path: String, library_id: i32 }, +} + +impl SegmentMedia { + fn rel_path(&self) -> &str { + match self { + SegmentMedia::Photo { rel_path, .. } | SegmentMedia::Clip { rel_path, .. } => rel_path, + } + } + fn library_id(&self) -> i32 { + match self { + SegmentMedia::Photo { library_id, .. } | SegmentMedia::Clip { library_id, .. } => { + *library_id + } + } + } +} + +/// A beat: one narration line over its media. A photo beat holds one still (a +/// held shot) or several (a quick burst that flashes through moments of an +/// event while the line is read). A clip beat holds a single video clip. Either +/// way one narration line covers the whole beat, so a week/month reel can +/// *show* everything it spans without a narration line — and the seconds that +/// come with it — per item. +#[derive(Debug, Clone)] +pub struct PlannedBeat { + pub media: Vec, + pub date: Option, + pub insight_title: Option, + pub insight_summary: Option, + /// GPS coordinates of the lead media item, when available. + pub gps: Option<(f64, f64)>, +} + +impl PlannedBeat { + /// Human date for the prompt, e.g. "June 12, 2019". `None` when undated. + pub fn date_label(&self) -> Option { + let ts = self.date?; + let dt = DateTime::from_timestamp(ts, 0)?; + Some(dt.format("%B %-d, %Y").to_string()) + } + + /// True when this beat is a single video clip (vs one or more photos). + pub fn is_clip(&self) -> bool { + matches!(self.media.as_slice(), [SegmentMedia::Clip { .. }]) + } +} + +/// Reel-wide metadata the scripter uses for framing. +#[derive(Debug, Clone)] +pub struct ReelMeta { + pub span: MemoriesSpan, + pub years: Vec, +} + +impl ReelMeta { + /// Natural-language phrase for the span, e.g. "on this day". + pub fn span_phrase(&self) -> &'static str { + match self.span { + MemoriesSpan::Day => "on this day", + MemoriesSpan::Week => "this week", + MemoriesSpan::Month => "this month", + } + } +} + +// --- Job registry ------------------------------------------------------------ +// +// In-memory, same shape as the TTS speech-job registry: a reel takes minutes, +// too long to hold one HTTP request from a phone. POST /reels returns a job id; +// the client polls GET /reels/{id} until the video URL appears. The heavy +// artifact (the MP4) lives on disk, not in this map — jobs only carry status + +// the output path. State is intentionally not durable across restarts; the +// on-disk cache is what makes a repeat request cheap, not the registry. + +#[derive(Clone, Copy, PartialEq, Eq, Debug, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum ReelJobStatus { + Queued, + Running, + Done, + Error, +} + +impl ReelJobStatus { + fn is_terminal(self) -> bool { + matches!(self, Self::Done | Self::Error) + } +} + +struct ReelJob { + status: ReelJobStatus, + /// Coarse progress label for the client ("scripting", "narrating", …). + stage: &'static str, + title: Option, + output_path: Option, + error: Option, + created_at: Instant, + finished_at: Option, + abort: Option, +} + +/// Finished jobs linger so a client that lost connectivity can still collect +/// the result; anything older than MAX_AGE is dropped (aborted first if somehow +/// still running). Swept lazily on each create. +const REEL_JOB_RESULT_TTL: Duration = Duration::from_secs(30 * 60); +const REEL_JOB_MAX_AGE: Duration = Duration::from_secs(60 * 60); + +static REEL_JOBS: LazyLock>> = + LazyLock::new(|| StdMutex::new(HashMap::new())); + +fn sweep_stale_jobs(jobs: &mut HashMap, now: Instant) { + jobs.retain(|_, job| { + let result_expired = job + .finished_at + .is_some_and(|t| now.duration_since(t) >= REEL_JOB_RESULT_TTL); + let too_old = now.duration_since(job.created_at) >= REEL_JOB_MAX_AGE; + if too_old && let Some(h) = job.abort.take() { + h.abort(); + } + !(result_expired || too_old) + }); +} + +fn with_job(id: Uuid, f: impl FnOnce(&mut ReelJob) -> R) -> Option { + REEL_JOBS.lock().unwrap().get_mut(&id).map(f) +} + +fn set_stage(id: Uuid, stage: &'static str) { + with_job(id, |job| { + if !job.status.is_terminal() { + job.status = ReelJobStatus::Running; + job.stage = stage; + } + }); +} + +/// Move a job to a terminal state (first terminal write wins). +fn finish_job( + id: Uuid, + status: ReelJobStatus, + title: Option, + output_path: Option, + error: Option, +) { + with_job(id, |job| { + if job.status.is_terminal() { + return; + } + job.status = status; + job.stage = match status { + ReelJobStatus::Done => "done", + _ => "error", + }; + job.title = title; + job.output_path = output_path; + job.error = error; + job.finished_at = Some(Instant::now()); + job.abort = None; + }); +} + +// --- On-disk cache ----------------------------------------------------------- + +/// Render version: bump to invalidate every cached reel after a rendering / +/// scripting change that should produce a fresh result. +const RENDER_VERSION: u32 = 7; + +/// Narration expressiveness — Chatterbox's `exaggeration` knob. A slight bump +/// over the ~0.5 default warms up otherwise-flat narration without over-acting; +/// tune via `REEL_TTS_EXAGGERATION` (0.25–2.0). +fn reel_tts_exaggeration() -> f32 { + std::env::var("REEL_TTS_EXAGGERATION") + .ok() + .and_then(|s| s.trim().parse::().ok()) + .filter(|x| x.is_finite()) + .unwrap_or(0.6) +} + +/// Cache key over everything that determines *which* media and *how* it's +/// voiced — but not the (non-deterministic) narration text. Same inputs → same +/// MP4 served instantly. blake3 keeps it filesystem-safe and collision-free. +fn cache_key(selector: &ReelSelector, media: &[SegmentMedia], voice: Option<&str>) -> String { + let mut buf = format!( + "v{}|{}|voice={}|", + RENDER_VERSION, + selector.descriptor(), + voice.unwrap_or("default") + ); + for m in media { + // Tag photo vs clip so the same path used as a still and as a video + // clip produce different keys. + let tag = match m { + SegmentMedia::Photo { .. } => 'P', + SegmentMedia::Clip { .. } => 'C', + }; + buf.push_str(&format!("{tag}{}:{}|", m.library_id(), m.rel_path())); + } + blake3::hash(buf.as_bytes()).to_hex().to_string() +} + +fn reel_mp4_path(app_state: &AppState, key: &str) -> PathBuf { + Path::new(&app_state.reels_path).join(format!("{key}.mp4")) +} + +fn reel_sidecar_path(app_state: &AppState, key: &str) -> PathBuf { + Path::new(&app_state.reels_path).join(format!("{key}.json")) +} + +#[derive(Serialize, Deserialize)] +struct ReelSidecar { + title: String, +} + +// --- HTTP types -------------------------------------------------------------- + +#[derive(Debug, Deserialize)] +pub struct CreateReelRequest { + #[serde(default)] + pub span: Option, + #[serde(default)] + pub timezone_offset_minutes: Option, + #[serde(default)] + pub library: Option, + /// Cloned TTS voice for the narration; server default when omitted. + #[serde(default)] + pub voice: Option, + /// Cap on photos in the reel (clamped server-side). + #[serde(default)] + pub max_segments: Option, +} + +#[derive(Debug, Serialize)] +pub struct ReelJobCreatedResponse { + pub job_id: String, + pub status: ReelJobStatus, +} + +#[derive(Debug, Serialize)] +pub struct ReelStatusResponse { + pub job_id: String, + pub status: ReelJobStatus, + pub stage: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub title: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub video_url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, +} + +/// Response shape for `GET /reels/precomputed`. +#[derive(Debug, Serialize)] +pub struct PrecomputedReelResponse { + pub video_url: String, + pub title: String, +} + +// --- Handlers ---------------------------------------------------------------- + +/// POST /reels — start (or instantly serve from cache) a memory reel for the +/// requested span. Returns 202 + a job id; the client polls GET /reels/{id}. +#[post("/reels")] +pub async fn create_reel_handler( + http_request: HttpRequest, + _claims: Claims, + req: web::Json, + app_state: web::Data, + exif_dao: web::Data>>, + insight_dao: web::Data>>, +) -> impl Responder { + let span_context = extract_context_from_request(&http_request); + + if app_state.llamacpp.is_none() { + return HttpResponse::ServiceUnavailable().json(json!({ + "error": "Reel narration needs the LLM/TTS backend (set LLAMA_SWAP_URL)" + })); + } + + let span = req.span.unwrap_or(MemoriesSpan::Day); + let max_segments = req.max_segments.unwrap_or(selector::DEFAULT_MAX_SEGMENTS); + let selector = ReelSelector::Memories { + span, + tz_offset_minutes: req.timezone_offset_minutes.unwrap_or(0), + library: req.library.clone(), + max_segments, + }; + + // Cheap pass: resolve the media set for the cache key and the emptiness + // check. Insight enrichment + scripting happen in the background job. + let (planned, meta) = match selector::resolve(&app_state, &exif_dao, &span_context, &selector) { + Ok(r) => r, + Err(msg) => return HttpResponse::BadRequest().body(msg), + }; + if planned.is_empty() { + return HttpResponse::UnprocessableEntity().json(json!({ + "error": "No photo memories found for this span" + })); + } + + // Flatten every media item across beats (in order) into the cache key — the + // key tracks exactly which photos/clips appear and in what sequence. + let media: Vec = planned.iter().flat_map(|b| b.media.clone()).collect(); + let voice = req.voice.clone().filter(|s| !s.is_empty()); + let key = cache_key(&selector, &media, voice.as_deref()); + + let job_id = Uuid::new_v4(); + log::info!( + "reel {job_id}: request span={:?} → {} beats, {} photos", + span, + planned.len(), + media.len() + ); + + // Cache hit: register an already-Done job pointing at the existing MP4 so + // the client's first poll returns the video URL immediately. + let mp4 = reel_mp4_path(&app_state, &key); + if mp4.exists() { + log::info!("reel {job_id}: cache hit, serving existing reel"); + let title = std::fs::read(reel_sidecar_path(&app_state, &key)) + .ok() + .and_then(|b| serde_json::from_slice::(&b).ok()) + .map(|s| s.title); + let mut jobs = REEL_JOBS.lock().unwrap(); + sweep_stale_jobs(&mut jobs, Instant::now()); + jobs.insert( + job_id, + ReelJob { + status: ReelJobStatus::Done, + stage: "done", + title, + output_path: Some(mp4), + error: None, + created_at: Instant::now(), + finished_at: Some(Instant::now()), + abort: None, + }, + ); + // Capture params for passive prefs mirror (best-effort, never fails). + let _ = capture_prefs(&app_state, &req, req.library.as_deref()); + return HttpResponse::Accepted().json(ReelJobCreatedResponse { + job_id: job_id.to_string(), + status: ReelJobStatus::Done, + }); + } + + { + let mut jobs = REEL_JOBS.lock().unwrap(); + sweep_stale_jobs(&mut jobs, Instant::now()); + jobs.insert( + job_id, + ReelJob { + status: ReelJobStatus::Queued, + stage: "queued", + title: None, + output_path: None, + error: None, + created_at: Instant::now(), + finished_at: None, + abort: None, + }, + ); + } + log::info!("reel {job_id}: queued for generation"); + + let state = app_state.clone(); + let insight_dao = insight_dao.clone(); + let exif_dao = exif_dao.clone(); + let handle = tokio::spawn(async move { + match run_reel_job( + &state, + &insight_dao, + &exif_dao, + job_id, + planned, + meta, + voice, + &key, + ) + .await + { + Ok((title, path)) => { + finish_job(job_id, ReelJobStatus::Done, Some(title), Some(path), None) + } + Err(e) => { + log::error!("reel job {job_id} failed: {e:?}"); + finish_job( + job_id, + ReelJobStatus::Error, + None, + None, + Some(format!("{e}")), + ) + } + } + }); + with_job(job_id, |job| job.abort = Some(handle.abort_handle())); + + // Capture params for passive prefs mirror (best-effort, never fails). + let _ = capture_prefs(&app_state, &req, req.library.as_deref()); + + HttpResponse::Accepted().json(ReelJobCreatedResponse { + job_id: job_id.to_string(), + status: ReelJobStatus::Queued, + }) +} + +/// GET /reels/{id} — poll a reel job. Done jobs carry a `video_url`. +#[get("/reels/{id}")] +pub async fn reel_status_handler(_claims: Claims, path: web::Path) -> impl Responder { + let id_str = path.into_inner(); + let Ok(id) = Uuid::parse_str(&id_str) else { + return HttpResponse::BadRequest().json(json!({ "error": "invalid job id" })); + }; + let resp = with_job(id, |job| ReelStatusResponse { + job_id: id_str.clone(), + status: job.status, + stage: job.stage.to_string(), + title: job.title.clone(), + video_url: matches!(job.status, ReelJobStatus::Done) + .then(|| format!("/reels/{id_str}/video")), + error: job.error.clone(), + }); + match resp { + Some(r) => HttpResponse::Ok().json(r), + None => HttpResponse::NotFound().json(json!({ "error": "job not found or expired" })), + } +} + +/// GET /reels/{id}/video — stream the finished MP4 (supports range requests via +/// NamedFile, so the mobile player can seek). +#[get("/reels/{id}/video")] +pub async fn reel_video_handler( + _claims: Claims, + request: HttpRequest, + path: web::Path, +) -> impl Responder { + let id_str = path.into_inner(); + let Ok(id) = Uuid::parse_str(&id_str) else { + return HttpResponse::BadRequest().json(json!({ "error": "invalid job id" })); + }; + let output = with_job(id, |job| job.output_path.clone()).flatten(); + let Some(path) = output else { + return HttpResponse::NotFound().json(json!({ "error": "reel not ready" })); + }; + match NamedFile::open(&path) { + Ok(file) => file.into_response(&request), + Err(e) => { + log::error!("opening reel mp4 {path:?} failed: {e:?}"); + HttpResponse::NotFound().json(json!({ "error": "reel file missing" })) + } + } +} + +/// GET /reels/precomputed?span=&library= +/// +/// Look up the latest precomputed reel for the given span and library key. +/// Validity gate (all must hold, else 404): +/// 1. `render_version == RENDER_VERSION` +/// 2. `output_path` exists on disk +/// 3. age <= max_age(span) (Day 26h, Week 8d, Month 32d) +/// +/// Returns `{ video_url: "/reels/by-key/{cache_key}/video", title }`. +#[get("/reels/precomputed")] +pub async fn precomputed_reel_handler( + _claims: Claims, + query: web::Query>, + app_state: web::Data, +) -> impl Responder { + let span = query.get("span").map(|s| s.as_str()).unwrap_or("day"); + let library_key = normalize_library_key( + &app_state.libraries, + query.get("library").map(|s| s.as_str()), + ); + + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("Time went backwards") + .as_secs() as i64; + + let max_age_hours = match span { + "week" => REEL_PRECOMPUTED_WEEK_MAX_AGE_HOURS as i64, + "month" => REEL_PRECOMPUTED_MONTH_MAX_AGE_HOURS as i64, + _ => REEL_PRECOMPUTED_DAY_MAX_AGE_HOURS as i64, + }; + let min_generated_at = now - (max_age_hours * 3600); + + let ctx = opentelemetry::Context::new(); + let mut dao = app_state + .precomputed_reel_dao + .lock() + .expect("Unable to lock PrecomputedReelDao"); + + // Fast existence gate: is there a fresh row at all? + if !dao + .exists_fresh( + &ctx, + span, + &library_key, + RENDER_VERSION as i32, + min_generated_at, + ) + .unwrap_or(false) + { + return HttpResponse::NotFound().json(json!({ "error": "no precomputed reel found" })); + } + + // Fetch the latest row for full validity checks. + let reel = match dao.latest_for(&ctx, span, &library_key) { + Ok(Some(r)) => r, + _ => { + return HttpResponse::NotFound().json(json!({ "error": "no precomputed reel found" })); + } + }; + + // Validity gate 1: render version must match. + if reel.render_version != RENDER_VERSION as i32 { + return HttpResponse::NotFound() + .json(json!({ "error": "precomputed reel is stale (render version mismatch)" })); + } + + // Validity gate 2: output_path must exist. + let output = std::path::Path::new(&reel.output_path); + if !output.exists() { + return HttpResponse::NotFound().json(json!({ "error": "precomputed reel file missing" })); + } + + // Validity gate 3: age <= max_age (re-checked via min_generated_at). + if reel.generated_at < min_generated_at { + return HttpResponse::NotFound().json(json!({ "error": "precomputed reel has expired" })); + } + + HttpResponse::Ok().json(PrecomputedReelResponse { + video_url: format!("/reels/by-key/{}/video", reel.cache_key), + title: reel.title, + }) +} + +/// GET /reels/by-key/{key}/video — stream a precomputed reel MP4 by cache key. +#[get("/reels/by-key/{key}/video")] +pub async fn precomputed_video_handler( + _claims: Claims, + request: HttpRequest, + path: web::Path, + app_state: web::Data, +) -> impl Responder { + let key = path.into_inner(); + let mp4 = reel_mp4_path(&app_state, &key); + match NamedFile::open(&mp4) { + Ok(file) => file.into_response(&request), + Err(e) => { + log::error!("opening precomputed reel {key} failed: {e:?}"); + HttpResponse::NotFound().json(json!({ "error": "precomputed reel file missing" })) + } + } +} + +// --- Pipeline ---------------------------------------------------------------- + +/// Run the full reel pipeline: enrich → script → narrate → render → concat, +/// then publish the MP4 into the cache. Returns (title, mp4_path). +/// +/// The `scripter` parameter controls which narration-generation strategy is +/// used (fast single-call vs. agentic backend resolution). On scripting +/// failure in Agentic mode the pipeline falls back to the fast path so a +/// single LLM failure never sinks a reel. +pub(crate) async fn produce_reel( + app_state: &AppState, + insight_dao: &Mutex>, + exif_dao: &Mutex>, + mut planned: Vec, + meta: ReelMeta, + voice: Option, + key: &str, + scripter: ScripterMode, + progress: Option<&ProgressFn<'_>>, +) -> anyhow::Result<(String, PathBuf)> { + let started = Instant::now(); + let total_photos: usize = planned.iter().map(|b| b.media.len()).sum(); + log::info!( + "reel produce_reel: starting — span {:?}, {} beats, {} photos, voice={}", + meta.span, + planned.len(), + total_photos, + voice.as_deref().unwrap_or("default") + ); + + let client = app_state + .llamacpp + .as_ref() + .ok_or_else(|| anyhow::anyhow!("TTS/LLM backend not configured"))? + .clone(); + + // 1. Enrich each beat with its lead photo's cached insight, then script + // (one LLM call → one narration line per beat). + emit_progress(progress, "scripting"); + log::info!("reel produce_reel: scripting narration via LLM…"); + let span_context = opentelemetry::Context::new(); + selector::enrich(insight_dao, exif_dao, &span_context, &mut planned); + let script = match scripter { + ScripterMode::Fast => script::generate_script(&client, &meta, &planned).await?, + ScripterMode::Agentic => { + match script::generate_script_agentic(&app_state.insight_generator, &meta, &planned) + .await + { + Ok(s) => s, + Err(e) => { + log::warn!( + "reel produce_reel: agentic script failed, falling back to fast: {e}" + ); + script::generate_script(&client, &meta, &planned).await? + } + } + } + }; + log::info!( + "reel produce_reel: scripted \"{}\" ({} lines)", + script.title, + script.lines.len() + ); + + // 2. Narrate each beat's line and 3. render the beat (its photos shown in + // sequence under that one narration). A beat whose audio or render fails + // is skipped (logged) rather than sinking the whole reel — handles an + // odd HEIC/corrupt file gracefully. + emit_progress(progress, "narrating"); + let work = tempfile::tempdir().context("creating reel work dir")?; + let nvenc = render::is_nvenc_available().await; + log::info!( + "reel produce_reel: narrating + rendering {} beats (encoder: {})", + planned.len(), + if nvenc { "nvenc" } else { "cpu" } + ); + let opts = render::SegmentOpts { + nvenc, + ..Default::default() + }; + + let beat_total = planned.len(); + let mut beat_files: Vec = Vec::new(); + for (i, (beat, line)) in planned.iter().zip(script.lines.iter()).enumerate() { + // Resolve the beat's media to absolute paths; drop any that don't + // resolve. An empty beat is skipped. + let paths: Vec = beat + .media + .iter() + .filter_map(|m| resolve_media_path(app_state, m)) + .collect(); + if paths.is_empty() { + log::warn!("reel produce_reel: skipping beat {i}, no media paths resolved"); + continue; + } + + let audio_bytes = match crate::ai::tts::synthesize_serialized( + &client, + line, + voice.as_deref(), + "wav", + Some(reel_tts_exaggeration()), + ) + .await + { + Ok(b) => b, + Err(e) => { + log::warn!("reel produce_reel: skipping beat {i}, TTS failed: {e}"); + continue; + } + }; + let audio_path = work.path().join(format!("narration_{i:03}.wav")); + if let Err(e) = tokio::fs::write(&audio_path, &audio_bytes).await { + log::warn!("reel produce_reel: skipping beat {i}, writing audio failed: {e}"); + continue; + } + + let narration_secs = + crate::video::ffmpeg::get_duration_seconds(&audio_path.to_string_lossy()) + .await + .ok() + .flatten() + .unwrap_or(render::MIN_SEGMENT_SECONDS); + + emit_progress(progress, "rendering"); + let beat_out = work.path().join(format!("beat_{i:03}.mp4")); + let render_result = if beat.is_clip() { + log::info!( + "reel produce_reel: beat {}/{} — video clip, narration {:.1}s", + i + 1, + beat_total, + narration_secs + ); + render::render_clip_beat(&paths[0], &audio_path, &beat_out, narration_secs, &opts).await + } else { + log::info!( + "reel produce_reel: beat {}/{} — {} photo(s), narration {:.1}s", + i + 1, + beat_total, + paths.len(), + narration_secs + ); + render::render_beat(&paths, &audio_path, &beat_out, narration_secs, &opts).await + }; + if let Err(e) = render_result { + log::warn!("reel produce_reel: skipping beat {i}, render failed: {e}"); + continue; + } + beat_files.push(beat_out.to_string_lossy().to_string()); + } + + let segment_files = beat_files; + if segment_files.is_empty() { + return Err(anyhow!("no beats rendered successfully")); + } + + // 4. Concat into the cache. Write to a temp name in the reels dir, then + // rename atomically (same filesystem) so a reader never sees a partial. + emit_progress(progress, "rendering"); + log::info!( + "reel produce_reel: joining {} rendered beats into the final reel", + segment_files.len() + ); + std::fs::create_dir_all(&app_state.reels_path).context("creating reels dir")?; + let final_path = reel_mp4_path(app_state, key); + let tmp_path = final_path.with_extension("mp4.tmp"); + render::concat_segments(&segment_files, &tmp_path).await?; + std::fs::rename(&tmp_path, &final_path).context("publishing reel mp4")?; + + // Sidecar carries the title so a future cache hit can return it without + // re-running the pipeline. + let sidecar = serde_json::to_vec(&ReelSidecar { + title: script.title.clone(), + }) + .context("serializing reel sidecar")?; + let _ = std::fs::write(reel_sidecar_path(app_state, key), sidecar); + + log::info!( + "reel produce_reel: done in {:.1}s — {} beats → {}", + started.elapsed().as_secs_f64(), + segment_files.len(), + final_path.display() + ); + Ok((script.title, final_path)) +} + +/// Emit a progress stage label via the optional callback. +fn emit_progress(progress: Option<&ProgressFn<'_>>, stage: &'static str) { + if let Some(p) = progress { + p(stage); + } +} + +/// Run the full reel pipeline and publish the MP4 into the cache. +/// Thin wrapper around [`produce_reel`] that wires up job-stage tracking. +async fn run_reel_job( + app_state: &AppState, + insight_dao: &Mutex>, + exif_dao: &Mutex>, + job_id: Uuid, + planned: Vec, + meta: ReelMeta, + voice: Option, + key: &str, +) -> anyhow::Result<(String, PathBuf)> { + let progress = move |stage: &'static str| { + set_stage(job_id, stage); + }; + produce_reel( + app_state, + insight_dao, + exif_dao, + planned, + meta, + voice, + key, + ScripterMode::Fast, + Some(&progress), + ) + .await +} + +/// Resolve a media item's library-relative path to a validated absolute path +/// under its library root (works for both photos and clips). +fn resolve_media_path(app_state: &AppState, media: &SegmentMedia) -> Option { + let lib = app_state.library_by_id(media.library_id())?; + let rel = media.rel_path().to_string(); + crate::files::is_valid_full_path(&lib.root_path, &rel, false) +} + +// --- Nightly pre-generation scheduler (Section D) ---------------------------- + +/// Env: "3" (default). The hour (0-23) when the nightly pre-gen batch fires. +/// Clamped to 0-23; invalid values fall back to default. +fn pregen_run_hour() -> u32 { + std::env::var("REEL_PREGEN_HOUR") + .ok() + .and_then(|v| v.trim().parse().ok()) + .filter(|h| *h <= 23) + .unwrap_or(3) +} + +/// Env: "1" (default, Monday). Day of week for weekly pre-gen (0=Sun, 1=Mon, ...). +/// Clamped to 0-6; invalid values fall back to default. +fn pregen_week_dow() -> u32 { + std::env::var("REEL_PREGEN_WEEK_DOW") + .ok() + .and_then(|v| v.trim().parse().ok()) + .filter(|d| *d <= 6) + .unwrap_or(1) +} + +/// Pure: seconds until the next `run_hour:00:00` strictly after `now`. +/// +/// Minute/second-accurate (not just hour-granular): when `now` is already at or +/// past the target this wraps to the same hour tomorrow, so a batch that +/// finishes inside the run hour sleeps ~24h rather than busy-looping (waking, +/// re-running, and re-sleeping 0s) for the rest of that hour. The tradeoff is +/// that booting at or after `run_hour` waits until the next day. Recomputed each +/// loop iteration from `Local::now()` so DST shifts are absorbed. +pub(crate) fn secs_until_next_run_hour(now: chrono::DateTime, run_hour: u32) -> u64 { + let now_secs = now.hour() * 3600 + now.minute() * 60 + now.second(); + let target_secs = run_hour * 3600; + let diff = if target_secs > now_secs { + target_secs - now_secs + } else { + 86_400 - now_secs + target_secs + }; + diff as u64 +} + +/// Load pre-gen parameters: tries the user_ai_prefs DB row first, falls back +/// to env vars, then to server-local defaults. +fn load_pregen_params(app_state: &AppState) -> (i32, Option, String) { + // Try DB row first + if let Ok(mut dao) = app_state.user_ai_prefs_dao.lock() { + let ctx = opentelemetry::Context::new(); + if let Ok(Some(prefs)) = dao.get_prefs(&ctx) { + let tz = prefs.tz_offset_minutes.unwrap_or_else(fixed_tz_offset); + let voice = prefs.voice; + let library = prefs.library.unwrap_or_else(|| "all".to_string()); + return (tz, voice, library); + } + } + // Fall back to env (explicit offset overrides auto-detect) + let tz = std::env::var("REEL_PREGEN_TZ_OFFSET_MINUTES") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or_else(fixed_tz_offset); + let voice = std::env::var("REEL_PREGEN_VOICE").ok(); + let library = std::env::var("REEL_PREGEN_LIBRARY") + .ok() + .unwrap_or_else(|| "all".to_string()); + (tz, voice, library) +} + +/// Fixed timezone offset: reads `REEL_PREGEN_TZ_FIXED_MINUTES` (e.g. "-480" +/// for US Eastern) when set, falling back to the system local offset. Using +/// a fixed offset avoids DST shifts changing the pre-gen schedule halfway +/// through the year. +fn fixed_tz_offset() -> i32 { + std::env::var("REEL_PREGEN_TZ_FIXED_MINUTES") + .ok() + .and_then(|v| v.trim().parse().ok()) + .unwrap_or_else(|| chrono::Local::now().offset().local_minus_utc()) +} + +/// Spawn the nightly pre-generation scheduler. Runs behind `REEL_PREGEN_ENABLED`. +pub(crate) async fn spawn_pregen_scheduler(app_state: web::Data) { + if std::env::var("REEL_PREGEN_ENABLED").ok() != Some("1".to_string()) { + log::info!("Reel pre-generation scheduler disabled (REEL_PREGEN_ENABLED != 1)"); + return; + } + + let run_hour = pregen_run_hour(); + log::info!( + "Reel pre-generation scheduler enabled, running at hour {} local", + run_hour + ); + + tokio::spawn(async move { + loop { + let now = chrono::Local::now(); + let sleep_secs = secs_until_next_run_hour(now, run_hour); + log::debug!("Next pre-gen run in {}s", sleep_secs); + tokio::time::sleep(std::time::Duration::from_secs(sleep_secs)).await; + + if let Err(e) = run_pregen_batch(&app_state).await { + log::error!("Reel pre-generation batch failed: {}", e); + } + } + }); +} + +/// Run the pre-generation batch for all applicable spans. +async fn run_pregen_batch(app_state: &AppState) -> anyhow::Result<()> { + let now = chrono::Local::now(); + let weekday = now.weekday().num_days_from_sunday(); // 0=Sun, 1=Mon, ... + let day_of_month = now.day(); + + let mut spans = vec!["day"]; + if weekday == pregen_week_dow() { + spans.push("week"); + } + if day_of_month == 1 { + spans.push("month"); + } + + let (tz, voice, library) = load_pregen_params(app_state); + + for span in spans { + if let Err(e) = pregen_one(app_state, span, tz, voice.clone(), &library).await { + log::error!("Pre-gen failed for span={}: {}", span, e); + } + } + + Ok(()) +} + +/// Pre-generate a single reel for the given span. +async fn pregen_one( + app_state: &AppState, + span: &str, + tz: i32, + voice: Option, + library: &str, +) -> anyhow::Result<()> { + let memories_span = match span { + "day" => MemoriesSpan::Day, + "week" => MemoriesSpan::Week, + "month" => MemoriesSpan::Month, + _ => MemoriesSpan::Day, + }; + + let selector = ReelSelector::Memories { + span: memories_span, + tz_offset_minutes: tz, + library: if library == "all" { + None + } else { + Some(library.to_string()) + }, + // Must match the on-demand default (create_reel_handler) so the cache + // key — which encodes the raw max_segments — lines up and the on-demand + // cache-hit path serves this pre-generated reel. The client sends no + // max_segments, so it defaults to DEFAULT_MAX_SEGMENTS there too. + max_segments: selector::DEFAULT_MAX_SEGMENTS, + }; + + let exif_dao = app_state.insight_generator.exif_dao(); + let insight_dao = app_state.insight_generator.insight_dao(); + let ctx = opentelemetry::Context::new(); + let (planned, reel_meta) = match selector::resolve(app_state, exif_dao, &ctx, &selector) { + Ok((p, m)) => (p, m), + Err(e) => { + log::warn!("Pre-gen resolve failed for span={}: {}", span, e); + return Ok(()); + } + }; + + if planned.is_empty() { + log::info!("No beats for span={}, skipping", span); + return Ok(()); + } + + // Flatten every media item across beats (in order) into the cache key. + let media: Vec = planned.iter().flat_map(|b| b.media.clone()).collect(); + let key = cache_key(&selector, &media, voice.as_deref()); + // Total media items shown (photos + clips), not beat count. + let media_count = media.len() as i32; + + // Dedup: check if fresh ledger row exists + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("Time went backwards") + .as_secs() as i64; + + let max_age_hours = match span { + "week" => REEL_PRECOMPUTED_WEEK_MAX_AGE_HOURS, + "month" => REEL_PRECOMPUTED_MONTH_MAX_AGE_HOURS, + _ => REEL_PRECOMPUTED_DAY_MAX_AGE_HOURS, + }; + let min_generated_at = now - (max_age_hours as i64 * 3600); + + // Skip only when a fresh ledger row points at THIS exact cache key (same + // media, params, render version) and its file still exists. Comparing the + // stored cache_key — not just (span, library) — means a key change from + // selection-logic/params drift that doesn't bump RENDER_VERSION still forces + // a regen within the freshness window, instead of leaving a stale row that + // points at an orphaned reel. + let already_current = { + let mut dao = app_state.precomputed_reel_dao.lock().expect("lock"); + matches!( + dao.latest_for(&ctx, span, library), + Ok(Some(row)) + if row.cache_key == key + && row.render_version == RENDER_VERSION as i32 + && row.generated_at >= min_generated_at + ) && reel_mp4_path(app_state, &key).exists() + }; + + if already_current { + log::info!( + "Fresh precomputed reel already current for span={} key={}, skipping", + span, + key + ); + return Ok(()); + } + + // Past the key-aware dedup above, any MP4 already at this key was NOT + // pre-generated by us (it has no matching ledger row) — most likely an + // on-demand fast-scripted reel that happens to share the key. Don't adopt + // it: regenerate so the precomputed reel is the agentic one. produce_reel + // publishes atomically, overwriting whatever is there. (The narrow + // render-succeeded-but-ledger-write-failed crash window just costs one + // redundant re-render next run.) + log::info!("Generating precomputed reel for span={}, key={}", span, key); + let (title, mp4) = produce_reel( + app_state, + insight_dao, + exif_dao, + planned, + reel_meta, + voice.clone(), + &key, + ScripterMode::Agentic, + None, + ) + .await?; + + // Record to ledger, then retire superseded reels for this (span, library) + // — yesterday's daily, an older render-version, etc. — keeping a small + // grace window. Done under one lock so the prune sees the row we just wrote. + let superseded = { + let mut reel_dao = app_state.precomputed_reel_dao.lock().expect("lock"); + reel_dao.record_reel( + &ctx, + &crate::database::models::InsertablePrecomputedReel { + span: span.to_string(), + library_key: library.to_string(), + cache_key: key.clone(), + output_path: mp4.to_string_lossy().to_string(), + title, + media_count, + render_version: RENDER_VERSION as i32, + tz_offset_minutes: tz, + voice: voice.clone(), + generated_at: now, + }, + )?; + reel_dao + .prune_superseded(&ctx, span, library, PREGEN_KEEP_PER_SCOPE) + .unwrap_or_default() + }; + for row in &superseded { + delete_reel_files(&row.output_path); + } + if !superseded.is_empty() { + log::info!( + "Pruned {} superseded precomputed reel(s) for span={}", + superseded.len(), + span + ); + } + + log::info!("Precomputed reel generated for span={}, key={}", span, key); + Ok(()) +} + +// --- On-disk cache sweep ----------------------------------------------------- + +/// Best-effort unlink of a reel's MP4 and its `.json` sidecar. +fn delete_reel_files(mp4_output_path: &str) { + let mp4 = Path::new(mp4_output_path); + let _ = std::fs::remove_file(mp4); + let _ = std::fs::remove_file(mp4.with_extension("json")); +} + +/// Max age (seconds) before an unreferenced reel MP4 is swept. +fn reel_cache_max_age_secs() -> u64 { + std::env::var("REEL_CACHE_MAX_AGE_DAYS") + .ok() + .and_then(|v| v.trim().parse::().ok()) + .filter(|d| *d > 0) + .unwrap_or(REEL_CACHE_MAX_AGE_DAYS_DEFAULT) + * 86_400 +} + +/// Spawn the periodic on-disk reel-cache sweeper. Runs independently of the +/// pre-gen scheduler because the on-demand cache grows whether or not pre-gen +/// is enabled. Disable with `REEL_CACHE_SWEEP_ENABLED=0`. +pub(crate) async fn spawn_reel_cache_sweeper(app_state: web::Data) { + if std::env::var("REEL_CACHE_SWEEP_ENABLED").ok().as_deref() == Some("0") { + log::info!("Reel cache sweeper disabled (REEL_CACHE_SWEEP_ENABLED=0)"); + return; + } + tokio::spawn(async move { + // Settle after startup, then sweep on a fixed cadence. + tokio::time::sleep(Duration::from_secs(300)).await; + loop { + let removed = sweep_reel_cache(&app_state); + if removed > 0 { + log::info!("Reel cache sweep removed {removed} stale file(s)"); + } + tokio::time::sleep(Duration::from_secs(REEL_CACHE_SWEEP_INTERVAL_SECS)).await; + } + }); +} + +/// One sweep of `reels_path`. Removes: stale render artifacts (`.mp4.tmp`, +/// `.concat.txt`, orphaned sidecars) from crashed runs; and reel MP4s that no +/// ledger row references, that no live job points at, and that are older than +/// the cache max age (the on-demand cache, which has no ledger row). Returns the +/// number of files removed. Best-effort — any IO error on one entry is skipped. +fn sweep_reel_cache(app_state: &AppState) -> usize { + let dir = Path::new(&app_state.reels_path); + let read_dir = match std::fs::read_dir(dir) { + Ok(rd) => rd, + Err(_) => return 0, // dir not created yet → nothing to sweep + }; + + // Files a ledger row still points at (current pre-gen reels). + let protected: std::collections::HashSet = { + let ctx = opentelemetry::Context::new(); + let mut dao = app_state.precomputed_reel_dao.lock().expect("lock"); + dao.all_cache_keys(&ctx) + .unwrap_or_default() + .into_iter() + .collect() + }; + // Outputs of live in-memory jobs (a Done reel a client may still be fetching). + let active: std::collections::HashSet = { + let jobs = REEL_JOBS.lock().unwrap(); + jobs.values() + .filter_map(|j| j.output_path.as_ref()) + .map(|p| p.to_string_lossy().to_string()) + .collect() + }; + + let now = std::time::SystemTime::now(); + let max_age = Duration::from_secs(reel_cache_max_age_secs()); + let tmp_max_age = Duration::from_secs(REEL_TMP_MAX_AGE_SECS); + let mut removed = 0usize; + + for entry in read_dir.flatten() { + let path = entry.path(); + let Some(name) = path.file_name().and_then(|n| n.to_str()) else { + continue; + }; + let age = entry + .metadata() + .and_then(|m| m.modified()) + .ok() + .and_then(|t| now.duration_since(t).ok()) + .unwrap_or_default(); + + // Transient render artifacts from a crashed run. + if name.ends_with(".mp4.tmp") || name.ends_with(".concat.txt") { + if age > tmp_max_age && std::fs::remove_file(&path).is_ok() { + removed += 1; + } + continue; + } + + // Reel MP4: keep if referenced (ledger or live job) or still recent. + if let Some(key) = name.strip_suffix(".mp4") { + let p = path.to_string_lossy().to_string(); + if protected.contains(key) || active.contains(&p) || age < max_age { + continue; + } + if std::fs::remove_file(&path).is_ok() { + let _ = std::fs::remove_file(path.with_extension("json")); + removed += 1; + } + continue; + } + + // Orphaned sidecar (its MP4 is gone). + if name.ends_with(".json") + && !path.with_extension("mp4").exists() + && age > tmp_max_age + && std::fs::remove_file(&path).is_ok() + { + removed += 1; + } + } + removed +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::libraries::Library; + use chrono::TimeZone; + + fn photo(p: &str, lib: i32) -> SegmentMedia { + SegmentMedia::Photo { + rel_path: p.to_string(), + library_id: lib, + } + } + + fn clip(p: &str, lib: i32) -> SegmentMedia { + SegmentMedia::Clip { + rel_path: p.to_string(), + library_id: lib, + } + } + + fn day_selector() -> ReelSelector { + ReelSelector::Memories { + span: MemoriesSpan::Day, + tz_offset_minutes: 0, + library: None, + max_segments: 24, + } + } + + #[test] + fn cache_key_is_stable_for_same_inputs() { + let media = vec![photo("a.jpg", 1), photo("b.jpg", 1)]; + let k1 = cache_key(&day_selector(), &media, Some("grandma")); + let k2 = cache_key(&day_selector(), &media, Some("grandma")); + assert_eq!(k1, k2); + // 64-hex blake3. + assert_eq!(k1.len(), 64); + assert!(k1.chars().all(|c| c.is_ascii_hexdigit())); + } + + #[test] + fn cache_key_changes_with_media_order_voice_and_selector() { + let media = vec![photo("a.jpg", 1), photo("b.jpg", 1)]; + let reordered = vec![photo("b.jpg", 1), photo("a.jpg", 1)]; + let base = cache_key(&day_selector(), &media, Some("grandma")); + // Order matters (the reel sequence differs). + assert_ne!( + base, + cache_key(&day_selector(), &reordered, Some("grandma")) + ); + // Voice matters. + assert_ne!(base, cache_key(&day_selector(), &media, Some("dad"))); + assert_ne!(base, cache_key(&day_selector(), &media, None)); + // Span matters. + let week = ReelSelector::Memories { + span: MemoriesSpan::Week, + tz_offset_minutes: 0, + library: None, + max_segments: 24, + }; + assert_ne!(base, cache_key(&week, &media, Some("grandma"))); + } + + #[test] + fn cache_key_distinguishes_photo_from_clip() { + // Same path/library used as a still vs a video clip must differ. + let as_photo = vec![photo("v.mp4", 1)]; + let as_clip = vec![clip("v.mp4", 1)]; + assert_ne!( + cache_key(&day_selector(), &as_photo, None), + cache_key(&day_selector(), &as_clip, None) + ); + } + + #[test] + fn is_clip_only_for_single_clip_beat() { + let clip_beat = PlannedBeat { + media: vec![clip("v.mp4", 1)], + date: None, + insight_title: None, + insight_summary: None, + gps: None, + }; + let photo_beat = PlannedBeat { + media: vec![photo("a.jpg", 1), photo("b.jpg", 1)], + date: None, + insight_title: None, + insight_summary: None, + gps: None, + }; + assert!(clip_beat.is_clip()); + assert!(!photo_beat.is_clip()); + } + + #[test] + fn span_phrase_maps_each_span() { + let mk = |span| ReelMeta { + span, + years: vec![], + }; + assert_eq!(mk(MemoriesSpan::Day).span_phrase(), "on this day"); + assert_eq!(mk(MemoriesSpan::Week).span_phrase(), "this week"); + assert_eq!(mk(MemoriesSpan::Month).span_phrase(), "this month"); + } + + #[test] + fn date_label_formats_or_none() { + let beat = PlannedBeat { + media: vec![photo("a.jpg", 1)], + date: Some(1_560_384_000), // 2019-06-13 UTC + insight_title: None, + insight_summary: None, + gps: None, + }; + assert!(beat.date_label().unwrap().contains("2019")); + + let undated = PlannedBeat { + media: vec![photo("a.jpg", 1)], + date: None, + insight_title: None, + insight_summary: None, + gps: None, + }; + assert_eq!(undated.date_label(), None); + } + + #[test] + fn normalize_library_key_returns_id_when_found_numeric() { + let libs = vec![ + Library { + id: 1, + name: "main".to_string(), + root_path: "/tmp/main".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }, + Library { + id: 7, + name: "archive".to_string(), + root_path: "/tmp/archive".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }, + ]; + assert_eq!(normalize_library_key(&libs, Some("1")), "1"); + } + + #[test] + fn normalize_library_key_returns_id_when_found_by_name() { + let libs = vec![Library { + id: 1, + name: "main".to_string(), + root_path: "/tmp/main".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }]; + assert_eq!(normalize_library_key(&libs, Some("main")), "1"); + } + + #[test] + fn normalize_library_key_returns_all_when_absent() { + let libs = vec![Library { + id: 1, + name: "main".to_string(), + root_path: "/tmp/main".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }]; + assert_eq!(normalize_library_key(&libs, None), "all"); + } + + #[test] + fn normalize_library_key_returns_all_when_empty() { + let libs = vec![Library { + id: 1, + name: "main".to_string(), + root_path: "/tmp/main".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }]; + assert_eq!(normalize_library_key(&libs, Some("")), "all"); + } + + #[test] + fn normalize_library_key_returns_all_when_unknown() { + let libs = vec![Library { + id: 1, + name: "main".to_string(), + root_path: "/tmp/main".to_string(), + enabled: true, + excluded_dirs: Vec::new(), + }]; + assert_eq!(normalize_library_key(&libs, Some("missing")), "all"); + } + + #[test] + fn secs_until_next_run_hour_within_run_hour_wraps_to_tomorrow() { + // 03:30, run 3 → already past today's 03:00, so wait until tomorrow + // 03:00 (23h30m). Crucially NOT 0 — that would busy-loop the scheduler + // for the rest of the hour. + let dt = chrono::Local + .with_ymd_and_hms(2026, 6, 13, 3, 30, 0) + .single() + .expect("valid datetime"); + assert_eq!(secs_until_next_run_hour(dt, 3), 23 * 3600 + 30 * 60); + } + + #[test] + fn secs_until_next_run_hour_future_today_counts_minutes() { + // 10:15 → 14:00 is 3h45m, not a whole-hour 4h (minutes count). + let dt = chrono::Local + .with_ymd_and_hms(2026, 6, 13, 10, 15, 0) + .single() + .expect("valid datetime"); + assert_eq!(secs_until_next_run_hour(dt, 14), 3 * 3600 + 45 * 60); + } + + #[test] + fn secs_until_next_run_hour_past_today_wraps() { + let dt = chrono::Local + .with_ymd_and_hms(2026, 6, 13, 20, 0, 0) + .single() + .expect("valid datetime"); + assert_eq!(secs_until_next_run_hour(dt, 3), (24 - 20 + 3) * 3600); + } + + #[test] + fn secs_until_next_run_hour_midnight() { + let dt = chrono::Local + .with_ymd_and_hms(2026, 6, 13, 0, 0, 0) + .single() + .expect("valid datetime"); + // 0:00, run at 3 → 3 hours + assert_eq!(secs_until_next_run_hour(dt, 3), 3 * 3600); + // 0:00 exactly, run at 0 → wraps to next midnight (not 0, so no busy loop) + assert_eq!(secs_until_next_run_hour(dt, 0), 86_400); + } + + #[test] + fn secs_until_next_run_hour_just_before_target() { + // 23:30, run 0 → 30 minutes to midnight (minute-accurate, not 1h). + let dt = chrono::Local + .with_ymd_and_hms(2026, 6, 13, 23, 30, 0) + .single() + .expect("valid datetime"); + assert_eq!(secs_until_next_run_hour(dt, 0), 30 * 60); + // 23:30, run 23 → already past today's 23:00, wait until tomorrow. + assert_eq!(secs_until_next_run_hour(dt, 23), 86_400 - 30 * 60); + } +} diff --git a/src/reels/render.rs b/src/reels/render.rs new file mode 100644 index 0000000..221df5f --- /dev/null +++ b/src/reels/render.rs @@ -0,0 +1,742 @@ +//! ffmpeg assembly for memory reels. +//! +//! Two-stage, per-segment design: each segment is rendered to its own +//! normalized MP4 (identical codec/resolution/fps/timebase), then the segments +//! are joined with the concat demuxer (stream copy, no re-encode). Rendering +//! per segment — rather than one monster filtergraph — keeps each ffmpeg +//! invocation simple to reason about, parallelizes naturally, and means a +//! video-clip segment type (phase 2) slots in as just a different per-segment +//! builder without touching the concat stage. +//! +//! The arg builders are pure (`Vec` out) so the exact ffmpeg command +//! is unit-testable; the runners spawn ffmpeg and surface stderr on failure. + +use anyhow::{Context, Result, bail}; +use std::path::Path; +use tokio::process::Command; + +/// Re-exported so the reel pipeline reaches NVENC detection through this module +/// rather than depending on `video::ffmpeg` directly. +pub use crate::video::ffmpeg::is_nvenc_available; + +/// Reel canvas. Portrait, because reels are watched on a phone held upright — +/// a landscape canvas letterboxes to a thin ~25%-height band there. Each photo +/// is fitted sharp and centered over a blurred, zoomed copy of itself (see +/// [`photo_filter_chain`]) so the frame is always filled regardless of the +/// photo's orientation, without cropping the subject. +pub const REEL_WIDTH: u32 = 1080; +pub const REEL_HEIGHT: u32 = 1920; +pub const REEL_FPS: u32 = 30; + +/// A beat's screen time is its narration length plus a short breath, with a +/// floor so a terse line still lingers. No ceiling: the beat always covers the +/// full narration so speech is never truncated — the scripter is asked to keep +/// lines short instead. +pub const MIN_SEGMENT_SECONDS: f64 = 2.5; +const NARRATION_TAIL_SECONDS: f64 = 0.6; + +/// Fade durations baked into each photo. A held (single-photo) beat gets a +/// gentle dip; burst photos get a much snappier fade so the difference between +/// a held shot and a quick burst is obvious. +const SINGLE_FADE_SECONDS: f64 = 0.35; +const BURST_FADE_SECONDS: f64 = 0.12; + +/// Video-clip framing. Fallback cap on how much of a clip we read when the +/// source length can't be probed; with a known length, a clip instead plays for +/// as much of its beat as its footage allows (see [`clip_beat_plan`]). Its live +/// audio is ducked to `CLIP_DUCK_VOLUME` under the narration. +pub const CLIP_SECONDS: f64 = 5.0; +const CLIP_DUCK_VOLUME: f64 = 0.35; + +/// Floor on how long each burst photo stays up, so a long line over many photos +/// doesn't flash them subliminally. If the narration is too short to give every +/// photo this much, the beat is stretched to fit. +const MIN_BURST_PHOTO_SECONDS: f64 = 0.6; + +/// Base screen time for a beat given its narration length: narration + breath, +/// floored. Used as the lower bound on a beat's total duration. +pub fn segment_duration(narration_secs: f64) -> f64 { + let d = narration_secs + NARRATION_TAIL_SECONDS; + if d.is_finite() && d > MIN_SEGMENT_SECONDS { + d + } else { + MIN_SEGMENT_SECONDS + } +} + +/// Split a beat into per-photo durations. The beat lasts at least its narration +/// (so speech isn't cut) and at least `n × MIN_BURST_PHOTO_SECONDS` (so a fast +/// burst stays legible); the photos share that total evenly. Returns +/// `(total_seconds, per_photo_seconds)`. +pub fn beat_durations(narration_secs: f64, n_photos: usize) -> (f64, Vec) { + let n = n_photos.max(1); + let base = segment_duration(narration_secs); + let min_total = n as f64 * MIN_BURST_PHOTO_SECONDS; + let total = if base > min_total { base } else { min_total }; + let each = total / n as f64; + (total, vec![each; n]) +} + +/// Fade length to use for a beat of `n_photos` (gentle when held, snappy in a +/// burst). +fn fade_for(n_photos: usize) -> f64 { + if n_photos > 1 { + BURST_FADE_SECONDS + } else { + SINGLE_FADE_SECONDS + } +} + +/// Options controlling per-segment rendering. +#[derive(Debug, Clone, Copy)] +pub struct SegmentOpts { + pub width: u32, + pub height: u32, + pub fps: u32, + pub nvenc: bool, +} + +impl Default for SegmentOpts { + fn default() -> Self { + Self { + width: REEL_WIDTH, + height: REEL_HEIGHT, + fps: REEL_FPS, + nvenc: false, + } + } +} + +/// Filter chain for one photo (input `idx`) producing the labelled output +/// `[v{idx}]`. Splits the still into a background and foreground: the background +/// is scaled to *cover* the canvas and heavily blurred; the foreground is +/// scaled to *fit* and overlaid centered. This fills the portrait frame for any +/// photo orientation — no black bars, no cropping of the subject — then a fade +/// in/out softens the cut. Intermediate labels are suffixed with `idx` so +/// several chains coexist in one `filter_complex`. +/// +/// `fps` is normalized BEFORE the fades so the brightness ramp is computed on a +/// true {fps}-frame timeline; otherwise the fade is sampled at the looped +/// still's coarse cadence and duplicated up, which reads as a steppy dip. +fn photo_filter_chain(idx: usize, opts: &SegmentOpts, duration: f64, fade: f64) -> String { + let (w, h, fps) = (opts.width, opts.height, opts.fps); + let fade_out_start = (duration - fade).max(0.0); + format!( + "[{idx}:v]split=2[bg{idx}][fg{idx}];\ + [bg{idx}]scale={w}:{h}:force_original_aspect_ratio=increase,\ + crop={w}:{h},boxblur=20:2[bgb{idx}];\ + [fg{idx}]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs{idx}];\ + [bgb{idx}][fgs{idx}]overlay=(W-w)/2:(H-h)/2,\ + fps={fps},\ + fade=t=in:st=0:d={fade},\ + fade=t=out:st={fade_out_start:.3}:d={fade},\ + setsar=1,format=yuv420p[v{idx}]" + ) +} + +/// Full `filter_complex` for a beat of `per_photo` durations: one chain per +/// photo, concatenated into `[v]`, with the narration (the last input, index +/// `per_photo.len()`) padded with trailing silence into `[a]`. A single-photo +/// beat degenerates to one chain + `concat=n=1` (a passthrough). +pub fn beat_filtergraph(opts: &SegmentOpts, per_photo: &[f64]) -> String { + let n = per_photo.len().max(1); + let fade = fade_for(n); + let chains: Vec = per_photo + .iter() + .enumerate() + .map(|(i, &d)| photo_filter_chain(i, opts, d, fade)) + .collect(); + let concat_inputs: String = (0..n).map(|i| format!("[v{i}]")).collect(); + format!( + "{chains};{concat_inputs}concat=n={n}:v=1:a=0[v];[{n}:a]apad[a]", + chains = chains.join(";") + ) +} + +fn video_encoder_args(nvenc: bool) -> Vec { + if nvenc { + // p4 ≈ balanced; cq 23 ≈ libx264 crf 21. Matches the HLS transcode path. + [ + "-c:v", + "h264_nvenc", + "-preset", + "p4", + "-cq", + "23", + "-pix_fmt", + "yuv420p", + ] + } else { + [ + "-c:v", "libx264", "-crf", "21", "-preset", "veryfast", "-pix_fmt", "yuv420p", + ] + } + .iter() + .map(|s| s.to_string()) + .collect() +} + +/// Build the ffmpeg args that render one beat: each photo looped for its slice +/// of the beat (filled to the portrait canvas with a blurred backdrop), the +/// slices concatenated, and the single narration muxed over the whole thing. +/// `total` bounds the output (and the apad'd audio) to the beat length. +pub fn build_beat_args( + image_paths: &[String], + audio_path: &str, + out_path: &str, + per_photo: &[f64], + total: f64, + opts: &SegmentOpts, +) -> Vec { + let fps = opts.fps.to_string(); + let mut args: Vec = vec!["-y".into()]; + if opts.nvenc { + args.extend(["-hwaccel".into(), "cuda".into()]); + } + // One looped-still input per photo, each bounded to its slice by an input + // `-t`; reading at the target `-framerate` gives the fades real frames to + // ramp across. + for (path, &dur) in image_paths.iter().zip(per_photo.iter()) { + args.extend([ + "-framerate".into(), + fps.clone(), + "-loop".into(), + "1".into(), + "-t".into(), + format!("{dur:.3}"), + "-i".into(), + path.clone(), + ]); + } + args.extend([ + "-i".into(), + audio_path.into(), + "-filter_complex".into(), + beat_filtergraph(opts, per_photo), + "-map".into(), + "[v]".into(), + "-map".into(), + "[a]".into(), + "-t".into(), + format!("{total:.3}"), + // Force constant frame rate so the beat (and the concatenated reel) + // plays at a steady {fps} rather than a variable cadence. + "-r".into(), + fps, + ]); + args.extend(video_encoder_args(opts.nvenc)); + args.extend( + ["-c:a", "aac", "-b:a", "160k", "-ar", "48000", "-shortest"] + .iter() + .map(|s| s.to_string()), + ); + args.push(out_path.into()); + args +} + +/// Build the concat-demuxer args that join rendered segments losslessly. +/// `+faststart` moves the moov atom up front so the reel streams immediately +/// on the mobile client. The output muxer is forced with `-f mp4` because we +/// write to a `.tmp` path (atomic publish) whose extension ffmpeg can't map to +/// a format on its own. +pub fn build_concat_args(list_path: &str, out_path: &str) -> Vec { + [ + "-y", + "-f", + "concat", + "-safe", + "0", + "-i", + list_path, + "-c", + "copy", + "-movflags", + "+faststart", + "-f", + "mp4", + out_path, + ] + .iter() + .map(|s| s.to_string()) + .collect() +} + +/// Render the concat list file body. Each line points the demuxer at one +/// segment; single quotes in paths are escaped per ffmpeg's concat syntax. +pub fn build_concat_list(segment_paths: &[String]) -> String { + let mut out = String::new(); + for p in segment_paths { + let escaped = p.replace('\'', r"'\''"); + out.push_str(&format!("file '{escaped}'\n")); + } + out +} + +async fn run_ffmpeg(args: &[String], what: &str) -> Result<()> { + let output = Command::new("ffmpeg") + .args(args) + .output() + .await + .with_context(|| format!("spawning ffmpeg for {what}"))?; + if !output.status.success() { + bail!( + "ffmpeg {what} failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + } + Ok(()) +} + +/// Render one beat to `out_path`: its photos shown in sequence (a held shot for +/// one photo, a quick burst for several) under the single narration in +/// `audio_path`, whose measured length sets the beat's pacing. +pub async fn render_beat( + image_paths: &[std::path::PathBuf], + audio_path: &Path, + out_path: &Path, + narration_secs: f64, + opts: &SegmentOpts, +) -> Result<()> { + if image_paths.is_empty() { + bail!("render_beat called with no images"); + } + let (total, per_photo) = beat_durations(narration_secs, image_paths.len()); + let paths: Vec = image_paths + .iter() + .map(|p| p.to_string_lossy().to_string()) + .collect(); + let args = build_beat_args( + &paths, + &audio_path.to_string_lossy(), + &out_path.to_string_lossy(), + &per_photo, + total, + opts, + ); + run_ffmpeg(&args, "beat render").await +} + +// --- Video-clip beats -------------------------------------------------------- + +/// Decide how long the clip plays and how long the whole beat lasts, from the +/// source video's length (if known) and the narration length. Returns +/// `(clip_dur, beat_total)`. +/// +/// The beat always lasts long enough for the full narration. The clip plays for +/// as much of that beat as its footage covers — so the motion fills the screen +/// time rather than stopping early. We only freeze the last frame (the +/// `beat_total - clip_dur` gap, handled by `tpad` in [`clip_video_filter`]) when +/// the source video is genuinely shorter than the narration. Capping clip +/// playback at a fixed length while the narration ran longer was what produced +/// the second-or-two freeze that read as a glitchy pause before the transition. +pub fn clip_beat_plan(source_dur: Option, narration_secs: f64) -> (f64, f64) { + let want = segment_duration(narration_secs); + let clip_dur = match source_dur { + // Known length: play up to the whole beat, but never past the source. + Some(d) if d > 0.0 => d.min(want), + // Unknown length: read up to the fallback cap; tpad covers any shortfall. + _ => want.min(CLIP_SECONDS), + }; + (clip_dur, want.max(clip_dur)) +} + +/// Video chain for a clip beat: fill the clip to the portrait canvas (blurred +/// backdrop, same look as photos), normalize fps, hold the last frame if the +/// narration outlasts the clip (`tpad`), then fade. Produces `[v]`. +fn clip_video_filter(opts: &SegmentOpts, clip_dur: f64, beat_total: f64) -> String { + let (w, h, fps) = (opts.width, opts.height, opts.fps); + let fade = SINGLE_FADE_SECONDS; + let hold = (beat_total - clip_dur).max(0.0); + let fade_out_start = (beat_total - fade).max(0.0); + // Freeze the final frame to cover narration that runs past the clip. + let tpad = if hold > 0.05 { + format!(",tpad=stop_mode=clone:stop_duration={hold:.3}") + } else { + String::new() + }; + format!( + "[0:v]split=2[bg][fg];\ + [bg]scale={w}:{h}:force_original_aspect_ratio=increase,\ + crop={w}:{h},boxblur=20:2[bgb];\ + [fg]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs];\ + [bgb][fgs]overlay=(W-w)/2:(H-h)/2,fps={fps}{tpad},\ + fade=t=in:st=0:d={fade},fade=t=out:st={fade_out_start:.3}:d={fade},\ + setsar=1,format=yuv420p[v]" + ) +} + +/// Audio chain for a clip beat. With a clip audio track, duck it under the +/// narration and mix; without one, just the narration. Produces `[a]`. +fn clip_audio_filter(has_audio: bool) -> String { + if has_audio { + format!( + "[0:a]volume={CLIP_DUCK_VOLUME}[duck];[1:a]apad[narr];\ + [duck][narr]amix=inputs=2:duration=longest:normalize=0[a]" + ) + } else { + "[1:a]apad[a]".to_string() + } +} + +/// Full `filter_complex` for a clip beat (input 0 = clip, input 1 = narration). +pub fn clip_beat_filtergraph( + opts: &SegmentOpts, + clip_dur: f64, + beat_total: f64, + has_audio: bool, +) -> String { + format!( + "{};{}", + clip_video_filter(opts, clip_dur, beat_total), + clip_audio_filter(has_audio) + ) +} + +/// Build the ffmpeg args for a clip beat: the first `clip_dur` seconds of the +/// source video, filled to the portrait canvas with its live audio ducked under +/// the narration, bounded to `beat_total`. +pub fn build_clip_beat_args( + clip_path: &str, + audio_path: &str, + out_path: &str, + clip_dur: f64, + beat_total: f64, + has_audio: bool, + opts: &SegmentOpts, +) -> Vec { + let fps = opts.fps.to_string(); + let mut args: Vec = vec!["-y".into()]; + if opts.nvenc { + args.extend(["-hwaccel".into(), "cuda".into()]); + } + args.extend([ + // Input `-t` limits the clip to its window; audio has none (apad fills). + "-t".into(), + format!("{clip_dur:.3}"), + "-i".into(), + clip_path.into(), + "-i".into(), + audio_path.into(), + "-filter_complex".into(), + clip_beat_filtergraph(opts, clip_dur, beat_total, has_audio), + "-map".into(), + "[v]".into(), + "-map".into(), + "[a]".into(), + "-t".into(), + format!("{beat_total:.3}"), + "-r".into(), + fps, + ]); + args.extend(video_encoder_args(opts.nvenc)); + args.extend( + ["-c:a", "aac", "-b:a", "160k", "-ar", "48000"] + .iter() + .map(|s| s.to_string()), + ); + args.push(out_path.into()); + args +} + +/// Whether a media file has at least one audio stream (so a clip beat knows +/// whether to mix in live audio). Defaults to `false` on any probe failure. +pub async fn has_audio_stream(path: &str) -> bool { + Command::new("ffprobe") + .args([ + "-v", + "error", + "-select_streams", + "a", + "-show_entries", + "stream=index", + "-of", + "csv=p=0", + path, + ]) + .output() + .await + .map(|out| !out.stdout.is_empty()) + .unwrap_or(false) +} + +/// Render one clip beat: a section of `clip_path` (capped at [`CLIP_SECONDS`], +/// and to the source length) under the narration in `audio_path`. The beat +/// lasts at least the narration, freezing the clip's last frame if needed. +pub async fn render_clip_beat( + clip_path: &Path, + audio_path: &Path, + out_path: &Path, + narration_secs: f64, + opts: &SegmentOpts, +) -> Result<()> { + let clip_str = clip_path.to_string_lossy().to_string(); + // Play the clip for as much of the beat as its footage covers; freeze only + // when the source is genuinely shorter than the narration (see clip_beat_plan). + let source_dur = crate::video::ffmpeg::get_duration_seconds(&clip_str) + .await + .ok() + .flatten(); + let (clip_dur, beat_total) = clip_beat_plan(source_dur, narration_secs); + let has_audio = has_audio_stream(&clip_str).await; + + let args = build_clip_beat_args( + &clip_str, + &audio_path.to_string_lossy(), + &out_path.to_string_lossy(), + clip_dur, + beat_total, + has_audio, + opts, + ); + run_ffmpeg(&args, "clip beat render").await +} + +/// Join rendered segments into the final reel. Writes the concat list into the +/// same directory as the output so relative paths and cleanup stay local. +pub async fn concat_segments(segment_paths: &[String], out_path: &Path) -> Result<()> { + let list_path = out_path.with_extension("concat.txt"); + let body = build_concat_list(segment_paths); + tokio::fs::write(&list_path, body) + .await + .context("writing concat list")?; + let args = build_concat_args(&list_path.to_string_lossy(), &out_path.to_string_lossy()); + let result = run_ffmpeg(&args, "concat").await; + let _ = tokio::fs::remove_file(&list_path).await; + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn segment_duration_floors_short_lines() { + // A one-word narration still lingers at the floor. + assert_eq!(segment_duration(0.5), MIN_SEGMENT_SECONDS); + assert_eq!(segment_duration(0.0), MIN_SEGMENT_SECONDS); + } + + #[test] + fn segment_duration_covers_full_narration_plus_tail() { + // No ceiling: a long line gets its full length so speech isn't cut. + assert!((segment_duration(5.0) - 5.6).abs() < 1e-9); + assert!((segment_duration(20.0) - 20.6).abs() < 1e-9); + } + + #[test] + fn segment_duration_rejects_nonfinite() { + assert_eq!(segment_duration(f64::NAN), MIN_SEGMENT_SECONDS); + assert_eq!(segment_duration(f64::INFINITY), MIN_SEGMENT_SECONDS); + } + + #[test] + fn beat_durations_single_photo_matches_base() { + let (total, per) = beat_durations(4.0, 1); + assert!((total - 4.6).abs() < 1e-9); // narration + tail + assert_eq!(per.len(), 1); + assert!((per[0] - 4.6).abs() < 1e-9); + } + + #[test] + fn beat_durations_burst_splits_evenly() { + // 5 photos, narration 4.6s base → ~0.92s each (above the 0.6 floor). + let (total, per) = beat_durations(4.0, 5); + assert!((total - 4.6).abs() < 1e-9); + assert_eq!(per.len(), 5); + assert!((per.iter().sum::() - total).abs() < 1e-9); + assert!(per.iter().all(|&d| d >= MIN_BURST_PHOTO_SECONDS)); + } + + #[test] + fn beat_durations_stretches_when_narration_too_short_for_burst() { + // Floor narration (2.5s) over 10 photos would be 0.25s each — below the + // legibility floor, so the beat stretches to 10 × 0.6 = 6s. + let (total, per) = beat_durations(0.0, 10); + assert!((total - 6.0).abs() < 1e-9); + assert!(per.iter().all(|&d| (d - 0.6).abs() < 1e-9)); + } + + #[test] + fn beat_filtergraph_single_photo_fills_portrait_and_holds() { + let (_t, per) = beat_durations(4.0, 1); + let g = beat_filtergraph(&SegmentOpts::default(), &per); + assert!(g.contains("[0:v]split=2[bg0][fg0]")); + assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=increase")); + assert!(g.contains("crop=1080:1920")); + assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=decrease")); + assert!(g.contains("overlay=(W-w)/2:(H-h)/2")); + // Single photo → concat of one, gentle fade, audio is input 1. + assert!(g.contains("concat=n=1:v=1:a=0[v]")); + assert!(g.contains("d=0.35")); // SINGLE_FADE + assert!(g.contains("[1:a]apad[a]")); + } + + #[test] + fn beat_filtergraph_burst_chains_concats_and_snappy_fade() { + let (_t, per) = beat_durations(4.0, 3); + let g = beat_filtergraph(&SegmentOpts::default(), &per); + // One chain per photo with index-suffixed labels. + assert!(g.contains("[0:v]split") && g.contains("[1:v]split") && g.contains("[2:v]split")); + // Concatenated in order, audio is the 4th input (index 3). + assert!(g.contains("[v0][v1][v2]concat=n=3:v=1:a=0[v]")); + assert!(g.contains("[3:a]apad[a]")); + // Burst uses the much snappier fade (vs 0.35 for a held shot). + assert!(g.contains("d=0.12")); + assert!(!g.contains("d=0.35")); + } + + #[test] + fn beat_filtergraph_normalizes_fps_before_fading() { + // fps must precede the fades on every chain (else the dip looks steppy). + let (_t, per) = beat_durations(4.0, 1); + let g = beat_filtergraph(&SegmentOpts::default(), &per); + let fps_at = g.find("fps=30").expect("fps in graph"); + let fade_at = g.find("fade=t=in").expect("fade in graph"); + assert!(fps_at < fade_at); + } + + #[test] + fn beat_args_one_input_per_photo_plus_audio_bound_by_total() { + let (total, per) = beat_durations(4.0, 2); + let args = build_beat_args( + &["/a.jpg".into(), "/b.jpg".into()], + "/n.wav", + "/out.mp4", + &per, + total, + &SegmentOpts::default(), + ); + let joined = args.join(" "); + // A looped-still input per photo, each with its slice -t, then the audio. + assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /a.jpg")); + assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /b.jpg")); + assert!(joined.contains("-i /n.wav")); + // Output bounded to the beat total and forced CFR. + assert!(joined.contains("-t 4.600")); + assert!(joined.contains("-r 30")); + assert!(joined.ends_with("/out.mp4")); + } + + #[test] + fn beat_args_use_nvenc_and_cuda_when_enabled() { + let opts = SegmentOpts { + nvenc: true, + ..SegmentOpts::default() + }; + let (total, per) = beat_durations(3.0, 1); + let args = build_beat_args( + &["/img.jpg".into()], + "/a.wav", + "/out.mp4", + &per, + total, + &opts, + ); + let joined = args.join(" "); + assert!(joined.contains("-hwaccel cuda")); + assert!(joined.contains("h264_nvenc")); + assert!(!joined.contains("libx264")); + } + + #[test] + fn clip_filter_ducks_audio_and_holds_last_frame_when_narration_longer() { + // 5s clip, 7s beat → 2s freeze of the last frame, ducked-audio mix. + let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 7.0, true); + assert!(g.contains("tpad=stop_mode=clone:stop_duration=2.000")); + assert!(g.contains("volume=0.35")); + assert!(g.contains("amix=inputs=2")); + assert!(g.contains("[1:a]apad[narr]")); + // Fill applied to the clip too. + assert!(g.contains("boxblur")); + assert!(g.contains("overlay=(W-w)/2:(H-h)/2")); + } + + #[test] + fn clip_beat_plan_plays_clip_through_the_whole_beat_when_source_is_long() { + // 30s source, 4s narration → beat is narration+tail (4.6), and the clip + // plays that whole 4.6s of motion: no freeze (clip_dur == beat_total). + let (clip_dur, beat_total) = clip_beat_plan(Some(30.0), 4.0); + assert!((beat_total - 4.6).abs() < 1e-9); + assert!((clip_dur - 4.6).abs() < 1e-9); + assert!((beat_total - clip_dur).abs() < 1e-9); // no hold + } + + #[test] + fn clip_beat_plan_freezes_only_when_source_shorter_than_narration() { + // 2s source under a 4s narration → play all 2s, freeze the remainder. + let (clip_dur, beat_total) = clip_beat_plan(Some(2.0), 4.0); + assert!((clip_dur - 2.0).abs() < 1e-9); + assert!((beat_total - 4.6).abs() < 1e-9); + assert!(beat_total - clip_dur > 2.0); // unavoidable freeze gap + } + + #[test] + fn clip_beat_plan_caps_read_when_source_length_unknown() { + // Probe failed: read up to the fallback cap, beat still covers narration. + let (clip_dur, beat_total) = clip_beat_plan(None, 8.0); + assert!((clip_dur - CLIP_SECONDS).abs() < 1e-9); + assert!((beat_total - 8.6).abs() < 1e-9); + } + + #[test] + fn clip_filter_no_tpad_when_clip_covers_the_beat() { + // Clip at least as long as the beat → no freeze. + let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 5.0, true); + assert!(!g.contains("tpad")); + } + + #[test] + fn clip_filter_narration_only_without_clip_audio() { + let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 5.0, false); + assert!(!g.contains("amix")); + assert!(!g.contains("volume=")); + assert!(g.contains("[1:a]apad[a]")); + } + + #[test] + fn clip_beat_args_bound_clip_and_output() { + let args = build_clip_beat_args( + "/v.mp4", + "/n.wav", + "/out.mp4", + 5.0, + 6.6, + true, + &SegmentOpts::default(), + ); + let joined = args.join(" "); + // Input -t bounds the clip read; output -t bounds the beat. + assert!(joined.contains("-t 5.000 -i /v.mp4")); + assert!(joined.contains("-i /n.wav")); + assert!(joined.contains("-t 6.600")); + assert!(joined.contains("-r 30")); + assert!(joined.ends_with("/out.mp4")); + } + + #[test] + fn concat_args_stream_copy_with_faststart_and_forced_muxer() { + // Output goes to a .tmp path, so the muxer must be forced — ffmpeg + // can't infer mp4 from the extension (the bug this guards against). + let args = build_concat_args("/tmp/list.txt", "/out.mp4.tmp"); + let joined = args.join(" "); + assert!(joined.contains("-f concat -safe 0 -i /tmp/list.txt")); + assert!(joined.contains("-c copy")); + assert!(joined.contains("+faststart")); + assert!(joined.contains("-f mp4")); + // The forced muxer must come before the output path. + let f_mp4 = args.windows(2).position(|w| w == ["-f", "mp4"]).unwrap(); + let out = args.iter().position(|a| a == "/out.mp4.tmp").unwrap(); + assert!(f_mp4 < out); + } + + #[test] + fn concat_list_escapes_single_quotes() { + let body = build_concat_list(&[ + "/tmp/seg_000.mp4".into(), + "/tmp/own's dir/seg_001.mp4".into(), + ]); + assert!(body.contains("file '/tmp/seg_000.mp4'\n")); + // The apostrophe is closed-escaped-reopened per ffmpeg concat syntax. + assert!(body.contains(r"own'\''s")); + } +} diff --git a/src/reels/script.rs b/src/reels/script.rs new file mode 100644 index 0000000..38ef9cc --- /dev/null +++ b/src/reels/script.rs @@ -0,0 +1,491 @@ +//! Narration scripting for memory reels. +//! +//! One LLM call turns the planned beats (each carrying its date and, where +//! available, its cached insight) into a short first-person narration line per +//! beat plus a title for the reel. A beat may show several photos in a quick +//! burst, so a line narrates the *moment*, not a single frame. We reuse the +//! cached insight summary as the richest signal rather than re-running vision +//! at reel time — that keeps reel generation off the GPU's vision slot. +//! +//! The prompt builder and response parser are pure so the contract is +//! unit-testable; `generate_script` wires them to the LLM client. +//! +//! The agentic scripter (pre-generation) resolves the backend through the +//! InsightGenerator, builds a read-only tool set, and runs a tool loop to +//! ground the narration in retrieved context before asking for the final JSON. + +use anyhow::{Context, Result}; +use std::sync::Arc; + +use super::{PlannedBeat, ReelMeta}; +use crate::ai::backend::{BackendKind, SamplingOverrides}; +use crate::ai::insight_generator::InsightGenerator; +use crate::ai::llamacpp::LlamaCppClient; +use crate::ai::llm_client::{LlmClient, Tool}; +use crate::ai::ollama::ChatMessage; + +/// The narration for a whole reel: a title and one line per beat, in order. +#[derive(Debug, Clone, PartialEq)] +pub struct ReelScript { + pub title: String, + pub lines: Vec, +} + +const SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \ +slideshow of someone's own photos set to a spoken voiceover. Write warm, \ +specific, first-person narration as if the person is gently looking back on \ +their own memories. Each line plays over one moment, which may be a quick burst \ +of several photos, so narrate the moment as a whole rather than a single frame. \ +Be concrete and grounded in the details given; never invent names, places, or \ +events that aren't supported. Keep each line to one or two short sentences that \ +can be read aloud in a few seconds. Avoid generic filler like \"what a \ +wonderful day\" — if you have little to go on, simply describe the moment \ +plainly."; + +/// Agentic scripter system prompt: richer version that tells the model it may +/// call read-only tools to ground each line. +const AGENTIC_SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \ +slideshow of someone's own photos set to a spoken voiceover. Write warm, \ +specific, first-person narration as if the person is gently looking back on \ +their own memories. Each line plays over one moment, which may be a quick burst \ +of several photos, so narrate the moment as a whole rather than a single frame. \ +Be concrete and grounded in the details given; never invent names, places, or \ +events that aren't supported. Keep each line to one or two short sentences that \ +can be read aloud in a few seconds. Avoid generic filler like \"what a \ +wonderful day\" — if you have little to go on, simply describe the moment \ +plainly.\n\nYou may call read-only tools (search_rag, search_messages, \ +get_sms_messages, get_calendar_events, get_location_history, reverse_geocode, \ +get_personal_place_at, recall_entities, get_current_datetime) to ground each \ +line in real context — e.g. reverse_geocode a moment's GPS to name the place, \ +or check the calendar/messages around its date. Never invent details. Return \ +ONLY the JSON object, no prose or code fences."; + +/// Maximum agentic tool iterations for pre-generation. Tunable via +/// `REEL_PREGEN_MAX_TOOL_ITERS` (default 8). +fn reel_pregen_max_tool_iters() -> usize { + std::env::var("REEL_PREGEN_MAX_TOOL_ITERS") + .ok() + .and_then(|s| s.trim().parse::().ok()) + .filter(|x| *x > 0) + .unwrap_or(8) +} + +/// Build the (system, user) prompt pair for the scripter. The user message +/// describes each beat in order and asks for strict JSON back. +pub fn build_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> (String, String) { + let mut user = String::new(); + user.push_str(&format!( + "This reel has {} moments surfaced as memories {}.\n\n", + beats.len(), + meta.span_phrase() + )); + if !meta.years.is_empty() { + let years: Vec = meta.years.iter().map(|y| y.to_string()).collect(); + user.push_str(&format!("They span the years: {}.\n\n", years.join(", "))); + } + user.push_str("Moments, in the order they will appear:\n"); + for (i, beat) in beats.iter().enumerate() { + user.push_str(&format!("\n[{}]", i + 1)); + if let Some(date) = beat.date_label() { + user.push_str(&format!(" {date}")); + } + if beat.is_clip() { + user.push_str(" (a video clip)"); + } else if beat.media.len() > 1 { + user.push_str(&format!(" (a burst of {} photos)", beat.media.len())); + } + user.push('\n'); + match (&beat.insight_title, &beat.insight_summary) { + (Some(t), Some(s)) if !s.trim().is_empty() => { + user.push_str(&format!(" Known context: {t} — {s}\n")); + } + (Some(t), _) => user.push_str(&format!(" Known context: {t}\n")), + (_, Some(s)) if !s.trim().is_empty() => { + user.push_str(&format!(" Known context: {s}\n")); + } + _ => user.push_str(" (no extra context — narrate plainly from the date)\n"), + } + } + user.push_str(&format!( + "\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\ + {{\"title\": \"\", \"segments\": [\"\", \ + \"\", ... ]}}\n\ + The \"segments\" array MUST have exactly {} items, one per moment in order.", + beats.len() + )); + (SYSTEM_PROMPT.to_string(), user) +} + +/// Build a richer (system, user) prompt pair for the agentic scripter. The +/// system prompt tells the model it may call read-only tools to ground each +/// line. The user message uses the same per-beat enumeration as +/// `build_script_messages` plus a GPS line per beat when available. +pub fn build_agentic_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> Vec { + let mut user = String::new(); + user.push_str(&format!( + "This reel has {} moments surfaced as memories {}.\n\n", + beats.len(), + meta.span_phrase() + )); + if !meta.years.is_empty() { + let years: Vec = meta.years.iter().map(|y| y.to_string()).collect(); + user.push_str(&format!("They span the years: {}.\n\n", years.join(", "))); + } + user.push_str("Moments, in the order they will appear:\n"); + for (i, beat) in beats.iter().enumerate() { + user.push_str(&format!("\n[{}]", i + 1)); + if let Some(date) = beat.date_label() { + user.push_str(&format!(" {date}")); + } + if beat.is_clip() { + user.push_str(" (a video clip)"); + } else if beat.media.len() > 1 { + user.push_str(&format!(" (a burst of {} photos)", beat.media.len())); + } + if let Some((lat, lon)) = beat.gps { + user.push_str(&format!("\n GPS: {:.4}, {:.4}", lat, lon)); + } + user.push('\n'); + match (&beat.insight_title, &beat.insight_summary) { + (Some(t), Some(s)) if !s.trim().is_empty() => { + user.push_str(&format!(" Known context: {t} — {s}\n")); + } + (Some(t), _) => user.push_str(&format!(" Known context: {t}\n")), + (_, Some(s)) if !s.trim().is_empty() => { + user.push_str(&format!(" Known context: {s}\n")); + } + _ => user.push_str(" (no extra context — narrate plainly from the date)\n"), + } + } + user.push_str(&format!( + "\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\ + {{\"title\": \"\", \"segments\": [\"\", \ + \"\", ... ]}}\n\ + The \"segments\" array MUST have exactly {} items, one per moment in order.", + beats.len() + )); + + vec![ + ChatMessage::system(AGENTIC_SYSTEM_PROMPT.to_string()), + ChatMessage::user(user), + ] +} + +/// Parse the model's response into a script with exactly `n` lines. Tolerant of +/// code fences and surrounding prose, and of both `segments: [".."]` and +/// `segments: [{"narration": ".."}]` shapes. Missing/extra lines are padded or +/// truncated so the caller always gets `n` aligned to the segments. +pub fn parse_script_response(raw: &str, n: usize) -> ReelScript { + let fallback_line = "A moment worth remembering."; + let value = extract_json_object(raw); + + let title = value + .as_ref() + .and_then(|v| v.get("title")) + .and_then(|t| t.as_str()) + .map(clean_text) + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| "Memories".to_string()); + + let mut lines: Vec = value + .as_ref() + .and_then(|v| v.get("segments")) + .and_then(|s| s.as_array()) + .map(|arr| { + arr.iter() + .map(|item| { + let text = item + .as_str() + .map(|s| s.to_string()) + .or_else(|| { + item.get("narration") + .and_then(|n| n.as_str()) + .map(|s| s.to_string()) + }) + .unwrap_or_default(); + clean_text(&text) + }) + .collect() + }) + .unwrap_or_default(); + + // Align to exactly n: drop extras, pad shortfalls with a neutral line so + // every photo still gets spoken audio. + lines.truncate(n); + while lines.len() < n { + lines.push(fallback_line.to_string()); + } + for line in lines.iter_mut() { + if line.is_empty() { + *line = fallback_line.to_string(); + } + } + + ReelScript { title, lines } +} + +/// Pull the first balanced top-level JSON object out of a possibly-noisy model +/// response (code fences, leading prose). Returns None if nothing parses. +fn extract_json_object(raw: &str) -> Option { + // Fast path: the whole thing is valid JSON. + if let Ok(v) = serde_json::from_str::(raw.trim()) { + return Some(v); + } + // Otherwise scan for the first '{' ... matching '}' span, ignoring braces + // inside strings. + let bytes = raw.as_bytes(); + let start = raw.find('{')?; + let mut depth = 0i32; + let mut in_str = false; + let mut escaped = false; + for i in start..bytes.len() { + let c = bytes[i] as char; + if in_str { + if escaped { + escaped = false; + } else if c == '\\' { + escaped = true; + } else if c == '"' { + in_str = false; + } + continue; + } + match c { + '"' => in_str = true, + '{' => depth += 1, + '}' => { + depth -= 1; + if depth == 0 { + return serde_json::from_str(&raw[start..=i]).ok(); + } + } + _ => {} + } + } + None +} + +/// Collapse whitespace and strip stray markdown/quote decorations a model +/// sometimes leaves around a line. +fn clean_text(s: &str) -> String { + let trimmed = s.trim().trim_matches('"').trim(); + trimmed.split_whitespace().collect::>().join(" ") +} + +/// Generate the reel script via the LLM. Text-only (no images) — the per-beat +/// context comes from cached insights. The call takes the GPU read lease +/// internally (see `LlamaCppClient::generate`). +pub async fn generate_script( + client: &Arc, + meta: &ReelMeta, + beats: &[PlannedBeat], +) -> Result { + let (system, user) = build_script_messages(meta, beats); + let raw = client + .generate(&user, Some(&system), None) + .await + .context("LLM script generation failed")?; + Ok(parse_script_response(&raw, beats.len())) +} + +/// Agentic version of script generation: resolves the backend via the +/// InsightGenerator (honouring LLM_BACKEND, model overrides, etc.), builds +/// a read-only tool set, runs the tool loop, then parses the JSON response. +/// Returns the same ReelScript shape. On failure the caller may fall back to +/// `generate_script`. +pub async fn generate_script_agentic( + generator: &InsightGenerator, + meta: &ReelMeta, + beats: &[PlannedBeat], +) -> Result { + // 1. Resolve the backend. Bail if the local model lacks tool-calling. + let backend = generator + .resolve_backend( + BackendKind::Local, + &SamplingOverrides { + model: None, + num_ctx: None, + temperature: None, + top_p: None, + top_k: None, + min_p: None, + enable_thinking: None, + }, + ) + .await + .context("resolving backend for agentic script")?; + + // 2. Build the read-only tool set. Start from the persona gate (no + // persona context, so corrections are closed), force has_vision=false, + // then filter out write tools. + let gate = generator.current_gate_opts_for_persona(false, None); + let all_tools = InsightGenerator::build_tool_definitions(gate); + // Whole-reel calls have no single photo and no authenticated user, so the + // loop runs execute_tool with empty file/image context and user_id=0. Only + // tools that work without that context are useful here — photo/user-bound + // tools (get_file_tags, get_faces_in_photo, recall_facts_for_photo, + // recall_facts_for_entity) would just no-op or error, burning iterations, + // so they're excluded. + let read_only_names: std::collections::HashSet<&str> = [ + "search_rag", + "search_messages", + "get_sms_messages", + "get_calendar_events", + "get_location_history", + "reverse_geocode", + "get_personal_place_at", + "recall_entities", + "get_current_datetime", + ] + .into_iter() + .collect(); + let tools: Vec = all_tools + .into_iter() + .filter(|t| read_only_names.contains(t.function.name.as_str())) + .collect(); + + // 3. Build the agentic prompt messages. + let messages = build_agentic_script_messages(meta, beats); + + // 4. Run the tool loop. + let max_iter = reel_pregen_max_tool_iters(); + let raw = generator + .run_readonly_tool_loop(&backend, messages, tools, max_iter) + .await + .context("agentic tool loop failed")?; + + // 5. Strip any think-blocks the model may have emitted, then parse. + let raw = crate::ai::llm_client::strip_think_blocks(&raw); + Ok(parse_script_response(&raw, beats.len())) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::memories::MemoriesSpan; + + fn meta() -> ReelMeta { + ReelMeta { + span: MemoriesSpan::Day, + years: vec![2019, 2021], + } + } + + fn planned(n: usize) -> Vec { + (0..n) + .map(|i| PlannedBeat { + media: vec![super::super::SegmentMedia::Photo { + rel_path: format!("p{i}.jpg"), + library_id: 1, + }], + date: Some(1_560_000_000 + i as i64 * 86_400), + insight_title: None, + insight_summary: None, + gps: None, + }) + .collect() + } + + #[test] + fn prompt_states_exact_moment_count_and_span() { + let (sys, user) = build_script_messages(&meta(), &planned(3)); + assert!(sys.contains("memory reel")); + assert!(user.contains("3 moments")); + assert!(user.contains("on this day")); + assert!(user.contains("exactly 3 items")); + // Each moment gets an indexed entry. + assert!(user.contains("[1]") && user.contains("[2]") && user.contains("[3]")); + } + + #[test] + fn prompt_notes_burst_photo_count() { + let mut p = planned(1); + p[0].media = vec![ + super::super::SegmentMedia::Photo { + rel_path: "a.jpg".into(), + library_id: 1, + }, + super::super::SegmentMedia::Photo { + rel_path: "b.jpg".into(), + library_id: 1, + }, + super::super::SegmentMedia::Photo { + rel_path: "c.jpg".into(), + library_id: 1, + }, + ]; + let (_sys, user) = build_script_messages(&meta(), &p); + assert!(user.contains("a burst of 3 photos")); + } + + #[test] + fn prompt_marks_clip_beats() { + let mut p = planned(1); + p[0].media = vec![super::super::SegmentMedia::Clip { + rel_path: "v.mp4".into(), + library_id: 1, + }]; + let (_sys, user) = build_script_messages(&meta(), &p); + assert!(user.contains("a video clip")); + } + + #[test] + fn prompt_includes_insight_context_when_present() { + let mut p = planned(1); + p[0].insight_title = Some("Lake house weekend".into()); + p[0].insight_summary = Some("Swimming with the dogs.".into()); + let (_sys, user) = build_script_messages(&meta(), &p); + assert!(user.contains("Lake house weekend — Swimming with the dogs.")); + } + + #[test] + fn parse_plain_json_object() { + let raw = r#"{"title":"Summer Days","segments":["First line.","Second line."]}"#; + let script = parse_script_response(raw, 2); + assert_eq!(script.title, "Summer Days"); + assert_eq!(script.lines, vec!["First line.", "Second line."]); + } + + #[test] + fn parse_tolerates_code_fences_and_prose() { + let raw = "Sure! Here's your reel:\n```json\n{\"title\": \"Trip\", \"segments\": [\"A.\", \"B.\"]}\n```\nEnjoy!"; + let script = parse_script_response(raw, 2); + assert_eq!(script.title, "Trip"); + assert_eq!(script.lines, vec!["A.", "B."]); + } + + #[test] + fn parse_accepts_object_segment_shape() { + let raw = r#"{"title":"T","segments":[{"narration":"One."},{"narration":"Two."}]}"#; + let script = parse_script_response(raw, 2); + assert_eq!(script.lines, vec!["One.", "Two."]); + } + + #[test] + fn parse_pads_short_and_truncates_long_to_n() { + // Model returned 1 line but we have 3 segments → pad with neutral lines. + let short = parse_script_response(r#"{"title":"T","segments":["Only one."]}"#, 3); + assert_eq!(short.lines.len(), 3); + assert_eq!(short.lines[0], "Only one."); + assert!(!short.lines[1].is_empty()); + + // Model returned 3 but we have 2 → truncate. + let long = parse_script_response(r#"{"title":"T","segments":["a","b","c"]}"#, 2); + assert_eq!(long.lines, vec!["a", "b"]); + } + + #[test] + fn parse_falls_back_on_garbage() { + let script = parse_script_response("the model said no", 2); + assert_eq!(script.title, "Memories"); + assert_eq!(script.lines.len(), 2); + assert!(script.lines.iter().all(|l| !l.is_empty())); + } + + #[test] + fn parse_blank_line_replaced_with_fallback() { + let script = parse_script_response(r#"{"title":"T","segments":[" ","Real."]}"#, 2); + assert!(!script.lines[0].is_empty()); + assert_eq!(script.lines[1], "Real."); + } +} diff --git a/src/reels/selector.rs b/src/reels/selector.rs new file mode 100644 index 0000000..a02cbb8 --- /dev/null +++ b/src/reels/selector.rs @@ -0,0 +1,560 @@ +//! Reel selectors: resolve "what goes in the reel" into an ordered media set +//! plus the metadata the scripter needs. The renderer and scripter are +//! selector-agnostic, so adding tag- or date-range-based reels later means +//! adding a variant here, not touching the pipeline. +//! +//! Resolution is split in two so the handler can compute a cache key (and +//! short-circuit on a cache hit) without the per-photo insight lookups: +//! [`resolve`] is the cheap media-set pass; [`enrich`] adds cached insights and +//! runs in the background job. + +use std::path::Path; +use std::sync::Mutex; + +use chrono::{DateTime, Datelike, FixedOffset}; + +use super::{PlannedBeat, ReelMeta, SegmentMedia}; +use crate::database::{ExifDao, InsightDao}; +use crate::file_types::{is_image_file, is_video_file}; +use crate::memories::{self, MemoriesSpan}; +use crate::state::AppState; + +/// Default and hard caps on how many photos a reel covers. The default is an +/// upper bound on the request; the effective count is usually smaller, set by +/// the duration budget (see [`budget_segments`]). The hard cap bounds work per +/// reel regardless. +pub const DEFAULT_MAX_SEGMENTS: usize = 40; +pub const HARD_MAX_SEGMENTS: usize = 40; + +/// Target reel length. Week and especially month spans can surface hundreds of +/// photos; at a few seconds of narration each, a naive reel runs minutes. We +/// cap the segment count to keep the reel near this length. Tunable via +/// `REEL_TARGET_SECONDS`. +const DEFAULT_TARGET_REEL_SECONDS: f64 = 90.0; + +/// Rough average wall-time per photo segment (a short narration line + the +/// silent tail). Only used to turn the duration target into a segment count; +/// the real per-segment time is the measured narration length. +const EST_SECONDS_PER_SEGMENT: f64 = 5.0; + +/// Time gap that separates one "event/moment" from the next when clustering a +/// span's photos. Photos within a few hours are treated as the same occasion +/// (and across years/days the gaps are far larger, so each instance clusters +/// on its own). 4 hours splits e.g. a morning hike from an evening dinner. +const EVENT_GAP_SECONDS: i64 = 4 * 3600; + +fn target_reel_seconds() -> f64 { + std::env::var("REEL_TARGET_SECONDS") + .ok() + .and_then(|s| s.trim().parse::().ok()) + .filter(|x| x.is_finite() && *x > 0.0) + .unwrap_or(DEFAULT_TARGET_REEL_SECONDS) +} + +/// How many photo segments fit the duration budget, bounded by the request's +/// max and the hard cap. This is what keeps week/month reels from running long. +pub fn budget_segments(requested_max: usize) -> usize { + let by_budget = (target_reel_seconds() / EST_SECONDS_PER_SEGMENT).floor() as usize; + by_budget.min(requested_max).clamp(1, HARD_MAX_SEGMENTS) +} + +/// What a reel is built from. v1 ships the memories (on this day/week/month) +/// selector; tag and date-range variants slot in here later. +#[derive(Debug, Clone)] +pub enum ReelSelector { + Memories { + span: MemoriesSpan, + tz_offset_minutes: i32, + library: Option, + max_segments: usize, + }, +} + +impl ReelSelector { + /// Stable string identity for the cache key. Captures everything that + /// changes *which* media is selected (but not the non-deterministic + /// narration, which can't be part of a pre-render key). + pub fn descriptor(&self) -> String { + match self { + ReelSelector::Memories { + span, + tz_offset_minutes, + library, + max_segments, + } => format!( + "memories:span={:?}:tz={}:lib={}:max={}", + span, + tz_offset_minutes, + library.as_deref().unwrap_or("all"), + max_segments + ), + } + } +} + +/// Pick at most `max` items spread evenly across the input, always keeping the +/// first and last. Returns the input unchanged when it already fits. +pub fn sample_evenly(items: &[T], max: usize) -> Vec { + if max == 0 { + return Vec::new(); + } + if items.len() <= max { + return items.to_vec(); + } + if max == 1 { + return vec![items[0].clone()]; + } + let last = items.len() - 1; + (0..max) + .map(|i| { + // Spread indices 0..=last across max picks, endpoints included. + let idx = (i * last + (max - 1) / 2) / (max - 1); + items[idx.min(last)].clone() + }) + .collect() +} + +/// Group time-sorted items into events by gap: a new event starts whenever the +/// jump from the previous photo exceeds `gap_seconds`. Preserves order; items +/// without a timestamp extend the current event. +fn cluster_by_gap( + items: &[memories::MemoryItem], + gap_seconds: i64, +) -> Vec> { + let mut clusters: Vec> = Vec::new(); + let mut prev_ts: Option = None; + for it in items { + let starts_new = match (prev_ts, it.created) { + (Some(p), Some(c)) => c - p > gap_seconds, + _ => false, + }; + if starts_new || clusters.is_empty() { + clusters.push(Vec::new()); + } + clusters.last_mut().unwrap().push(it.clone()); + if let Some(c) = it.created { + prev_ts = Some(c); + } + } + clusters +} + +/// Most photos a single beat will flash through. Bounds the burst so one huge +/// event doesn't dominate, and keeps each photo on screen long enough to +/// register at the per-beat narration length (see render's beat timing). +pub const MAX_BURST_PHOTOS: usize = 10; + +/// Merge a list of (time-ordered) event clusters into exactly `n` contiguous +/// groups, so a span with more events than the beat budget still covers the +/// whole timeline — adjacent events fold together into one beat rather than +/// getting dropped. `n` must be ≥ 1 and ≤ clusters.len(). +fn partition_into_groups( + clusters: Vec>, + n: usize, +) -> Vec> { + let c = clusters.len(); + let mut clusters = clusters.into_iter(); + (0..n) + .map(|j| { + // Even contiguous split of c clusters into n groups. + let start = j * c / n; + let end = (j + 1) * c / n; + let take = end.saturating_sub(start).max(1); + (0..take) + .flat_map(|_| clusters.next().into_iter().flatten()) + .collect() + }) + .collect() +} + +/// Turn photo items into `n_beats` photo beats. Clusters photos into events by +/// time gap; if there are more events than beats, adjacent events are merged so +/// the whole span is still covered. Each beat then flashes up to `max_burst` +/// photos (an even spread of its group) under one narration line — so a +/// week/month reel *shows* all its moments without a narrated (and timed) +/// segment per photo. +fn form_photo_beats( + items: &[memories::MemoryItem], + n_beats: usize, + max_burst: usize, +) -> Vec { + if n_beats == 0 || items.is_empty() { + return Vec::new(); + } + let clusters = cluster_by_gap(items, EVENT_GAP_SECONDS); + // One beat per event when they fit; otherwise fold adjacent events together + // into exactly n_beats groups. + let groups = if clusters.len() <= n_beats { + clusters + } else { + partition_into_groups(clusters, n_beats) + }; + + groups + .into_iter() + .filter(|g| !g.is_empty()) + .map(|group| { + let shown = sample_evenly(&group, max_burst); + let date = shown.first().and_then(|it| it.created); + PlannedBeat { + media: shown + .into_iter() + .map(|it| SegmentMedia::Photo { + rel_path: it.path, + library_id: it.library_id, + }) + .collect(), + date, + insight_title: None, + insight_summary: None, + gps: None, + } + }) + .collect() +} + +/// Split the beat budget between photo beats and video-clip beats. Clips are +/// individually valuable (motion + live audio) so they get up to half the +/// budget (at least one if any exist); photos take the rest. With only one +/// kind present, it gets the whole budget. +fn split_beat_budget(n_photos: usize, n_videos: usize, n_beats: usize) -> (usize, usize) { + if n_videos == 0 { + return (n_beats, 0); + } + if n_photos == 0 { + return (0, n_beats.min(n_videos)); + } + let clip_beats = n_videos.min((n_beats / 2).max(1)); + let photo_beats = n_beats.saturating_sub(clip_beats); + (photo_beats, clip_beats) +} + +/// Build the reel's beats from a span's photos and videos under a beat budget. +/// Videos become one-clip beats (sampled across time if there are more than the +/// clip budget); photos cluster into burst beats. The two are merged back into +/// chronological order so the reel reads as the span unfolded. +pub fn form_beats( + photos: &[memories::MemoryItem], + videos: &[memories::MemoryItem], + n_beats: usize, + max_burst: usize, +) -> Vec { + if n_beats == 0 { + return Vec::new(); + } + let (photo_budget, clip_budget) = split_beat_budget(photos.len(), videos.len(), n_beats); + + let mut beats = form_photo_beats(photos, photo_budget, max_burst); + + // One clip beat per chosen video, spread across the span's videos. + for v in sample_evenly(videos, clip_budget) { + beats.push(PlannedBeat { + media: vec![SegmentMedia::Clip { + rel_path: v.path, + library_id: v.library_id, + }], + date: v.created, + insight_title: None, + insight_summary: None, + gps: None, + }); + } + + // Merge photo and clip beats back into chronological order (undated last). + beats.sort_by(|a, b| match (a.date, b.date) { + (Some(x), Some(y)) => x.cmp(&y), + (Some(_), None) => std::cmp::Ordering::Less, + (None, Some(_)) => std::cmp::Ordering::Greater, + (None, None) => std::cmp::Ordering::Equal, + }); + beats +} + +/// Cheap pass: resolve the selector into an ordered list of media (no insight +/// lookups yet) plus reel metadata. `Err` only on an invalid library param. +pub fn resolve( + app_state: &AppState, + exif_dao: &Mutex>, + span_context: &opentelemetry::Context, + selector: &ReelSelector, +) -> Result<(Vec, ReelMeta), String> { + match selector { + ReelSelector::Memories { + span, + tz_offset_minutes, + library, + max_segments, + } => { + let client_tz = FixedOffset::east_opt(tz_offset_minutes * 60); + let items = memories::gather_memory_items( + app_state, + exif_dao, + span_context, + *span, + *tz_offset_minutes, + client_tz, + library.as_deref(), + )?; + + // Split into photos and video clips; anything that's neither is + // dropped. Years span both, computed before the budget narrows it. + let years = distinct_years(&items, client_tz); + let meta = ReelMeta { span: *span, years }; + + let (photos, videos): (Vec<_>, Vec<_>) = items + .into_iter() + .filter(|it| { + is_image_file(Path::new(&it.path)) || is_video_file(Path::new(&it.path)) + }) + .partition(|it| is_image_file(Path::new(&it.path))); + + // The budget caps the number of narrated beats (≈ reel length); + // photo beats then burst through several photos and video beats + // play a short clip, so the reel covers the span without running + // minutes long. + let n_beats = budget_segments(*max_segments); + let beats = form_beats(&photos, &videos, n_beats, MAX_BURST_PHOTOS); + Ok((beats, meta)) + } + } +} + +/// Distinct calendar years represented by the selected media, in the client's +/// timezone, ascending. Used to tell the scripter how far back the reel reaches. +fn distinct_years(items: &[memories::MemoryItem], tz: Option) -> Vec { + let mut years: Vec = items + .iter() + .filter_map(|it| it.created) + .filter_map(|ts| DateTime::from_timestamp(ts, 0)) + .map(|dt| match tz { + Some(off) => dt.with_timezone(&off).year(), + None => dt.year(), + }) + .collect(); + years.sort_unstable(); + years.dedup(); + years +} + +/// Background pass: fill each beat's cached insight (title + summary) and +/// GPS coordinates from its lead photo, where one exists. Best-effort — a +/// missing or errored lookup leaves the fields `None` and the scripter +/// narrates from the date alone. +pub fn enrich( + insight_dao: &Mutex>, + exif_dao: &Mutex>, + span_context: &opentelemetry::Context, + beats: &mut [PlannedBeat], +) { + let Ok(mut insight_dao) = insight_dao.lock() else { + return; + }; + let Ok(mut exif_dao) = exif_dao.lock() else { + return; + }; + for beat in beats.iter_mut() { + let rel_path = match beat.media.first() { + Some(SegmentMedia::Photo { rel_path, .. } | SegmentMedia::Clip { rel_path, .. }) => { + rel_path.clone() + } + None => continue, + }; + if let Ok(Some(insight)) = insight_dao.get_insight(span_context, &rel_path) { + beat.insight_title = Some(insight.title); + beat.insight_summary = Some(insight.summary); + } + // Enrich GPS from EXIF when the lead media is a photo. + if let Some(SegmentMedia::Photo { .. }) = beat.media.first() + && let Ok(Some(exif)) = exif_dao.get_exif(span_context, &rel_path) + && let (Some(lat), Some(lon)) = (exif.gps_latitude, exif.gps_longitude) + { + beat.gps = Some((lat as f64, lon as f64)); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sample_evenly_returns_all_when_under_cap() { + let v = vec![1, 2, 3]; + assert_eq!(sample_evenly(&v, 5), vec![1, 2, 3]); + assert_eq!(sample_evenly(&v, 3), vec![1, 2, 3]); + } + + #[test] + fn sample_evenly_keeps_endpoints_and_spreads() { + let v: Vec = (0..100).collect(); + let picked = sample_evenly(&v, 5); + assert_eq!(picked.len(), 5); + assert_eq!(picked[0], 0); // first kept + assert_eq!(*picked.last().unwrap(), 99); // last kept + // Strictly increasing, no dupes. + assert!(picked.windows(2).all(|w| w[0] < w[1])); + } + + #[test] + fn sample_evenly_handles_one_and_zero() { + let v: Vec = (0..10).collect(); + assert_eq!(sample_evenly(&v, 1), vec![0]); + assert!(sample_evenly(&v, 0).is_empty()); + } + + #[test] + fn descriptor_is_stable_and_distinguishes_inputs() { + let a = ReelSelector::Memories { + span: MemoriesSpan::Day, + tz_offset_minutes: -480, + library: None, + max_segments: 24, + }; + let b = ReelSelector::Memories { + span: MemoriesSpan::Week, + tz_offset_minutes: -480, + library: None, + max_segments: 24, + }; + assert_eq!(a.descriptor(), a.clone().descriptor()); + assert_ne!(a.descriptor(), b.descriptor()); + assert!(a.descriptor().contains("lib=all")); + } + + #[test] + fn distinct_years_dedupes_and_sorts() { + let items = vec![ + memories::MemoryItem { + path: "a".into(), + created: Some(1_560_000_000), // 2019 + modified: None, + library_id: 1, + }, + memories::MemoryItem { + path: "b".into(), + created: Some(1_560_086_400), // 2019 + modified: None, + library_id: 1, + }, + memories::MemoryItem { + path: "c".into(), + created: Some(1_623_000_000), // 2021 + modified: None, + library_id: 1, + }, + ]; + assert_eq!(distinct_years(&items, None), vec![2019, 2021]); + } + + // Build an item at a given unix timestamp (seconds) with a chosen extension. + fn item_ext(ts: i64, name: &str, ext: &str) -> memories::MemoryItem { + memories::MemoryItem { + path: format!("{name}.{ext}"), + created: Some(ts), + modified: None, + library_id: 1, + } + } + fn item_at(ts: i64, name: &str) -> memories::MemoryItem { + item_ext(ts, name, "jpg") + } + + #[test] + fn budget_segments_caps_to_duration_target() { + // 90s / 5s ≈ 18, bounded by the request max and hard cap. + assert_eq!(budget_segments(40), 18); + assert_eq!(budget_segments(5), 5); // request asked for fewer + assert_eq!(budget_segments(1000), 18); // hard cap / budget wins + } + + #[test] + fn cluster_by_gap_splits_on_large_jumps() { + // Two photos minutes apart, then one a day later → two events. + let items = vec![ + item_at(1_000_000, "a"), + item_at(1_000_300, "b"), // +5 min → same event + item_at(1_100_000, "c"), // +~27h → new event + ]; + let clusters = cluster_by_gap(&items, EVENT_GAP_SECONDS); + assert_eq!(clusters.len(), 2); + assert_eq!(clusters[0].len(), 2); + assert_eq!(clusters[1].len(), 1); + } + + #[test] + fn photo_beats_one_per_event_when_they_fit() { + // Three well-separated events, budget of 10 → three beats, each holding + // all of its (few) photos. + let items = vec![ + item_at(0, "a"), + item_at(50, "b"), // same event as a + item_at(1_000_000, "c"), + item_at(2_000_000, "d"), + ]; + let beats = form_photo_beats(&items, 10, MAX_BURST_PHOTOS); + assert_eq!(beats.len(), 3); + assert_eq!(beats[0].media.len(), 2); // burst of the first event + assert_eq!(beats[1].media.len(), 1); + assert_eq!(beats[2].media.len(), 1); + } + + #[test] + fn photo_beats_merge_events_when_over_budget() { + // Six distinct events but only two beats → adjacent events fold in, and + // every event's photos still appear (capped by the burst max). + let items: Vec = (0..6) + .map(|i| item_at(i as i64 * 1_000_000, &format!("e{i}"))) + .collect(); + let beats = form_photo_beats(&items, 2, MAX_BURST_PHOTOS); + assert_eq!(beats.len(), 2); + let shown: usize = beats.iter().map(|b| b.media.len()).sum(); + assert_eq!(shown, 6); // all six moments still shown across two beats + } + + #[test] + fn photo_beats_cap_burst_to_max() { + // One dense event of 30 photos, generous budget → a single beat that + // bursts at most MAX_BURST_PHOTOS, not all 30. + let items: Vec = (0..30) + .map(|i| item_at(i as i64, &format!("p{i}"))) + .collect(); + let beats = form_photo_beats(&items, 18, MAX_BURST_PHOTOS); + assert_eq!(beats.len(), 1); + assert_eq!(beats[0].media.len(), MAX_BURST_PHOTOS); + } + + #[test] + fn split_beat_budget_handles_each_mix() { + // Only photos / only videos → that kind gets the whole budget. + assert_eq!(split_beat_budget(10, 0, 18), (18, 0)); + assert_eq!(split_beat_budget(0, 10, 18), (0, 10)); // capped at n_videos + assert_eq!(split_beat_budget(0, 30, 18), (0, 18)); // capped at budget + // Mixed → clips up to half (≥1), photos the rest. + assert_eq!(split_beat_budget(100, 100, 18), (9, 9)); + assert_eq!(split_beat_budget(100, 1, 18), (17, 1)); // few videos + } + + #[test] + fn form_beats_mixes_clip_and_photo_beats_in_time_order() { + let photos = vec![item_at(0, "p0"), item_at(2_000_000, "p1")]; + // A video between the two photo events (in time). + let videos = vec![item_ext(1_000_000, "v0", "mp4")]; + let beats = form_beats(&photos, &videos, 10, MAX_BURST_PHOTOS); + // Two photo events + one clip = three beats, chronological. + assert_eq!(beats.len(), 3); + assert!(!beats[0].is_clip()); // p0 @ t=0 + assert!(beats[1].is_clip()); // v0 @ t=1e6 + assert!(!beats[2].is_clip()); // p1 @ t=2e6 + assert!(matches!(beats[1].media[0], SegmentMedia::Clip { .. })); + } + + #[test] + fn form_beats_videos_only_become_clip_beats() { + let videos: Vec = (0..3) + .map(|i| item_ext(i as i64 * 1_000_000, &format!("v{i}"), "mov")) + .collect(); + let beats = form_beats(&[], &videos, 10, MAX_BURST_PHOTOS); + assert_eq!(beats.len(), 3); + assert!(beats.iter().all(|b| b.is_clip())); + } +} diff --git a/src/state.rs b/src/state.rs index e678ad1..33e8e3f 100644 --- a/src/state.rs +++ b/src/state.rs @@ -8,9 +8,10 @@ use crate::ai::turn_registry::TurnRegistry; use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient}; use crate::database::{ CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, InsightGenerationJobDao, KnowledgeDao, - LocationHistoryDao, SearchHistoryDao, SqliteCalendarEventDao, SqliteDailySummaryDao, - SqliteExifDao, SqliteInsightDao, SqliteInsightGenerationJobDao, SqliteKnowledgeDao, - SqliteLocationHistoryDao, SqliteSearchHistoryDao, connect, + LocationHistoryDao, PrecomputedReelDao, SearchHistoryDao, SqliteCalendarEventDao, + SqliteDailySummaryDao, SqliteExifDao, SqliteInsightDao, SqliteInsightGenerationJobDao, + SqliteKnowledgeDao, SqliteLocationHistoryDao, SqlitePrecomputedReelDao, SqliteSearchHistoryDao, + SqliteUserAiPrefsDao, UserAiPrefsDao, connect, }; use crate::database::{PreviewDao, SqlitePreviewDao}; use crate::faces; @@ -53,6 +54,10 @@ pub struct AppState { pub video_path: String, pub gif_path: String, pub preview_clips_path: String, + /// Directory for cached memory-reel MP4s (+ title sidecars). Derived from + /// `REELS_DIRECTORY`, defaulting to a `reels` dir beside the preview clips. + /// Created lazily by the reel pipeline on first render. + pub reels_path: String, pub excluded_dirs: Vec, pub ollama: OllamaClient, /// `None` when `OPENROUTER_API_KEY` is not configured. Consulted only @@ -84,6 +89,14 @@ pub struct AppState { pub clip_client: ClipClient, pub insight_job_dao: Arc>>, pub insight_job_handles: Arc>>, + /// Ledger for precomputed memory reels. Written by the nightly agentic + /// job (Section D); read by `GET /reels/precomputed` (Section C). + #[allow(dead_code)] + pub precomputed_reel_dao: Arc>>, + /// User AI preferences (voice, timezone, library). Mirrored by the + /// client; read by the nightly pre-generation scheduler. + #[allow(dead_code)] + pub user_ai_prefs_dao: Arc>>, } impl AppState { @@ -97,6 +110,7 @@ impl AppState { self.libraries.iter().find(|l| l.id == id) } + #[allow(dead_code)] pub fn library_by_name(&self, name: &str) -> Option<&Library> { self.libraries.iter().find(|l| l.name == name) } @@ -125,6 +139,8 @@ impl AppState { clip_client: ClipClient, insight_job_dao: Arc>>, insight_job_handles: Arc>>, + precomputed_reel_dao: Arc>>, + user_ai_prefs_dao: Arc>>, ) -> Self { assert!( !libraries_vec.is_empty(), @@ -141,6 +157,19 @@ impl AppState { preview_dao, ); + // Reels cache dir: explicit env, else a `reels` sibling of the preview + // clips dir (a known-writable, test-safe location). Not created here — + // the reel pipeline does `create_dir_all` before its first write, so + // construction (incl. tests) never touches the filesystem. + let reels_path = std::env::var("REELS_DIRECTORY").unwrap_or_else(|_| { + std::path::Path::new(&preview_clips_path) + .parent() + .map(|p| p.join("reels")) + .unwrap_or_else(|| std::path::PathBuf::from("reels")) + .to_string_lossy() + .to_string() + }); + let library_health = libraries::new_health_map(&libraries_vec); let live_libraries = Arc::new(RwLock::new(libraries_vec.clone())); Self { @@ -155,6 +184,7 @@ impl AppState { video_path, gif_path, preview_clips_path, + reels_path, excluded_dirs, ollama, openrouter, @@ -169,6 +199,8 @@ impl AppState { clip_client, insight_job_dao, insight_job_handles, + precomputed_reel_dao, + user_ai_prefs_dao, } } @@ -249,6 +281,14 @@ impl Default for AppState { let insight_job_handles: Arc>> = Arc::new(Mutex::new(HashMap::new())); + // Initialize precomputed reel DAO (nightly pre-generation ledger) + let precomputed_reel_dao: Arc>> = + Arc::new(Mutex::new(Box::new(SqlitePrecomputedReelDao::new()))); + + // Initialize user AI preferences DAO (Section E) + let user_ai_prefs_dao: Arc>> = + Arc::new(Mutex::new(Box::new(SqliteUserAiPrefsDao::new()))); + // Load base path and ensure the primary library row reflects it. let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env"); let mut seed_conn = connect(); @@ -326,6 +366,8 @@ impl Default for AppState { clip_client, insight_job_dao, insight_job_handles, + precomputed_reel_dao, + user_ai_prefs_dao, ) } } @@ -535,6 +577,8 @@ impl AppState { ClipClient::new(None), // disabled in test Arc::new(Mutex::new(Box::new(SqliteInsightGenerationJobDao::new()))), // placeholder for test Arc::new(Mutex::new(HashMap::new())), // placeholder for test + Arc::new(Mutex::new(Box::new(SqlitePrecomputedReelDao::new()))), // placeholder for test + Arc::new(Mutex::new(Box::new(SqliteUserAiPrefsDao::new()))), // placeholder for test ) } } diff --git a/src/tags.rs b/src/tags.rs index f3e0135..3dc0859 100644 --- a/src/tags.rs +++ b/src/tags.rs @@ -168,7 +168,7 @@ async fn get_tags( // this file, so tags added under one library show up under the // others when they hold the same file. Falls back to direct rel_path // match when the file hasn't been hashed yet. - let library = libraries::resolve_library_param(&app_state, request.library.as_deref()) + let library = libraries::resolve_library_param_state(&app_state, request.library.as_deref()) .ok() .flatten() .unwrap_or_else(|| app_state.primary_library()); diff --git a/src/unified_search.rs b/src/unified_search.rs new file mode 100644 index 0000000..0940a92 --- /dev/null +++ b/src/unified_search.rs @@ -0,0 +1,521 @@ +//! `/photos/search/unified?q=` — unified NL photo search. +//! +//! One free-text box that composes the two existing engines instead of making +//! the user pick between them: +//! 1. A grounded local-LLM call ([`crate::ai::nl_query`]) translates the +//! query into a structured filter + a semantic term. +//! 2. Structured filters (tags / EXIF / geo / date / media-type) define the +//! candidate set; the semantic term ranks within it via CLIP. +//! +//! Path A (orchestration): we reuse `clip_search`'s scoring core and the +//! existing `ExifDao` / `TagDao` queries, joining on `content_hash`. EXIF rows +//! are the universal candidate carrier — each has `(library_id, file_path, +//! content_hash, date_taken)` — so the structured filter is just a predicate +//! over them, and the CLIP hits (which key on `content_hash`) intersect by +//! hash. No new schema, no surgery on `list_photos`. +//! +//! Degenerate cases collapse to the existing behavior: semantic-only → plain +//! CLIP search; filters-only → a date-sorted filtered listing. +//! +//! Person filtering is intentionally deferred (no person→photos resolver yet). + +use crate::AppState; +use crate::ai::backend::{BackendKind, SamplingOverrides}; +use crate::ai::nl_query::{StructuredQuery, translate_nl_query}; +use crate::clip_search::{ + SearchHit, parse_library_scope, resolve_hits, score_error_response, score_photos, +}; +use crate::data::Claims; +use crate::database::ExifDao; +use crate::file_types::{is_image_file, is_video_file}; +use crate::geo::{forward_geocode, gps_bounding_box, haversine_distance}; +use crate::tags::TagDao; +use actix_web::HttpResponse; +use actix_web::web::{Data, Query}; +use serde::{Deserialize, Serialize}; +use std::collections::HashSet; +use std::path::Path; +use std::sync::Mutex; + +#[derive(Debug, Deserialize)] +pub struct UnifiedQuery { + /// Natural-language query. Required; empty triggers 400. + pub q: String, + #[serde(default = "default_limit")] + pub limit: usize, + #[serde(default)] + pub offset: usize, + /// CLIP cosine floor for the semantic ranking stage. Same default as the + /// plain search endpoint. + #[serde(default = "default_threshold")] + pub threshold: f32, + /// Legacy single-library scope (see clip_search). + pub library: Option, + /// Multi-library scope, comma-separated ids. + pub library_ids: Option, + /// Optional model override. The client passes the user's currently-selected + /// local model so the translation step reuses a model that's already loaded + /// (avoids a llama-swap eviction / cold start). Falls back to the configured + /// default local model when absent. Local only — no hybrid here. + pub model: Option, +} + +fn default_limit() -> usize { + 20 +} +fn default_threshold() -> f32 { + 0.20 +} + +/// A geocoded place echoed back so the client can show / edit the location +/// filter it actually searched. +#[derive(Debug, Serialize)] +struct ResolvedPlace { + display_name: String, + lat: f64, + lon: f64, + radius_km: f64, +} + +/// How the server interpreted the NL query — echoed to the client to render +/// editable filter chips. tag ids map to the client's existing tag list. +#[derive(Debug, Serialize)] +struct Interpreted { + semantic: Option, + tag_ids: Vec, + exclude_tag_ids: Vec, + /// Words the model treated as tags that don't exist in the vocab; folded + /// into the semantic term and surfaced here so the UI can explain it. + unmatched_tags: Vec, + camera_make: Option, + camera_model: Option, + lens_model: Option, + date_from: Option, + date_to: Option, + media_type: Option, + place: Option, +} + +#[derive(Debug, Serialize)] +struct UnifiedResponse { + query: String, + interpreted: Interpreted, + /// CLIP model version used for ranking; `None` when the query had no + /// semantic term (filters-only). + model_version: Option, + /// Embeddings scored by CLIP (0 when filters-only). + considered: usize, + /// Matches before pagination. + total_matching: usize, + offset: usize, + results: Vec, +} + +#[derive(Debug, Serialize)] +struct ErrorBody { + error: String, +} + +fn bad_request(msg: impl Into) -> HttpResponse { + HttpResponse::BadRequest().json(ErrorBody { error: msg.into() }) +} + +/// Combine the model's semantic term with any tag words that didn't match the +/// vocab, so a hallucinated/non-vocab tag becomes a soft semantic signal +/// rather than being dropped. +fn effective_semantic(sq: &StructuredQuery) -> Option { + let mut parts: Vec = Vec::new(); + if let Some(s) = sq.semantic.as_deref() { + parts.push(s.to_string()); + } + parts.extend(sq.unmatched_tags.iter().cloned()); + if parts.is_empty() { + None + } else { + Some(parts.join(" ")) + } +} + +pub async fn unified_search( + _: Claims, + state: Data, + exif_dao: Data>>, + tag_dao: Data>, + query: Query, +) -> HttpResponse { + let nl = query.q.trim().to_string(); + if nl.is_empty() { + return bad_request("query parameter `q` is required"); + } + + let limit = query.limit.clamp(1, 200); + let offset = query.offset; + let threshold = query.threshold.clamp(-1.0, 1.0); + + let library_ids = match parse_library_scope(query.library_ids.as_deref(), query.library) { + Ok(ids) => ids, + Err(msg) => return bad_request(msg), + }; + + let ctx = opentelemetry::Context::current(); + + // ── 1. Translate the NL query, grounded on the real tag vocabulary ── + let tag_vocab: Vec<(i32, String)> = { + let mut dao = tag_dao.lock().expect("tag dao"); + match dao.get_all_tags(&ctx, None) { + Ok(tags) => tags.into_iter().map(|(_, t)| (t.id, t.name)).collect(), + Err(e) => { + log::warn!("unified_search: get_all_tags failed: {e:?}"); + Vec::new() + } + } + }; + + // Respect env/config for the LLM backend (LLM_BACKEND → ollama or + // llama-swap); local only, no hybrid, per the feature's design. + // + // Translation-model precedence: + // 1. UNIFIED_SEARCH_MODEL env — pin a small, fast model that can stay + // co-resident with CLIP (and the chat model) so translation never + // evicts them. This is the recommended setup on a tight VRAM budget. + // 2. the client-selected model — routes translation to whatever the user + // already has loaded (no swap) when no dedicated model is pinned. + // 3. None → resolve_backend uses the configured default local model. + let translation_model = std::env::var("UNIFIED_SEARCH_MODEL") + .ok() + .filter(|m| !m.trim().is_empty()) + .or_else(|| query.model.clone()) + .filter(|m| !m.trim().is_empty()); + let overrides = SamplingOverrides { + model: translation_model, + num_ctx: None, + temperature: None, + top_p: None, + top_k: None, + min_p: None, + enable_thinking: None, + }; + let backend = match state + .insight_generator + .resolve_backend(BackendKind::Local, &overrides) + .await + { + Ok(b) => b, + Err(e) => { + log::warn!("unified_search: resolve_backend failed: {e:?}"); + return HttpResponse::ServiceUnavailable().json(ErrorBody { + error: "LLM backend unavailable".into(), + }); + } + }; + log::info!("unified_search: translating with model={}", backend.model()); + + let today = chrono::Utc::now().date_naive(); + let sq = match translate_nl_query(backend.chat(), &nl, &tag_vocab, today).await { + Ok(sq) => sq, + Err(e) => { + log::warn!("unified_search: translate_nl_query failed: {e:?}"); + return HttpResponse::BadGateway().json(ErrorBody { + error: "could not interpret the query".into(), + }); + } + }; + + // ── 2. Forward-geocode the place name into a gps circle ── + let resolved_place = match sq.place.as_deref() { + Some(p) => forward_geocode(p).await.map(|g| ResolvedPlace { + display_name: g.display_name, + lat: g.lat, + lon: g.lon, + radius_km: g.radius_km, + }), + None => None, + }; + let gps = resolved_place.as_ref().map(|p| (p.lat, p.lon, p.radius_km)); + + let semantic = effective_semantic(&sq); + + let has_exif_filter = sq.camera_make.is_some() + || sq.camera_model.is_some() + || sq.lens_model.is_some() + || sq.date_from.is_some() + || sq.date_to.is_some(); + let has_struct = + has_exif_filter || gps.is_some() || !sq.tag_ids.is_empty() || sq.media_type.is_some(); + + // Stage trace: what the model extracted + whether a structured filter is + // active. The chips show this to the user too, but logging it makes the + // "why no results" path debuggable from the server side. + log::info!( + "unified_search: q={nl:?} semantic={:?} tag_ids={:?} exclude={:?} place={:?} gps={:?} date=({:?},{:?}) media={:?} unmatched={:?} has_struct={has_struct}", + sq.semantic, + sq.tag_ids, + sq.exclude_tag_ids, + resolved_place.as_ref().map(|p| p.display_name.as_str()), + gps, + sq.date_from, + sq.date_to, + sq.media_type, + sq.unmatched_tags, + ); + + // ── 3. Build the structured candidate set (EXIF rows passing every + // filter). Skipped entirely for a pure-semantic query. ── + let mut candidate: Vec = Vec::new(); + let mut allowed_hashes: HashSet = HashSet::new(); + if has_struct { + // Tag membership set (rel_path only — same cross-library imprecision + // as the existing /photos tag listing). ANY-mode: a photo matches if + // it carries any of the named tags. ALL-mode over-constrains NL + // queries (the model maps several words to tags and few photos carry + // them all); the semantic term does the precision work instead. + let tag_set: Option> = if sq.tag_ids.is_empty() { + None + } else { + let mut dao = tag_dao.lock().expect("tag dao"); + match dao.get_files_with_any_tag_ids( + sq.tag_ids.clone(), + sq.exclude_tag_ids.clone(), + &ctx, + ) { + Ok(files) => Some(files.into_iter().map(|f| f.file_name).collect()), + Err(e) => { + log::warn!("unified_search: tag filter failed: {e:?}"); + Some(HashSet::new()) + } + } + }; + log::info!( + "unified_search: tag_ids={:?} -> tag_set_files={:?}", + sq.tag_ids, + tag_set.as_ref().map(|s| s.len()) + ); + + // EXIF query handles camera/lens/gps-box/date. With no EXIF filters + // it returns the whole table, which we then narrow by the predicates + // below (tags / media / scope). Fine at personal-library scale. + let gps_bounds = gps.map(|(lat, lon, r)| gps_bounding_box(lat, lon, r)); + let rows = { + let mut dao = exif_dao.lock().expect("exif dao"); + dao.query_by_exif( + &ctx, + None, // scope filtered in-Rust to support multi-library + sq.camera_make.as_deref(), + sq.camera_model.as_deref(), + sq.lens_model.as_deref(), + gps_bounds, + sq.date_from, + sq.date_to, + ) + .unwrap_or_else(|e| { + log::warn!("unified_search: query_by_exif failed: {e:?}"); + Vec::new() + }) + }; + + candidate = rows + .into_iter() + .filter(|row| { + // Library scope. + if !library_ids.is_empty() && !library_ids.contains(&row.library_id) { + return false; + } + // Precise GPS distance (the EXIF query only did a coarse box). + if let Some((lat, lon, radius_km)) = gps { + match (row.gps_latitude, row.gps_longitude) { + (Some(plat), Some(plon)) => { + if haversine_distance(lat, lon, plat as f64, plon as f64) > radius_km { + return false; + } + } + _ => return false, + } + } + // Media type. + if let Some(mt) = sq.media_type.as_deref() { + let p = Path::new(&row.file_path); + let ok = if mt == "video" { + is_video_file(p) + } else { + is_image_file(p) + }; + if !ok { + return false; + } + } + // Tag membership. + if let Some(ts) = &tag_set + && !ts.contains(&row.file_path) + { + return false; + } + true + }) + .collect(); + + allowed_hashes = candidate + .iter() + .filter_map(|r| r.content_hash.clone()) + .collect(); + log::info!( + "unified_search: candidate_rows={} allowed_hashes={}", + candidate.len(), + allowed_hashes.len() + ); + } + + // ── 4. Rank ── + match semantic { + Some(ref sem) => { + // When structured filters are present they ARE the constraint — + // CLIP only ranks within the candidate set. So drop the global + // similarity threshold (it's tuned for whole-library search and + // would pre-discard filter-matching photos that scored just under + // it — e.g. a 2022 beach photo at 0.18 — before the intersection + // ever runs). With no filters, keep the user's threshold for the + // plain semantic case. + let clip_threshold = if has_struct { -1.0 } else { threshold }; + let scored = match score_photos( + &state, + &exif_dao, + sem, + &library_ids, + clip_threshold, + None, + ) + .await + { + Ok(s) => s, + Err(e) => return score_error_response(e), + }; + let considered = scored.considered; + let clip_hits = scored.hits.len(); + let hits: Vec<(f32, String)> = if has_struct { + scored + .hits + .into_iter() + .filter(|(_, h)| allowed_hashes.contains(h)) + .collect() + } else { + scored.hits + }; + log::info!( + "unified_search: clip considered={considered} hits={clip_hits} after_struct_filter={}", + hits.len() + ); + let total_matching = hits.len(); + let page = paginate(&hits, offset, limit); + let results = resolve_hits(&exif_dao, &page); + HttpResponse::Ok().json(UnifiedResponse { + query: nl, + interpreted: interpreted(&sq, resolved_place), + model_version: Some(scored.model_version), + considered: scored.considered, + total_matching, + offset, + results, + }) + } + None => { + // Filters-only: no semantic term. Require at least one filter, + // then return the candidate set newest-first. + if !has_struct { + return bad_request("query had no searchable terms"); + } + candidate.sort_by(|a, b| b.date_taken.cmp(&a.date_taken)); + let total_matching = candidate.len(); + log::info!("unified_search: filters-only matches={total_matching}"); + let end = (offset + limit).min(total_matching); + let results: Vec = if offset >= total_matching { + Vec::new() + } else { + candidate[offset..end] + .iter() + .map(|r| SearchHit { + library_id: r.library_id, + rel_path: r.file_path.clone(), + content_hash: r.content_hash.clone().unwrap_or_default(), + score: 0.0, + }) + .collect() + }; + HttpResponse::Ok().json(UnifiedResponse { + query: nl, + interpreted: interpreted(&sq, resolved_place), + model_version: None, + considered: 0, + total_matching, + offset, + results, + }) + } + } +} + +/// Slice a sorted hit list at `[offset, offset+limit)`, tolerating +/// out-of-range offsets (empty page). +fn paginate(hits: &[(f32, String)], offset: usize, limit: usize) -> Vec<(f32, String)> { + if offset >= hits.len() { + return Vec::new(); + } + let end = (offset + limit).min(hits.len()); + hits[offset..end].to_vec() +} + +fn interpreted(sq: &StructuredQuery, place: Option) -> Interpreted { + Interpreted { + semantic: sq.semantic.clone(), + tag_ids: sq.tag_ids.clone(), + exclude_tag_ids: sq.exclude_tag_ids.clone(), + unmatched_tags: sq.unmatched_tags.clone(), + camera_make: sq.camera_make.clone(), + camera_model: sq.camera_model.clone(), + lens_model: sq.lens_model.clone(), + date_from: sq.date_from, + date_to: sq.date_to, + media_type: sq.media_type.clone(), + place, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ai::nl_query::StructuredQuery; + + #[test] + fn effective_semantic_combines_semantic_and_unmatched() { + let sq = StructuredQuery { + semantic: Some("sunset".into()), + unmatched_tags: vec!["golden hour".into()], + ..Default::default() + }; + assert_eq!( + effective_semantic(&sq).as_deref(), + Some("sunset golden hour") + ); + } + + #[test] + fn effective_semantic_none_when_empty() { + let sq = StructuredQuery::default(); + assert_eq!(effective_semantic(&sq), None); + } + + #[test] + fn effective_semantic_unmatched_only() { + let sq = StructuredQuery { + unmatched_tags: vec!["disco".into()], + ..Default::default() + }; + assert_eq!(effective_semantic(&sq).as_deref(), Some("disco")); + } + + #[test] + fn paginate_handles_out_of_range_offset() { + let hits = vec![(0.9, "a".to_string()), (0.8, "b".to_string())]; + assert_eq!(paginate(&hits, 5, 10).len(), 0); + assert_eq!(paginate(&hits, 0, 1).len(), 1); + assert_eq!(paginate(&hits, 1, 10).len(), 1); + } +} diff --git a/src/video/ffmpeg.rs b/src/video/ffmpeg.rs index d385cac..019bd86 100644 --- a/src/video/ffmpeg.rs +++ b/src/video/ffmpeg.rs @@ -231,7 +231,7 @@ impl Ffmpeg { /// a hard failure — previously the `parse::` on empty stdout produced /// "cannot parse float from empty string" and poisoned the preview-clip row /// with status=failed, which the watcher would re-queue every full scan. -async fn get_duration_seconds(input_file: &str) -> Result> { +pub async fn get_duration_seconds(input_file: &str) -> Result> { if let Some(d) = probe_duration(input_file, "format=duration").await? { return Ok(Some(d)); }