Compare commits
26 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 48a1b753f0 | |||
| f2ab8d3740 | |||
| 6e5898e766 | |||
| 6c315edacc | |||
| 0a40e78528 | |||
| e56235acc5 | |||
| fcbd7e2733 | |||
| e4c875f473 | |||
| 50ed780844 | |||
| 7e21213181 | |||
| 664b3694f8 | |||
| b52b1eb323 | |||
| 19fc1bbdf8 | |||
| ca007a618d | |||
| e4d8d374fb | |||
| 5c9ee56527 | |||
| f707353807 | |||
| b30c8c16d0 | |||
| f5581edf5e | |||
| 65793a2dda | |||
| 299e32b014 | |||
| 6e90f24307 | |||
| 740fc4d841 | |||
| 7715a7a905 | |||
| 42453d5786 | |||
| e3f731b3b2 |
@@ -80,6 +80,16 @@ AGENTIC_CHAT_MAX_ITERATIONS=6
|
|||||||
# LLAMA_SWAP_ALLOWED_MODELS=chat,vision,embed
|
# LLAMA_SWAP_ALLOWED_MODELS=chat,vision,embed
|
||||||
# LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS=180
|
# LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS=180
|
||||||
|
|
||||||
|
# ── Unified search translation model (optional) ─────────────────────────
|
||||||
|
# /photos/search/unified runs one small LLM call to translate a natural-
|
||||||
|
# language query into structured filters + a semantic term, then CLIP-ranks.
|
||||||
|
# That step needs an LLM AND CLIP available at once. On a tight VRAM budget a
|
||||||
|
# large chat model can't co-reside with CLIP, so pin a small, fast model here
|
||||||
|
# (it can stay loaded alongside CLIP and the chat model). Precedence:
|
||||||
|
# UNIFIED_SEARCH_MODEL > the client's selected model > the configured default.
|
||||||
|
# Use the configured backend (LLM_BACKEND); local only — no hybrid.
|
||||||
|
# UNIFIED_SEARCH_MODEL=qwen3-0.6b
|
||||||
|
|
||||||
# ── Text-to-speech (optional, requires LLAMA_SWAP_URL) ───────────────────
|
# ── Text-to-speech (optional, requires LLAMA_SWAP_URL) ───────────────────
|
||||||
# TTS routes through the same llama-swap proxy (a Chatterbox model id), so it
|
# TTS routes through the same llama-swap proxy (a Chatterbox model id), so it
|
||||||
# only needs LLAMA_SWAP_URL — it does NOT require LLM_BACKEND=llamacpp.
|
# only needs LLAMA_SWAP_URL — it does NOT require LLM_BACKEND=llamacpp.
|
||||||
@@ -139,3 +149,31 @@ CLIP_REQUEST_TIMEOUT_SEC=60
|
|||||||
# ── RAG / search ────────────────────────────────────────────────────────
|
# ── RAG / search ────────────────────────────────────────────────────────
|
||||||
# Set to `1` to enable cross-encoder reranking on /search results.
|
# Set to `1` to enable cross-encoder reranking on /search results.
|
||||||
SEARCH_RAG_RERANK=0
|
SEARCH_RAG_RERANK=0
|
||||||
|
|
||||||
|
# ── Nightly reel pre-generation (Phase 3+) ──────────────────────────────
|
||||||
|
# Set to `1` to enable the scheduler. Disabled by default.
|
||||||
|
# REEL_PREGEN_ENABLED=1
|
||||||
|
# Hour (0-23) when the nightly batch fires. Default 3 AM.
|
||||||
|
# REEL_PREGEN_HOUR=3
|
||||||
|
# Day of week for weekly reels (0=Sun, 1=Mon, …). Default Monday.
|
||||||
|
# REEL_PREGEN_WEEK_DOW=1
|
||||||
|
# Timezone offset in minutes from UTC (e.g., -480 = PST). Defaults to
|
||||||
|
# the server's local timezone.
|
||||||
|
# REEL_PREGEN_TZ_OFFSET_MINUTES=
|
||||||
|
# Fixed timezone offset — overrides auto-detect to avoid DST shifts.
|
||||||
|
# When set, both the DB fallback and env fallback use this value.
|
||||||
|
# REEL_PREGEN_TZ_FIXED_MINUTES=-480
|
||||||
|
# Voice ID for narration (e.g., "grandma"). Falls back to the value
|
||||||
|
# stored in the user_ai_prefs DB row when set.
|
||||||
|
# REEL_PREGEN_VOICE=
|
||||||
|
# Library filter: a library id (e.g. "1") or "all" for every library.
|
||||||
|
# REEL_PREGEN_LIBRARY=all
|
||||||
|
# Max agentic tool iterations for pre-gen scripter. Default 8.
|
||||||
|
# REEL_PREGEN_MAX_TOOL_ITERS=8
|
||||||
|
#
|
||||||
|
# On-disk reel cache sweep (runs every 24h, independent of pre-gen). Removes
|
||||||
|
# reel MP4s with no ledger row + no live job that are older than the max age —
|
||||||
|
# i.e. the on-demand cache, which otherwise grows forever. Set to 0 to disable.
|
||||||
|
# REEL_CACHE_SWEEP_ENABLED=1
|
||||||
|
# Age (days) before an unreferenced reel MP4 is swept. Default 7.
|
||||||
|
# REEL_CACHE_MAX_AGE_DAYS=7
|
||||||
|
|||||||
Generated
+1
-1
@@ -2051,7 +2051,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "image-api"
|
name = "image-api"
|
||||||
version = "1.3.0"
|
version = "1.4.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix",
|
"actix",
|
||||||
"actix-cors",
|
"actix-cors",
|
||||||
|
|||||||
+1
-1
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "image-api"
|
name = "image-api"
|
||||||
version = "1.3.0"
|
version = "1.4.0"
|
||||||
authors = ["Cameron Cordes <cameronc.dev@gmail.com>"]
|
authors = ["Cameron Cordes <cameronc.dev@gmail.com>"]
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,2 @@
|
|||||||
|
DROP INDEX IF EXISTS idx_precomputed_reels_span_library;
|
||||||
|
DROP TABLE IF EXISTS precomputed_reels;
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
CREATE TABLE precomputed_reels (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
span TEXT NOT NULL,
|
||||||
|
library_key TEXT NOT NULL,
|
||||||
|
cache_key TEXT NOT NULL,
|
||||||
|
output_path TEXT NOT NULL,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
media_count INT NOT NULL,
|
||||||
|
render_version INT NOT NULL DEFAULT 1,
|
||||||
|
tz_offset_minutes INT NOT NULL,
|
||||||
|
voice TEXT,
|
||||||
|
generated_at BIGINT NOT NULL
|
||||||
|
);
|
||||||
|
CREATE INDEX idx_precomputed_reels_span_library ON precomputed_reels(span, library_key, generated_at DESC);
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
DROP TABLE IF EXISTS user_ai_prefs;
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
CREATE TABLE user_ai_prefs (
|
||||||
|
id INTEGER PRIMARY KEY CHECK(id=1),
|
||||||
|
voice TEXT,
|
||||||
|
tz_offset_minutes INTEGER,
|
||||||
|
library TEXT,
|
||||||
|
updated_at BIGINT NOT NULL
|
||||||
|
);
|
||||||
@@ -41,6 +41,10 @@ pub struct SamplingOverrides {
|
|||||||
pub top_p: Option<f32>,
|
pub top_p: Option<f32>,
|
||||||
pub top_k: Option<i32>,
|
pub top_k: Option<i32>,
|
||||||
pub min_p: Option<f32>,
|
pub min_p: Option<f32>,
|
||||||
|
/// Reasoning toggle. Only the llama.cpp backend honors it (forwarded as
|
||||||
|
/// `chat_template_kwargs.enable_thinking`); other backends ignore it.
|
||||||
|
/// `None` leaves the model/template default in place.
|
||||||
|
pub enable_thinking: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SamplingOverrides {
|
impl SamplingOverrides {
|
||||||
@@ -124,6 +128,7 @@ mod tests {
|
|||||||
top_p: None,
|
top_p: None,
|
||||||
top_k: None,
|
top_k: None,
|
||||||
min_p: None,
|
min_p: None,
|
||||||
|
enable_thinking: None,
|
||||||
};
|
};
|
||||||
assert!(!empty.has_sampling());
|
assert!(!empty.has_sampling());
|
||||||
|
|
||||||
@@ -134,6 +139,7 @@ mod tests {
|
|||||||
top_p: None,
|
top_p: None,
|
||||||
top_k: None,
|
top_k: None,
|
||||||
min_p: None,
|
min_p: None,
|
||||||
|
enable_thinking: None,
|
||||||
};
|
};
|
||||||
assert!(with_temp.has_sampling());
|
assert!(with_temp.has_sampling());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -191,11 +191,13 @@ impl ClipClient {
|
|||||||
let resp = match self.client.post(&url).json(&body).send().await {
|
let resp = match self.client.post(&url).json(&body).send().await {
|
||||||
Ok(r) => r,
|
Ok(r) => r,
|
||||||
Err(e) if e.is_timeout() || e.is_connect() => {
|
Err(e) if e.is_timeout() || e.is_connect() => {
|
||||||
|
log::warn!("clip encode_text network error to {url}: {e}");
|
||||||
return Err(ClipError::Transient(anyhow::anyhow!(
|
return Err(ClipError::Transient(anyhow::anyhow!(
|
||||||
"clip client network: {e}"
|
"clip client network: {e}"
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
|
log::warn!("clip encode_text request error to {url}: {e}");
|
||||||
return Err(ClipError::Transient(anyhow::anyhow!(
|
return Err(ClipError::Transient(anyhow::anyhow!(
|
||||||
"clip client request: {e}"
|
"clip client request: {e}"
|
||||||
)));
|
)));
|
||||||
@@ -210,6 +212,7 @@ impl ClipClient {
|
|||||||
return Ok(body);
|
return Ok(body);
|
||||||
}
|
}
|
||||||
let body_text = resp.text().await.unwrap_or_default();
|
let body_text = resp.text().await.unwrap_or_default();
|
||||||
|
log::warn!("clip encode_text HTTP {status} from {url}: {body_text}");
|
||||||
Err(classify_error_response(status.as_u16(), &body_text))
|
Err(classify_error_response(status.as_u16(), &body_text))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+63
-43
@@ -40,6 +40,12 @@ pub struct GeneratePhotoInsightRequest {
|
|||||||
pub top_k: Option<i32>,
|
pub top_k: Option<i32>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub min_p: Option<f32>,
|
pub min_p: Option<f32>,
|
||||||
|
/// Reasoning toggle for thinking-capable models. Forwarded to the
|
||||||
|
/// llama.cpp backend as `chat_template_kwargs.enable_thinking`; ignored
|
||||||
|
/// by other backends and the non-agentic (Ollama) path. Only the agentic
|
||||||
|
/// endpoint routes through llama.cpp. None defers to the template default.
|
||||||
|
#[serde(default)]
|
||||||
|
pub enable_thinking: Option<bool>,
|
||||||
/// `"local"` (default, Ollama with images) | `"hybrid"` (local vision +
|
/// `"local"` (default, Ollama with images) | `"hybrid"` (local vision +
|
||||||
/// OpenRouter chat). Only respected by the agentic endpoint.
|
/// OpenRouter chat). Only respected by the agentic endpoint.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
@@ -120,7 +126,7 @@ pub async fn generation_status_handler(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if let Some(ref fp) = query.path {
|
if let Some(ref fp) = query.path {
|
||||||
let library = libraries::resolve_library_param(&app_state, query.library.as_deref())
|
let library = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
|
||||||
.ok()
|
.ok()
|
||||||
.flatten()
|
.flatten()
|
||||||
.unwrap_or_else(|| app_state.primary_library());
|
.unwrap_or_else(|| app_state.primary_library());
|
||||||
@@ -218,10 +224,11 @@ pub async fn cancel_generation_handler(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if let Some(ref fp) = request.file_path {
|
if let Some(ref fp) = request.file_path {
|
||||||
let library = libraries::resolve_library_param(&app_state, request.library.as_deref())
|
let library =
|
||||||
.ok()
|
libraries::resolve_library_param_state(&app_state, request.library.as_deref())
|
||||||
.flatten()
|
.ok()
|
||||||
.unwrap_or_else(|| app_state.primary_library());
|
.flatten()
|
||||||
|
.unwrap_or_else(|| app_state.primary_library());
|
||||||
let normalized = normalize_path(fp);
|
let normalized = normalize_path(fp);
|
||||||
|
|
||||||
// Get active job ids first, then cancel in DB, then abort tasks
|
// Get active job ids first, then cancel in DB, then abort tasks
|
||||||
@@ -580,7 +587,7 @@ pub async fn get_insight_handler(
|
|||||||
|
|
||||||
// Expand to rel_paths sharing content so an insight generated under
|
// Expand to rel_paths sharing content so an insight generated under
|
||||||
// library 1 still shows when the same photo is viewed from library 2.
|
// library 1 still shows when the same photo is viewed from library 2.
|
||||||
let library = libraries::resolve_library_param(&app_state, query.library.as_deref())
|
let library = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
|
||||||
.ok()
|
.ok()
|
||||||
.flatten()
|
.flatten()
|
||||||
.unwrap_or_else(|| app_state.primary_library());
|
.unwrap_or_else(|| app_state.primary_library());
|
||||||
@@ -867,6 +874,7 @@ pub async fn generate_agentic_insight_handler(
|
|||||||
request.top_p,
|
request.top_p,
|
||||||
request.top_k,
|
request.top_k,
|
||||||
request.min_p,
|
request.min_p,
|
||||||
|
request.enable_thinking,
|
||||||
max_iterations,
|
max_iterations,
|
||||||
request.backend.clone(),
|
request.backend.clone(),
|
||||||
fewshot_examples,
|
fewshot_examples,
|
||||||
@@ -1168,6 +1176,11 @@ pub struct ChatTurnHttpRequest {
|
|||||||
pub top_k: Option<i32>,
|
pub top_k: Option<i32>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub min_p: Option<f32>,
|
pub min_p: Option<f32>,
|
||||||
|
/// Reasoning toggle for thinking-capable models. Forwarded to the
|
||||||
|
/// llama.cpp backend as `chat_template_kwargs.enable_thinking`; ignored
|
||||||
|
/// by other backends. None defers to the model/template default.
|
||||||
|
#[serde(default)]
|
||||||
|
pub enable_thinking: Option<bool>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub max_iterations: Option<usize>,
|
pub max_iterations: Option<usize>,
|
||||||
/// Per-turn system-prompt override. Ephemeral in append mode,
|
/// Per-turn system-prompt override. Ephemeral in append mode,
|
||||||
@@ -1218,15 +1231,16 @@ pub async fn chat_turn_handler(
|
|||||||
let mut span = tracer.start_with_context("http.insights.chat", &parent_context);
|
let mut span = tracer.start_with_context("http.insights.chat", &parent_context);
|
||||||
span.set_attribute(KeyValue::new("file_path", request.file_path.clone()));
|
span.set_attribute(KeyValue::new("file_path", request.file_path.clone()));
|
||||||
|
|
||||||
let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
|
let library =
|
||||||
Ok(Some(lib)) => lib,
|
match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) {
|
||||||
Ok(None) => app_state.primary_library(),
|
Ok(Some(lib)) => lib,
|
||||||
Err(e) => {
|
Ok(None) => app_state.primary_library(),
|
||||||
return HttpResponse::BadRequest().json(serde_json::json!({
|
Err(e) => {
|
||||||
"error": format!("invalid library: {}", e)
|
return HttpResponse::BadRequest().json(serde_json::json!({
|
||||||
}));
|
"error": format!("invalid library: {}", e)
|
||||||
}
|
}));
|
||||||
};
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Service-token claims (sub: "service:apollo") fall through to
|
// Service-token claims (sub: "service:apollo") fall through to
|
||||||
// user_id=1 — the operator convention. Mobile/web clients have a
|
// user_id=1 — the operator convention. Mobile/web clients have a
|
||||||
@@ -1245,6 +1259,7 @@ pub async fn chat_turn_handler(
|
|||||||
top_p: request.top_p,
|
top_p: request.top_p,
|
||||||
top_k: request.top_k,
|
top_k: request.top_k,
|
||||||
min_p: request.min_p,
|
min_p: request.min_p,
|
||||||
|
enable_thinking: request.enable_thinking,
|
||||||
max_iterations: request.max_iterations,
|
max_iterations: request.max_iterations,
|
||||||
system_prompt: request.system_prompt.clone(),
|
system_prompt: request.system_prompt.clone(),
|
||||||
persona_id: request.persona_id.clone(),
|
persona_id: request.persona_id.clone(),
|
||||||
@@ -1344,15 +1359,16 @@ pub async fn chat_rewind_handler(
|
|||||||
request: web::Json<ChatRewindHttpRequest>,
|
request: web::Json<ChatRewindHttpRequest>,
|
||||||
app_state: web::Data<AppState>,
|
app_state: web::Data<AppState>,
|
||||||
) -> impl Responder {
|
) -> impl Responder {
|
||||||
let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
|
let library =
|
||||||
Ok(Some(lib)) => lib,
|
match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) {
|
||||||
Ok(None) => app_state.primary_library(),
|
Ok(Some(lib)) => lib,
|
||||||
Err(e) => {
|
Ok(None) => app_state.primary_library(),
|
||||||
return HttpResponse::BadRequest().json(serde_json::json!({
|
Err(e) => {
|
||||||
"error": format!("invalid library: {}", e)
|
return HttpResponse::BadRequest().json(serde_json::json!({
|
||||||
}));
|
"error": format!("invalid library: {}", e)
|
||||||
}
|
}));
|
||||||
};
|
}
|
||||||
|
};
|
||||||
|
|
||||||
match app_state
|
match app_state
|
||||||
.insight_chat
|
.insight_chat
|
||||||
@@ -1393,7 +1409,7 @@ pub async fn chat_history_handler(
|
|||||||
// cross-library lookup when the scoped one misses, so a photo
|
// cross-library lookup when the scoped one misses, so a photo
|
||||||
// with no insight in this library but one in another still
|
// with no insight in this library but one in another still
|
||||||
// surfaces (the "show this photo's primary insight" merge case).
|
// surfaces (the "show this photo's primary insight" merge case).
|
||||||
let library = libraries::resolve_library_param(&app_state, query.library.as_deref())
|
let library = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
|
||||||
.ok()
|
.ok()
|
||||||
.flatten()
|
.flatten()
|
||||||
.unwrap_or_else(|| app_state.primary_library());
|
.unwrap_or_else(|| app_state.primary_library());
|
||||||
@@ -1444,15 +1460,16 @@ pub async fn chat_stream_handler(
|
|||||||
request: web::Json<ChatTurnHttpRequest>,
|
request: web::Json<ChatTurnHttpRequest>,
|
||||||
app_state: web::Data<AppState>,
|
app_state: web::Data<AppState>,
|
||||||
) -> HttpResponse {
|
) -> HttpResponse {
|
||||||
let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
|
let library =
|
||||||
Ok(Some(lib)) => lib,
|
match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) {
|
||||||
Ok(None) => app_state.primary_library(),
|
Ok(Some(lib)) => lib,
|
||||||
Err(e) => {
|
Ok(None) => app_state.primary_library(),
|
||||||
return HttpResponse::BadRequest().json(serde_json::json!({
|
Err(e) => {
|
||||||
"error": format!("invalid library: {}", e)
|
return HttpResponse::BadRequest().json(serde_json::json!({
|
||||||
}));
|
"error": format!("invalid library: {}", e)
|
||||||
}
|
}));
|
||||||
};
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Service-token sub falls through to user_id=1 (see chat_turn_handler).
|
// Service-token sub falls through to user_id=1 (see chat_turn_handler).
|
||||||
let user_id = claims.sub.parse::<i32>().unwrap_or(1);
|
let user_id = claims.sub.parse::<i32>().unwrap_or(1);
|
||||||
@@ -1469,6 +1486,7 @@ pub async fn chat_stream_handler(
|
|||||||
top_p: request.top_p,
|
top_p: request.top_p,
|
||||||
top_k: request.top_k,
|
top_k: request.top_k,
|
||||||
min_p: request.min_p,
|
min_p: request.min_p,
|
||||||
|
enable_thinking: request.enable_thinking,
|
||||||
max_iterations: request.max_iterations,
|
max_iterations: request.max_iterations,
|
||||||
system_prompt: request.system_prompt.clone(),
|
system_prompt: request.system_prompt.clone(),
|
||||||
persona_id: request.persona_id.clone(),
|
persona_id: request.persona_id.clone(),
|
||||||
@@ -1589,15 +1607,16 @@ pub async fn turn_async_handler(
|
|||||||
let mut span = tracer.start_with_context("http.insights.chat_turn_async", &parent_context);
|
let mut span = tracer.start_with_context("http.insights.chat_turn_async", &parent_context);
|
||||||
span.set_attribute(KeyValue::new("file_path", request.file_path.clone()));
|
span.set_attribute(KeyValue::new("file_path", request.file_path.clone()));
|
||||||
|
|
||||||
let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
|
let library =
|
||||||
Ok(Some(lib)) => lib,
|
match libraries::resolve_library_param_state(&app_state, request.library.as_deref()) {
|
||||||
Ok(None) => app_state.primary_library(),
|
Ok(Some(lib)) => lib,
|
||||||
Err(e) => {
|
Ok(None) => app_state.primary_library(),
|
||||||
return HttpResponse::BadRequest().json(serde_json::json!({
|
Err(e) => {
|
||||||
"error": format!("invalid library: {}", e)
|
return HttpResponse::BadRequest().json(serde_json::json!({
|
||||||
}));
|
"error": format!("invalid library: {}", e)
|
||||||
}
|
}));
|
||||||
};
|
}
|
||||||
|
};
|
||||||
|
|
||||||
let user_id = claims.sub.parse::<i32>().unwrap_or(1);
|
let user_id = claims.sub.parse::<i32>().unwrap_or(1);
|
||||||
|
|
||||||
@@ -1613,6 +1632,7 @@ pub async fn turn_async_handler(
|
|||||||
top_p: request.top_p,
|
top_p: request.top_p,
|
||||||
top_k: request.top_k,
|
top_k: request.top_k,
|
||||||
min_p: request.min_p,
|
min_p: request.min_p,
|
||||||
|
enable_thinking: request.enable_thinking,
|
||||||
max_iterations: request.max_iterations,
|
max_iterations: request.max_iterations,
|
||||||
system_prompt: request.system_prompt.clone(),
|
system_prompt: request.system_prompt.clone(),
|
||||||
persona_id: request.persona_id.clone(),
|
persona_id: request.persona_id.clone(),
|
||||||
|
|||||||
@@ -70,6 +70,10 @@ pub struct ChatTurnRequest {
|
|||||||
pub top_p: Option<f32>,
|
pub top_p: Option<f32>,
|
||||||
pub top_k: Option<i32>,
|
pub top_k: Option<i32>,
|
||||||
pub min_p: Option<f32>,
|
pub min_p: Option<f32>,
|
||||||
|
/// Reasoning toggle for thinking-capable models. Forwarded to the
|
||||||
|
/// llama.cpp backend as `chat_template_kwargs.enable_thinking`; ignored
|
||||||
|
/// by other backends. None defers to the model/template default.
|
||||||
|
pub enable_thinking: Option<bool>,
|
||||||
pub max_iterations: Option<usize>,
|
pub max_iterations: Option<usize>,
|
||||||
/// Per-turn system-prompt override. In append mode (default), applied
|
/// Per-turn system-prompt override. In append mode (default), applied
|
||||||
/// ephemerally — original system message restored before persistence.
|
/// ephemerally — original system message restored before persistence.
|
||||||
@@ -344,6 +348,7 @@ impl InsightChatService {
|
|||||||
top_p: req.top_p,
|
top_p: req.top_p,
|
||||||
top_k: req.top_k,
|
top_k: req.top_k,
|
||||||
min_p: req.min_p,
|
min_p: req.min_p,
|
||||||
|
enable_thinking: req.enable_thinking,
|
||||||
};
|
};
|
||||||
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
||||||
let model_used = backend.model().to_string();
|
let model_used = backend.model().to_string();
|
||||||
@@ -847,6 +852,7 @@ impl InsightChatService {
|
|||||||
top_p: req.top_p,
|
top_p: req.top_p,
|
||||||
top_k: req.top_k,
|
top_k: req.top_k,
|
||||||
min_p: req.min_p,
|
min_p: req.min_p,
|
||||||
|
enable_thinking: req.enable_thinking,
|
||||||
};
|
};
|
||||||
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
||||||
let model_used = backend.model().to_string();
|
let model_used = backend.model().to_string();
|
||||||
@@ -1017,6 +1023,7 @@ impl InsightChatService {
|
|||||||
top_p: req.top_p,
|
top_p: req.top_p,
|
||||||
top_k: req.top_k,
|
top_k: req.top_k,
|
||||||
min_p: req.min_p,
|
min_p: req.min_p,
|
||||||
|
enable_thinking: req.enable_thinking,
|
||||||
};
|
};
|
||||||
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
||||||
let model_used = backend.model().to_string();
|
let model_used = backend.model().to_string();
|
||||||
@@ -1425,6 +1432,7 @@ impl InsightChatService {
|
|||||||
top_p: req.top_p,
|
top_p: req.top_p,
|
||||||
top_k: req.top_k,
|
top_k: req.top_k,
|
||||||
min_p: req.min_p,
|
min_p: req.min_p,
|
||||||
|
enable_thinking: req.enable_thinking,
|
||||||
};
|
};
|
||||||
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
||||||
let model_used = backend.model().to_string();
|
let model_used = backend.model().to_string();
|
||||||
@@ -1607,6 +1615,7 @@ impl InsightChatService {
|
|||||||
top_p: req.top_p,
|
top_p: req.top_p,
|
||||||
top_k: req.top_k,
|
top_k: req.top_k,
|
||||||
min_p: req.min_p,
|
min_p: req.min_p,
|
||||||
|
enable_thinking: req.enable_thinking,
|
||||||
};
|
};
|
||||||
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
||||||
let model_used = backend.model().to_string();
|
let model_used = backend.model().to_string();
|
||||||
|
|||||||
@@ -217,6 +217,13 @@ impl InsightGenerator {
|
|||||||
&self.insight_dao
|
&self.insight_dao
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Accessor for the EXIF DAO (used by the reel scheduler to resolve
|
||||||
|
/// GPS enrichment without creating a separate DB connection).
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub fn exif_dao(&self) -> &Arc<Mutex<Box<dyn ExifDao>>> {
|
||||||
|
&self.exif_dao
|
||||||
|
}
|
||||||
|
|
||||||
/// Whether the optional Apollo Places integration is wired up. Drives
|
/// Whether the optional Apollo Places integration is wired up. Drives
|
||||||
/// tool-definition gating (no point offering `get_personal_place_at`
|
/// tool-definition gating (no point offering `get_personal_place_at`
|
||||||
/// when Apollo is unreachable) — exposed publicly so `insight_chat`
|
/// when Apollo is unreachable) — exposed publicly so `insight_chat`
|
||||||
@@ -3926,6 +3933,7 @@ Return ONLY the summary, nothing else."#,
|
|||||||
if let Some(ctx) = overrides.num_ctx {
|
if let Some(ctx) = overrides.num_ctx {
|
||||||
c.set_num_ctx(Some(ctx));
|
c.set_num_ctx(Some(ctx));
|
||||||
}
|
}
|
||||||
|
c.set_enable_thinking(overrides.enable_thinking);
|
||||||
Box::new(c)
|
Box::new(c)
|
||||||
} else {
|
} else {
|
||||||
// Pure Ollama local.
|
// Pure Ollama local.
|
||||||
@@ -4057,6 +4065,7 @@ Return ONLY the summary, nothing else."#,
|
|||||||
top_p: Option<f32>,
|
top_p: Option<f32>,
|
||||||
top_k: Option<i32>,
|
top_k: Option<i32>,
|
||||||
min_p: Option<f32>,
|
min_p: Option<f32>,
|
||||||
|
enable_thinking: Option<bool>,
|
||||||
max_iterations: usize,
|
max_iterations: usize,
|
||||||
backend: Option<String>,
|
backend: Option<String>,
|
||||||
fewshot_examples: Vec<Vec<ChatMessage>>,
|
fewshot_examples: Vec<Vec<ChatMessage>>,
|
||||||
@@ -4084,6 +4093,7 @@ Return ONLY the summary, nothing else."#,
|
|||||||
top_p,
|
top_p,
|
||||||
top_k,
|
top_k,
|
||||||
min_p,
|
min_p,
|
||||||
|
enable_thinking,
|
||||||
};
|
};
|
||||||
let backend = self.resolve_backend(kind, &overrides).await?;
|
let backend = self.resolve_backend(kind, &overrides).await?;
|
||||||
span.set_attribute(KeyValue::new("model", backend.model().to_string()));
|
span.set_attribute(KeyValue::new("model", backend.model().to_string()));
|
||||||
@@ -4497,6 +4507,110 @@ Return ONLY the summary, nothing else."#,
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A read-only agentic tool loop: chat with tools until the model stops
|
||||||
|
/// calling them, then return the final content.
|
||||||
|
///
|
||||||
|
/// This is the loop body extracted from
|
||||||
|
/// `generate_agentic_insight_for_photo` (lines 4316-4377) so it can be
|
||||||
|
/// reused by the reel-scripter without the photo-specific context
|
||||||
|
/// (image_base64, file_path, persona_id). The photo insight loop still
|
||||||
|
/// has its own copy because it threads image/file context through
|
||||||
|
/// `execute_tool`.
|
||||||
|
///
|
||||||
|
/// Calls `execute_tool` with empty file/image context; enabled tools
|
||||||
|
/// never read those fields.
|
||||||
|
///
|
||||||
|
/// Only used by the `reels` module (compiled in `main.rs`, not `lib.rs`),
|
||||||
|
/// so the `#[allow(dead_code)]` suppresses the lib-target warning.
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub(crate) async fn run_readonly_tool_loop(
|
||||||
|
&self,
|
||||||
|
backend: &ResolvedBackend,
|
||||||
|
mut messages: Vec<ChatMessage>,
|
||||||
|
tools: Vec<Tool>,
|
||||||
|
max_iter: usize,
|
||||||
|
) -> Result<String> {
|
||||||
|
let mut final_content = String::new();
|
||||||
|
|
||||||
|
for iteration in 0..max_iter {
|
||||||
|
log::info!("Agentic iteration {}/{}", iteration + 1, max_iter);
|
||||||
|
|
||||||
|
let (response, _prompt_tokens, _eval_tokens) = backend
|
||||||
|
.chat()
|
||||||
|
.chat_with_tools(messages.clone(), tools.clone())
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// Sanitize tool call arguments before pushing back into history.
|
||||||
|
// Some models occasionally return non-object arguments (bool,
|
||||||
|
// string, null) which Ollama rejects when they are re-sent in
|
||||||
|
// a subsequent request.
|
||||||
|
let mut response = response;
|
||||||
|
if let Some(ref mut tool_calls) = response.tool_calls {
|
||||||
|
for tc in tool_calls.iter_mut() {
|
||||||
|
if !tc.function.arguments.is_object() {
|
||||||
|
log::warn!(
|
||||||
|
"Tool '{}' returned non-object arguments ({:?}), normalising to {{}}",
|
||||||
|
tc.function.name,
|
||||||
|
tc.function.arguments
|
||||||
|
);
|
||||||
|
tc.function.arguments = serde_json::Value::Object(Default::default());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
messages.push(response.clone());
|
||||||
|
|
||||||
|
if let Some(ref tool_calls) = response.tool_calls
|
||||||
|
&& !tool_calls.is_empty()
|
||||||
|
{
|
||||||
|
for tool_call in tool_calls {
|
||||||
|
log::info!(
|
||||||
|
"Agentic tool call [{}]: {} {}",
|
||||||
|
iteration,
|
||||||
|
tool_call.function.name,
|
||||||
|
tool_call.function.arguments
|
||||||
|
);
|
||||||
|
let result = self
|
||||||
|
.execute_tool(
|
||||||
|
&tool_call.function.name,
|
||||||
|
&tool_call.function.arguments,
|
||||||
|
backend,
|
||||||
|
&None,
|
||||||
|
"",
|
||||||
|
0,
|
||||||
|
"",
|
||||||
|
&opentelemetry::Context::new(),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
messages.push(ChatMessage::tool_result(result));
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// No tool calls — this is the final answer
|
||||||
|
final_content = response.content;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If loop exhausted without final answer, ask for one
|
||||||
|
if final_content.is_empty() {
|
||||||
|
log::info!(
|
||||||
|
"Agentic loop exhausted after {} iterations, requesting final answer",
|
||||||
|
max_iter
|
||||||
|
);
|
||||||
|
messages.push(ChatMessage::user(
|
||||||
|
"Based on the context gathered, please write the final answer. Return ONLY the JSON object, no prose or code fences.",
|
||||||
|
));
|
||||||
|
let (final_response, _, _) = backend
|
||||||
|
.chat()
|
||||||
|
.chat_with_tools(messages.clone(), vec![])
|
||||||
|
.await?;
|
||||||
|
final_content = final_response.content;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(final_content)
|
||||||
|
}
|
||||||
|
|
||||||
/// Reverse geocode GPS coordinates to human-readable place names
|
/// Reverse geocode GPS coordinates to human-readable place names
|
||||||
async fn reverse_geocode(&self, lat: f64, lon: f64) -> Option<String> {
|
async fn reverse_geocode(&self, lat: f64, lon: f64) -> Option<String> {
|
||||||
let url = format!(
|
let url = format!(
|
||||||
|
|||||||
@@ -64,6 +64,12 @@ pub struct LlamaCppClient {
|
|||||||
top_p: Option<f32>,
|
top_p: Option<f32>,
|
||||||
top_k: Option<i32>,
|
top_k: Option<i32>,
|
||||||
min_p: Option<f32>,
|
min_p: Option<f32>,
|
||||||
|
/// When `Some`, forwarded to llama-server as
|
||||||
|
/// `chat_template_kwargs: {"enable_thinking": <bool>}`. The Jinja chat
|
||||||
|
/// template (e.g. Qwen3) reads this to gate its reasoning block. `None`
|
||||||
|
/// omits the key entirely, leaving the template's own default. Templates
|
||||||
|
/// that don't reference the key ignore it, so sending it is harmless.
|
||||||
|
enable_thinking: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LlamaCppClient {
|
impl LlamaCppClient {
|
||||||
@@ -89,6 +95,7 @@ impl LlamaCppClient {
|
|||||||
top_p: None,
|
top_p: None,
|
||||||
top_k: None,
|
top_k: None,
|
||||||
min_p: None,
|
min_p: None,
|
||||||
|
enable_thinking: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -104,6 +111,12 @@ impl LlamaCppClient {
|
|||||||
self.num_ctx = num_ctx;
|
self.num_ctx = num_ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the reasoning toggle forwarded as `chat_template_kwargs.enable_thinking`.
|
||||||
|
/// `None` leaves the chat template's own default in place.
|
||||||
|
pub fn set_enable_thinking(&mut self, enable_thinking: Option<bool>) {
|
||||||
|
self.enable_thinking = enable_thinking;
|
||||||
|
}
|
||||||
|
|
||||||
pub fn set_sampling_params(
|
pub fn set_sampling_params(
|
||||||
&mut self,
|
&mut self,
|
||||||
temperature: Option<f32>,
|
temperature: Option<f32>,
|
||||||
@@ -458,6 +471,12 @@ impl LlamaCppClient {
|
|||||||
// via -c, so we silently drop the override here. The config.yaml
|
// via -c, so we silently drop the override here. The config.yaml
|
||||||
// entry is the source of truth for context size.
|
// entry is the source of truth for context size.
|
||||||
let _ = self.num_ctx;
|
let _ = self.num_ctx;
|
||||||
|
// Reasoning toggle for thinking-capable templates (Qwen3 et al.).
|
||||||
|
// llama-server forwards chat_template_kwargs into the Jinja render
|
||||||
|
// (requires --jinja); templates that ignore the key are unaffected.
|
||||||
|
if let Some(think) = self.enable_thinking {
|
||||||
|
v.push(("chat_template_kwargs", json!({ "enable_thinking": think })));
|
||||||
|
}
|
||||||
v
|
v
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ pub mod insight_generator;
|
|||||||
pub mod llamacpp;
|
pub mod llamacpp;
|
||||||
pub mod llm_client;
|
pub mod llm_client;
|
||||||
pub mod local_llm;
|
pub mod local_llm;
|
||||||
|
pub mod nl_query;
|
||||||
pub mod ollama;
|
pub mod ollama;
|
||||||
pub mod openrouter;
|
pub mod openrouter;
|
||||||
pub mod pronunciation;
|
pub mod pronunciation;
|
||||||
|
|||||||
@@ -0,0 +1,408 @@
|
|||||||
|
//! Natural-language → structured-query translation for unified photo search.
|
||||||
|
//!
|
||||||
|
//! The unified search endpoint (`/photos/search/unified`, Phase 2) needs to
|
||||||
|
//! turn a free-text query like *"sunset photos in Italy from last summer"*
|
||||||
|
//! into the structured filter the existing `/photos` engine understands plus
|
||||||
|
//! a semantic term for CLIP ranking. That translation is a single grounded
|
||||||
|
//! LLM call, isolated here so it can be unit-tested without a network or the
|
||||||
|
//! full `InsightGenerator`.
|
||||||
|
//!
|
||||||
|
//! Two-stage design:
|
||||||
|
//! 1. The LLM emits a [`RawNlQuery`] — references are by *name* (tags) and
|
||||||
|
//! dates as ISO strings, never numeric ids it could hallucinate.
|
||||||
|
//! 2. [`resolve_raw_query`] maps names against the real tag vocabulary and
|
||||||
|
//! converts ISO dates to unix seconds, producing a [`StructuredQuery`].
|
||||||
|
//! A tag the model invents that isn't in the vocab is surfaced in
|
||||||
|
//! `unmatched_tags` (the caller folds it back into the semantic term)
|
||||||
|
//! rather than silently dropped — this is the anti-noise guard.
|
||||||
|
//!
|
||||||
|
//! Geocoding of `place` and person filtering are intentionally *not* handled
|
||||||
|
//! here: `place` stays as text for the caller to forward-geocode (async, see
|
||||||
|
//! `geo::forward_geocode`), and person filtering is deferred until a
|
||||||
|
//! person→photos resolver exists.
|
||||||
|
|
||||||
|
use crate::ai::llm_client::{ChatMessage, LlmClient, Tool, strip_think_blocks};
|
||||||
|
use anyhow::{Result, anyhow};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
/// Raw query object as emitted by the LLM. Tag references are by name
|
||||||
|
/// (resolved against the real vocab in Rust); dates are ISO `YYYY-MM-DD`.
|
||||||
|
/// Every field is optional so a partial / minimal model response still
|
||||||
|
/// deserializes.
|
||||||
|
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
|
||||||
|
pub struct RawNlQuery {
|
||||||
|
/// Visual/scene description handed to CLIP for ranking. The descriptive
|
||||||
|
/// remainder after structured filters are peeled off.
|
||||||
|
#[serde(default)]
|
||||||
|
pub semantic: Option<String>,
|
||||||
|
/// Tag names the photos must have. Matched case-insensitively against
|
||||||
|
/// the supplied vocabulary; non-matches land in `unmatched_tags`.
|
||||||
|
#[serde(default)]
|
||||||
|
pub tags: Vec<String>,
|
||||||
|
/// Tag names the photos must NOT have.
|
||||||
|
#[serde(default)]
|
||||||
|
pub exclude_tags: Vec<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub camera_make: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub camera_model: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub lens_model: Option<String>,
|
||||||
|
/// Free-text place/location name to forward-geocode (e.g. "Italy").
|
||||||
|
#[serde(default)]
|
||||||
|
pub place: Option<String>,
|
||||||
|
/// Inclusive start date, ISO `YYYY-MM-DD`.
|
||||||
|
#[serde(default)]
|
||||||
|
pub date_from: Option<String>,
|
||||||
|
/// Inclusive end date, ISO `YYYY-MM-DD`.
|
||||||
|
#[serde(default)]
|
||||||
|
pub date_to: Option<String>,
|
||||||
|
/// "photo" | "video" — normalized in [`resolve_raw_query`].
|
||||||
|
#[serde(default)]
|
||||||
|
pub media_type: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolved structured query: tag names mapped to ids against the real
|
||||||
|
/// vocab, ISO dates converted to unix seconds. `place` stays as text for the
|
||||||
|
/// caller to forward-geocode into a gps circle. Serializable so the endpoint
|
||||||
|
/// can echo it back to the client as "this is how I read your query"
|
||||||
|
/// (editable filter chips).
|
||||||
|
#[derive(Debug, Clone, Default, PartialEq, Serialize)]
|
||||||
|
pub struct StructuredQuery {
|
||||||
|
pub semantic: Option<String>,
|
||||||
|
pub tag_ids: Vec<i32>,
|
||||||
|
pub exclude_tag_ids: Vec<i32>,
|
||||||
|
/// Tag names the model produced that don't exist in the vocabulary.
|
||||||
|
/// The caller folds these back into the semantic term so the concept
|
||||||
|
/// isn't lost — and surfacing them keeps a hallucinated tag from
|
||||||
|
/// silently filtering the whole library to nothing.
|
||||||
|
pub unmatched_tags: Vec<String>,
|
||||||
|
pub camera_make: Option<String>,
|
||||||
|
pub camera_model: Option<String>,
|
||||||
|
pub lens_model: Option<String>,
|
||||||
|
/// Raw place name awaiting forward-geocoding by the caller.
|
||||||
|
pub place: Option<String>,
|
||||||
|
pub date_from: Option<i64>,
|
||||||
|
pub date_to: Option<i64>,
|
||||||
|
/// Normalized to "photo" | "video"; `None` means no media-type filter.
|
||||||
|
pub media_type: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert an ISO `YYYY-MM-DD` date to a unix timestamp (seconds). With
|
||||||
|
/// `end_of_day`, returns 23:59:59 of that day so a `date_to` filter is
|
||||||
|
/// inclusive of the whole day; otherwise 00:00:00. Returns `None` for any
|
||||||
|
/// unparseable input (the filter is simply omitted rather than erroring).
|
||||||
|
pub fn iso_to_unix(date: &str, end_of_day: bool) -> Option<i64> {
|
||||||
|
let d = chrono::NaiveDate::parse_from_str(date.trim(), "%Y-%m-%d").ok()?;
|
||||||
|
let time = if end_of_day {
|
||||||
|
chrono::NaiveTime::from_hms_opt(23, 59, 59)?
|
||||||
|
} else {
|
||||||
|
chrono::NaiveTime::from_hms_opt(0, 0, 0)?
|
||||||
|
};
|
||||||
|
Some(d.and_time(time).and_utc().timestamp())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Normalize a free-form media-type string to the engine's vocabulary.
|
||||||
|
/// Anything that isn't clearly photo or video (including "all") yields
|
||||||
|
/// `None` — no filter.
|
||||||
|
fn normalize_media_type(raw: &str) -> Option<String> {
|
||||||
|
match raw.trim().to_lowercase().as_str() {
|
||||||
|
"photo" | "photos" | "image" | "images" | "picture" | "pictures" => {
|
||||||
|
Some("photo".to_string())
|
||||||
|
}
|
||||||
|
"video" | "videos" | "movie" | "movies" | "clip" | "clips" => Some("video".to_string()),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve a raw LLM query against the real tag vocabulary, producing the
|
||||||
|
/// structured filter. Pure — no network, no LLM — so it carries the
|
||||||
|
/// correctness-critical mapping logic under unit test.
|
||||||
|
///
|
||||||
|
/// `tag_vocab` is `(tag_id, tag_name)` pairs (the shape `TagDao::get_all_tags`
|
||||||
|
/// yields once the count is dropped). Matching is case-insensitive and exact
|
||||||
|
/// on the trimmed name.
|
||||||
|
pub fn resolve_raw_query(raw: RawNlQuery, tag_vocab: &[(i32, String)]) -> StructuredQuery {
|
||||||
|
// Case-insensitive name → id lookup. Built once per call.
|
||||||
|
let lookup: std::collections::HashMap<String, i32> = tag_vocab
|
||||||
|
.iter()
|
||||||
|
.map(|(id, name)| (name.trim().to_lowercase(), *id))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let resolve_names = |names: &[String], ids: &mut Vec<i32>, unmatched: &mut Vec<String>| {
|
||||||
|
for name in names {
|
||||||
|
let key = name.trim().to_lowercase();
|
||||||
|
if key.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
match lookup.get(&key) {
|
||||||
|
Some(id) if !ids.contains(id) => ids.push(*id),
|
||||||
|
Some(_) => {} // duplicate, already collected
|
||||||
|
None => {
|
||||||
|
if !unmatched.iter().any(|u| u.eq_ignore_ascii_case(name)) {
|
||||||
|
unmatched.push(name.trim().to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut tag_ids = Vec::new();
|
||||||
|
let mut unmatched_tags = Vec::new();
|
||||||
|
resolve_names(&raw.tags, &mut tag_ids, &mut unmatched_tags);
|
||||||
|
|
||||||
|
// Excluded tags that don't match a real tag are simply ignored — you
|
||||||
|
// can't exclude a tag that doesn't exist, and folding them into
|
||||||
|
// `semantic` would make no sense.
|
||||||
|
let mut exclude_tag_ids = Vec::new();
|
||||||
|
let mut exclude_unmatched = Vec::new();
|
||||||
|
resolve_names(
|
||||||
|
&raw.exclude_tags,
|
||||||
|
&mut exclude_tag_ids,
|
||||||
|
&mut exclude_unmatched,
|
||||||
|
);
|
||||||
|
|
||||||
|
let clean = |s: Option<String>| s.map(|v| v.trim().to_string()).filter(|v| !v.is_empty());
|
||||||
|
|
||||||
|
StructuredQuery {
|
||||||
|
semantic: clean(raw.semantic),
|
||||||
|
tag_ids,
|
||||||
|
exclude_tag_ids,
|
||||||
|
unmatched_tags,
|
||||||
|
camera_make: clean(raw.camera_make),
|
||||||
|
camera_model: clean(raw.camera_model),
|
||||||
|
lens_model: clean(raw.lens_model),
|
||||||
|
place: clean(raw.place),
|
||||||
|
date_from: raw.date_from.as_deref().and_then(|d| iso_to_unix(d, false)),
|
||||||
|
date_to: raw.date_to.as_deref().and_then(|d| iso_to_unix(d, true)),
|
||||||
|
media_type: raw.media_type.as_deref().and_then(normalize_media_type),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build the grounded system prompt. The model is told the current date (so
|
||||||
|
/// "last summer" resolves) and the exact tag vocabulary (so it uses real
|
||||||
|
/// tags or routes the concept to `semantic` instead of inventing one).
|
||||||
|
fn build_system_prompt(tag_vocab: &[(i32, String)], today: chrono::NaiveDate) -> String {
|
||||||
|
// Cap the vocab dump so a huge library doesn't blow the context window;
|
||||||
|
// the most-used tags are the ones a query is likely to reference.
|
||||||
|
const MAX_TAGS: usize = 400;
|
||||||
|
let mut names: Vec<&str> = tag_vocab.iter().map(|(_, n)| n.as_str()).collect();
|
||||||
|
names.sort_unstable();
|
||||||
|
names.dedup();
|
||||||
|
let shown = names.len().min(MAX_TAGS);
|
||||||
|
let vocab = names[..shown].join(", ");
|
||||||
|
let truncation = if names.len() > MAX_TAGS {
|
||||||
|
format!(" (showing {MAX_TAGS} of {} tags)", names.len())
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
};
|
||||||
|
|
||||||
|
format!(
|
||||||
|
"You translate a user's natural-language photo-search request into a JSON \
|
||||||
|
filter. Today's date is {today}. Respond with ONLY a JSON object, no prose, no \
|
||||||
|
code fences.\n\n\
|
||||||
|
Schema (all fields optional):\n\
|
||||||
|
{{\n \
|
||||||
|
\"semantic\": string|null, // visual scene/subject for image similarity search\n \
|
||||||
|
\"tags\": string[], // ONLY names from the tag list below\n \
|
||||||
|
\"exclude_tags\": string[], // ONLY names from the tag list below\n \
|
||||||
|
\"camera_make\": string|null,\n \
|
||||||
|
\"camera_model\": string|null,\n \
|
||||||
|
\"lens_model\": string|null,\n \
|
||||||
|
\"place\": string|null, // a location name to look up (city, country, landmark)\n \
|
||||||
|
\"date_from\": \"YYYY-MM-DD\"|null, // inclusive\n \
|
||||||
|
\"date_to\": \"YYYY-MM-DD\"|null, // inclusive\n \
|
||||||
|
\"media_type\": \"photo\"|\"video\"|null\n\
|
||||||
|
}}\n\n\
|
||||||
|
Rules:\n\
|
||||||
|
- Put descriptive/visual concepts (\"sunset\", \"crowded beach\", \"red car\") in \"semantic\".\n\
|
||||||
|
- Only use \"tags\"/\"exclude_tags\" values that appear EXACTLY in the tag list. If a \
|
||||||
|
concept isn't a listed tag, put it in \"semantic\" instead — never invent a tag.\n\
|
||||||
|
- Resolve relative dates against today's date (\"last summer\", \"2023\", \"last month\").\n\
|
||||||
|
- Put place/location names in \"place\" (not \"semantic\").\n\
|
||||||
|
- Omit (use null / empty array) anything the request doesn't mention.\n\n\
|
||||||
|
Available tags{truncation}: {vocab}"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract the JSON object from a model response that may include a leading
|
||||||
|
/// `<think>` block, code fences, or trailing prose. Strips the think block
|
||||||
|
/// first (so reasoning that mentions braces can't fool the scan), then
|
||||||
|
/// returns the substring from the first `{` to the last `}` inclusive — or
|
||||||
|
/// the trimmed text if no braces are found (which then fails to parse with a
|
||||||
|
/// clear error).
|
||||||
|
fn extract_json(raw: &str) -> String {
|
||||||
|
let s = strip_think_blocks(raw);
|
||||||
|
let start = s.find('{');
|
||||||
|
let end = s.rfind('}');
|
||||||
|
match (start, end) {
|
||||||
|
(Some(a), Some(b)) if b >= a => s[a..=b].to_string(),
|
||||||
|
_ => s.trim().to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a model response string into a [`StructuredQuery`], resolving names
|
||||||
|
/// against the vocab. Separated from the LLM call so it's unit-testable.
|
||||||
|
pub fn parse_response(response: &str, tag_vocab: &[(i32, String)]) -> Result<StructuredQuery> {
|
||||||
|
let json = extract_json(response);
|
||||||
|
let raw: RawNlQuery = serde_json::from_str(&json)
|
||||||
|
.map_err(|e| anyhow!("failed to parse NL query JSON: {e}; raw response: {response:?}"))?;
|
||||||
|
Ok(resolve_raw_query(raw, tag_vocab))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Translate a natural-language query into a [`StructuredQuery`] via one
|
||||||
|
/// grounded LLM call. The `client` is any configured backend (the unified
|
||||||
|
/// endpoint passes the resolved chat backend); `tag_vocab` grounds the tag
|
||||||
|
/// mapping; `today` anchors relative-date resolution.
|
||||||
|
pub async fn translate_nl_query(
|
||||||
|
client: &dyn LlmClient,
|
||||||
|
nl: &str,
|
||||||
|
tag_vocab: &[(i32, String)],
|
||||||
|
today: chrono::NaiveDate,
|
||||||
|
) -> Result<StructuredQuery> {
|
||||||
|
let system = build_system_prompt(tag_vocab, today);
|
||||||
|
let messages = vec![ChatMessage::system(system), ChatMessage::user(nl)];
|
||||||
|
let (msg, _, _) = client.chat_with_tools(messages, Vec::<Tool>::new()).await?;
|
||||||
|
parse_response(&msg.content, tag_vocab)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn vocab() -> Vec<(i32, String)> {
|
||||||
|
vec![
|
||||||
|
(1, "beach".to_string()),
|
||||||
|
(2, "Sunset".to_string()), // mixed case to exercise case-insensitivity
|
||||||
|
(3, "family".to_string()),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn iso_to_unix_start_and_end_of_day() {
|
||||||
|
// 2023-01-01 UTC midnight = 1672531200.
|
||||||
|
assert_eq!(iso_to_unix("2023-01-01", false), Some(1_672_531_200));
|
||||||
|
// End of that day is 86399 seconds later.
|
||||||
|
assert_eq!(
|
||||||
|
iso_to_unix("2023-01-01", true),
|
||||||
|
Some(1_672_531_200 + 86_399)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn iso_to_unix_rejects_garbage() {
|
||||||
|
assert_eq!(iso_to_unix("last summer", false), None);
|
||||||
|
assert_eq!(iso_to_unix("2023-13-99", false), None);
|
||||||
|
assert_eq!(iso_to_unix("", false), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_matches_tags_case_insensitively() {
|
||||||
|
let raw = RawNlQuery {
|
||||||
|
tags: vec!["BEACH".to_string(), "sunset".to_string()],
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let q = resolve_raw_query(raw, &vocab());
|
||||||
|
assert_eq!(q.tag_ids, vec![1, 2]);
|
||||||
|
assert!(q.unmatched_tags.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_surfaces_unmatched_tags_not_silently_dropped() {
|
||||||
|
// A hallucinated / non-vocab tag must be surfaced so the caller can
|
||||||
|
// fold it into semantic — never silently used as a hard filter.
|
||||||
|
let raw = RawNlQuery {
|
||||||
|
tags: vec!["beach".to_string(), "golden hour".to_string()],
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let q = resolve_raw_query(raw, &vocab());
|
||||||
|
assert_eq!(q.tag_ids, vec![1]);
|
||||||
|
assert_eq!(q.unmatched_tags, vec!["golden hour".to_string()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_dedups_repeated_tags() {
|
||||||
|
let raw = RawNlQuery {
|
||||||
|
tags: vec![
|
||||||
|
"beach".to_string(),
|
||||||
|
"Beach".to_string(),
|
||||||
|
"beach".to_string(),
|
||||||
|
],
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let q = resolve_raw_query(raw, &vocab());
|
||||||
|
assert_eq!(q.tag_ids, vec![1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_normalizes_media_type_and_dates() {
|
||||||
|
let raw = RawNlQuery {
|
||||||
|
media_type: Some("Videos".to_string()),
|
||||||
|
date_from: Some("2023-06-01".to_string()),
|
||||||
|
date_to: Some("2023-06-30".to_string()),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let q = resolve_raw_query(raw, &vocab());
|
||||||
|
assert_eq!(q.media_type.as_deref(), Some("video"));
|
||||||
|
assert_eq!(q.date_from, iso_to_unix("2023-06-01", false));
|
||||||
|
assert_eq!(q.date_to, iso_to_unix("2023-06-30", true));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_media_type_all_is_no_filter() {
|
||||||
|
let raw = RawNlQuery {
|
||||||
|
media_type: Some("all".to_string()),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
assert_eq!(resolve_raw_query(raw, &vocab()).media_type, None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_trims_and_empties_to_none() {
|
||||||
|
let raw = RawNlQuery {
|
||||||
|
semantic: Some(" ".to_string()),
|
||||||
|
camera_make: Some(" Fujifilm ".to_string()),
|
||||||
|
place: Some("".to_string()),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let q = resolve_raw_query(raw, &vocab());
|
||||||
|
assert_eq!(q.semantic, None);
|
||||||
|
assert_eq!(q.camera_make.as_deref(), Some("Fujifilm"));
|
||||||
|
assert_eq!(q.place, None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_response_handles_code_fences_and_prose() {
|
||||||
|
let resp = "Here is the filter:\n```json\n{\"semantic\":\"sunset\",\"tags\":[\"beach\"]}\n```\nDone.";
|
||||||
|
let q = parse_response(resp, &vocab()).expect("parse");
|
||||||
|
assert_eq!(q.semantic.as_deref(), Some("sunset"));
|
||||||
|
assert_eq!(q.tag_ids, vec![1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_response_handles_think_block_then_json() {
|
||||||
|
let resp = "<think>user wants beach sunsets</think>{\"tags\":[\"beach\",\"sunset\"]}";
|
||||||
|
let q = parse_response(resp, &vocab()).expect("parse");
|
||||||
|
assert_eq!(q.tag_ids, vec![1, 2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_response_errors_on_non_json() {
|
||||||
|
assert!(parse_response("I cannot help with that.", &vocab()).is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn build_system_prompt_includes_date_and_vocab() {
|
||||||
|
let today = chrono::NaiveDate::from_ymd_opt(2026, 6, 14).unwrap();
|
||||||
|
let prompt = build_system_prompt(&vocab(), today);
|
||||||
|
assert!(
|
||||||
|
prompt.contains("2026-06-14"),
|
||||||
|
"prompt should state today's date"
|
||||||
|
);
|
||||||
|
assert!(prompt.contains("beach"), "prompt should list the vocab");
|
||||||
|
assert!(
|
||||||
|
prompt.contains("never invent a tag"),
|
||||||
|
"prompt should warn against inventing tags"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
+36
-1
@@ -23,6 +23,7 @@ use std::time::{Duration, Instant};
|
|||||||
use tokio::sync::Semaphore;
|
use tokio::sync::Semaphore;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::ai::llamacpp::LlamaCppClient;
|
||||||
use crate::data::Claims;
|
use crate::data::Claims;
|
||||||
use crate::file_types::{is_audio_file, is_video_file};
|
use crate::file_types::{is_audio_file, is_video_file};
|
||||||
use crate::files::is_valid_full_path;
|
use crate::files::is_valid_full_path;
|
||||||
@@ -473,6 +474,40 @@ pub struct TtsJobStatusResponse {
|
|||||||
pub error: Option<String>,
|
pub error: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Synthesize speech honoring the global single-GPU serialization
|
||||||
|
/// (`TTS_PERMIT`) and the GPU write lease, exactly as the speech-job path does.
|
||||||
|
/// Queues on the permit rather than fast-failing, so callers wait their turn
|
||||||
|
/// instead of contending. Text is run through the same markdown/emoji cleanup +
|
||||||
|
/// pronunciation pipeline as the HTTP handlers. Reused by the memory-reel
|
||||||
|
/// pipeline to narrate each segment without racing a user's TTS request on the
|
||||||
|
/// Chatterbox GPU.
|
||||||
|
pub async fn synthesize_serialized(
|
||||||
|
client: &LlamaCppClient,
|
||||||
|
text: &str,
|
||||||
|
voice: Option<&str>,
|
||||||
|
format: &str,
|
||||||
|
exaggeration: Option<f32>,
|
||||||
|
) -> anyhow::Result<Vec<u8>> {
|
||||||
|
let prepared = prepare_for_tts(text);
|
||||||
|
if prepared.is_empty() {
|
||||||
|
anyhow::bail!("nothing to synthesize after cleanup");
|
||||||
|
}
|
||||||
|
// Clamp to Chatterbox's documented range, matching the HTTP handlers
|
||||||
|
// (which clamp before forwarding; this path bypasses them).
|
||||||
|
let exaggeration = exaggeration.map(|x| x.clamp(0.25, 2.0));
|
||||||
|
// Queue rather than fast-fail (mirrors create_speech_job_handler).
|
||||||
|
let _permit = TTS_PERMIT
|
||||||
|
.acquire()
|
||||||
|
.await
|
||||||
|
.map_err(|_| anyhow::anyhow!("TTS permit closed"))?;
|
||||||
|
// Wait for the LLM side to release the GPU before the request timeout
|
||||||
|
// starts (see ai::gpu).
|
||||||
|
let _gpu = crate::ai::gpu::tts_lease().await;
|
||||||
|
client
|
||||||
|
.text_to_speech(&prepared, voice, format, exaggeration, None, None)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
/// POST /tts/speech/jobs — durable variant of /tts/speech for long syntheses.
|
/// POST /tts/speech/jobs — durable variant of /tts/speech for long syntheses.
|
||||||
/// Returns 202 + a job id immediately; the synth queues on the single GPU
|
/// Returns 202 + a job id immediately; the synth queues on the single GPU
|
||||||
/// permit (instead of fast-failing 429) and the client polls the job until
|
/// permit (instead of fast-failing 429) and the client polls the job until
|
||||||
@@ -985,7 +1020,7 @@ pub async fn create_voice_from_library_handler(
|
|||||||
let voice_name =
|
let voice_name =
|
||||||
append_ref_window(&voice_name, ref_start, ref_duration.round().max(1.0) as u32);
|
append_ref_window(&voice_name, ref_start, ref_duration.round().max(1.0) as u32);
|
||||||
|
|
||||||
let library = match libraries::resolve_library_param(&app_state, req.library.as_deref()) {
|
let library = match libraries::resolve_library_param_state(&app_state, req.library.as_deref()) {
|
||||||
Ok(Some(l)) => l,
|
Ok(Some(l)) => l,
|
||||||
Ok(None) => app_state.primary_library(),
|
Ok(None) => app_state.primary_library(),
|
||||||
Err(msg) => {
|
Err(msg) => {
|
||||||
|
|||||||
@@ -336,6 +336,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
args.top_p,
|
args.top_p,
|
||||||
args.top_k,
|
args.top_k,
|
||||||
args.min_p,
|
args.min_p,
|
||||||
|
None, // enable_thinking: leave model/template default
|
||||||
args.max_iterations,
|
args.max_iterations,
|
||||||
None,
|
None,
|
||||||
Vec::new(),
|
Vec::new(),
|
||||||
|
|||||||
+214
-184
@@ -124,65 +124,161 @@ fn dot(a: &[f32], b: &[f32]) -> f32 {
|
|||||||
a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
|
a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn search_photos(
|
/// Failure modes of [`score_photos`]. Carries enough to let each caller pick
|
||||||
state: web::Data<AppState>,
|
/// an appropriate HTTP status (the CLIP service being down is a 502, a
|
||||||
exif_dao: web::Data<Mutex<Box<dyn ExifDao>>>,
|
/// disabled feature is a 503, a rejected query is a 400, a DB failure 500).
|
||||||
query: web::Query<SearchQuery>,
|
pub enum ScoreError {
|
||||||
) -> ActixResult<HttpResponse> {
|
/// CLIP search isn't configured at all (no Apollo endpoint).
|
||||||
let q_text = query.q.trim().to_string();
|
Disabled,
|
||||||
if q_text.is_empty() {
|
/// The query was rejected by the encoder (client error).
|
||||||
return Ok(HttpResponse::BadRequest().json(SearchError {
|
Rejected(String),
|
||||||
error: "query parameter `q` is required".into(),
|
/// The CLIP service is transiently unavailable (upstream error).
|
||||||
}));
|
Unavailable(String),
|
||||||
}
|
/// The encoder returned an embedding we couldn't decode.
|
||||||
|
MalformedEmbedding,
|
||||||
|
/// A database / index load failure.
|
||||||
|
Internal(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Result of scoring the whole library against a query embedding: the
|
||||||
|
/// resolved model version, how many embeddings were considered, and every
|
||||||
|
/// `(score, content_hash)` above threshold, sorted by descending score.
|
||||||
|
/// Pagination and path resolution are the caller's job (see [`resolve_hits`])
|
||||||
|
/// so this core can be reused for both the plain search endpoint and the
|
||||||
|
/// unified endpoint (which filters by hash before paginating).
|
||||||
|
pub struct ScoredPhotos {
|
||||||
|
pub model_version: String,
|
||||||
|
pub considered: usize,
|
||||||
|
/// `(cosine_score, content_hash)` pairs, descending by score.
|
||||||
|
pub hits: Vec<(f32, String)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Encode `q_text` via CLIP and score it against every stored embedding in
|
||||||
|
/// the given library scope. Returns all matches above `threshold`, sorted by
|
||||||
|
/// descending similarity. Pure of HTTP concerns so it's shared by
|
||||||
|
/// `search_photos` and the unified search endpoint.
|
||||||
|
pub async fn score_photos(
|
||||||
|
state: &AppState,
|
||||||
|
exif_dao: &Mutex<Box<dyn ExifDao>>,
|
||||||
|
q_text: &str,
|
||||||
|
library_ids: &[i32],
|
||||||
|
threshold: f32,
|
||||||
|
model_version: Option<&str>,
|
||||||
|
) -> Result<ScoredPhotos, ScoreError> {
|
||||||
if !state.clip_client.is_enabled() {
|
if !state.clip_client.is_enabled() {
|
||||||
return Ok(HttpResponse::ServiceUnavailable().json(SearchError {
|
return Err(ScoreError::Disabled);
|
||||||
error: "CLIP search is disabled (no Apollo CLIP endpoint configured)".into(),
|
|
||||||
}));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let limit = query.limit.clamp(1, 200);
|
// 1. Encode the query text. Fast — Apollo's text encoder is ~50ms on CPU.
|
||||||
let offset = query.offset;
|
let query_resp = match state.clip_client.encode_text(q_text).await {
|
||||||
let threshold = query.threshold.clamp(-1.0, 1.0);
|
|
||||||
|
|
||||||
// 1. Encode the query text. Fast — Apollo's text encoder is ~50ms
|
|
||||||
// on CPU. Bail with a clear error message if Apollo's down so the
|
|
||||||
// user sees "service unavailable" rather than empty results.
|
|
||||||
let query_resp = match state.clip_client.encode_text(&q_text).await {
|
|
||||||
Ok(r) => r,
|
Ok(r) => r,
|
||||||
Err(ClipError::Permanent(e)) => {
|
Err(ClipError::Permanent(e)) => return Err(ScoreError::Rejected(e.to_string())),
|
||||||
return Ok(HttpResponse::BadRequest().json(SearchError {
|
Err(ClipError::Transient(e)) => return Err(ScoreError::Unavailable(e.to_string())),
|
||||||
error: format!("query rejected: {e}"),
|
Err(ClipError::Disabled) => return Err(ScoreError::Disabled),
|
||||||
}));
|
|
||||||
}
|
|
||||||
Err(ClipError::Transient(e)) => {
|
|
||||||
return Ok(HttpResponse::BadGateway().json(SearchError {
|
|
||||||
error: format!("CLIP service unavailable: {e}"),
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
Err(ClipError::Disabled) => {
|
|
||||||
return Ok(HttpResponse::ServiceUnavailable().json(SearchError {
|
|
||||||
error: "CLIP service disabled".into(),
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
// decode_embedding works on raw bytes; the wire format is b64.
|
// decode_embedding works on raw bytes; the wire format is b64.
|
||||||
let query_bytes = base64::engine::general_purpose::STANDARD
|
let query_bytes = base64::engine::general_purpose::STANDARD
|
||||||
.decode(query_resp.embedding.as_bytes())
|
.decode(query_resp.embedding.as_bytes())
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
let query_vec = match decode_embedding(&query_bytes) {
|
let query_vec = decode_embedding(&query_bytes).ok_or(ScoreError::MalformedEmbedding)?;
|
||||||
Some(v) => v,
|
|
||||||
None => {
|
|
||||||
return Ok(HttpResponse::BadGateway().json(SearchError {
|
|
||||||
error: "CLIP service returned a malformed query embedding".into(),
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// 2. Decide which library scope to search. `library_ids` (multi)
|
// 2. Pull the (hash, embedding) matrix under the dao lock, release
|
||||||
// wins over the legacy `library` (single) when both are present;
|
// before scoring. The caller-supplied `model_version` (or the live
|
||||||
// either / both empty falls back to "every enabled library".
|
// engine's) forces a strict join so a mid-flight model swap can't mix
|
||||||
let library_ids: Vec<i32> = if let Some(raw) = query.library_ids.as_deref() {
|
// geometries.
|
||||||
|
let ctx = opentelemetry::Context::current();
|
||||||
|
let rows: Vec<(String, Vec<u8>)> = {
|
||||||
|
let mut dao = exif_dao.lock().expect("exif dao");
|
||||||
|
dao.list_clip_index(
|
||||||
|
&ctx,
|
||||||
|
library_ids,
|
||||||
|
model_version.or(Some(&query_resp.model_version)),
|
||||||
|
)
|
||||||
|
.map_err(|e| {
|
||||||
|
log::warn!("clip_search: list_clip_index failed: {:?}", e);
|
||||||
|
ScoreError::Internal("failed to load search index".into())
|
||||||
|
})?
|
||||||
|
};
|
||||||
|
let considered = rows.len();
|
||||||
|
|
||||||
|
// 3. Score. Keep all matches and sort at the end (~microseconds at 14k).
|
||||||
|
let mut hits: Vec<(f32, String)> = Vec::with_capacity(considered);
|
||||||
|
for (hash, blob) in rows {
|
||||||
|
let Some(emb) = decode_embedding(&blob) else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
if emb.len() != query_vec.len() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let sim = dot(&emb, &query_vec);
|
||||||
|
if sim < threshold {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
hits.push((sim, hash));
|
||||||
|
}
|
||||||
|
hits.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||||
|
|
||||||
|
Ok(ScoredPhotos {
|
||||||
|
model_version: query_resp.model_version,
|
||||||
|
considered,
|
||||||
|
hits,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve a page of `(score, content_hash)` pairs back to [`SearchHit`]s
|
||||||
|
/// (each carrying `library_id` + `rel_path`). Hashes that no longer resolve
|
||||||
|
/// to a row are skipped. Shared by both endpoints.
|
||||||
|
pub fn resolve_hits(
|
||||||
|
exif_dao: &Mutex<Box<dyn ExifDao>>,
|
||||||
|
scored: &[(f32, String)],
|
||||||
|
) -> Vec<SearchHit> {
|
||||||
|
if scored.is_empty() {
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
let ctx = opentelemetry::Context::current();
|
||||||
|
let hashes: Vec<String> = scored.iter().map(|(_, h)| h.clone()).collect();
|
||||||
|
let mut dao = exif_dao.lock().expect("exif dao");
|
||||||
|
let path_map = dao
|
||||||
|
.get_rel_paths_for_hashes(&ctx, &hashes)
|
||||||
|
.unwrap_or_else(|e| {
|
||||||
|
log::warn!("clip_search: get_rel_paths_for_hashes failed: {:?}", e);
|
||||||
|
std::collections::HashMap::new()
|
||||||
|
});
|
||||||
|
|
||||||
|
let mut results = Vec::with_capacity(scored.len());
|
||||||
|
for (score, hash) in scored {
|
||||||
|
let row = match dao.find_by_content_hash(&ctx, hash) {
|
||||||
|
Ok(Some(r)) => r,
|
||||||
|
Ok(None) => continue,
|
||||||
|
Err(e) => {
|
||||||
|
log::warn!("clip_search: find_by_content_hash failed for {hash}: {e:?}");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
// Prefer get_rel_paths_for_hashes's first entry (shares image_exif's
|
||||||
|
// natural order), falling back to the ImageExif row.
|
||||||
|
let rel_path = path_map
|
||||||
|
.get(hash)
|
||||||
|
.and_then(|paths| paths.first().cloned())
|
||||||
|
.unwrap_or(row.file_path);
|
||||||
|
results.push(SearchHit {
|
||||||
|
library_id: row.library_id,
|
||||||
|
rel_path,
|
||||||
|
content_hash: hash.clone(),
|
||||||
|
score: *score,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
results
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse the `library_ids` (multi) / `library` (single) scope params into a
|
||||||
|
/// deduped id list. Empty = "every enabled library". Shared so the unified
|
||||||
|
/// endpoint scopes CLIP identically.
|
||||||
|
pub fn parse_library_scope(
|
||||||
|
library_ids: Option<&str>,
|
||||||
|
library: Option<i32>,
|
||||||
|
) -> Result<Vec<i32>, String> {
|
||||||
|
if let Some(raw) = library_ids {
|
||||||
let mut out: Vec<i32> = Vec::new();
|
let mut out: Vec<i32> = Vec::new();
|
||||||
for piece in raw.split(',') {
|
for piece in raw.split(',') {
|
||||||
let trimmed = piece.trim();
|
let trimmed = piece.trim();
|
||||||
@@ -195,158 +291,92 @@ pub async fn search_photos(
|
|||||||
out.push(id);
|
out.push(id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(_) => {
|
Err(_) => return Err(format!("invalid library_ids entry: {trimmed:?}")),
|
||||||
return Ok(HttpResponse::BadRequest().json(SearchError {
|
|
||||||
error: format!("invalid library_ids entry: {trimmed:?}"),
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out
|
Ok(out)
|
||||||
} else if let Some(id) = query.library {
|
} else if let Some(id) = library {
|
||||||
vec![id]
|
Ok(vec![id])
|
||||||
} else {
|
} else {
|
||||||
Vec::new()
|
Ok(Vec::new())
|
||||||
};
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// 3. Pull the (hash, embedding) matrix. Lock contention here is
|
pub async fn search_photos(
|
||||||
// bounded — one big SELECT under a mutex Arc<Mutex<dyn ExifDao>>
|
state: web::Data<AppState>,
|
||||||
// and then we release before scoring. If this becomes a hotspot
|
exif_dao: web::Data<Mutex<Box<dyn ExifDao>>>,
|
||||||
// we'll cache the decoded matrix in AppState with TTL.
|
query: web::Query<SearchQuery>,
|
||||||
let ctx = opentelemetry::Context::current();
|
) -> ActixResult<HttpResponse> {
|
||||||
let rows: Vec<(String, Vec<u8>)> = {
|
let q_text = query.q.trim().to_string();
|
||||||
let mut dao = exif_dao.lock().expect("exif dao");
|
if q_text.is_empty() {
|
||||||
match dao.list_clip_index(
|
return Ok(HttpResponse::BadRequest().json(SearchError {
|
||||||
&ctx,
|
error: "query parameter `q` is required".into(),
|
||||||
&library_ids,
|
|
||||||
query
|
|
||||||
.model_version
|
|
||||||
.as_deref()
|
|
||||||
.or(Some(&query_resp.model_version)),
|
|
||||||
) {
|
|
||||||
Ok(r) => r,
|
|
||||||
Err(e) => {
|
|
||||||
log::warn!("clip_search: list_clip_index failed: {:?}", e);
|
|
||||||
return Ok(HttpResponse::InternalServerError().json(SearchError {
|
|
||||||
error: "failed to load search index".into(),
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let considered = rows.len();
|
|
||||||
if considered == 0 {
|
|
||||||
return Ok(HttpResponse::Ok().json(SearchResponse {
|
|
||||||
query: q_text,
|
|
||||||
model_version: query_resp.model_version,
|
|
||||||
threshold,
|
|
||||||
considered,
|
|
||||||
total_matching: 0,
|
|
||||||
offset,
|
|
||||||
results: Vec::new(),
|
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
// 4. Score. Cap the loop's transient allocation; we keep all scores
|
let limit = query.limit.clamp(1, 200);
|
||||||
// and sort at the end. With ~14k entries the sort is microseconds.
|
let offset = query.offset;
|
||||||
let mut scored: Vec<(f32, String)> = Vec::with_capacity(considered);
|
let threshold = query.threshold.clamp(-1.0, 1.0);
|
||||||
for (hash, blob) in rows {
|
|
||||||
let Some(emb) = decode_embedding(&blob) else {
|
let library_ids = match parse_library_scope(query.library_ids.as_deref(), query.library) {
|
||||||
continue;
|
Ok(ids) => ids,
|
||||||
};
|
Err(msg) => return Ok(HttpResponse::BadRequest().json(SearchError { error: msg })),
|
||||||
if emb.len() != query_vec.len() {
|
};
|
||||||
continue;
|
|
||||||
}
|
let scored = match score_photos(
|
||||||
let sim = dot(&emb, &query_vec);
|
&state,
|
||||||
if sim < threshold {
|
&exif_dao,
|
||||||
continue;
|
&q_text,
|
||||||
}
|
&library_ids,
|
||||||
scored.push((sim, hash));
|
threshold,
|
||||||
}
|
query.model_version.as_deref(),
|
||||||
scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
)
|
||||||
let total_matching = scored.len();
|
.await
|
||||||
// Pagination — slice the sorted list at `[offset, offset+limit)`.
|
{
|
||||||
// Offsets past the end produce empty pages rather than an error so
|
Ok(s) => s,
|
||||||
// the client can stop fetching naturally on "load more" past the end.
|
Err(e) => return Ok(score_error_response(e)),
|
||||||
let scored: Vec<(f32, String)> = if offset >= total_matching {
|
};
|
||||||
|
|
||||||
|
let total_matching = scored.hits.len();
|
||||||
|
// Pagination — slice the sorted list at `[offset, offset+limit)`. Offsets
|
||||||
|
// past the end produce empty pages so "load more" stops naturally.
|
||||||
|
let page: Vec<(f32, String)> = if offset >= total_matching {
|
||||||
Vec::new()
|
Vec::new()
|
||||||
} else {
|
} else {
|
||||||
let end = (offset + limit).min(total_matching);
|
let end = (offset + limit).min(total_matching);
|
||||||
scored[offset..end].to_vec()
|
scored.hits[offset..end].to_vec()
|
||||||
};
|
};
|
||||||
|
let results = resolve_hits(&exif_dao, &page);
|
||||||
if scored.is_empty() {
|
|
||||||
return Ok(HttpResponse::Ok().json(SearchResponse {
|
|
||||||
query: q_text,
|
|
||||||
model_version: query_resp.model_version,
|
|
||||||
threshold,
|
|
||||||
considered,
|
|
||||||
total_matching,
|
|
||||||
offset,
|
|
||||||
results: Vec::new(),
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
|
|
||||||
// 5. Resolve each surviving hash back to a `(library_id, rel_path)`.
|
|
||||||
// `get_rel_paths_by_hash` returns every rel_path; we pick the first
|
|
||||||
// one for the result. Apollo / the UI can fetch alternatives via
|
|
||||||
// /image/metadata when needed.
|
|
||||||
let hashes: Vec<String> = scored.iter().map(|(_, h)| h.clone()).collect();
|
|
||||||
let path_map = {
|
|
||||||
let mut dao = exif_dao.lock().expect("exif dao");
|
|
||||||
match dao.get_rel_paths_for_hashes(&ctx, &hashes) {
|
|
||||||
Ok(m) => m,
|
|
||||||
Err(e) => {
|
|
||||||
log::warn!("clip_search: get_rel_paths_for_hashes failed: {:?}", e);
|
|
||||||
return Ok(HttpResponse::InternalServerError().json(SearchError {
|
|
||||||
error: "failed to resolve photo paths".into(),
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// We need (library_id, rel_path) — get_rel_paths_for_hashes only
|
|
||||||
// returns rel_paths. Cross-reference via find_by_content_hash to
|
|
||||||
// pick the library too. Single call per surviving hash; cheap at
|
|
||||||
// top-20.
|
|
||||||
let mut results = Vec::with_capacity(scored.len());
|
|
||||||
{
|
|
||||||
let mut dao = exif_dao.lock().expect("exif dao");
|
|
||||||
for (score, hash) in scored {
|
|
||||||
let row = match dao.find_by_content_hash(&ctx, &hash) {
|
|
||||||
Ok(Some(r)) => r,
|
|
||||||
Ok(None) => continue,
|
|
||||||
Err(e) => {
|
|
||||||
log::warn!(
|
|
||||||
"clip_search: find_by_content_hash failed for {}: {:?}",
|
|
||||||
hash,
|
|
||||||
e
|
|
||||||
);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
// Prefer get_rel_paths_for_hashes's first entry if it
|
|
||||||
// exists (it shares semantics with `image_exif`'s natural
|
|
||||||
// order), falling back to the ImageExif row.
|
|
||||||
let rel_path = path_map
|
|
||||||
.get(&hash)
|
|
||||||
.and_then(|paths| paths.first().cloned())
|
|
||||||
.unwrap_or(row.file_path);
|
|
||||||
results.push(SearchHit {
|
|
||||||
library_id: row.library_id,
|
|
||||||
rel_path,
|
|
||||||
content_hash: hash,
|
|
||||||
score,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(HttpResponse::Ok().json(SearchResponse {
|
Ok(HttpResponse::Ok().json(SearchResponse {
|
||||||
query: q_text,
|
query: q_text,
|
||||||
model_version: query_resp.model_version,
|
model_version: scored.model_version,
|
||||||
threshold,
|
threshold,
|
||||||
considered,
|
considered: scored.considered,
|
||||||
total_matching,
|
total_matching,
|
||||||
offset,
|
offset,
|
||||||
results,
|
results,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Map a [`ScoreError`] to the HTTP response `search_photos` historically
|
||||||
|
/// returned for each failure mode. Reused by the unified endpoint.
|
||||||
|
pub fn score_error_response(e: ScoreError) -> HttpResponse {
|
||||||
|
match e {
|
||||||
|
ScoreError::Disabled => HttpResponse::ServiceUnavailable().json(SearchError {
|
||||||
|
error: "CLIP search is disabled (no Apollo CLIP endpoint configured)".into(),
|
||||||
|
}),
|
||||||
|
ScoreError::Rejected(msg) => HttpResponse::BadRequest().json(SearchError {
|
||||||
|
error: format!("query rejected: {msg}"),
|
||||||
|
}),
|
||||||
|
ScoreError::Unavailable(msg) => HttpResponse::BadGateway().json(SearchError {
|
||||||
|
error: format!("CLIP service unavailable: {msg}"),
|
||||||
|
}),
|
||||||
|
ScoreError::MalformedEmbedding => HttpResponse::BadGateway().json(SearchError {
|
||||||
|
error: "CLIP service returned a malformed query embedding".into(),
|
||||||
|
}),
|
||||||
|
ScoreError::Internal(msg) => {
|
||||||
|
HttpResponse::InternalServerError().json(SearchError { error: msg })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -51,10 +51,12 @@ pub mod knowledge_dao;
|
|||||||
pub mod location_dao;
|
pub mod location_dao;
|
||||||
pub mod models;
|
pub mod models;
|
||||||
pub mod persona_dao;
|
pub mod persona_dao;
|
||||||
|
pub mod precomputed_reel_dao;
|
||||||
pub mod preview_dao;
|
pub mod preview_dao;
|
||||||
pub mod reconcile;
|
pub mod reconcile;
|
||||||
pub mod schema;
|
pub mod schema;
|
||||||
pub mod search_dao;
|
pub mod search_dao;
|
||||||
|
pub mod user_ai_prefs_dao;
|
||||||
|
|
||||||
pub use calendar_dao::{CalendarEventDao, SqliteCalendarEventDao};
|
pub use calendar_dao::{CalendarEventDao, SqliteCalendarEventDao};
|
||||||
pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
|
pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
|
||||||
@@ -66,8 +68,10 @@ pub use knowledge_dao::{
|
|||||||
};
|
};
|
||||||
pub use location_dao::{LocationHistoryDao, SqliteLocationHistoryDao};
|
pub use location_dao::{LocationHistoryDao, SqliteLocationHistoryDao};
|
||||||
pub use persona_dao::{ImportPersona, PersonaDao, PersonaPatch, SqlitePersonaDao};
|
pub use persona_dao::{ImportPersona, PersonaDao, PersonaPatch, SqlitePersonaDao};
|
||||||
|
pub use precomputed_reel_dao::{PrecomputedReelDao, SqlitePrecomputedReelDao};
|
||||||
pub use preview_dao::{PreviewDao, SqlitePreviewDao};
|
pub use preview_dao::{PreviewDao, SqlitePreviewDao};
|
||||||
pub use search_dao::{SearchHistoryDao, SqliteSearchHistoryDao};
|
pub use search_dao::{SearchHistoryDao, SqliteSearchHistoryDao};
|
||||||
|
pub use user_ai_prefs_dao::{SqliteUserAiPrefsDao, UserAiPrefsDao};
|
||||||
|
|
||||||
pub trait UserDao {
|
pub trait UserDao {
|
||||||
fn create_user(&mut self, user: &str, password: &str) -> Option<User>;
|
fn create_user(&mut self, user: &str, password: &str) -> Option<User>;
|
||||||
|
|||||||
+55
-1
@@ -1,6 +1,7 @@
|
|||||||
use crate::database::schema::{
|
use crate::database::schema::{
|
||||||
entities, entity_facts, entity_photo_links, favorites, image_exif, insight_generation_jobs,
|
entities, entity_facts, entity_photo_links, favorites, image_exif, insight_generation_jobs,
|
||||||
libraries, personas, photo_insights, users, video_preview_clips,
|
libraries, personas, photo_insights, precomputed_reels, user_ai_prefs, users,
|
||||||
|
video_preview_clips,
|
||||||
};
|
};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
|
||||||
@@ -505,3 +506,56 @@ pub struct InsightGenerationJob {
|
|||||||
pub result_insight_id: Option<i32>,
|
pub result_insight_id: Option<i32>,
|
||||||
pub error_message: Option<String>,
|
pub error_message: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- Precomputed reels -------------------------------------------------------
|
||||||
|
|
||||||
|
#[derive(Insertable)]
|
||||||
|
#[diesel(table_name = precomputed_reels)]
|
||||||
|
pub struct InsertablePrecomputedReel {
|
||||||
|
pub span: String,
|
||||||
|
pub library_key: String,
|
||||||
|
pub cache_key: String,
|
||||||
|
pub output_path: String,
|
||||||
|
pub title: String,
|
||||||
|
pub media_count: i32,
|
||||||
|
pub render_version: i32,
|
||||||
|
pub tz_offset_minutes: i32,
|
||||||
|
pub voice: Option<String>,
|
||||||
|
pub generated_at: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Queryable, Clone, Debug)]
|
||||||
|
pub struct PrecomputedReel {
|
||||||
|
pub id: i32,
|
||||||
|
pub span: String,
|
||||||
|
pub library_key: String,
|
||||||
|
pub cache_key: String,
|
||||||
|
pub output_path: String,
|
||||||
|
pub title: String,
|
||||||
|
pub media_count: i32,
|
||||||
|
pub render_version: i32,
|
||||||
|
pub tz_offset_minutes: i32,
|
||||||
|
pub voice: Option<String>,
|
||||||
|
pub generated_at: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- User AI preferences (Section E) ----------------------------------------
|
||||||
|
|
||||||
|
#[derive(Queryable, Insertable, Debug, Clone, serde::Deserialize, serde::Serialize)]
|
||||||
|
#[diesel(table_name = user_ai_prefs)]
|
||||||
|
pub struct UserAiPrefs {
|
||||||
|
pub id: i32,
|
||||||
|
pub voice: Option<String>,
|
||||||
|
pub tz_offset_minutes: Option<i32>,
|
||||||
|
pub library: Option<String>,
|
||||||
|
pub updated_at: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Insertable, Debug, Clone, serde::Deserialize, serde::Serialize)]
|
||||||
|
#[diesel(table_name = user_ai_prefs)]
|
||||||
|
pub struct UpsertUserAiPrefs {
|
||||||
|
pub voice: Option<String>,
|
||||||
|
pub tz_offset_minutes: Option<i32>,
|
||||||
|
pub library: Option<String>,
|
||||||
|
pub updated_at: i64,
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,439 @@
|
|||||||
|
use diesel::prelude::*;
|
||||||
|
use diesel::sqlite::SqliteConnection;
|
||||||
|
use std::ops::DerefMut;
|
||||||
|
use std::sync::{Arc, Mutex};
|
||||||
|
|
||||||
|
use crate::database::models::{InsertablePrecomputedReel, PrecomputedReel};
|
||||||
|
use crate::database::schema;
|
||||||
|
use crate::database::{DbError, DbErrorKind, connect};
|
||||||
|
use crate::otel::trace_db_call;
|
||||||
|
|
||||||
|
/// Ledger for precomputed memory reels. The nightly agentic job writes a
|
||||||
|
/// row after each successful render; the `GET /reels/precomputed` handler
|
||||||
|
/// reads it to gate on freshness and serve the cached MP4.
|
||||||
|
pub trait PrecomputedReelDao: Sync + Send {
|
||||||
|
/// Insert a precomputed reel row. Returns the new row's id.
|
||||||
|
/// Written by the nightly agentic job (Section D).
|
||||||
|
#[allow(dead_code)]
|
||||||
|
fn record_reel(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
row: &InsertablePrecomputedReel,
|
||||||
|
) -> Result<i32, DbError>;
|
||||||
|
|
||||||
|
/// Find the latest precomputed reel for the given (span, library_key).
|
||||||
|
fn latest_for(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
span: &str,
|
||||||
|
library_key: &str,
|
||||||
|
) -> Result<Option<PrecomputedReel>, DbError>;
|
||||||
|
|
||||||
|
/// Return true when a fresh precomputed reel exists for the given
|
||||||
|
/// (span, library_key, render_version) that was generated at or after
|
||||||
|
/// `min_generated_at`. Used as a fast existence gate before falling
|
||||||
|
/// back to `latest_for` (avoids a second query path).
|
||||||
|
fn exists_fresh(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
span: &str,
|
||||||
|
library_key: &str,
|
||||||
|
render_version: i32,
|
||||||
|
min_generated_at: i64,
|
||||||
|
) -> Result<bool, DbError>;
|
||||||
|
|
||||||
|
/// Delete all but the newest `keep` rows for (span, library_key), returning
|
||||||
|
/// the deleted rows so the caller can unlink their output files. Used by the
|
||||||
|
/// nightly job to retire superseded reels (e.g. yesterday's daily).
|
||||||
|
#[allow(dead_code)]
|
||||||
|
fn prune_superseded(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
span: &str,
|
||||||
|
library_key: &str,
|
||||||
|
keep: usize,
|
||||||
|
) -> Result<Vec<PrecomputedReel>, DbError>;
|
||||||
|
|
||||||
|
/// Every cache_key currently in the ledger. Used by the on-disk cache sweep
|
||||||
|
/// to protect files a ledger row still points at.
|
||||||
|
#[allow(dead_code)]
|
||||||
|
fn all_cache_keys(&mut self, context: &opentelemetry::Context) -> Result<Vec<String>, DbError>;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SqlitePrecomputedReelDao {
|
||||||
|
connection: Arc<Mutex<SqliteConnection>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for SqlitePrecomputedReelDao {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SqlitePrecomputedReelDao {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
connection: Arc::new(Mutex::new(connect())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub fn from_connection(conn: Arc<Mutex<SqliteConnection>>) -> Self {
|
||||||
|
Self { connection: conn }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PrecomputedReelDao for SqlitePrecomputedReelDao {
|
||||||
|
fn record_reel(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
row: &InsertablePrecomputedReel,
|
||||||
|
) -> Result<i32, DbError> {
|
||||||
|
trace_db_call(context, "insert", "record_reel", |_span| {
|
||||||
|
use schema::precomputed_reels::dsl;
|
||||||
|
|
||||||
|
let mut connection = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to lock PrecomputedReelDao");
|
||||||
|
|
||||||
|
diesel::insert_into(dsl::precomputed_reels)
|
||||||
|
.values(row)
|
||||||
|
.execute(connection.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to insert reel: {}", e))?;
|
||||||
|
|
||||||
|
dsl::precomputed_reels
|
||||||
|
.order(dsl::id.desc())
|
||||||
|
.select(dsl::id)
|
||||||
|
.first::<i32>(connection.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to get reel id: {}", e))
|
||||||
|
})
|
||||||
|
.map_err(|e| DbError::log(DbErrorKind::InsertError, e))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn latest_for(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
span: &str,
|
||||||
|
library_key: &str,
|
||||||
|
) -> Result<Option<PrecomputedReel>, DbError> {
|
||||||
|
trace_db_call(context, "query", "latest_for", |_span| {
|
||||||
|
use schema::precomputed_reels::dsl;
|
||||||
|
|
||||||
|
let mut connection = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to lock PrecomputedReelDao");
|
||||||
|
|
||||||
|
dsl::precomputed_reels
|
||||||
|
.filter(dsl::span.eq(span))
|
||||||
|
.filter(dsl::library_key.eq(library_key))
|
||||||
|
.order(dsl::generated_at.desc())
|
||||||
|
.first::<PrecomputedReel>(connection.deref_mut())
|
||||||
|
.optional()
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to get latest reel: {}", e))
|
||||||
|
})
|
||||||
|
.map_err(|e| DbError::log(DbErrorKind::QueryError, e))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn exists_fresh(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
span: &str,
|
||||||
|
library_key: &str,
|
||||||
|
render_version: i32,
|
||||||
|
min_generated_at: i64,
|
||||||
|
) -> Result<bool, DbError> {
|
||||||
|
trace_db_call(context, "query", "exists_fresh", |_span| {
|
||||||
|
use schema::precomputed_reels::dsl;
|
||||||
|
|
||||||
|
let mut connection = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to lock PrecomputedReelDao");
|
||||||
|
|
||||||
|
let count: i64 = dsl::precomputed_reels
|
||||||
|
.filter(dsl::span.eq(span))
|
||||||
|
.filter(dsl::library_key.eq(library_key))
|
||||||
|
.filter(dsl::render_version.eq(render_version))
|
||||||
|
.filter(dsl::generated_at.ge(min_generated_at))
|
||||||
|
.count()
|
||||||
|
.get_result(connection.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to check fresh reel: {}", e))?;
|
||||||
|
|
||||||
|
Ok(count > 0)
|
||||||
|
})
|
||||||
|
.map_err(|e| DbError::log(DbErrorKind::QueryError, e))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn prune_superseded(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
span: &str,
|
||||||
|
library_key: &str,
|
||||||
|
keep: usize,
|
||||||
|
) -> Result<Vec<PrecomputedReel>, DbError> {
|
||||||
|
trace_db_call(context, "delete", "prune_superseded", |_span| {
|
||||||
|
use schema::precomputed_reels::dsl;
|
||||||
|
|
||||||
|
let mut connection = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to lock PrecomputedReelDao");
|
||||||
|
|
||||||
|
// Newest first; everything past `keep` is superseded. The table
|
||||||
|
// holds at most a handful of rows per (span, library), so loading
|
||||||
|
// and slicing in Rust is cheaper than a correlated subquery.
|
||||||
|
let mut rows: Vec<PrecomputedReel> = dsl::precomputed_reels
|
||||||
|
.filter(dsl::span.eq(span))
|
||||||
|
.filter(dsl::library_key.eq(library_key))
|
||||||
|
.order(dsl::generated_at.desc())
|
||||||
|
.load::<PrecomputedReel>(connection.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to load reels for prune: {}", e))?;
|
||||||
|
|
||||||
|
let stale = rows.split_off(rows.len().min(keep));
|
||||||
|
if !stale.is_empty() {
|
||||||
|
let ids: Vec<i32> = stale.iter().map(|r| r.id).collect();
|
||||||
|
diesel::delete(dsl::precomputed_reels.filter(dsl::id.eq_any(ids)))
|
||||||
|
.execute(connection.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to delete superseded reels: {}", e))?;
|
||||||
|
}
|
||||||
|
Ok(stale)
|
||||||
|
})
|
||||||
|
.map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn all_cache_keys(&mut self, context: &opentelemetry::Context) -> Result<Vec<String>, DbError> {
|
||||||
|
trace_db_call(context, "query", "all_cache_keys", |_span| {
|
||||||
|
use schema::precomputed_reels::dsl;
|
||||||
|
|
||||||
|
let mut connection = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to lock PrecomputedReelDao");
|
||||||
|
|
||||||
|
dsl::precomputed_reels
|
||||||
|
.select(dsl::cache_key)
|
||||||
|
.load::<String>(connection.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to load cache keys: {}", e))
|
||||||
|
})
|
||||||
|
.map_err(|e| DbError::log(DbErrorKind::QueryError, e))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use diesel::Connection;
|
||||||
|
use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations};
|
||||||
|
|
||||||
|
const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!();
|
||||||
|
|
||||||
|
fn setup_dao() -> SqlitePrecomputedReelDao {
|
||||||
|
let mut conn = SqliteConnection::establish(":memory:")
|
||||||
|
.expect("Unable to create in-memory db connection");
|
||||||
|
conn.run_pending_migrations(DB_MIGRATIONS)
|
||||||
|
.expect("Failure running DB migrations");
|
||||||
|
SqlitePrecomputedReelDao::from_connection(Arc::new(Mutex::new(conn)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ctx() -> opentelemetry::Context {
|
||||||
|
opentelemetry::Context::new()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sample_row() -> InsertablePrecomputedReel {
|
||||||
|
InsertablePrecomputedReel {
|
||||||
|
span: "day".to_string(),
|
||||||
|
library_key: "1".to_string(),
|
||||||
|
cache_key: "abc123".to_string(),
|
||||||
|
output_path: "/tmp/reel.mp4".to_string(),
|
||||||
|
title: "Test Reel".to_string(),
|
||||||
|
media_count: 10,
|
||||||
|
render_version: 1,
|
||||||
|
tz_offset_minutes: 0,
|
||||||
|
voice: Some("default".to_string()),
|
||||||
|
generated_at: 1_000_000,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn record_reel_inserts_and_returns_id() {
|
||||||
|
let mut dao = setup_dao();
|
||||||
|
let ctx = ctx();
|
||||||
|
let row = sample_row();
|
||||||
|
|
||||||
|
let id = dao.record_reel(&ctx, &row).unwrap();
|
||||||
|
assert!(id > 0, "should return a positive id");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn record_reel_returns_increasing_ids() {
|
||||||
|
let mut dao = setup_dao();
|
||||||
|
let ctx = ctx();
|
||||||
|
let row = sample_row();
|
||||||
|
|
||||||
|
let id1 = dao.record_reel(&ctx, &row).unwrap();
|
||||||
|
let id2 = dao.record_reel(&ctx, &row).unwrap();
|
||||||
|
assert!(id2 > id1, "each insert should get a higher id");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn latest_for_returns_latest() {
|
||||||
|
let mut dao = setup_dao();
|
||||||
|
let ctx = ctx();
|
||||||
|
|
||||||
|
let row1 = InsertablePrecomputedReel {
|
||||||
|
generated_at: 1_000_000,
|
||||||
|
..sample_row()
|
||||||
|
};
|
||||||
|
let row2 = InsertablePrecomputedReel {
|
||||||
|
generated_at: 2_000_000,
|
||||||
|
..sample_row()
|
||||||
|
};
|
||||||
|
|
||||||
|
dao.record_reel(&ctx, &row1).unwrap();
|
||||||
|
dao.record_reel(&ctx, &row2).unwrap();
|
||||||
|
|
||||||
|
let latest = dao.latest_for(&ctx, "day", "1").unwrap().unwrap();
|
||||||
|
assert_eq!(latest.generated_at, 2_000_000);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn latest_for_scoped_by_span_and_library() {
|
||||||
|
let mut dao = setup_dao();
|
||||||
|
let ctx = ctx();
|
||||||
|
|
||||||
|
let day_row = InsertablePrecomputedReel {
|
||||||
|
span: "day".to_string(),
|
||||||
|
library_key: "1".to_string(),
|
||||||
|
generated_at: 1_000_000,
|
||||||
|
..sample_row()
|
||||||
|
};
|
||||||
|
let week_row = InsertablePrecomputedReel {
|
||||||
|
span: "week".to_string(),
|
||||||
|
library_key: "1".to_string(),
|
||||||
|
generated_at: 2_000_000,
|
||||||
|
..sample_row()
|
||||||
|
};
|
||||||
|
|
||||||
|
dao.record_reel(&ctx, &day_row).unwrap();
|
||||||
|
dao.record_reel(&ctx, &week_row).unwrap();
|
||||||
|
|
||||||
|
let day_latest = dao.latest_for(&ctx, "day", "1").unwrap().unwrap();
|
||||||
|
assert_eq!(day_latest.span, "day");
|
||||||
|
|
||||||
|
let week_latest = dao.latest_for(&ctx, "week", "1").unwrap().unwrap();
|
||||||
|
assert_eq!(week_latest.span, "week");
|
||||||
|
|
||||||
|
// Different library returns None
|
||||||
|
let missing = dao.latest_for(&ctx, "day", "99").unwrap();
|
||||||
|
assert!(missing.is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn latest_for_returns_none_when_no_rows() {
|
||||||
|
let mut dao = setup_dao();
|
||||||
|
let ctx = ctx();
|
||||||
|
|
||||||
|
let result = dao.latest_for(&ctx, "day", "1").unwrap();
|
||||||
|
assert!(result.is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn exists_fresh_returns_true_when_present() {
|
||||||
|
let mut dao = setup_dao();
|
||||||
|
let ctx = ctx();
|
||||||
|
|
||||||
|
dao.record_reel(&ctx, &sample_row()).unwrap();
|
||||||
|
|
||||||
|
let exists = dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap();
|
||||||
|
assert!(exists, "should find the row we just inserted");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn exists_fresh_returns_false_when_missing() {
|
||||||
|
let mut dao = setup_dao();
|
||||||
|
let ctx = ctx();
|
||||||
|
|
||||||
|
let exists = dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap();
|
||||||
|
assert!(!exists, "should not find anything in empty table");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn exists_fresh_respects_min_generated_at() {
|
||||||
|
let mut dao = setup_dao();
|
||||||
|
let ctx = ctx();
|
||||||
|
|
||||||
|
dao.record_reel(&ctx, &sample_row()).unwrap();
|
||||||
|
|
||||||
|
// Below the threshold — should exist
|
||||||
|
let exists = dao.exists_fresh(&ctx, "day", "1", 1, 500_000).unwrap();
|
||||||
|
assert!(exists);
|
||||||
|
|
||||||
|
// Above the threshold — should not exist
|
||||||
|
let exists = dao.exists_fresh(&ctx, "day", "1", 1, 2_000_000).unwrap();
|
||||||
|
assert!(!exists);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn exists_fresh_respects_render_version() {
|
||||||
|
let mut dao = setup_dao();
|
||||||
|
let ctx = ctx();
|
||||||
|
|
||||||
|
let row_v1 = InsertablePrecomputedReel {
|
||||||
|
render_version: 1,
|
||||||
|
..sample_row()
|
||||||
|
};
|
||||||
|
dao.record_reel(&ctx, &row_v1).unwrap();
|
||||||
|
|
||||||
|
assert!(dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap());
|
||||||
|
assert!(!dao.exists_fresh(&ctx, "day", "1", 2, 900_000).unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn prune_superseded_keeps_newest_and_returns_deleted() {
|
||||||
|
let mut dao = setup_dao();
|
||||||
|
let ctx = ctx();
|
||||||
|
// Three day/lib1 reels at increasing timestamps, plus an unrelated one.
|
||||||
|
for (i, key) in ["k1", "k2", "k3"].iter().enumerate() {
|
||||||
|
dao.record_reel(
|
||||||
|
&ctx,
|
||||||
|
&InsertablePrecomputedReel {
|
||||||
|
cache_key: key.to_string(),
|
||||||
|
generated_at: 1_000_000 + i as i64 * 1000,
|
||||||
|
..sample_row()
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
let other = InsertablePrecomputedReel {
|
||||||
|
library_key: "2".to_string(),
|
||||||
|
cache_key: "other".to_string(),
|
||||||
|
..sample_row()
|
||||||
|
};
|
||||||
|
dao.record_reel(&ctx, &other).unwrap();
|
||||||
|
|
||||||
|
// Keep the newest 2 of (day, "1"); k1 (oldest) is superseded.
|
||||||
|
let deleted = dao.prune_superseded(&ctx, "day", "1", 2).unwrap();
|
||||||
|
assert_eq!(deleted.len(), 1);
|
||||||
|
assert_eq!(deleted[0].cache_key, "k1");
|
||||||
|
|
||||||
|
// The newest 2 survive; the other-library row is untouched.
|
||||||
|
let keys = dao.all_cache_keys(&ctx).unwrap();
|
||||||
|
assert_eq!(keys.len(), 3);
|
||||||
|
assert!(keys.contains(&"k2".to_string()));
|
||||||
|
assert!(keys.contains(&"k3".to_string()));
|
||||||
|
assert!(keys.contains(&"other".to_string()));
|
||||||
|
assert!(!keys.contains(&"k1".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn prune_superseded_noop_when_within_keep() {
|
||||||
|
let mut dao = setup_dao();
|
||||||
|
let ctx = ctx();
|
||||||
|
dao.record_reel(&ctx, &sample_row()).unwrap();
|
||||||
|
let deleted = dao.prune_superseded(&ctx, "day", "1", 2).unwrap();
|
||||||
|
assert!(deleted.is_empty());
|
||||||
|
assert_eq!(dao.all_cache_keys(&ctx).unwrap().len(), 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -266,6 +266,16 @@ diesel::table! {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
diesel::table! {
|
||||||
|
user_ai_prefs (id) {
|
||||||
|
id -> Integer,
|
||||||
|
voice -> Nullable<Text>,
|
||||||
|
tz_offset_minutes -> Nullable<Integer>,
|
||||||
|
library -> Nullable<Text>,
|
||||||
|
updated_at -> BigInt,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
diesel::table! {
|
diesel::table! {
|
||||||
video_preview_clips (id) {
|
video_preview_clips (id) {
|
||||||
id -> Integer,
|
id -> Integer,
|
||||||
@@ -294,6 +304,22 @@ diesel::table! {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
diesel::table! {
|
||||||
|
precomputed_reels (id) {
|
||||||
|
id -> Integer,
|
||||||
|
span -> Text,
|
||||||
|
library_key -> Text,
|
||||||
|
cache_key -> Text,
|
||||||
|
output_path -> Text,
|
||||||
|
title -> Text,
|
||||||
|
media_count -> Integer,
|
||||||
|
render_version -> Integer,
|
||||||
|
tz_offset_minutes -> Integer,
|
||||||
|
voice -> Nullable<Text>,
|
||||||
|
generated_at -> BigInt,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
diesel::joinable!(entity_facts -> photo_insights (source_insight_id));
|
diesel::joinable!(entity_facts -> photo_insights (source_insight_id));
|
||||||
diesel::joinable!(entity_photo_links -> entities (entity_id));
|
diesel::joinable!(entity_photo_links -> entities (entity_id));
|
||||||
diesel::joinable!(entity_photo_links -> libraries (library_id));
|
diesel::joinable!(entity_photo_links -> libraries (library_id));
|
||||||
@@ -322,9 +348,11 @@ diesel::allow_tables_to_appear_in_same_query!(
|
|||||||
personas,
|
personas,
|
||||||
persons,
|
persons,
|
||||||
photo_insights,
|
photo_insights,
|
||||||
|
precomputed_reels,
|
||||||
search_history,
|
search_history,
|
||||||
tagged_photo,
|
tagged_photo,
|
||||||
tags,
|
tags,
|
||||||
|
user_ai_prefs,
|
||||||
users,
|
users,
|
||||||
video_preview_clips,
|
video_preview_clips,
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -0,0 +1,206 @@
|
|||||||
|
use diesel::prelude::*;
|
||||||
|
use diesel::sqlite::SqliteConnection;
|
||||||
|
use std::ops::DerefMut;
|
||||||
|
use std::sync::{Arc, Mutex};
|
||||||
|
|
||||||
|
use crate::database::models::{UpsertUserAiPrefs, UserAiPrefs};
|
||||||
|
use crate::database::schema;
|
||||||
|
use crate::database::{DbError, DbErrorKind, connect};
|
||||||
|
use crate::otel::trace_db_call;
|
||||||
|
|
||||||
|
/// Generic single-row table that passively mirrors the latest client AI
|
||||||
|
/// request parameters (voice, timezone, library). Read by the nightly
|
||||||
|
/// pre-generation scheduler (Section D) to pick up user preferences.
|
||||||
|
pub trait UserAiPrefsDao: Sync + Send {
|
||||||
|
/// Read the single row; `None` when it hasn't been populated yet.
|
||||||
|
fn get_prefs(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
) -> Result<Option<UserAiPrefs>, DbError>;
|
||||||
|
|
||||||
|
/// Upsert the single row (id is always 1).
|
||||||
|
#[allow(dead_code)]
|
||||||
|
fn upsert_prefs(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
prefs: &UpsertUserAiPrefs,
|
||||||
|
) -> Result<(), DbError>;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SqliteUserAiPrefsDao {
|
||||||
|
connection: Arc<Mutex<SqliteConnection>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for SqliteUserAiPrefsDao {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SqliteUserAiPrefsDao {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
connection: Arc::new(Mutex::new(connect())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub fn from_connection(conn: Arc<Mutex<SqliteConnection>>) -> Self {
|
||||||
|
Self { connection: conn }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl UserAiPrefsDao for SqliteUserAiPrefsDao {
|
||||||
|
fn get_prefs(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
) -> Result<Option<UserAiPrefs>, DbError> {
|
||||||
|
trace_db_call(context, "query", "get_prefs", |_span| {
|
||||||
|
use schema::user_ai_prefs::dsl;
|
||||||
|
|
||||||
|
let mut connection = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to lock UserAiPrefsDao");
|
||||||
|
|
||||||
|
dsl::user_ai_prefs
|
||||||
|
.first::<UserAiPrefs>(connection.deref_mut())
|
||||||
|
.optional()
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to get prefs: {}", e))
|
||||||
|
})
|
||||||
|
.map_err(|e| DbError::log(DbErrorKind::QueryError, e))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn upsert_prefs(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
prefs: &UpsertUserAiPrefs,
|
||||||
|
) -> Result<(), DbError> {
|
||||||
|
trace_db_call(context, "upsert", "upsert_prefs", |_span| {
|
||||||
|
use schema::user_ai_prefs::dsl;
|
||||||
|
|
||||||
|
let mut connection = self
|
||||||
|
.connection
|
||||||
|
.lock()
|
||||||
|
.expect("Unable to lock UserAiPrefsDao");
|
||||||
|
|
||||||
|
// Single-row table (id=1): one atomic upsert. The explicit id=1
|
||||||
|
// makes the conflict target deterministic so the second call
|
||||||
|
// updates in place rather than tripping the CHECK(id=1) constraint,
|
||||||
|
// and real insert errors surface instead of being swallowed into a
|
||||||
|
// separate update branch. The columns are set explicitly (rather
|
||||||
|
// than via AsChangeset) so a None field overwrites to NULL — the
|
||||||
|
// row mirrors the latest request exactly, not a merge of past ones.
|
||||||
|
diesel::insert_into(dsl::user_ai_prefs)
|
||||||
|
.values((dsl::id.eq(1), prefs))
|
||||||
|
.on_conflict(dsl::id)
|
||||||
|
.do_update()
|
||||||
|
.set((
|
||||||
|
dsl::voice.eq(&prefs.voice),
|
||||||
|
dsl::tz_offset_minutes.eq(&prefs.tz_offset_minutes),
|
||||||
|
dsl::library.eq(&prefs.library),
|
||||||
|
dsl::updated_at.eq(&prefs.updated_at),
|
||||||
|
))
|
||||||
|
.execute(connection.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to upsert prefs: {}", e))?;
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
.map_err(|e| DbError::log(DbErrorKind::InsertError, e))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use diesel::Connection;
|
||||||
|
use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations};
|
||||||
|
|
||||||
|
const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!();
|
||||||
|
|
||||||
|
fn setup_dao() -> SqliteUserAiPrefsDao {
|
||||||
|
let mut conn = SqliteConnection::establish(":memory:")
|
||||||
|
.expect("Unable to create in-memory db connection");
|
||||||
|
conn.run_pending_migrations(DB_MIGRATIONS)
|
||||||
|
.expect("Failure running DB migrations");
|
||||||
|
SqliteUserAiPrefsDao::from_connection(Arc::new(Mutex::new(conn)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ctx() -> opentelemetry::Context {
|
||||||
|
opentelemetry::Context::new()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn get_prefs_returns_none_when_empty() {
|
||||||
|
let mut dao = setup_dao();
|
||||||
|
let result = dao.get_prefs(&ctx()).unwrap();
|
||||||
|
assert!(result.is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn upsert_prefs_inserts_row() {
|
||||||
|
let mut dao = setup_dao();
|
||||||
|
let now = 1_700_000_000i64;
|
||||||
|
let prefs = UpsertUserAiPrefs {
|
||||||
|
voice: Some("grandma".to_string()),
|
||||||
|
tz_offset_minutes: Some(-480),
|
||||||
|
library: Some("1".to_string()),
|
||||||
|
updated_at: now,
|
||||||
|
};
|
||||||
|
dao.upsert_prefs(&ctx(), &prefs).unwrap();
|
||||||
|
|
||||||
|
let row = dao.get_prefs(&ctx()).unwrap().unwrap();
|
||||||
|
assert_eq!(row.id, 1);
|
||||||
|
assert_eq!(row.voice, Some("grandma".to_string()));
|
||||||
|
assert_eq!(row.tz_offset_minutes, Some(-480));
|
||||||
|
assert_eq!(row.library, Some("1".to_string()));
|
||||||
|
assert_eq!(row.updated_at, now);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn upsert_prefs_replaces_existing() {
|
||||||
|
let mut dao = setup_dao();
|
||||||
|
let now1 = 1_700_000_000i64;
|
||||||
|
let now2 = 1_800_000_000i64;
|
||||||
|
|
||||||
|
let prefs1 = UpsertUserAiPrefs {
|
||||||
|
voice: Some("grandma".to_string()),
|
||||||
|
tz_offset_minutes: Some(-480),
|
||||||
|
library: Some("1".to_string()),
|
||||||
|
updated_at: now1,
|
||||||
|
};
|
||||||
|
dao.upsert_prefs(&ctx(), &prefs1).unwrap();
|
||||||
|
|
||||||
|
let prefs2 = UpsertUserAiPrefs {
|
||||||
|
voice: Some("dad".to_string()),
|
||||||
|
tz_offset_minutes: Some(-300),
|
||||||
|
library: None,
|
||||||
|
updated_at: now2,
|
||||||
|
};
|
||||||
|
dao.upsert_prefs(&ctx(), &prefs2).unwrap();
|
||||||
|
|
||||||
|
let row = dao.get_prefs(&ctx()).unwrap().unwrap();
|
||||||
|
assert_eq!(row.voice, Some("dad".to_string()));
|
||||||
|
assert_eq!(row.tz_offset_minutes, Some(-300));
|
||||||
|
assert!(row.library.is_none());
|
||||||
|
assert_eq!(row.updated_at, now2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn upsert_partial_fields() {
|
||||||
|
let mut dao = setup_dao();
|
||||||
|
let now = 1_700_000_000i64;
|
||||||
|
|
||||||
|
let prefs = UpsertUserAiPrefs {
|
||||||
|
voice: None,
|
||||||
|
tz_offset_minutes: Some(-480),
|
||||||
|
library: None,
|
||||||
|
updated_at: now,
|
||||||
|
};
|
||||||
|
dao.upsert_prefs(&ctx(), &prefs).unwrap();
|
||||||
|
|
||||||
|
let row = dao.get_prefs(&ctx()).unwrap().unwrap();
|
||||||
|
assert_eq!(row.tz_offset_minutes, Some(-480));
|
||||||
|
assert!(row.voice.is_none());
|
||||||
|
assert!(row.library.is_none());
|
||||||
|
}
|
||||||
|
}
|
||||||
+3
-3
@@ -234,7 +234,7 @@ async fn list_exact_handler(
|
|||||||
let span = global_tracer().start_with_context("duplicates.list_exact", &context);
|
let span = global_tracer().start_with_context("duplicates.list_exact", &context);
|
||||||
let span_context = opentelemetry::Context::current_with_span(span);
|
let span_context = opentelemetry::Context::current_with_span(span);
|
||||||
|
|
||||||
let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
|
let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
|
||||||
.ok()
|
.ok()
|
||||||
.flatten()
|
.flatten()
|
||||||
.map(|l| l.id);
|
.map(|l| l.id);
|
||||||
@@ -265,7 +265,7 @@ async fn list_perceptual_handler(
|
|||||||
let span = global_tracer().start_with_context("duplicates.list_perceptual", &context);
|
let span = global_tracer().start_with_context("duplicates.list_perceptual", &context);
|
||||||
let span_context = opentelemetry::Context::current_with_span(span);
|
let span_context = opentelemetry::Context::current_with_span(span);
|
||||||
|
|
||||||
let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
|
let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
|
||||||
.ok()
|
.ok()
|
||||||
.flatten()
|
.flatten()
|
||||||
.map(|l| l.id);
|
.map(|l| l.id);
|
||||||
@@ -449,7 +449,7 @@ async fn list_folder_pairs_handler(
|
|||||||
let span = global_tracer().start_with_context("duplicates.list_folder_pairs", &context);
|
let span = global_tracer().start_with_context("duplicates.list_folder_pairs", &context);
|
||||||
let span_context = opentelemetry::Context::current_with_span(span);
|
let span_context = opentelemetry::Context::current_with_span(span);
|
||||||
|
|
||||||
let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
|
let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
|
||||||
.ok()
|
.ok()
|
||||||
.flatten()
|
.flatten()
|
||||||
.map(|l| l.id);
|
.map(|l| l.id);
|
||||||
|
|||||||
+10
-9
@@ -1755,7 +1755,7 @@ async fn stats_handler<D: FaceDao>(
|
|||||||
let span = global_tracer().start_with_context("faces.stats", &context);
|
let span = global_tracer().start_with_context("faces.stats", &context);
|
||||||
let span_context = opentelemetry::Context::current_with_span(span);
|
let span_context = opentelemetry::Context::current_with_span(span);
|
||||||
|
|
||||||
let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
|
let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
|
||||||
.ok()
|
.ok()
|
||||||
.flatten()
|
.flatten()
|
||||||
.map(|l| l.id);
|
.map(|l| l.id);
|
||||||
@@ -1782,11 +1782,12 @@ async fn list_faces_handler<D: FaceDao>(
|
|||||||
let normalized_path = normalize_path(&query.path);
|
let normalized_path = normalize_path(&query.path);
|
||||||
// resolve_library_param returns Option<&Library>; clone so the result
|
// resolve_library_param returns Option<&Library>; clone so the result
|
||||||
// is owned (matching the primary_library fallback's type).
|
// is owned (matching the primary_library fallback's type).
|
||||||
let library: Library = libraries::resolve_library_param(&app_state, query.library.as_deref())
|
let library: Library =
|
||||||
.ok()
|
libraries::resolve_library_param_state(&app_state, query.library.as_deref())
|
||||||
.flatten()
|
.ok()
|
||||||
.cloned()
|
.flatten()
|
||||||
.unwrap_or_else(|| app_state.primary_library().clone());
|
.cloned()
|
||||||
|
.unwrap_or_else(|| app_state.primary_library().clone());
|
||||||
|
|
||||||
let mut dao = face_dao.lock().expect("face dao lock");
|
let mut dao = face_dao.lock().expect("face dao lock");
|
||||||
let hash = match dao.resolve_content_hash(&span_context, library.id, &normalized_path) {
|
let hash = match dao.resolve_content_hash(&span_context, library.id, &normalized_path) {
|
||||||
@@ -1870,7 +1871,7 @@ async fn create_face_handler<D: FaceDao>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
let normalized_path = normalize_path(&body.path);
|
let normalized_path = normalize_path(&body.path);
|
||||||
let library: Library = match libraries::resolve_library_param(
|
let library: Library = match libraries::resolve_library_param_state(
|
||||||
&app_state,
|
&app_state,
|
||||||
body.library.as_ref().map(|i| i.to_string()).as_deref(),
|
body.library.as_ref().map(|i| i.to_string()).as_deref(),
|
||||||
) {
|
) {
|
||||||
@@ -2192,7 +2193,7 @@ async fn list_persons_handler<D: FaceDao>(
|
|||||||
let span = global_tracer().start_with_context("persons.list", &context);
|
let span = global_tracer().start_with_context("persons.list", &context);
|
||||||
let span_context = opentelemetry::Context::current_with_span(span);
|
let span_context = opentelemetry::Context::current_with_span(span);
|
||||||
|
|
||||||
let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
|
let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
|
||||||
.ok()
|
.ok()
|
||||||
.flatten()
|
.flatten()
|
||||||
.map(|l| l.id);
|
.map(|l| l.id);
|
||||||
@@ -2345,7 +2346,7 @@ async fn person_faces_handler<D: FaceDao>(
|
|||||||
let context = extract_context_from_request(&request);
|
let context = extract_context_from_request(&request);
|
||||||
let span = global_tracer().start_with_context("persons.faces", &context);
|
let span = global_tracer().start_with_context("persons.faces", &context);
|
||||||
let span_context = opentelemetry::Context::current_with_span(span);
|
let span_context = opentelemetry::Context::current_with_span(span);
|
||||||
let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
|
let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
|
||||||
.ok()
|
.ok()
|
||||||
.flatten()
|
.flatten()
|
||||||
.map(|l| l.id);
|
.map(|l| l.id);
|
||||||
|
|||||||
+9
-9
@@ -275,14 +275,14 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
|
|||||||
// Resolve the optional library filter. Unknown values return 400. A
|
// Resolve the optional library filter. Unknown values return 400. A
|
||||||
// `None` result means "union across all libraries" and downstream
|
// `None` result means "union across all libraries" and downstream
|
||||||
// walks iterate every configured library root.
|
// walks iterate every configured library root.
|
||||||
let library = match crate::libraries::resolve_library_param(&app_state, req.library.as_deref())
|
let library =
|
||||||
{
|
match crate::libraries::resolve_library_param_state(&app_state, req.library.as_deref()) {
|
||||||
Ok(lib) => lib,
|
Ok(lib) => lib,
|
||||||
Err(msg) => {
|
Err(msg) => {
|
||||||
log::warn!("Rejecting /photos request: {}", msg);
|
log::warn!("Rejecting /photos request: {}", msg);
|
||||||
return HttpResponse::BadRequest().body(msg);
|
return HttpResponse::BadRequest().body(msg);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let span_context = opentelemetry::Context::current_with_span(span);
|
let span_context = opentelemetry::Context::current_with_span(span);
|
||||||
|
|
||||||
@@ -1238,7 +1238,7 @@ pub async fn list_exif_summary(
|
|||||||
// Resolve the library filter up front so a bad id/name 400s before we
|
// Resolve the library filter up front so a bad id/name 400s before we
|
||||||
// ever take the DAO mutex. None == union across all libraries.
|
// ever take the DAO mutex. None == union across all libraries.
|
||||||
let library_filter =
|
let library_filter =
|
||||||
match crate::libraries::resolve_library_param(&app_state, req.library.as_deref()) {
|
match crate::libraries::resolve_library_param_state(&app_state, req.library.as_deref()) {
|
||||||
Ok(lib) => lib.map(|l| l.id),
|
Ok(lib) => lib.map(|l| l.id),
|
||||||
Err(msg) => {
|
Err(msg) => {
|
||||||
span.set_status(Status::error(msg.clone()));
|
span.set_status(Status::error(msg.clone()));
|
||||||
|
|||||||
+172
@@ -1,4 +1,5 @@
|
|||||||
/// Geographic calculation utilities for GPS-based search
|
/// Geographic calculation utilities for GPS-based search
|
||||||
|
use serde::Deserialize;
|
||||||
use std::f64;
|
use std::f64;
|
||||||
|
|
||||||
/// Calculate distance between two GPS coordinates using the Haversine formula.
|
/// Calculate distance between two GPS coordinates using the Haversine formula.
|
||||||
@@ -61,6 +62,140 @@ pub fn gps_bounding_box(lat: f64, lon: f64, radius_km: f64) -> (f64, f64, f64, f
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A place resolved from a free-text query via forward geocoding.
|
||||||
|
///
|
||||||
|
/// The filter pipeline searches a *circle* (`gps_lat`/`gps_lon`/
|
||||||
|
/// `gps_radius_km`), but a place can be anything from a single address to
|
||||||
|
/// a whole country. We collapse Nominatim's bounding box into the smallest
|
||||||
|
/// circle that circumscribes it (see [`bbox_to_circle`]) so "Portland" and
|
||||||
|
/// "Italy" both map onto the existing circle filter without a schema change.
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub struct GeoPlace {
|
||||||
|
/// Nominatim's canonical name for the match (e.g. "Italia").
|
||||||
|
pub display_name: String,
|
||||||
|
/// Centroid latitude in decimal degrees.
|
||||||
|
pub lat: f64,
|
||||||
|
/// Centroid longitude in decimal degrees.
|
||||||
|
pub lon: f64,
|
||||||
|
/// Radius (km) of a circle centred on the centroid that covers the
|
||||||
|
/// matched area. Floored to [`MIN_PLACE_RADIUS_KM`] so a point result
|
||||||
|
/// (whose bounding box is microscopic) still yields a usable circle.
|
||||||
|
pub radius_km: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Floor for a geocoded place's radius. Point results (a street address)
|
||||||
|
/// come back with a near-zero bounding box; without a floor the circle
|
||||||
|
/// filter would match nothing.
|
||||||
|
pub const MIN_PLACE_RADIUS_KM: f64 = 0.5;
|
||||||
|
|
||||||
|
/// Collapse a bounding box into the centroid + circumscribing radius.
|
||||||
|
///
|
||||||
|
/// Input is Nominatim's `boundingbox` order: `(south_lat, north_lat,
|
||||||
|
/// west_lon, east_lon)`. The radius is the *largest* great-circle distance
|
||||||
|
/// from the centroid to any of the four corners, so the resulting circle
|
||||||
|
/// fully covers the box. (The corners aren't equidistant on a sphere —
|
||||||
|
/// longitude lines converge toward the poles, so the equator-facing edge's
|
||||||
|
/// corners are farthest; taking the max guarantees coverage in either
|
||||||
|
/// hemisphere.)
|
||||||
|
///
|
||||||
|
/// Pure and exact (no flooring) so it can be unit-tested directly; callers
|
||||||
|
/// apply [`MIN_PLACE_RADIUS_KM`] when turning the result into a filter.
|
||||||
|
pub fn bbox_to_circle(south: f64, north: f64, west: f64, east: f64) -> (f64, f64, f64) {
|
||||||
|
let center_lat = (south + north) / 2.0;
|
||||||
|
let center_lon = (west + east) / 2.0;
|
||||||
|
let radius_km = [(south, west), (south, east), (north, west), (north, east)]
|
||||||
|
.iter()
|
||||||
|
.map(|(clat, clon)| haversine_distance(center_lat, center_lon, *clat, *clon))
|
||||||
|
.fold(0.0_f64, f64::max);
|
||||||
|
(center_lat, center_lon, radius_km)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Raw Nominatim `/search` result. `lat`/`lon` arrive as strings and
|
||||||
|
/// `boundingbox` as a 4-element string array `[south, north, west, east]`.
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct NominatimSearchResult {
|
||||||
|
lat: String,
|
||||||
|
lon: String,
|
||||||
|
display_name: String,
|
||||||
|
boundingbox: Option<[String; 4]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Forward-geocode a free-text place name to a [`GeoPlace`] via the public
|
||||||
|
/// OpenStreetMap Nominatim `/search` endpoint.
|
||||||
|
///
|
||||||
|
/// Mirrors `InsightGenerator::reverse_geocode`'s error posture: any network,
|
||||||
|
/// HTTP, or parse failure returns `None` rather than propagating, so a flaky
|
||||||
|
/// geocoder degrades the query to "no location filter" instead of failing it.
|
||||||
|
///
|
||||||
|
/// Nominatim's usage policy requires a `User-Agent` and rate-limits to ~1
|
||||||
|
/// request/second; callers doing this interactively should cache results.
|
||||||
|
pub async fn forward_geocode(query: &str) -> Option<GeoPlace> {
|
||||||
|
let q = query.trim();
|
||||||
|
if q.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let client = reqwest::Client::new();
|
||||||
|
let response = match client
|
||||||
|
.get("https://nominatim.openstreetmap.org/search")
|
||||||
|
.query(&[("format", "json"), ("limit", "1"), ("q", q)])
|
||||||
|
.header("User-Agent", "ImageAPI/1.0") // Nominatim requires User-Agent
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(resp) => resp,
|
||||||
|
Err(e) => {
|
||||||
|
log::warn!("Forward geocoding network error for {q:?}: {e}");
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if !response.status().is_success() {
|
||||||
|
log::warn!(
|
||||||
|
"Forward geocoding HTTP error for {q:?}: {}",
|
||||||
|
response.status()
|
||||||
|
);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let results: Vec<NominatimSearchResult> = match response.json().await {
|
||||||
|
Ok(r) => r,
|
||||||
|
Err(e) => {
|
||||||
|
log::warn!("Forward geocoding JSON parse error for {q:?}: {e}");
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let top = results.into_iter().next()?;
|
||||||
|
let lat: f64 = top.lat.parse().ok()?;
|
||||||
|
let lon: f64 = top.lon.parse().ok()?;
|
||||||
|
|
||||||
|
// Prefer the bounding box (handles large places); fall back to a
|
||||||
|
// point + floor radius when Nominatim omits it.
|
||||||
|
let (center_lat, center_lon, radius_km) = match &top.boundingbox {
|
||||||
|
Some([s, n, w, e]) => match (s.parse(), n.parse(), w.parse(), e.parse()) {
|
||||||
|
(Ok(s), Ok(n), Ok(w), Ok(e)) => bbox_to_circle(s, n, w, e),
|
||||||
|
_ => (lat, lon, 0.0),
|
||||||
|
},
|
||||||
|
None => (lat, lon, 0.0),
|
||||||
|
};
|
||||||
|
|
||||||
|
let place = GeoPlace {
|
||||||
|
display_name: top.display_name,
|
||||||
|
lat: center_lat,
|
||||||
|
lon: center_lon,
|
||||||
|
radius_km: radius_km.max(MIN_PLACE_RADIUS_KM),
|
||||||
|
};
|
||||||
|
log::info!(
|
||||||
|
"Forward geocoded {q:?} -> {} ({:.4}, {:.4}, r={:.1}km)",
|
||||||
|
place.display_name,
|
||||||
|
place.lat,
|
||||||
|
place.lon,
|
||||||
|
place.radius_km
|
||||||
|
);
|
||||||
|
Some(place)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
@@ -118,4 +253,41 @@ mod tests {
|
|||||||
distance
|
distance
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_bbox_to_circle_centroid() {
|
||||||
|
// Symmetric box around (10, 20): centroid should land dead centre.
|
||||||
|
let (lat, lon, radius) = bbox_to_circle(9.0, 11.0, 19.0, 21.0);
|
||||||
|
assert!((lat - 10.0).abs() < 1e-9, "centroid lat, got {lat}");
|
||||||
|
assert!((lon - 20.0).abs() < 1e-9, "centroid lon, got {lon}");
|
||||||
|
assert!(radius > 0.0, "radius should be positive, got {radius}");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_bbox_to_circle_covers_corner() {
|
||||||
|
// The radius must reach every corner of the box. Verify the
|
||||||
|
// centroid-to-corner distance equals the returned radius for all
|
||||||
|
// four corners (they're symmetric, so all equal).
|
||||||
|
let (south, north, west, east) = (40.0, 42.0, -74.0, -72.0);
|
||||||
|
let (lat, lon, radius) = bbox_to_circle(south, north, west, east);
|
||||||
|
for (clat, clon) in [(south, west), (south, east), (north, west), (north, east)] {
|
||||||
|
let d = haversine_distance(lat, lon, clat, clon);
|
||||||
|
assert!(
|
||||||
|
d <= radius + 1e-6,
|
||||||
|
"corner ({clat},{clon}) at {d}km should be within radius {radius}km"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_bbox_to_circle_country_vs_city_scale() {
|
||||||
|
// A country-sized box yields a far larger radius than a city-sized
|
||||||
|
// one — confirming the bbox approach scales with place size.
|
||||||
|
let (_, _, country) = bbox_to_circle(35.5, 47.1, 6.6, 18.5); // ~Italy
|
||||||
|
let (_, _, city) = bbox_to_circle(45.4, 45.6, -122.8, -122.5); // ~Portland
|
||||||
|
assert!(
|
||||||
|
country > city * 10.0,
|
||||||
|
"country radius {country}km should dwarf city radius {city}km"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ pub async fn get_image(
|
|||||||
|
|
||||||
// Resolve library from query param; default to primary so clients that
|
// Resolve library from query param; default to primary so clients that
|
||||||
// don't yet send `library=` continue to work.
|
// don't yet send `library=` continue to work.
|
||||||
let library = match libraries::resolve_library_param(&app_state, req.library.as_deref()) {
|
let library = match libraries::resolve_library_param_state(&app_state, req.library.as_deref()) {
|
||||||
Ok(Some(lib)) => lib,
|
Ok(Some(lib)) => lib,
|
||||||
Ok(None) => app_state.primary_library(),
|
Ok(None) => app_state.primary_library(),
|
||||||
Err(msg) => {
|
Err(msg) => {
|
||||||
@@ -492,7 +492,7 @@ pub async fn get_file_metadata(
|
|||||||
let span_context =
|
let span_context =
|
||||||
opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
|
opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
|
||||||
|
|
||||||
let library = libraries::resolve_library_param(&app_state, path.library.as_deref())
|
let library = libraries::resolve_library_param_state(&app_state, path.library.as_deref())
|
||||||
.ok()
|
.ok()
|
||||||
.flatten()
|
.flatten()
|
||||||
.unwrap_or_else(|| app_state.primary_library());
|
.unwrap_or_else(|| app_state.primary_library());
|
||||||
@@ -580,7 +580,7 @@ pub async fn set_image_gps(
|
|||||||
let span_context =
|
let span_context =
|
||||||
opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
|
opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
|
||||||
|
|
||||||
let library = libraries::resolve_library_param(&app_state, body.library.as_deref())
|
let library = libraries::resolve_library_param_state(&app_state, body.library.as_deref())
|
||||||
.ok()
|
.ok()
|
||||||
.flatten()
|
.flatten()
|
||||||
.unwrap_or_else(|| app_state.primary_library());
|
.unwrap_or_else(|| app_state.primary_library());
|
||||||
@@ -746,7 +746,7 @@ pub async fn get_full_exif(
|
|||||||
let context = extract_context_from_request(&request);
|
let context = extract_context_from_request(&request);
|
||||||
let mut span = tracer.start_with_context("get_full_exif", &context);
|
let mut span = tracer.start_with_context("get_full_exif", &context);
|
||||||
|
|
||||||
let library = libraries::resolve_library_param(&app_state, path.library.as_deref())
|
let library = libraries::resolve_library_param_state(&app_state, path.library.as_deref())
|
||||||
.ok()
|
.ok()
|
||||||
.flatten()
|
.flatten()
|
||||||
.unwrap_or_else(|| app_state.primary_library());
|
.unwrap_or_else(|| app_state.primary_library());
|
||||||
@@ -888,7 +888,8 @@ pub async fn set_image_date(
|
|||||||
let span_context =
|
let span_context =
|
||||||
opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
|
opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
|
||||||
|
|
||||||
let library = match libraries::resolve_library_param(&app_state, body.library.as_deref()) {
|
let library = match libraries::resolve_library_param_state(&app_state, body.library.as_deref())
|
||||||
|
{
|
||||||
Ok(Some(lib)) => lib,
|
Ok(Some(lib)) => lib,
|
||||||
Ok(None) => app_state.primary_library(),
|
Ok(None) => app_state.primary_library(),
|
||||||
Err(msg) => {
|
Err(msg) => {
|
||||||
@@ -941,7 +942,8 @@ pub async fn clear_image_date(
|
|||||||
let span_context =
|
let span_context =
|
||||||
opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
|
opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
|
||||||
|
|
||||||
let library = match libraries::resolve_library_param(&app_state, body.library.as_deref()) {
|
let library = match libraries::resolve_library_param_state(&app_state, body.library.as_deref())
|
||||||
|
{
|
||||||
Ok(Some(lib)) => lib,
|
Ok(Some(lib)) => lib,
|
||||||
Ok(None) => app_state.primary_library(),
|
Ok(None) => app_state.primary_library(),
|
||||||
Err(msg) => {
|
Err(msg) => {
|
||||||
@@ -1001,7 +1003,7 @@ pub async fn upload_image(
|
|||||||
// Resolve the optional library selector. Absent → primary library
|
// Resolve the optional library selector. Absent → primary library
|
||||||
// (backwards-compatible with clients that don't yet send `library=`).
|
// (backwards-compatible with clients that don't yet send `library=`).
|
||||||
let target_library =
|
let target_library =
|
||||||
match libraries::resolve_library_param(&app_state, query.library.as_deref()) {
|
match libraries::resolve_library_param_state(&app_state, query.library.as_deref()) {
|
||||||
Ok(Some(lib)) => lib,
|
Ok(Some(lib)) => lib,
|
||||||
Ok(None) => app_state.primary_library(),
|
Ok(None) => app_state.primary_library(),
|
||||||
Err(msg) => {
|
Err(msg) => {
|
||||||
|
|||||||
@@ -67,10 +67,11 @@ pub async fn generate_video(
|
|||||||
let context = extract_context_from_request(&request);
|
let context = extract_context_from_request(&request);
|
||||||
let mut span = tracer.start_with_context("generate_video", &context);
|
let mut span = tracer.start_with_context("generate_video", &context);
|
||||||
|
|
||||||
let preferred_library = libraries::resolve_library_param(&app_state, body.library.as_deref())
|
let preferred_library =
|
||||||
.ok()
|
libraries::resolve_library_param_state(&app_state, body.library.as_deref())
|
||||||
.flatten()
|
.ok()
|
||||||
.unwrap_or_else(|| app_state.primary_library());
|
.flatten()
|
||||||
|
.unwrap_or_else(|| app_state.primary_library());
|
||||||
|
|
||||||
// Try the resolved library first, then fall back to any other library
|
// Try the resolved library first, then fall back to any other library
|
||||||
// that actually contains the file — handles union-mode requests where
|
// that actually contains the file — handles union-mode requests where
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ pub mod tags;
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub mod testhelpers;
|
pub mod testhelpers;
|
||||||
pub mod thumbnails;
|
pub mod thumbnails;
|
||||||
|
pub mod unified_search;
|
||||||
pub mod utils;
|
pub mod utils;
|
||||||
pub mod video;
|
pub mod video;
|
||||||
|
|
||||||
|
|||||||
+42
-37
@@ -291,11 +291,11 @@ pub fn seed_or_patch_from_env(conn: &mut SqliteConnection, base_path: &str) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Resolve a library request parameter (accepts numeric id as string or name)
|
/// Resolve a library request parameter (accepts numeric id as string or name)
|
||||||
/// against the configured libraries. Returns `Ok(None)` when the param is
|
/// against a list of libraries. Returns `Ok(None)` when the param is
|
||||||
/// absent, meaning "span all libraries". Returns `Err` when a value is
|
/// absent, meaning "span all libraries". Returns `Err` when a value is
|
||||||
/// provided but does not match any library.
|
/// provided but does not match any library.
|
||||||
pub fn resolve_library_param<'a>(
|
pub fn resolve_library_param<'a>(
|
||||||
state: &'a AppState,
|
libs: &'a [Library],
|
||||||
param: Option<&str>,
|
param: Option<&str>,
|
||||||
) -> Result<Option<&'a Library>, String> {
|
) -> Result<Option<&'a Library>, String> {
|
||||||
let Some(raw) = param.map(str::trim).filter(|s| !s.is_empty()) else {
|
let Some(raw) = param.map(str::trim).filter(|s| !s.is_empty()) else {
|
||||||
@@ -303,18 +303,29 @@ pub fn resolve_library_param<'a>(
|
|||||||
};
|
};
|
||||||
|
|
||||||
if let Ok(id) = raw.parse::<i32>() {
|
if let Ok(id) = raw.parse::<i32>() {
|
||||||
return state
|
return libs
|
||||||
.library_by_id(id)
|
.iter()
|
||||||
|
.find(|l| l.id == id)
|
||||||
.map(Some)
|
.map(Some)
|
||||||
.ok_or_else(|| format!("unknown library id: {}", id));
|
.ok_or_else(|| format!("unknown library id: {}", id));
|
||||||
}
|
}
|
||||||
|
|
||||||
state
|
libs.iter()
|
||||||
.library_by_name(raw)
|
.find(|l| l.name == raw)
|
||||||
.map(Some)
|
.map(Some)
|
||||||
.ok_or_else(|| format!("unknown library name: {}", raw))
|
.ok_or_else(|| format!("unknown library name: {}", raw))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Resolve a library request parameter against the AppState's libraries.
|
||||||
|
/// Returns `Ok(None)` when the param is absent, meaning "span all libraries".
|
||||||
|
/// Returns `Err` when a value is provided but does not match any library.
|
||||||
|
pub fn resolve_library_param_state<'a>(
|
||||||
|
state: &'a AppState,
|
||||||
|
param: Option<&str>,
|
||||||
|
) -> Result<Option<&'a Library>, String> {
|
||||||
|
resolve_library_param(&state.libraries, param)
|
||||||
|
}
|
||||||
|
|
||||||
/// Health of a library at a point in time. Probed at the top of each
|
/// Health of a library at a point in time. Probed at the top of each
|
||||||
/// file-watcher tick. The `Stale` state is the "be conservative" signal:
|
/// file-watcher tick. The `Stale` state is the "be conservative" signal:
|
||||||
/// destructive paths (ingest writes, future move-handoff and orphan GC in
|
/// destructive paths (ingest writes, future move-handoff and orphan GC in
|
||||||
@@ -662,12 +673,6 @@ mod tests {
|
|||||||
assert_eq!(abs, PathBuf::from("/tmp/media/2024/photo.jpg"));
|
assert_eq!(abs, PathBuf::from("/tmp/media/2024/photo.jpg"));
|
||||||
}
|
}
|
||||||
|
|
||||||
fn state_with_libraries(libs: Vec<Library>) -> AppState {
|
|
||||||
let mut state = AppState::test_state();
|
|
||||||
state.libraries = libs;
|
|
||||||
state
|
|
||||||
}
|
|
||||||
|
|
||||||
fn sample_libraries() -> Vec<Library> {
|
fn sample_libraries() -> Vec<Library> {
|
||||||
vec![
|
vec![
|
||||||
Library {
|
Library {
|
||||||
@@ -687,52 +692,52 @@ mod tests {
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[test]
|
||||||
async fn resolve_library_param_absent_is_union() {
|
fn resolve_library_param_absent_is_union() {
|
||||||
let state = state_with_libraries(sample_libraries());
|
let libs = sample_libraries();
|
||||||
assert!(matches!(resolve_library_param(&state, None), Ok(None)));
|
assert!(matches!(resolve_library_param(&libs, None), Ok(None)));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[test]
|
||||||
async fn resolve_library_param_empty_or_whitespace_is_union() {
|
fn resolve_library_param_empty_or_whitespace_is_union() {
|
||||||
let state = state_with_libraries(sample_libraries());
|
let libs = sample_libraries();
|
||||||
assert!(matches!(resolve_library_param(&state, Some("")), Ok(None)));
|
assert!(matches!(resolve_library_param(&libs, Some("")), Ok(None)));
|
||||||
assert!(matches!(
|
assert!(matches!(
|
||||||
resolve_library_param(&state, Some(" ")),
|
resolve_library_param(&libs, Some(" ")),
|
||||||
Ok(None)
|
Ok(None)
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[test]
|
||||||
async fn resolve_library_param_numeric_id_matches() {
|
fn resolve_library_param_numeric_id_matches() {
|
||||||
let state = state_with_libraries(sample_libraries());
|
let libs = sample_libraries();
|
||||||
let lib = resolve_library_param(&state, Some("7"))
|
let lib = resolve_library_param(&libs, Some("7"))
|
||||||
.expect("valid id")
|
.expect("valid id")
|
||||||
.expect("some library");
|
.expect("some library");
|
||||||
assert_eq!(lib.id, 7);
|
assert_eq!(lib.id, 7);
|
||||||
assert_eq!(lib.name, "archive");
|
assert_eq!(lib.name, "archive");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[test]
|
||||||
async fn resolve_library_param_name_matches() {
|
fn resolve_library_param_name_matches() {
|
||||||
let state = state_with_libraries(sample_libraries());
|
let libs = sample_libraries();
|
||||||
let lib = resolve_library_param(&state, Some("main"))
|
let lib = resolve_library_param(&libs, Some("main"))
|
||||||
.expect("valid name")
|
.expect("valid name")
|
||||||
.expect("some library");
|
.expect("some library");
|
||||||
assert_eq!(lib.id, 1);
|
assert_eq!(lib.id, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[test]
|
||||||
async fn resolve_library_param_unknown_id_errs() {
|
fn resolve_library_param_unknown_id_errs() {
|
||||||
let state = state_with_libraries(sample_libraries());
|
let libs = sample_libraries();
|
||||||
let err = resolve_library_param(&state, Some("999")).unwrap_err();
|
let err = resolve_library_param(&libs, Some("999")).unwrap_err();
|
||||||
assert!(err.contains("unknown library id"));
|
assert!(err.contains("unknown library id"));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[test]
|
||||||
async fn resolve_library_param_unknown_name_errs() {
|
fn resolve_library_param_unknown_name_errs() {
|
||||||
let state = state_with_libraries(sample_libraries());
|
let libs = sample_libraries();
|
||||||
let err = resolve_library_param(&state, Some("missing")).unwrap_err();
|
let err = resolve_library_param(&libs, Some("missing")).unwrap_err();
|
||||||
assert!(err.contains("unknown library name"));
|
assert!(err.contains("unknown library name"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+19
@@ -54,6 +54,7 @@ mod perceptual_hash;
|
|||||||
mod state;
|
mod state;
|
||||||
mod tags;
|
mod tags;
|
||||||
mod thumbnails;
|
mod thumbnails;
|
||||||
|
mod unified_search;
|
||||||
mod utils;
|
mod utils;
|
||||||
mod video;
|
mod video;
|
||||||
mod watcher;
|
mod watcher;
|
||||||
@@ -62,6 +63,7 @@ mod knowledge;
|
|||||||
mod memories;
|
mod memories;
|
||||||
mod otel;
|
mod otel;
|
||||||
mod personas;
|
mod personas;
|
||||||
|
mod reels;
|
||||||
mod service;
|
mod service;
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod testhelpers;
|
mod testhelpers;
|
||||||
@@ -266,6 +268,11 @@ fn main() -> std::io::Result<()> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Spawn the nightly pre-generation scheduler (Section D).
|
||||||
|
reels::spawn_pregen_scheduler(app_state.clone()).await;
|
||||||
|
// Spawn the on-disk reel-cache sweeper (bounds pre-gen + on-demand reels).
|
||||||
|
reels::spawn_reel_cache_sweeper(app_state.clone()).await;
|
||||||
|
|
||||||
HttpServer::new(move || {
|
HttpServer::new(move || {
|
||||||
let user_dao = SqliteUserDao::new();
|
let user_dao = SqliteUserDao::new();
|
||||||
let favorites_dao = SqliteFavoriteDao::new();
|
let favorites_dao = SqliteFavoriteDao::new();
|
||||||
@@ -327,6 +334,13 @@ fn main() -> std::io::Result<()> {
|
|||||||
web::resource("/photos/search")
|
web::resource("/photos/search")
|
||||||
.route(web::get().to(clip_search::search_photos)),
|
.route(web::get().to(clip_search::search_photos)),
|
||||||
)
|
)
|
||||||
|
.service(
|
||||||
|
// Unified natural-language search: LLM translates the
|
||||||
|
// query into structured filters + a semantic term, then
|
||||||
|
// filters constrain and CLIP ranks. See src/unified_search.rs.
|
||||||
|
web::resource("/photos/search/unified")
|
||||||
|
.route(web::get().to(unified_search::unified_search::<SqliteTagDao>)),
|
||||||
|
)
|
||||||
.service(web::resource("/file/move").post(move_file::<RealFileSystem>))
|
.service(web::resource("/file/move").post(move_file::<RealFileSystem>))
|
||||||
.service(handlers::image::get_image)
|
.service(handlers::image::get_image)
|
||||||
.service(handlers::image::upload_image)
|
.service(handlers::image::upload_image)
|
||||||
@@ -344,6 +358,11 @@ fn main() -> std::io::Result<()> {
|
|||||||
.service(handlers::image::clear_image_date)
|
.service(handlers::image::clear_image_date)
|
||||||
.service(handlers::image::get_full_exif)
|
.service(handlers::image::get_full_exif)
|
||||||
.service(memories::list_memories)
|
.service(memories::list_memories)
|
||||||
|
.service(reels::create_reel_handler)
|
||||||
|
.service(reels::reel_status_handler)
|
||||||
|
.service(reels::reel_video_handler)
|
||||||
|
.service(reels::precomputed_reel_handler)
|
||||||
|
.service(reels::precomputed_video_handler)
|
||||||
.service(ai::generate_insight_handler)
|
.service(ai::generate_insight_handler)
|
||||||
.service(ai::generate_agentic_insight_handler)
|
.service(ai::generate_agentic_insight_handler)
|
||||||
.service(ai::generation_status_handler)
|
.service(ai::generation_status_handler)
|
||||||
|
|||||||
+57
-29
@@ -349,12 +349,6 @@ pub async fn list_memories(
|
|||||||
opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
|
opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
|
||||||
|
|
||||||
let span_mode = q.span.unwrap_or(MemoriesSpan::Day);
|
let span_mode = q.span.unwrap_or(MemoriesSpan::Day);
|
||||||
let span_token = match span_mode {
|
|
||||||
MemoriesSpan::Day => "day",
|
|
||||||
MemoriesSpan::Week => "week",
|
|
||||||
MemoriesSpan::Month => "month",
|
|
||||||
};
|
|
||||||
let years_back: i32 = DEFAULT_YEARS_BACK;
|
|
||||||
|
|
||||||
// The SQL filter expects a signed offset in minutes from UTC; default
|
// The SQL filter expects a signed offset in minutes from UTC; default
|
||||||
// 0 (UTC) when the client didn't send a hint. We also keep a chrono
|
// 0 (UTC) when the client didn't send a hint. We also keep a chrono
|
||||||
@@ -366,18 +360,66 @@ pub async fn list_memories(
|
|||||||
.timezone_offset_minutes
|
.timezone_offset_minutes
|
||||||
.and_then(|offset_mins| FixedOffset::east_opt(offset_mins * 60));
|
.and_then(|offset_mins| FixedOffset::east_opt(offset_mins * 60));
|
||||||
|
|
||||||
debug!(
|
let items = match gather_memory_items(
|
||||||
"list_memories: span={:?} tz_offset_min={} years_back={}",
|
&app_state,
|
||||||
span_mode, tz_offset_minutes, years_back
|
&exif_dao,
|
||||||
);
|
&span_context,
|
||||||
|
span_mode,
|
||||||
let library = match crate::libraries::resolve_library_param(&app_state, q.library.as_deref()) {
|
tz_offset_minutes,
|
||||||
Ok(lib) => lib,
|
client_timezone,
|
||||||
|
q.library.as_deref(),
|
||||||
|
) {
|
||||||
|
Ok(items) => items,
|
||||||
Err(msg) => {
|
Err(msg) => {
|
||||||
warn!("Rejecting /memories request: {}", msg);
|
warn!("Rejecting /memories request: {}", msg);
|
||||||
return HttpResponse::BadRequest().body(msg);
|
return HttpResponse::BadRequest().body(msg);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
span.add_event(
|
||||||
|
"memories_scanned",
|
||||||
|
vec![
|
||||||
|
KeyValue::new("span", format!("{:?}", span_mode)),
|
||||||
|
KeyValue::new("years_back", DEFAULT_YEARS_BACK.to_string()),
|
||||||
|
KeyValue::new("result_count", items.len().to_string()),
|
||||||
|
KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()),
|
||||||
|
KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
span.set_status(Status::Ok);
|
||||||
|
|
||||||
|
HttpResponse::Ok().json(MemoriesResponse { items })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve an "on this day/week/month across past years" window into an
|
||||||
|
/// ordered list of [`MemoryItem`]s. Shared by the `/memories` handler and the
|
||||||
|
/// memory-reel selector so both honour the same library resolution, per-library
|
||||||
|
/// exclusions, timezone handling, and sort order. Returns `Err(message)` only
|
||||||
|
/// when the `library` param is invalid (callers map that to 400); per-library
|
||||||
|
/// query/lock failures are logged and skipped, matching the handler's
|
||||||
|
/// best-effort behaviour.
|
||||||
|
pub fn gather_memory_items(
|
||||||
|
app_state: &AppState,
|
||||||
|
exif_dao: &Mutex<Box<dyn ExifDao>>,
|
||||||
|
span_context: &opentelemetry::Context,
|
||||||
|
span_mode: MemoriesSpan,
|
||||||
|
tz_offset_minutes: i32,
|
||||||
|
client_timezone: Option<FixedOffset>,
|
||||||
|
library_param: Option<&str>,
|
||||||
|
) -> Result<Vec<MemoryItem>, String> {
|
||||||
|
let span_token = match span_mode {
|
||||||
|
MemoriesSpan::Day => "day",
|
||||||
|
MemoriesSpan::Week => "week",
|
||||||
|
MemoriesSpan::Month => "month",
|
||||||
|
};
|
||||||
|
let years_back: i32 = DEFAULT_YEARS_BACK;
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"gather_memory_items: span={:?} tz_offset_min={} years_back={}",
|
||||||
|
span_mode, tz_offset_minutes, years_back
|
||||||
|
);
|
||||||
|
|
||||||
|
let library = crate::libraries::resolve_library_param_state(app_state, library_param)?;
|
||||||
let libraries_to_scan: Vec<&crate::libraries::Library> = match library {
|
let libraries_to_scan: Vec<&crate::libraries::Library> = match library {
|
||||||
Some(lib) => vec![lib],
|
Some(lib) => vec![lib],
|
||||||
None => app_state.libraries.iter().collect(),
|
None => app_state.libraries.iter().collect(),
|
||||||
@@ -394,7 +436,7 @@ pub async fn list_memories(
|
|||||||
|
|
||||||
let rows = match exif_dao.lock() {
|
let rows = match exif_dao.lock() {
|
||||||
Ok(mut dao) => match dao.get_memories_in_window(
|
Ok(mut dao) => match dao.get_memories_in_window(
|
||||||
&span_context,
|
span_context,
|
||||||
lib.id,
|
lib.id,
|
||||||
span_token,
|
span_token,
|
||||||
years_back,
|
years_back,
|
||||||
@@ -469,21 +511,7 @@ pub async fn list_memories(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let items: Vec<MemoryItem> = memories_with_dates.into_iter().map(|(m, _)| m).collect();
|
Ok(memories_with_dates.into_iter().map(|(m, _)| m).collect())
|
||||||
|
|
||||||
span.add_event(
|
|
||||||
"memories_scanned",
|
|
||||||
vec![
|
|
||||||
KeyValue::new("span", format!("{:?}", span_mode)),
|
|
||||||
KeyValue::new("years_back", years_back.to_string()),
|
|
||||||
KeyValue::new("result_count", items.len().to_string()),
|
|
||||||
KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()),
|
|
||||||
KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)),
|
|
||||||
],
|
|
||||||
);
|
|
||||||
span.set_status(Status::Ok);
|
|
||||||
|
|
||||||
HttpResponse::Ok().json(MemoriesResponse { items })
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
+1568
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,742 @@
|
|||||||
|
//! ffmpeg assembly for memory reels.
|
||||||
|
//!
|
||||||
|
//! Two-stage, per-segment design: each segment is rendered to its own
|
||||||
|
//! normalized MP4 (identical codec/resolution/fps/timebase), then the segments
|
||||||
|
//! are joined with the concat demuxer (stream copy, no re-encode). Rendering
|
||||||
|
//! per segment — rather than one monster filtergraph — keeps each ffmpeg
|
||||||
|
//! invocation simple to reason about, parallelizes naturally, and means a
|
||||||
|
//! video-clip segment type (phase 2) slots in as just a different per-segment
|
||||||
|
//! builder without touching the concat stage.
|
||||||
|
//!
|
||||||
|
//! The arg builders are pure (`Vec<String>` out) so the exact ffmpeg command
|
||||||
|
//! is unit-testable; the runners spawn ffmpeg and surface stderr on failure.
|
||||||
|
|
||||||
|
use anyhow::{Context, Result, bail};
|
||||||
|
use std::path::Path;
|
||||||
|
use tokio::process::Command;
|
||||||
|
|
||||||
|
/// Re-exported so the reel pipeline reaches NVENC detection through this module
|
||||||
|
/// rather than depending on `video::ffmpeg` directly.
|
||||||
|
pub use crate::video::ffmpeg::is_nvenc_available;
|
||||||
|
|
||||||
|
/// Reel canvas. Portrait, because reels are watched on a phone held upright —
|
||||||
|
/// a landscape canvas letterboxes to a thin ~25%-height band there. Each photo
|
||||||
|
/// is fitted sharp and centered over a blurred, zoomed copy of itself (see
|
||||||
|
/// [`photo_filter_chain`]) so the frame is always filled regardless of the
|
||||||
|
/// photo's orientation, without cropping the subject.
|
||||||
|
pub const REEL_WIDTH: u32 = 1080;
|
||||||
|
pub const REEL_HEIGHT: u32 = 1920;
|
||||||
|
pub const REEL_FPS: u32 = 30;
|
||||||
|
|
||||||
|
/// A beat's screen time is its narration length plus a short breath, with a
|
||||||
|
/// floor so a terse line still lingers. No ceiling: the beat always covers the
|
||||||
|
/// full narration so speech is never truncated — the scripter is asked to keep
|
||||||
|
/// lines short instead.
|
||||||
|
pub const MIN_SEGMENT_SECONDS: f64 = 2.5;
|
||||||
|
const NARRATION_TAIL_SECONDS: f64 = 0.6;
|
||||||
|
|
||||||
|
/// Fade durations baked into each photo. A held (single-photo) beat gets a
|
||||||
|
/// gentle dip; burst photos get a much snappier fade so the difference between
|
||||||
|
/// a held shot and a quick burst is obvious.
|
||||||
|
const SINGLE_FADE_SECONDS: f64 = 0.35;
|
||||||
|
const BURST_FADE_SECONDS: f64 = 0.12;
|
||||||
|
|
||||||
|
/// Video-clip framing. Fallback cap on how much of a clip we read when the
|
||||||
|
/// source length can't be probed; with a known length, a clip instead plays for
|
||||||
|
/// as much of its beat as its footage allows (see [`clip_beat_plan`]). Its live
|
||||||
|
/// audio is ducked to `CLIP_DUCK_VOLUME` under the narration.
|
||||||
|
pub const CLIP_SECONDS: f64 = 5.0;
|
||||||
|
const CLIP_DUCK_VOLUME: f64 = 0.35;
|
||||||
|
|
||||||
|
/// Floor on how long each burst photo stays up, so a long line over many photos
|
||||||
|
/// doesn't flash them subliminally. If the narration is too short to give every
|
||||||
|
/// photo this much, the beat is stretched to fit.
|
||||||
|
const MIN_BURST_PHOTO_SECONDS: f64 = 0.6;
|
||||||
|
|
||||||
|
/// Base screen time for a beat given its narration length: narration + breath,
|
||||||
|
/// floored. Used as the lower bound on a beat's total duration.
|
||||||
|
pub fn segment_duration(narration_secs: f64) -> f64 {
|
||||||
|
let d = narration_secs + NARRATION_TAIL_SECONDS;
|
||||||
|
if d.is_finite() && d > MIN_SEGMENT_SECONDS {
|
||||||
|
d
|
||||||
|
} else {
|
||||||
|
MIN_SEGMENT_SECONDS
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Split a beat into per-photo durations. The beat lasts at least its narration
|
||||||
|
/// (so speech isn't cut) and at least `n × MIN_BURST_PHOTO_SECONDS` (so a fast
|
||||||
|
/// burst stays legible); the photos share that total evenly. Returns
|
||||||
|
/// `(total_seconds, per_photo_seconds)`.
|
||||||
|
pub fn beat_durations(narration_secs: f64, n_photos: usize) -> (f64, Vec<f64>) {
|
||||||
|
let n = n_photos.max(1);
|
||||||
|
let base = segment_duration(narration_secs);
|
||||||
|
let min_total = n as f64 * MIN_BURST_PHOTO_SECONDS;
|
||||||
|
let total = if base > min_total { base } else { min_total };
|
||||||
|
let each = total / n as f64;
|
||||||
|
(total, vec![each; n])
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fade length to use for a beat of `n_photos` (gentle when held, snappy in a
|
||||||
|
/// burst).
|
||||||
|
fn fade_for(n_photos: usize) -> f64 {
|
||||||
|
if n_photos > 1 {
|
||||||
|
BURST_FADE_SECONDS
|
||||||
|
} else {
|
||||||
|
SINGLE_FADE_SECONDS
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Options controlling per-segment rendering.
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub struct SegmentOpts {
|
||||||
|
pub width: u32,
|
||||||
|
pub height: u32,
|
||||||
|
pub fps: u32,
|
||||||
|
pub nvenc: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for SegmentOpts {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
width: REEL_WIDTH,
|
||||||
|
height: REEL_HEIGHT,
|
||||||
|
fps: REEL_FPS,
|
||||||
|
nvenc: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Filter chain for one photo (input `idx`) producing the labelled output
|
||||||
|
/// `[v{idx}]`. Splits the still into a background and foreground: the background
|
||||||
|
/// is scaled to *cover* the canvas and heavily blurred; the foreground is
|
||||||
|
/// scaled to *fit* and overlaid centered. This fills the portrait frame for any
|
||||||
|
/// photo orientation — no black bars, no cropping of the subject — then a fade
|
||||||
|
/// in/out softens the cut. Intermediate labels are suffixed with `idx` so
|
||||||
|
/// several chains coexist in one `filter_complex`.
|
||||||
|
///
|
||||||
|
/// `fps` is normalized BEFORE the fades so the brightness ramp is computed on a
|
||||||
|
/// true {fps}-frame timeline; otherwise the fade is sampled at the looped
|
||||||
|
/// still's coarse cadence and duplicated up, which reads as a steppy dip.
|
||||||
|
fn photo_filter_chain(idx: usize, opts: &SegmentOpts, duration: f64, fade: f64) -> String {
|
||||||
|
let (w, h, fps) = (opts.width, opts.height, opts.fps);
|
||||||
|
let fade_out_start = (duration - fade).max(0.0);
|
||||||
|
format!(
|
||||||
|
"[{idx}:v]split=2[bg{idx}][fg{idx}];\
|
||||||
|
[bg{idx}]scale={w}:{h}:force_original_aspect_ratio=increase,\
|
||||||
|
crop={w}:{h},boxblur=20:2[bgb{idx}];\
|
||||||
|
[fg{idx}]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs{idx}];\
|
||||||
|
[bgb{idx}][fgs{idx}]overlay=(W-w)/2:(H-h)/2,\
|
||||||
|
fps={fps},\
|
||||||
|
fade=t=in:st=0:d={fade},\
|
||||||
|
fade=t=out:st={fade_out_start:.3}:d={fade},\
|
||||||
|
setsar=1,format=yuv420p[v{idx}]"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Full `filter_complex` for a beat of `per_photo` durations: one chain per
|
||||||
|
/// photo, concatenated into `[v]`, with the narration (the last input, index
|
||||||
|
/// `per_photo.len()`) padded with trailing silence into `[a]`. A single-photo
|
||||||
|
/// beat degenerates to one chain + `concat=n=1` (a passthrough).
|
||||||
|
pub fn beat_filtergraph(opts: &SegmentOpts, per_photo: &[f64]) -> String {
|
||||||
|
let n = per_photo.len().max(1);
|
||||||
|
let fade = fade_for(n);
|
||||||
|
let chains: Vec<String> = per_photo
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(i, &d)| photo_filter_chain(i, opts, d, fade))
|
||||||
|
.collect();
|
||||||
|
let concat_inputs: String = (0..n).map(|i| format!("[v{i}]")).collect();
|
||||||
|
format!(
|
||||||
|
"{chains};{concat_inputs}concat=n={n}:v=1:a=0[v];[{n}:a]apad[a]",
|
||||||
|
chains = chains.join(";")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn video_encoder_args(nvenc: bool) -> Vec<String> {
|
||||||
|
if nvenc {
|
||||||
|
// p4 ≈ balanced; cq 23 ≈ libx264 crf 21. Matches the HLS transcode path.
|
||||||
|
[
|
||||||
|
"-c:v",
|
||||||
|
"h264_nvenc",
|
||||||
|
"-preset",
|
||||||
|
"p4",
|
||||||
|
"-cq",
|
||||||
|
"23",
|
||||||
|
"-pix_fmt",
|
||||||
|
"yuv420p",
|
||||||
|
]
|
||||||
|
} else {
|
||||||
|
[
|
||||||
|
"-c:v", "libx264", "-crf", "21", "-preset", "veryfast", "-pix_fmt", "yuv420p",
|
||||||
|
]
|
||||||
|
}
|
||||||
|
.iter()
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build the ffmpeg args that render one beat: each photo looped for its slice
|
||||||
|
/// of the beat (filled to the portrait canvas with a blurred backdrop), the
|
||||||
|
/// slices concatenated, and the single narration muxed over the whole thing.
|
||||||
|
/// `total` bounds the output (and the apad'd audio) to the beat length.
|
||||||
|
pub fn build_beat_args(
|
||||||
|
image_paths: &[String],
|
||||||
|
audio_path: &str,
|
||||||
|
out_path: &str,
|
||||||
|
per_photo: &[f64],
|
||||||
|
total: f64,
|
||||||
|
opts: &SegmentOpts,
|
||||||
|
) -> Vec<String> {
|
||||||
|
let fps = opts.fps.to_string();
|
||||||
|
let mut args: Vec<String> = vec!["-y".into()];
|
||||||
|
if opts.nvenc {
|
||||||
|
args.extend(["-hwaccel".into(), "cuda".into()]);
|
||||||
|
}
|
||||||
|
// One looped-still input per photo, each bounded to its slice by an input
|
||||||
|
// `-t`; reading at the target `-framerate` gives the fades real frames to
|
||||||
|
// ramp across.
|
||||||
|
for (path, &dur) in image_paths.iter().zip(per_photo.iter()) {
|
||||||
|
args.extend([
|
||||||
|
"-framerate".into(),
|
||||||
|
fps.clone(),
|
||||||
|
"-loop".into(),
|
||||||
|
"1".into(),
|
||||||
|
"-t".into(),
|
||||||
|
format!("{dur:.3}"),
|
||||||
|
"-i".into(),
|
||||||
|
path.clone(),
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
args.extend([
|
||||||
|
"-i".into(),
|
||||||
|
audio_path.into(),
|
||||||
|
"-filter_complex".into(),
|
||||||
|
beat_filtergraph(opts, per_photo),
|
||||||
|
"-map".into(),
|
||||||
|
"[v]".into(),
|
||||||
|
"-map".into(),
|
||||||
|
"[a]".into(),
|
||||||
|
"-t".into(),
|
||||||
|
format!("{total:.3}"),
|
||||||
|
// Force constant frame rate so the beat (and the concatenated reel)
|
||||||
|
// plays at a steady {fps} rather than a variable cadence.
|
||||||
|
"-r".into(),
|
||||||
|
fps,
|
||||||
|
]);
|
||||||
|
args.extend(video_encoder_args(opts.nvenc));
|
||||||
|
args.extend(
|
||||||
|
["-c:a", "aac", "-b:a", "160k", "-ar", "48000", "-shortest"]
|
||||||
|
.iter()
|
||||||
|
.map(|s| s.to_string()),
|
||||||
|
);
|
||||||
|
args.push(out_path.into());
|
||||||
|
args
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build the concat-demuxer args that join rendered segments losslessly.
|
||||||
|
/// `+faststart` moves the moov atom up front so the reel streams immediately
|
||||||
|
/// on the mobile client. The output muxer is forced with `-f mp4` because we
|
||||||
|
/// write to a `.tmp` path (atomic publish) whose extension ffmpeg can't map to
|
||||||
|
/// a format on its own.
|
||||||
|
pub fn build_concat_args(list_path: &str, out_path: &str) -> Vec<String> {
|
||||||
|
[
|
||||||
|
"-y",
|
||||||
|
"-f",
|
||||||
|
"concat",
|
||||||
|
"-safe",
|
||||||
|
"0",
|
||||||
|
"-i",
|
||||||
|
list_path,
|
||||||
|
"-c",
|
||||||
|
"copy",
|
||||||
|
"-movflags",
|
||||||
|
"+faststart",
|
||||||
|
"-f",
|
||||||
|
"mp4",
|
||||||
|
out_path,
|
||||||
|
]
|
||||||
|
.iter()
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Render the concat list file body. Each line points the demuxer at one
|
||||||
|
/// segment; single quotes in paths are escaped per ffmpeg's concat syntax.
|
||||||
|
pub fn build_concat_list(segment_paths: &[String]) -> String {
|
||||||
|
let mut out = String::new();
|
||||||
|
for p in segment_paths {
|
||||||
|
let escaped = p.replace('\'', r"'\''");
|
||||||
|
out.push_str(&format!("file '{escaped}'\n"));
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn run_ffmpeg(args: &[String], what: &str) -> Result<()> {
|
||||||
|
let output = Command::new("ffmpeg")
|
||||||
|
.args(args)
|
||||||
|
.output()
|
||||||
|
.await
|
||||||
|
.with_context(|| format!("spawning ffmpeg for {what}"))?;
|
||||||
|
if !output.status.success() {
|
||||||
|
bail!(
|
||||||
|
"ffmpeg {what} failed: {}",
|
||||||
|
String::from_utf8_lossy(&output.stderr)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Render one beat to `out_path`: its photos shown in sequence (a held shot for
|
||||||
|
/// one photo, a quick burst for several) under the single narration in
|
||||||
|
/// `audio_path`, whose measured length sets the beat's pacing.
|
||||||
|
pub async fn render_beat(
|
||||||
|
image_paths: &[std::path::PathBuf],
|
||||||
|
audio_path: &Path,
|
||||||
|
out_path: &Path,
|
||||||
|
narration_secs: f64,
|
||||||
|
opts: &SegmentOpts,
|
||||||
|
) -> Result<()> {
|
||||||
|
if image_paths.is_empty() {
|
||||||
|
bail!("render_beat called with no images");
|
||||||
|
}
|
||||||
|
let (total, per_photo) = beat_durations(narration_secs, image_paths.len());
|
||||||
|
let paths: Vec<String> = image_paths
|
||||||
|
.iter()
|
||||||
|
.map(|p| p.to_string_lossy().to_string())
|
||||||
|
.collect();
|
||||||
|
let args = build_beat_args(
|
||||||
|
&paths,
|
||||||
|
&audio_path.to_string_lossy(),
|
||||||
|
&out_path.to_string_lossy(),
|
||||||
|
&per_photo,
|
||||||
|
total,
|
||||||
|
opts,
|
||||||
|
);
|
||||||
|
run_ffmpeg(&args, "beat render").await
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Video-clip beats --------------------------------------------------------
|
||||||
|
|
||||||
|
/// Decide how long the clip plays and how long the whole beat lasts, from the
|
||||||
|
/// source video's length (if known) and the narration length. Returns
|
||||||
|
/// `(clip_dur, beat_total)`.
|
||||||
|
///
|
||||||
|
/// The beat always lasts long enough for the full narration. The clip plays for
|
||||||
|
/// as much of that beat as its footage covers — so the motion fills the screen
|
||||||
|
/// time rather than stopping early. We only freeze the last frame (the
|
||||||
|
/// `beat_total - clip_dur` gap, handled by `tpad` in [`clip_video_filter`]) when
|
||||||
|
/// the source video is genuinely shorter than the narration. Capping clip
|
||||||
|
/// playback at a fixed length while the narration ran longer was what produced
|
||||||
|
/// the second-or-two freeze that read as a glitchy pause before the transition.
|
||||||
|
pub fn clip_beat_plan(source_dur: Option<f64>, narration_secs: f64) -> (f64, f64) {
|
||||||
|
let want = segment_duration(narration_secs);
|
||||||
|
let clip_dur = match source_dur {
|
||||||
|
// Known length: play up to the whole beat, but never past the source.
|
||||||
|
Some(d) if d > 0.0 => d.min(want),
|
||||||
|
// Unknown length: read up to the fallback cap; tpad covers any shortfall.
|
||||||
|
_ => want.min(CLIP_SECONDS),
|
||||||
|
};
|
||||||
|
(clip_dur, want.max(clip_dur))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Video chain for a clip beat: fill the clip to the portrait canvas (blurred
|
||||||
|
/// backdrop, same look as photos), normalize fps, hold the last frame if the
|
||||||
|
/// narration outlasts the clip (`tpad`), then fade. Produces `[v]`.
|
||||||
|
fn clip_video_filter(opts: &SegmentOpts, clip_dur: f64, beat_total: f64) -> String {
|
||||||
|
let (w, h, fps) = (opts.width, opts.height, opts.fps);
|
||||||
|
let fade = SINGLE_FADE_SECONDS;
|
||||||
|
let hold = (beat_total - clip_dur).max(0.0);
|
||||||
|
let fade_out_start = (beat_total - fade).max(0.0);
|
||||||
|
// Freeze the final frame to cover narration that runs past the clip.
|
||||||
|
let tpad = if hold > 0.05 {
|
||||||
|
format!(",tpad=stop_mode=clone:stop_duration={hold:.3}")
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
};
|
||||||
|
format!(
|
||||||
|
"[0:v]split=2[bg][fg];\
|
||||||
|
[bg]scale={w}:{h}:force_original_aspect_ratio=increase,\
|
||||||
|
crop={w}:{h},boxblur=20:2[bgb];\
|
||||||
|
[fg]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs];\
|
||||||
|
[bgb][fgs]overlay=(W-w)/2:(H-h)/2,fps={fps}{tpad},\
|
||||||
|
fade=t=in:st=0:d={fade},fade=t=out:st={fade_out_start:.3}:d={fade},\
|
||||||
|
setsar=1,format=yuv420p[v]"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Audio chain for a clip beat. With a clip audio track, duck it under the
|
||||||
|
/// narration and mix; without one, just the narration. Produces `[a]`.
|
||||||
|
fn clip_audio_filter(has_audio: bool) -> String {
|
||||||
|
if has_audio {
|
||||||
|
format!(
|
||||||
|
"[0:a]volume={CLIP_DUCK_VOLUME}[duck];[1:a]apad[narr];\
|
||||||
|
[duck][narr]amix=inputs=2:duration=longest:normalize=0[a]"
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
"[1:a]apad[a]".to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Full `filter_complex` for a clip beat (input 0 = clip, input 1 = narration).
|
||||||
|
pub fn clip_beat_filtergraph(
|
||||||
|
opts: &SegmentOpts,
|
||||||
|
clip_dur: f64,
|
||||||
|
beat_total: f64,
|
||||||
|
has_audio: bool,
|
||||||
|
) -> String {
|
||||||
|
format!(
|
||||||
|
"{};{}",
|
||||||
|
clip_video_filter(opts, clip_dur, beat_total),
|
||||||
|
clip_audio_filter(has_audio)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build the ffmpeg args for a clip beat: the first `clip_dur` seconds of the
|
||||||
|
/// source video, filled to the portrait canvas with its live audio ducked under
|
||||||
|
/// the narration, bounded to `beat_total`.
|
||||||
|
pub fn build_clip_beat_args(
|
||||||
|
clip_path: &str,
|
||||||
|
audio_path: &str,
|
||||||
|
out_path: &str,
|
||||||
|
clip_dur: f64,
|
||||||
|
beat_total: f64,
|
||||||
|
has_audio: bool,
|
||||||
|
opts: &SegmentOpts,
|
||||||
|
) -> Vec<String> {
|
||||||
|
let fps = opts.fps.to_string();
|
||||||
|
let mut args: Vec<String> = vec!["-y".into()];
|
||||||
|
if opts.nvenc {
|
||||||
|
args.extend(["-hwaccel".into(), "cuda".into()]);
|
||||||
|
}
|
||||||
|
args.extend([
|
||||||
|
// Input `-t` limits the clip to its window; audio has none (apad fills).
|
||||||
|
"-t".into(),
|
||||||
|
format!("{clip_dur:.3}"),
|
||||||
|
"-i".into(),
|
||||||
|
clip_path.into(),
|
||||||
|
"-i".into(),
|
||||||
|
audio_path.into(),
|
||||||
|
"-filter_complex".into(),
|
||||||
|
clip_beat_filtergraph(opts, clip_dur, beat_total, has_audio),
|
||||||
|
"-map".into(),
|
||||||
|
"[v]".into(),
|
||||||
|
"-map".into(),
|
||||||
|
"[a]".into(),
|
||||||
|
"-t".into(),
|
||||||
|
format!("{beat_total:.3}"),
|
||||||
|
"-r".into(),
|
||||||
|
fps,
|
||||||
|
]);
|
||||||
|
args.extend(video_encoder_args(opts.nvenc));
|
||||||
|
args.extend(
|
||||||
|
["-c:a", "aac", "-b:a", "160k", "-ar", "48000"]
|
||||||
|
.iter()
|
||||||
|
.map(|s| s.to_string()),
|
||||||
|
);
|
||||||
|
args.push(out_path.into());
|
||||||
|
args
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether a media file has at least one audio stream (so a clip beat knows
|
||||||
|
/// whether to mix in live audio). Defaults to `false` on any probe failure.
|
||||||
|
pub async fn has_audio_stream(path: &str) -> bool {
|
||||||
|
Command::new("ffprobe")
|
||||||
|
.args([
|
||||||
|
"-v",
|
||||||
|
"error",
|
||||||
|
"-select_streams",
|
||||||
|
"a",
|
||||||
|
"-show_entries",
|
||||||
|
"stream=index",
|
||||||
|
"-of",
|
||||||
|
"csv=p=0",
|
||||||
|
path,
|
||||||
|
])
|
||||||
|
.output()
|
||||||
|
.await
|
||||||
|
.map(|out| !out.stdout.is_empty())
|
||||||
|
.unwrap_or(false)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Render one clip beat: a section of `clip_path` (capped at [`CLIP_SECONDS`],
|
||||||
|
/// and to the source length) under the narration in `audio_path`. The beat
|
||||||
|
/// lasts at least the narration, freezing the clip's last frame if needed.
|
||||||
|
pub async fn render_clip_beat(
|
||||||
|
clip_path: &Path,
|
||||||
|
audio_path: &Path,
|
||||||
|
out_path: &Path,
|
||||||
|
narration_secs: f64,
|
||||||
|
opts: &SegmentOpts,
|
||||||
|
) -> Result<()> {
|
||||||
|
let clip_str = clip_path.to_string_lossy().to_string();
|
||||||
|
// Play the clip for as much of the beat as its footage covers; freeze only
|
||||||
|
// when the source is genuinely shorter than the narration (see clip_beat_plan).
|
||||||
|
let source_dur = crate::video::ffmpeg::get_duration_seconds(&clip_str)
|
||||||
|
.await
|
||||||
|
.ok()
|
||||||
|
.flatten();
|
||||||
|
let (clip_dur, beat_total) = clip_beat_plan(source_dur, narration_secs);
|
||||||
|
let has_audio = has_audio_stream(&clip_str).await;
|
||||||
|
|
||||||
|
let args = build_clip_beat_args(
|
||||||
|
&clip_str,
|
||||||
|
&audio_path.to_string_lossy(),
|
||||||
|
&out_path.to_string_lossy(),
|
||||||
|
clip_dur,
|
||||||
|
beat_total,
|
||||||
|
has_audio,
|
||||||
|
opts,
|
||||||
|
);
|
||||||
|
run_ffmpeg(&args, "clip beat render").await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Join rendered segments into the final reel. Writes the concat list into the
|
||||||
|
/// same directory as the output so relative paths and cleanup stay local.
|
||||||
|
pub async fn concat_segments(segment_paths: &[String], out_path: &Path) -> Result<()> {
|
||||||
|
let list_path = out_path.with_extension("concat.txt");
|
||||||
|
let body = build_concat_list(segment_paths);
|
||||||
|
tokio::fs::write(&list_path, body)
|
||||||
|
.await
|
||||||
|
.context("writing concat list")?;
|
||||||
|
let args = build_concat_args(&list_path.to_string_lossy(), &out_path.to_string_lossy());
|
||||||
|
let result = run_ffmpeg(&args, "concat").await;
|
||||||
|
let _ = tokio::fs::remove_file(&list_path).await;
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn segment_duration_floors_short_lines() {
|
||||||
|
// A one-word narration still lingers at the floor.
|
||||||
|
assert_eq!(segment_duration(0.5), MIN_SEGMENT_SECONDS);
|
||||||
|
assert_eq!(segment_duration(0.0), MIN_SEGMENT_SECONDS);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn segment_duration_covers_full_narration_plus_tail() {
|
||||||
|
// No ceiling: a long line gets its full length so speech isn't cut.
|
||||||
|
assert!((segment_duration(5.0) - 5.6).abs() < 1e-9);
|
||||||
|
assert!((segment_duration(20.0) - 20.6).abs() < 1e-9);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn segment_duration_rejects_nonfinite() {
|
||||||
|
assert_eq!(segment_duration(f64::NAN), MIN_SEGMENT_SECONDS);
|
||||||
|
assert_eq!(segment_duration(f64::INFINITY), MIN_SEGMENT_SECONDS);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn beat_durations_single_photo_matches_base() {
|
||||||
|
let (total, per) = beat_durations(4.0, 1);
|
||||||
|
assert!((total - 4.6).abs() < 1e-9); // narration + tail
|
||||||
|
assert_eq!(per.len(), 1);
|
||||||
|
assert!((per[0] - 4.6).abs() < 1e-9);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn beat_durations_burst_splits_evenly() {
|
||||||
|
// 5 photos, narration 4.6s base → ~0.92s each (above the 0.6 floor).
|
||||||
|
let (total, per) = beat_durations(4.0, 5);
|
||||||
|
assert!((total - 4.6).abs() < 1e-9);
|
||||||
|
assert_eq!(per.len(), 5);
|
||||||
|
assert!((per.iter().sum::<f64>() - total).abs() < 1e-9);
|
||||||
|
assert!(per.iter().all(|&d| d >= MIN_BURST_PHOTO_SECONDS));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn beat_durations_stretches_when_narration_too_short_for_burst() {
|
||||||
|
// Floor narration (2.5s) over 10 photos would be 0.25s each — below the
|
||||||
|
// legibility floor, so the beat stretches to 10 × 0.6 = 6s.
|
||||||
|
let (total, per) = beat_durations(0.0, 10);
|
||||||
|
assert!((total - 6.0).abs() < 1e-9);
|
||||||
|
assert!(per.iter().all(|&d| (d - 0.6).abs() < 1e-9));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn beat_filtergraph_single_photo_fills_portrait_and_holds() {
|
||||||
|
let (_t, per) = beat_durations(4.0, 1);
|
||||||
|
let g = beat_filtergraph(&SegmentOpts::default(), &per);
|
||||||
|
assert!(g.contains("[0:v]split=2[bg0][fg0]"));
|
||||||
|
assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=increase"));
|
||||||
|
assert!(g.contains("crop=1080:1920"));
|
||||||
|
assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=decrease"));
|
||||||
|
assert!(g.contains("overlay=(W-w)/2:(H-h)/2"));
|
||||||
|
// Single photo → concat of one, gentle fade, audio is input 1.
|
||||||
|
assert!(g.contains("concat=n=1:v=1:a=0[v]"));
|
||||||
|
assert!(g.contains("d=0.35")); // SINGLE_FADE
|
||||||
|
assert!(g.contains("[1:a]apad[a]"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn beat_filtergraph_burst_chains_concats_and_snappy_fade() {
|
||||||
|
let (_t, per) = beat_durations(4.0, 3);
|
||||||
|
let g = beat_filtergraph(&SegmentOpts::default(), &per);
|
||||||
|
// One chain per photo with index-suffixed labels.
|
||||||
|
assert!(g.contains("[0:v]split") && g.contains("[1:v]split") && g.contains("[2:v]split"));
|
||||||
|
// Concatenated in order, audio is the 4th input (index 3).
|
||||||
|
assert!(g.contains("[v0][v1][v2]concat=n=3:v=1:a=0[v]"));
|
||||||
|
assert!(g.contains("[3:a]apad[a]"));
|
||||||
|
// Burst uses the much snappier fade (vs 0.35 for a held shot).
|
||||||
|
assert!(g.contains("d=0.12"));
|
||||||
|
assert!(!g.contains("d=0.35"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn beat_filtergraph_normalizes_fps_before_fading() {
|
||||||
|
// fps must precede the fades on every chain (else the dip looks steppy).
|
||||||
|
let (_t, per) = beat_durations(4.0, 1);
|
||||||
|
let g = beat_filtergraph(&SegmentOpts::default(), &per);
|
||||||
|
let fps_at = g.find("fps=30").expect("fps in graph");
|
||||||
|
let fade_at = g.find("fade=t=in").expect("fade in graph");
|
||||||
|
assert!(fps_at < fade_at);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn beat_args_one_input_per_photo_plus_audio_bound_by_total() {
|
||||||
|
let (total, per) = beat_durations(4.0, 2);
|
||||||
|
let args = build_beat_args(
|
||||||
|
&["/a.jpg".into(), "/b.jpg".into()],
|
||||||
|
"/n.wav",
|
||||||
|
"/out.mp4",
|
||||||
|
&per,
|
||||||
|
total,
|
||||||
|
&SegmentOpts::default(),
|
||||||
|
);
|
||||||
|
let joined = args.join(" ");
|
||||||
|
// A looped-still input per photo, each with its slice -t, then the audio.
|
||||||
|
assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /a.jpg"));
|
||||||
|
assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /b.jpg"));
|
||||||
|
assert!(joined.contains("-i /n.wav"));
|
||||||
|
// Output bounded to the beat total and forced CFR.
|
||||||
|
assert!(joined.contains("-t 4.600"));
|
||||||
|
assert!(joined.contains("-r 30"));
|
||||||
|
assert!(joined.ends_with("/out.mp4"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn beat_args_use_nvenc_and_cuda_when_enabled() {
|
||||||
|
let opts = SegmentOpts {
|
||||||
|
nvenc: true,
|
||||||
|
..SegmentOpts::default()
|
||||||
|
};
|
||||||
|
let (total, per) = beat_durations(3.0, 1);
|
||||||
|
let args = build_beat_args(
|
||||||
|
&["/img.jpg".into()],
|
||||||
|
"/a.wav",
|
||||||
|
"/out.mp4",
|
||||||
|
&per,
|
||||||
|
total,
|
||||||
|
&opts,
|
||||||
|
);
|
||||||
|
let joined = args.join(" ");
|
||||||
|
assert!(joined.contains("-hwaccel cuda"));
|
||||||
|
assert!(joined.contains("h264_nvenc"));
|
||||||
|
assert!(!joined.contains("libx264"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn clip_filter_ducks_audio_and_holds_last_frame_when_narration_longer() {
|
||||||
|
// 5s clip, 7s beat → 2s freeze of the last frame, ducked-audio mix.
|
||||||
|
let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 7.0, true);
|
||||||
|
assert!(g.contains("tpad=stop_mode=clone:stop_duration=2.000"));
|
||||||
|
assert!(g.contains("volume=0.35"));
|
||||||
|
assert!(g.contains("amix=inputs=2"));
|
||||||
|
assert!(g.contains("[1:a]apad[narr]"));
|
||||||
|
// Fill applied to the clip too.
|
||||||
|
assert!(g.contains("boxblur"));
|
||||||
|
assert!(g.contains("overlay=(W-w)/2:(H-h)/2"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn clip_beat_plan_plays_clip_through_the_whole_beat_when_source_is_long() {
|
||||||
|
// 30s source, 4s narration → beat is narration+tail (4.6), and the clip
|
||||||
|
// plays that whole 4.6s of motion: no freeze (clip_dur == beat_total).
|
||||||
|
let (clip_dur, beat_total) = clip_beat_plan(Some(30.0), 4.0);
|
||||||
|
assert!((beat_total - 4.6).abs() < 1e-9);
|
||||||
|
assert!((clip_dur - 4.6).abs() < 1e-9);
|
||||||
|
assert!((beat_total - clip_dur).abs() < 1e-9); // no hold
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn clip_beat_plan_freezes_only_when_source_shorter_than_narration() {
|
||||||
|
// 2s source under a 4s narration → play all 2s, freeze the remainder.
|
||||||
|
let (clip_dur, beat_total) = clip_beat_plan(Some(2.0), 4.0);
|
||||||
|
assert!((clip_dur - 2.0).abs() < 1e-9);
|
||||||
|
assert!((beat_total - 4.6).abs() < 1e-9);
|
||||||
|
assert!(beat_total - clip_dur > 2.0); // unavoidable freeze gap
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn clip_beat_plan_caps_read_when_source_length_unknown() {
|
||||||
|
// Probe failed: read up to the fallback cap, beat still covers narration.
|
||||||
|
let (clip_dur, beat_total) = clip_beat_plan(None, 8.0);
|
||||||
|
assert!((clip_dur - CLIP_SECONDS).abs() < 1e-9);
|
||||||
|
assert!((beat_total - 8.6).abs() < 1e-9);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn clip_filter_no_tpad_when_clip_covers_the_beat() {
|
||||||
|
// Clip at least as long as the beat → no freeze.
|
||||||
|
let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 5.0, true);
|
||||||
|
assert!(!g.contains("tpad"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn clip_filter_narration_only_without_clip_audio() {
|
||||||
|
let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 5.0, false);
|
||||||
|
assert!(!g.contains("amix"));
|
||||||
|
assert!(!g.contains("volume="));
|
||||||
|
assert!(g.contains("[1:a]apad[a]"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn clip_beat_args_bound_clip_and_output() {
|
||||||
|
let args = build_clip_beat_args(
|
||||||
|
"/v.mp4",
|
||||||
|
"/n.wav",
|
||||||
|
"/out.mp4",
|
||||||
|
5.0,
|
||||||
|
6.6,
|
||||||
|
true,
|
||||||
|
&SegmentOpts::default(),
|
||||||
|
);
|
||||||
|
let joined = args.join(" ");
|
||||||
|
// Input -t bounds the clip read; output -t bounds the beat.
|
||||||
|
assert!(joined.contains("-t 5.000 -i /v.mp4"));
|
||||||
|
assert!(joined.contains("-i /n.wav"));
|
||||||
|
assert!(joined.contains("-t 6.600"));
|
||||||
|
assert!(joined.contains("-r 30"));
|
||||||
|
assert!(joined.ends_with("/out.mp4"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn concat_args_stream_copy_with_faststart_and_forced_muxer() {
|
||||||
|
// Output goes to a .tmp path, so the muxer must be forced — ffmpeg
|
||||||
|
// can't infer mp4 from the extension (the bug this guards against).
|
||||||
|
let args = build_concat_args("/tmp/list.txt", "/out.mp4.tmp");
|
||||||
|
let joined = args.join(" ");
|
||||||
|
assert!(joined.contains("-f concat -safe 0 -i /tmp/list.txt"));
|
||||||
|
assert!(joined.contains("-c copy"));
|
||||||
|
assert!(joined.contains("+faststart"));
|
||||||
|
assert!(joined.contains("-f mp4"));
|
||||||
|
// The forced muxer must come before the output path.
|
||||||
|
let f_mp4 = args.windows(2).position(|w| w == ["-f", "mp4"]).unwrap();
|
||||||
|
let out = args.iter().position(|a| a == "/out.mp4.tmp").unwrap();
|
||||||
|
assert!(f_mp4 < out);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn concat_list_escapes_single_quotes() {
|
||||||
|
let body = build_concat_list(&[
|
||||||
|
"/tmp/seg_000.mp4".into(),
|
||||||
|
"/tmp/own's dir/seg_001.mp4".into(),
|
||||||
|
]);
|
||||||
|
assert!(body.contains("file '/tmp/seg_000.mp4'\n"));
|
||||||
|
// The apostrophe is closed-escaped-reopened per ffmpeg concat syntax.
|
||||||
|
assert!(body.contains(r"own'\''s"));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,491 @@
|
|||||||
|
//! Narration scripting for memory reels.
|
||||||
|
//!
|
||||||
|
//! One LLM call turns the planned beats (each carrying its date and, where
|
||||||
|
//! available, its cached insight) into a short first-person narration line per
|
||||||
|
//! beat plus a title for the reel. A beat may show several photos in a quick
|
||||||
|
//! burst, so a line narrates the *moment*, not a single frame. We reuse the
|
||||||
|
//! cached insight summary as the richest signal rather than re-running vision
|
||||||
|
//! at reel time — that keeps reel generation off the GPU's vision slot.
|
||||||
|
//!
|
||||||
|
//! The prompt builder and response parser are pure so the contract is
|
||||||
|
//! unit-testable; `generate_script` wires them to the LLM client.
|
||||||
|
//!
|
||||||
|
//! The agentic scripter (pre-generation) resolves the backend through the
|
||||||
|
//! InsightGenerator, builds a read-only tool set, and runs a tool loop to
|
||||||
|
//! ground the narration in retrieved context before asking for the final JSON.
|
||||||
|
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use super::{PlannedBeat, ReelMeta};
|
||||||
|
use crate::ai::backend::{BackendKind, SamplingOverrides};
|
||||||
|
use crate::ai::insight_generator::InsightGenerator;
|
||||||
|
use crate::ai::llamacpp::LlamaCppClient;
|
||||||
|
use crate::ai::llm_client::{LlmClient, Tool};
|
||||||
|
use crate::ai::ollama::ChatMessage;
|
||||||
|
|
||||||
|
/// The narration for a whole reel: a title and one line per beat, in order.
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub struct ReelScript {
|
||||||
|
pub title: String,
|
||||||
|
pub lines: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
const SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \
|
||||||
|
slideshow of someone's own photos set to a spoken voiceover. Write warm, \
|
||||||
|
specific, first-person narration as if the person is gently looking back on \
|
||||||
|
their own memories. Each line plays over one moment, which may be a quick burst \
|
||||||
|
of several photos, so narrate the moment as a whole rather than a single frame. \
|
||||||
|
Be concrete and grounded in the details given; never invent names, places, or \
|
||||||
|
events that aren't supported. Keep each line to one or two short sentences that \
|
||||||
|
can be read aloud in a few seconds. Avoid generic filler like \"what a \
|
||||||
|
wonderful day\" — if you have little to go on, simply describe the moment \
|
||||||
|
plainly.";
|
||||||
|
|
||||||
|
/// Agentic scripter system prompt: richer version that tells the model it may
|
||||||
|
/// call read-only tools to ground each line.
|
||||||
|
const AGENTIC_SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \
|
||||||
|
slideshow of someone's own photos set to a spoken voiceover. Write warm, \
|
||||||
|
specific, first-person narration as if the person is gently looking back on \
|
||||||
|
their own memories. Each line plays over one moment, which may be a quick burst \
|
||||||
|
of several photos, so narrate the moment as a whole rather than a single frame. \
|
||||||
|
Be concrete and grounded in the details given; never invent names, places, or \
|
||||||
|
events that aren't supported. Keep each line to one or two short sentences that \
|
||||||
|
can be read aloud in a few seconds. Avoid generic filler like \"what a \
|
||||||
|
wonderful day\" — if you have little to go on, simply describe the moment \
|
||||||
|
plainly.\n\nYou may call read-only tools (search_rag, search_messages, \
|
||||||
|
get_sms_messages, get_calendar_events, get_location_history, reverse_geocode, \
|
||||||
|
get_personal_place_at, recall_entities, get_current_datetime) to ground each \
|
||||||
|
line in real context — e.g. reverse_geocode a moment's GPS to name the place, \
|
||||||
|
or check the calendar/messages around its date. Never invent details. Return \
|
||||||
|
ONLY the JSON object, no prose or code fences.";
|
||||||
|
|
||||||
|
/// Maximum agentic tool iterations for pre-generation. Tunable via
|
||||||
|
/// `REEL_PREGEN_MAX_TOOL_ITERS` (default 8).
|
||||||
|
fn reel_pregen_max_tool_iters() -> usize {
|
||||||
|
std::env::var("REEL_PREGEN_MAX_TOOL_ITERS")
|
||||||
|
.ok()
|
||||||
|
.and_then(|s| s.trim().parse::<usize>().ok())
|
||||||
|
.filter(|x| *x > 0)
|
||||||
|
.unwrap_or(8)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build the (system, user) prompt pair for the scripter. The user message
|
||||||
|
/// describes each beat in order and asks for strict JSON back.
|
||||||
|
pub fn build_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> (String, String) {
|
||||||
|
let mut user = String::new();
|
||||||
|
user.push_str(&format!(
|
||||||
|
"This reel has {} moments surfaced as memories {}.\n\n",
|
||||||
|
beats.len(),
|
||||||
|
meta.span_phrase()
|
||||||
|
));
|
||||||
|
if !meta.years.is_empty() {
|
||||||
|
let years: Vec<String> = meta.years.iter().map(|y| y.to_string()).collect();
|
||||||
|
user.push_str(&format!("They span the years: {}.\n\n", years.join(", ")));
|
||||||
|
}
|
||||||
|
user.push_str("Moments, in the order they will appear:\n");
|
||||||
|
for (i, beat) in beats.iter().enumerate() {
|
||||||
|
user.push_str(&format!("\n[{}]", i + 1));
|
||||||
|
if let Some(date) = beat.date_label() {
|
||||||
|
user.push_str(&format!(" {date}"));
|
||||||
|
}
|
||||||
|
if beat.is_clip() {
|
||||||
|
user.push_str(" (a video clip)");
|
||||||
|
} else if beat.media.len() > 1 {
|
||||||
|
user.push_str(&format!(" (a burst of {} photos)", beat.media.len()));
|
||||||
|
}
|
||||||
|
user.push('\n');
|
||||||
|
match (&beat.insight_title, &beat.insight_summary) {
|
||||||
|
(Some(t), Some(s)) if !s.trim().is_empty() => {
|
||||||
|
user.push_str(&format!(" Known context: {t} — {s}\n"));
|
||||||
|
}
|
||||||
|
(Some(t), _) => user.push_str(&format!(" Known context: {t}\n")),
|
||||||
|
(_, Some(s)) if !s.trim().is_empty() => {
|
||||||
|
user.push_str(&format!(" Known context: {s}\n"));
|
||||||
|
}
|
||||||
|
_ => user.push_str(" (no extra context — narrate plainly from the date)\n"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
user.push_str(&format!(
|
||||||
|
"\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\
|
||||||
|
{{\"title\": \"<short reel title>\", \"segments\": [\"<line for moment 1>\", \
|
||||||
|
\"<line for moment 2>\", ... ]}}\n\
|
||||||
|
The \"segments\" array MUST have exactly {} items, one per moment in order.",
|
||||||
|
beats.len()
|
||||||
|
));
|
||||||
|
(SYSTEM_PROMPT.to_string(), user)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build a richer (system, user) prompt pair for the agentic scripter. The
|
||||||
|
/// system prompt tells the model it may call read-only tools to ground each
|
||||||
|
/// line. The user message uses the same per-beat enumeration as
|
||||||
|
/// `build_script_messages` plus a GPS line per beat when available.
|
||||||
|
pub fn build_agentic_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> Vec<ChatMessage> {
|
||||||
|
let mut user = String::new();
|
||||||
|
user.push_str(&format!(
|
||||||
|
"This reel has {} moments surfaced as memories {}.\n\n",
|
||||||
|
beats.len(),
|
||||||
|
meta.span_phrase()
|
||||||
|
));
|
||||||
|
if !meta.years.is_empty() {
|
||||||
|
let years: Vec<String> = meta.years.iter().map(|y| y.to_string()).collect();
|
||||||
|
user.push_str(&format!("They span the years: {}.\n\n", years.join(", ")));
|
||||||
|
}
|
||||||
|
user.push_str("Moments, in the order they will appear:\n");
|
||||||
|
for (i, beat) in beats.iter().enumerate() {
|
||||||
|
user.push_str(&format!("\n[{}]", i + 1));
|
||||||
|
if let Some(date) = beat.date_label() {
|
||||||
|
user.push_str(&format!(" {date}"));
|
||||||
|
}
|
||||||
|
if beat.is_clip() {
|
||||||
|
user.push_str(" (a video clip)");
|
||||||
|
} else if beat.media.len() > 1 {
|
||||||
|
user.push_str(&format!(" (a burst of {} photos)", beat.media.len()));
|
||||||
|
}
|
||||||
|
if let Some((lat, lon)) = beat.gps {
|
||||||
|
user.push_str(&format!("\n GPS: {:.4}, {:.4}", lat, lon));
|
||||||
|
}
|
||||||
|
user.push('\n');
|
||||||
|
match (&beat.insight_title, &beat.insight_summary) {
|
||||||
|
(Some(t), Some(s)) if !s.trim().is_empty() => {
|
||||||
|
user.push_str(&format!(" Known context: {t} — {s}\n"));
|
||||||
|
}
|
||||||
|
(Some(t), _) => user.push_str(&format!(" Known context: {t}\n")),
|
||||||
|
(_, Some(s)) if !s.trim().is_empty() => {
|
||||||
|
user.push_str(&format!(" Known context: {s}\n"));
|
||||||
|
}
|
||||||
|
_ => user.push_str(" (no extra context — narrate plainly from the date)\n"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
user.push_str(&format!(
|
||||||
|
"\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\
|
||||||
|
{{\"title\": \"<short reel title>\", \"segments\": [\"<line for moment 1>\", \
|
||||||
|
\"<line for moment 2>\", ... ]}}\n\
|
||||||
|
The \"segments\" array MUST have exactly {} items, one per moment in order.",
|
||||||
|
beats.len()
|
||||||
|
));
|
||||||
|
|
||||||
|
vec![
|
||||||
|
ChatMessage::system(AGENTIC_SYSTEM_PROMPT.to_string()),
|
||||||
|
ChatMessage::user(user),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse the model's response into a script with exactly `n` lines. Tolerant of
|
||||||
|
/// code fences and surrounding prose, and of both `segments: [".."]` and
|
||||||
|
/// `segments: [{"narration": ".."}]` shapes. Missing/extra lines are padded or
|
||||||
|
/// truncated so the caller always gets `n` aligned to the segments.
|
||||||
|
pub fn parse_script_response(raw: &str, n: usize) -> ReelScript {
|
||||||
|
let fallback_line = "A moment worth remembering.";
|
||||||
|
let value = extract_json_object(raw);
|
||||||
|
|
||||||
|
let title = value
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|v| v.get("title"))
|
||||||
|
.and_then(|t| t.as_str())
|
||||||
|
.map(clean_text)
|
||||||
|
.filter(|s| !s.is_empty())
|
||||||
|
.unwrap_or_else(|| "Memories".to_string());
|
||||||
|
|
||||||
|
let mut lines: Vec<String> = value
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|v| v.get("segments"))
|
||||||
|
.and_then(|s| s.as_array())
|
||||||
|
.map(|arr| {
|
||||||
|
arr.iter()
|
||||||
|
.map(|item| {
|
||||||
|
let text = item
|
||||||
|
.as_str()
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.or_else(|| {
|
||||||
|
item.get("narration")
|
||||||
|
.and_then(|n| n.as_str())
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
})
|
||||||
|
.unwrap_or_default();
|
||||||
|
clean_text(&text)
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
})
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
// Align to exactly n: drop extras, pad shortfalls with a neutral line so
|
||||||
|
// every photo still gets spoken audio.
|
||||||
|
lines.truncate(n);
|
||||||
|
while lines.len() < n {
|
||||||
|
lines.push(fallback_line.to_string());
|
||||||
|
}
|
||||||
|
for line in lines.iter_mut() {
|
||||||
|
if line.is_empty() {
|
||||||
|
*line = fallback_line.to_string();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ReelScript { title, lines }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pull the first balanced top-level JSON object out of a possibly-noisy model
|
||||||
|
/// response (code fences, leading prose). Returns None if nothing parses.
|
||||||
|
fn extract_json_object(raw: &str) -> Option<serde_json::Value> {
|
||||||
|
// Fast path: the whole thing is valid JSON.
|
||||||
|
if let Ok(v) = serde_json::from_str::<serde_json::Value>(raw.trim()) {
|
||||||
|
return Some(v);
|
||||||
|
}
|
||||||
|
// Otherwise scan for the first '{' ... matching '}' span, ignoring braces
|
||||||
|
// inside strings.
|
||||||
|
let bytes = raw.as_bytes();
|
||||||
|
let start = raw.find('{')?;
|
||||||
|
let mut depth = 0i32;
|
||||||
|
let mut in_str = false;
|
||||||
|
let mut escaped = false;
|
||||||
|
for i in start..bytes.len() {
|
||||||
|
let c = bytes[i] as char;
|
||||||
|
if in_str {
|
||||||
|
if escaped {
|
||||||
|
escaped = false;
|
||||||
|
} else if c == '\\' {
|
||||||
|
escaped = true;
|
||||||
|
} else if c == '"' {
|
||||||
|
in_str = false;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
match c {
|
||||||
|
'"' => in_str = true,
|
||||||
|
'{' => depth += 1,
|
||||||
|
'}' => {
|
||||||
|
depth -= 1;
|
||||||
|
if depth == 0 {
|
||||||
|
return serde_json::from_str(&raw[start..=i]).ok();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Collapse whitespace and strip stray markdown/quote decorations a model
|
||||||
|
/// sometimes leaves around a line.
|
||||||
|
fn clean_text(s: &str) -> String {
|
||||||
|
let trimmed = s.trim().trim_matches('"').trim();
|
||||||
|
trimmed.split_whitespace().collect::<Vec<_>>().join(" ")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generate the reel script via the LLM. Text-only (no images) — the per-beat
|
||||||
|
/// context comes from cached insights. The call takes the GPU read lease
|
||||||
|
/// internally (see `LlamaCppClient::generate`).
|
||||||
|
pub async fn generate_script(
|
||||||
|
client: &Arc<LlamaCppClient>,
|
||||||
|
meta: &ReelMeta,
|
||||||
|
beats: &[PlannedBeat],
|
||||||
|
) -> Result<ReelScript> {
|
||||||
|
let (system, user) = build_script_messages(meta, beats);
|
||||||
|
let raw = client
|
||||||
|
.generate(&user, Some(&system), None)
|
||||||
|
.await
|
||||||
|
.context("LLM script generation failed")?;
|
||||||
|
Ok(parse_script_response(&raw, beats.len()))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Agentic version of script generation: resolves the backend via the
|
||||||
|
/// InsightGenerator (honouring LLM_BACKEND, model overrides, etc.), builds
|
||||||
|
/// a read-only tool set, runs the tool loop, then parses the JSON response.
|
||||||
|
/// Returns the same ReelScript shape. On failure the caller may fall back to
|
||||||
|
/// `generate_script`.
|
||||||
|
pub async fn generate_script_agentic(
|
||||||
|
generator: &InsightGenerator,
|
||||||
|
meta: &ReelMeta,
|
||||||
|
beats: &[PlannedBeat],
|
||||||
|
) -> Result<ReelScript> {
|
||||||
|
// 1. Resolve the backend. Bail if the local model lacks tool-calling.
|
||||||
|
let backend = generator
|
||||||
|
.resolve_backend(
|
||||||
|
BackendKind::Local,
|
||||||
|
&SamplingOverrides {
|
||||||
|
model: None,
|
||||||
|
num_ctx: None,
|
||||||
|
temperature: None,
|
||||||
|
top_p: None,
|
||||||
|
top_k: None,
|
||||||
|
min_p: None,
|
||||||
|
enable_thinking: None,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.context("resolving backend for agentic script")?;
|
||||||
|
|
||||||
|
// 2. Build the read-only tool set. Start from the persona gate (no
|
||||||
|
// persona context, so corrections are closed), force has_vision=false,
|
||||||
|
// then filter out write tools.
|
||||||
|
let gate = generator.current_gate_opts_for_persona(false, None);
|
||||||
|
let all_tools = InsightGenerator::build_tool_definitions(gate);
|
||||||
|
// Whole-reel calls have no single photo and no authenticated user, so the
|
||||||
|
// loop runs execute_tool with empty file/image context and user_id=0. Only
|
||||||
|
// tools that work without that context are useful here — photo/user-bound
|
||||||
|
// tools (get_file_tags, get_faces_in_photo, recall_facts_for_photo,
|
||||||
|
// recall_facts_for_entity) would just no-op or error, burning iterations,
|
||||||
|
// so they're excluded.
|
||||||
|
let read_only_names: std::collections::HashSet<&str> = [
|
||||||
|
"search_rag",
|
||||||
|
"search_messages",
|
||||||
|
"get_sms_messages",
|
||||||
|
"get_calendar_events",
|
||||||
|
"get_location_history",
|
||||||
|
"reverse_geocode",
|
||||||
|
"get_personal_place_at",
|
||||||
|
"recall_entities",
|
||||||
|
"get_current_datetime",
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
let tools: Vec<Tool> = all_tools
|
||||||
|
.into_iter()
|
||||||
|
.filter(|t| read_only_names.contains(t.function.name.as_str()))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// 3. Build the agentic prompt messages.
|
||||||
|
let messages = build_agentic_script_messages(meta, beats);
|
||||||
|
|
||||||
|
// 4. Run the tool loop.
|
||||||
|
let max_iter = reel_pregen_max_tool_iters();
|
||||||
|
let raw = generator
|
||||||
|
.run_readonly_tool_loop(&backend, messages, tools, max_iter)
|
||||||
|
.await
|
||||||
|
.context("agentic tool loop failed")?;
|
||||||
|
|
||||||
|
// 5. Strip any think-blocks the model may have emitted, then parse.
|
||||||
|
let raw = crate::ai::llm_client::strip_think_blocks(&raw);
|
||||||
|
Ok(parse_script_response(&raw, beats.len()))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::memories::MemoriesSpan;
|
||||||
|
|
||||||
|
fn meta() -> ReelMeta {
|
||||||
|
ReelMeta {
|
||||||
|
span: MemoriesSpan::Day,
|
||||||
|
years: vec![2019, 2021],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn planned(n: usize) -> Vec<PlannedBeat> {
|
||||||
|
(0..n)
|
||||||
|
.map(|i| PlannedBeat {
|
||||||
|
media: vec![super::super::SegmentMedia::Photo {
|
||||||
|
rel_path: format!("p{i}.jpg"),
|
||||||
|
library_id: 1,
|
||||||
|
}],
|
||||||
|
date: Some(1_560_000_000 + i as i64 * 86_400),
|
||||||
|
insight_title: None,
|
||||||
|
insight_summary: None,
|
||||||
|
gps: None,
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn prompt_states_exact_moment_count_and_span() {
|
||||||
|
let (sys, user) = build_script_messages(&meta(), &planned(3));
|
||||||
|
assert!(sys.contains("memory reel"));
|
||||||
|
assert!(user.contains("3 moments"));
|
||||||
|
assert!(user.contains("on this day"));
|
||||||
|
assert!(user.contains("exactly 3 items"));
|
||||||
|
// Each moment gets an indexed entry.
|
||||||
|
assert!(user.contains("[1]") && user.contains("[2]") && user.contains("[3]"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn prompt_notes_burst_photo_count() {
|
||||||
|
let mut p = planned(1);
|
||||||
|
p[0].media = vec![
|
||||||
|
super::super::SegmentMedia::Photo {
|
||||||
|
rel_path: "a.jpg".into(),
|
||||||
|
library_id: 1,
|
||||||
|
},
|
||||||
|
super::super::SegmentMedia::Photo {
|
||||||
|
rel_path: "b.jpg".into(),
|
||||||
|
library_id: 1,
|
||||||
|
},
|
||||||
|
super::super::SegmentMedia::Photo {
|
||||||
|
rel_path: "c.jpg".into(),
|
||||||
|
library_id: 1,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
let (_sys, user) = build_script_messages(&meta(), &p);
|
||||||
|
assert!(user.contains("a burst of 3 photos"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn prompt_marks_clip_beats() {
|
||||||
|
let mut p = planned(1);
|
||||||
|
p[0].media = vec![super::super::SegmentMedia::Clip {
|
||||||
|
rel_path: "v.mp4".into(),
|
||||||
|
library_id: 1,
|
||||||
|
}];
|
||||||
|
let (_sys, user) = build_script_messages(&meta(), &p);
|
||||||
|
assert!(user.contains("a video clip"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn prompt_includes_insight_context_when_present() {
|
||||||
|
let mut p = planned(1);
|
||||||
|
p[0].insight_title = Some("Lake house weekend".into());
|
||||||
|
p[0].insight_summary = Some("Swimming with the dogs.".into());
|
||||||
|
let (_sys, user) = build_script_messages(&meta(), &p);
|
||||||
|
assert!(user.contains("Lake house weekend — Swimming with the dogs."));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_plain_json_object() {
|
||||||
|
let raw = r#"{"title":"Summer Days","segments":["First line.","Second line."]}"#;
|
||||||
|
let script = parse_script_response(raw, 2);
|
||||||
|
assert_eq!(script.title, "Summer Days");
|
||||||
|
assert_eq!(script.lines, vec!["First line.", "Second line."]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_tolerates_code_fences_and_prose() {
|
||||||
|
let raw = "Sure! Here's your reel:\n```json\n{\"title\": \"Trip\", \"segments\": [\"A.\", \"B.\"]}\n```\nEnjoy!";
|
||||||
|
let script = parse_script_response(raw, 2);
|
||||||
|
assert_eq!(script.title, "Trip");
|
||||||
|
assert_eq!(script.lines, vec!["A.", "B."]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_accepts_object_segment_shape() {
|
||||||
|
let raw = r#"{"title":"T","segments":[{"narration":"One."},{"narration":"Two."}]}"#;
|
||||||
|
let script = parse_script_response(raw, 2);
|
||||||
|
assert_eq!(script.lines, vec!["One.", "Two."]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_pads_short_and_truncates_long_to_n() {
|
||||||
|
// Model returned 1 line but we have 3 segments → pad with neutral lines.
|
||||||
|
let short = parse_script_response(r#"{"title":"T","segments":["Only one."]}"#, 3);
|
||||||
|
assert_eq!(short.lines.len(), 3);
|
||||||
|
assert_eq!(short.lines[0], "Only one.");
|
||||||
|
assert!(!short.lines[1].is_empty());
|
||||||
|
|
||||||
|
// Model returned 3 but we have 2 → truncate.
|
||||||
|
let long = parse_script_response(r#"{"title":"T","segments":["a","b","c"]}"#, 2);
|
||||||
|
assert_eq!(long.lines, vec!["a", "b"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_falls_back_on_garbage() {
|
||||||
|
let script = parse_script_response("the model said no", 2);
|
||||||
|
assert_eq!(script.title, "Memories");
|
||||||
|
assert_eq!(script.lines.len(), 2);
|
||||||
|
assert!(script.lines.iter().all(|l| !l.is_empty()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_blank_line_replaced_with_fallback() {
|
||||||
|
let script = parse_script_response(r#"{"title":"T","segments":[" ","Real."]}"#, 2);
|
||||||
|
assert!(!script.lines[0].is_empty());
|
||||||
|
assert_eq!(script.lines[1], "Real.");
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,560 @@
|
|||||||
|
//! Reel selectors: resolve "what goes in the reel" into an ordered media set
|
||||||
|
//! plus the metadata the scripter needs. The renderer and scripter are
|
||||||
|
//! selector-agnostic, so adding tag- or date-range-based reels later means
|
||||||
|
//! adding a variant here, not touching the pipeline.
|
||||||
|
//!
|
||||||
|
//! Resolution is split in two so the handler can compute a cache key (and
|
||||||
|
//! short-circuit on a cache hit) without the per-photo insight lookups:
|
||||||
|
//! [`resolve`] is the cheap media-set pass; [`enrich`] adds cached insights and
|
||||||
|
//! runs in the background job.
|
||||||
|
|
||||||
|
use std::path::Path;
|
||||||
|
use std::sync::Mutex;
|
||||||
|
|
||||||
|
use chrono::{DateTime, Datelike, FixedOffset};
|
||||||
|
|
||||||
|
use super::{PlannedBeat, ReelMeta, SegmentMedia};
|
||||||
|
use crate::database::{ExifDao, InsightDao};
|
||||||
|
use crate::file_types::{is_image_file, is_video_file};
|
||||||
|
use crate::memories::{self, MemoriesSpan};
|
||||||
|
use crate::state::AppState;
|
||||||
|
|
||||||
|
/// Default and hard caps on how many photos a reel covers. The default is an
|
||||||
|
/// upper bound on the request; the effective count is usually smaller, set by
|
||||||
|
/// the duration budget (see [`budget_segments`]). The hard cap bounds work per
|
||||||
|
/// reel regardless.
|
||||||
|
pub const DEFAULT_MAX_SEGMENTS: usize = 40;
|
||||||
|
pub const HARD_MAX_SEGMENTS: usize = 40;
|
||||||
|
|
||||||
|
/// Target reel length. Week and especially month spans can surface hundreds of
|
||||||
|
/// photos; at a few seconds of narration each, a naive reel runs minutes. We
|
||||||
|
/// cap the segment count to keep the reel near this length. Tunable via
|
||||||
|
/// `REEL_TARGET_SECONDS`.
|
||||||
|
const DEFAULT_TARGET_REEL_SECONDS: f64 = 90.0;
|
||||||
|
|
||||||
|
/// Rough average wall-time per photo segment (a short narration line + the
|
||||||
|
/// silent tail). Only used to turn the duration target into a segment count;
|
||||||
|
/// the real per-segment time is the measured narration length.
|
||||||
|
const EST_SECONDS_PER_SEGMENT: f64 = 5.0;
|
||||||
|
|
||||||
|
/// Time gap that separates one "event/moment" from the next when clustering a
|
||||||
|
/// span's photos. Photos within a few hours are treated as the same occasion
|
||||||
|
/// (and across years/days the gaps are far larger, so each instance clusters
|
||||||
|
/// on its own). 4 hours splits e.g. a morning hike from an evening dinner.
|
||||||
|
const EVENT_GAP_SECONDS: i64 = 4 * 3600;
|
||||||
|
|
||||||
|
fn target_reel_seconds() -> f64 {
|
||||||
|
std::env::var("REEL_TARGET_SECONDS")
|
||||||
|
.ok()
|
||||||
|
.and_then(|s| s.trim().parse::<f64>().ok())
|
||||||
|
.filter(|x| x.is_finite() && *x > 0.0)
|
||||||
|
.unwrap_or(DEFAULT_TARGET_REEL_SECONDS)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// How many photo segments fit the duration budget, bounded by the request's
|
||||||
|
/// max and the hard cap. This is what keeps week/month reels from running long.
|
||||||
|
pub fn budget_segments(requested_max: usize) -> usize {
|
||||||
|
let by_budget = (target_reel_seconds() / EST_SECONDS_PER_SEGMENT).floor() as usize;
|
||||||
|
by_budget.min(requested_max).clamp(1, HARD_MAX_SEGMENTS)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// What a reel is built from. v1 ships the memories (on this day/week/month)
|
||||||
|
/// selector; tag and date-range variants slot in here later.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum ReelSelector {
|
||||||
|
Memories {
|
||||||
|
span: MemoriesSpan,
|
||||||
|
tz_offset_minutes: i32,
|
||||||
|
library: Option<String>,
|
||||||
|
max_segments: usize,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ReelSelector {
|
||||||
|
/// Stable string identity for the cache key. Captures everything that
|
||||||
|
/// changes *which* media is selected (but not the non-deterministic
|
||||||
|
/// narration, which can't be part of a pre-render key).
|
||||||
|
pub fn descriptor(&self) -> String {
|
||||||
|
match self {
|
||||||
|
ReelSelector::Memories {
|
||||||
|
span,
|
||||||
|
tz_offset_minutes,
|
||||||
|
library,
|
||||||
|
max_segments,
|
||||||
|
} => format!(
|
||||||
|
"memories:span={:?}:tz={}:lib={}:max={}",
|
||||||
|
span,
|
||||||
|
tz_offset_minutes,
|
||||||
|
library.as_deref().unwrap_or("all"),
|
||||||
|
max_segments
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pick at most `max` items spread evenly across the input, always keeping the
|
||||||
|
/// first and last. Returns the input unchanged when it already fits.
|
||||||
|
pub fn sample_evenly<T: Clone>(items: &[T], max: usize) -> Vec<T> {
|
||||||
|
if max == 0 {
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
if items.len() <= max {
|
||||||
|
return items.to_vec();
|
||||||
|
}
|
||||||
|
if max == 1 {
|
||||||
|
return vec![items[0].clone()];
|
||||||
|
}
|
||||||
|
let last = items.len() - 1;
|
||||||
|
(0..max)
|
||||||
|
.map(|i| {
|
||||||
|
// Spread indices 0..=last across max picks, endpoints included.
|
||||||
|
let idx = (i * last + (max - 1) / 2) / (max - 1);
|
||||||
|
items[idx.min(last)].clone()
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Group time-sorted items into events by gap: a new event starts whenever the
|
||||||
|
/// jump from the previous photo exceeds `gap_seconds`. Preserves order; items
|
||||||
|
/// without a timestamp extend the current event.
|
||||||
|
fn cluster_by_gap(
|
||||||
|
items: &[memories::MemoryItem],
|
||||||
|
gap_seconds: i64,
|
||||||
|
) -> Vec<Vec<memories::MemoryItem>> {
|
||||||
|
let mut clusters: Vec<Vec<memories::MemoryItem>> = Vec::new();
|
||||||
|
let mut prev_ts: Option<i64> = None;
|
||||||
|
for it in items {
|
||||||
|
let starts_new = match (prev_ts, it.created) {
|
||||||
|
(Some(p), Some(c)) => c - p > gap_seconds,
|
||||||
|
_ => false,
|
||||||
|
};
|
||||||
|
if starts_new || clusters.is_empty() {
|
||||||
|
clusters.push(Vec::new());
|
||||||
|
}
|
||||||
|
clusters.last_mut().unwrap().push(it.clone());
|
||||||
|
if let Some(c) = it.created {
|
||||||
|
prev_ts = Some(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
clusters
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Most photos a single beat will flash through. Bounds the burst so one huge
|
||||||
|
/// event doesn't dominate, and keeps each photo on screen long enough to
|
||||||
|
/// register at the per-beat narration length (see render's beat timing).
|
||||||
|
pub const MAX_BURST_PHOTOS: usize = 10;
|
||||||
|
|
||||||
|
/// Merge a list of (time-ordered) event clusters into exactly `n` contiguous
|
||||||
|
/// groups, so a span with more events than the beat budget still covers the
|
||||||
|
/// whole timeline — adjacent events fold together into one beat rather than
|
||||||
|
/// getting dropped. `n` must be ≥ 1 and ≤ clusters.len().
|
||||||
|
fn partition_into_groups(
|
||||||
|
clusters: Vec<Vec<memories::MemoryItem>>,
|
||||||
|
n: usize,
|
||||||
|
) -> Vec<Vec<memories::MemoryItem>> {
|
||||||
|
let c = clusters.len();
|
||||||
|
let mut clusters = clusters.into_iter();
|
||||||
|
(0..n)
|
||||||
|
.map(|j| {
|
||||||
|
// Even contiguous split of c clusters into n groups.
|
||||||
|
let start = j * c / n;
|
||||||
|
let end = (j + 1) * c / n;
|
||||||
|
let take = end.saturating_sub(start).max(1);
|
||||||
|
(0..take)
|
||||||
|
.flat_map(|_| clusters.next().into_iter().flatten())
|
||||||
|
.collect()
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Turn photo items into `n_beats` photo beats. Clusters photos into events by
|
||||||
|
/// time gap; if there are more events than beats, adjacent events are merged so
|
||||||
|
/// the whole span is still covered. Each beat then flashes up to `max_burst`
|
||||||
|
/// photos (an even spread of its group) under one narration line — so a
|
||||||
|
/// week/month reel *shows* all its moments without a narrated (and timed)
|
||||||
|
/// segment per photo.
|
||||||
|
fn form_photo_beats(
|
||||||
|
items: &[memories::MemoryItem],
|
||||||
|
n_beats: usize,
|
||||||
|
max_burst: usize,
|
||||||
|
) -> Vec<PlannedBeat> {
|
||||||
|
if n_beats == 0 || items.is_empty() {
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
let clusters = cluster_by_gap(items, EVENT_GAP_SECONDS);
|
||||||
|
// One beat per event when they fit; otherwise fold adjacent events together
|
||||||
|
// into exactly n_beats groups.
|
||||||
|
let groups = if clusters.len() <= n_beats {
|
||||||
|
clusters
|
||||||
|
} else {
|
||||||
|
partition_into_groups(clusters, n_beats)
|
||||||
|
};
|
||||||
|
|
||||||
|
groups
|
||||||
|
.into_iter()
|
||||||
|
.filter(|g| !g.is_empty())
|
||||||
|
.map(|group| {
|
||||||
|
let shown = sample_evenly(&group, max_burst);
|
||||||
|
let date = shown.first().and_then(|it| it.created);
|
||||||
|
PlannedBeat {
|
||||||
|
media: shown
|
||||||
|
.into_iter()
|
||||||
|
.map(|it| SegmentMedia::Photo {
|
||||||
|
rel_path: it.path,
|
||||||
|
library_id: it.library_id,
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
date,
|
||||||
|
insight_title: None,
|
||||||
|
insight_summary: None,
|
||||||
|
gps: None,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Split the beat budget between photo beats and video-clip beats. Clips are
|
||||||
|
/// individually valuable (motion + live audio) so they get up to half the
|
||||||
|
/// budget (at least one if any exist); photos take the rest. With only one
|
||||||
|
/// kind present, it gets the whole budget.
|
||||||
|
fn split_beat_budget(n_photos: usize, n_videos: usize, n_beats: usize) -> (usize, usize) {
|
||||||
|
if n_videos == 0 {
|
||||||
|
return (n_beats, 0);
|
||||||
|
}
|
||||||
|
if n_photos == 0 {
|
||||||
|
return (0, n_beats.min(n_videos));
|
||||||
|
}
|
||||||
|
let clip_beats = n_videos.min((n_beats / 2).max(1));
|
||||||
|
let photo_beats = n_beats.saturating_sub(clip_beats);
|
||||||
|
(photo_beats, clip_beats)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build the reel's beats from a span's photos and videos under a beat budget.
|
||||||
|
/// Videos become one-clip beats (sampled across time if there are more than the
|
||||||
|
/// clip budget); photos cluster into burst beats. The two are merged back into
|
||||||
|
/// chronological order so the reel reads as the span unfolded.
|
||||||
|
pub fn form_beats(
|
||||||
|
photos: &[memories::MemoryItem],
|
||||||
|
videos: &[memories::MemoryItem],
|
||||||
|
n_beats: usize,
|
||||||
|
max_burst: usize,
|
||||||
|
) -> Vec<PlannedBeat> {
|
||||||
|
if n_beats == 0 {
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
let (photo_budget, clip_budget) = split_beat_budget(photos.len(), videos.len(), n_beats);
|
||||||
|
|
||||||
|
let mut beats = form_photo_beats(photos, photo_budget, max_burst);
|
||||||
|
|
||||||
|
// One clip beat per chosen video, spread across the span's videos.
|
||||||
|
for v in sample_evenly(videos, clip_budget) {
|
||||||
|
beats.push(PlannedBeat {
|
||||||
|
media: vec![SegmentMedia::Clip {
|
||||||
|
rel_path: v.path,
|
||||||
|
library_id: v.library_id,
|
||||||
|
}],
|
||||||
|
date: v.created,
|
||||||
|
insight_title: None,
|
||||||
|
insight_summary: None,
|
||||||
|
gps: None,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Merge photo and clip beats back into chronological order (undated last).
|
||||||
|
beats.sort_by(|a, b| match (a.date, b.date) {
|
||||||
|
(Some(x), Some(y)) => x.cmp(&y),
|
||||||
|
(Some(_), None) => std::cmp::Ordering::Less,
|
||||||
|
(None, Some(_)) => std::cmp::Ordering::Greater,
|
||||||
|
(None, None) => std::cmp::Ordering::Equal,
|
||||||
|
});
|
||||||
|
beats
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Cheap pass: resolve the selector into an ordered list of media (no insight
|
||||||
|
/// lookups yet) plus reel metadata. `Err` only on an invalid library param.
|
||||||
|
pub fn resolve(
|
||||||
|
app_state: &AppState,
|
||||||
|
exif_dao: &Mutex<Box<dyn ExifDao>>,
|
||||||
|
span_context: &opentelemetry::Context,
|
||||||
|
selector: &ReelSelector,
|
||||||
|
) -> Result<(Vec<PlannedBeat>, ReelMeta), String> {
|
||||||
|
match selector {
|
||||||
|
ReelSelector::Memories {
|
||||||
|
span,
|
||||||
|
tz_offset_minutes,
|
||||||
|
library,
|
||||||
|
max_segments,
|
||||||
|
} => {
|
||||||
|
let client_tz = FixedOffset::east_opt(tz_offset_minutes * 60);
|
||||||
|
let items = memories::gather_memory_items(
|
||||||
|
app_state,
|
||||||
|
exif_dao,
|
||||||
|
span_context,
|
||||||
|
*span,
|
||||||
|
*tz_offset_minutes,
|
||||||
|
client_tz,
|
||||||
|
library.as_deref(),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
// Split into photos and video clips; anything that's neither is
|
||||||
|
// dropped. Years span both, computed before the budget narrows it.
|
||||||
|
let years = distinct_years(&items, client_tz);
|
||||||
|
let meta = ReelMeta { span: *span, years };
|
||||||
|
|
||||||
|
let (photos, videos): (Vec<_>, Vec<_>) = items
|
||||||
|
.into_iter()
|
||||||
|
.filter(|it| {
|
||||||
|
is_image_file(Path::new(&it.path)) || is_video_file(Path::new(&it.path))
|
||||||
|
})
|
||||||
|
.partition(|it| is_image_file(Path::new(&it.path)));
|
||||||
|
|
||||||
|
// The budget caps the number of narrated beats (≈ reel length);
|
||||||
|
// photo beats then burst through several photos and video beats
|
||||||
|
// play a short clip, so the reel covers the span without running
|
||||||
|
// minutes long.
|
||||||
|
let n_beats = budget_segments(*max_segments);
|
||||||
|
let beats = form_beats(&photos, &videos, n_beats, MAX_BURST_PHOTOS);
|
||||||
|
Ok((beats, meta))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Distinct calendar years represented by the selected media, in the client's
|
||||||
|
/// timezone, ascending. Used to tell the scripter how far back the reel reaches.
|
||||||
|
fn distinct_years(items: &[memories::MemoryItem], tz: Option<FixedOffset>) -> Vec<i32> {
|
||||||
|
let mut years: Vec<i32> = items
|
||||||
|
.iter()
|
||||||
|
.filter_map(|it| it.created)
|
||||||
|
.filter_map(|ts| DateTime::from_timestamp(ts, 0))
|
||||||
|
.map(|dt| match tz {
|
||||||
|
Some(off) => dt.with_timezone(&off).year(),
|
||||||
|
None => dt.year(),
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
years.sort_unstable();
|
||||||
|
years.dedup();
|
||||||
|
years
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Background pass: fill each beat's cached insight (title + summary) and
|
||||||
|
/// GPS coordinates from its lead photo, where one exists. Best-effort — a
|
||||||
|
/// missing or errored lookup leaves the fields `None` and the scripter
|
||||||
|
/// narrates from the date alone.
|
||||||
|
pub fn enrich(
|
||||||
|
insight_dao: &Mutex<Box<dyn InsightDao>>,
|
||||||
|
exif_dao: &Mutex<Box<dyn ExifDao>>,
|
||||||
|
span_context: &opentelemetry::Context,
|
||||||
|
beats: &mut [PlannedBeat],
|
||||||
|
) {
|
||||||
|
let Ok(mut insight_dao) = insight_dao.lock() else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
let Ok(mut exif_dao) = exif_dao.lock() else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
for beat in beats.iter_mut() {
|
||||||
|
let rel_path = match beat.media.first() {
|
||||||
|
Some(SegmentMedia::Photo { rel_path, .. } | SegmentMedia::Clip { rel_path, .. }) => {
|
||||||
|
rel_path.clone()
|
||||||
|
}
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
if let Ok(Some(insight)) = insight_dao.get_insight(span_context, &rel_path) {
|
||||||
|
beat.insight_title = Some(insight.title);
|
||||||
|
beat.insight_summary = Some(insight.summary);
|
||||||
|
}
|
||||||
|
// Enrich GPS from EXIF when the lead media is a photo.
|
||||||
|
if let Some(SegmentMedia::Photo { .. }) = beat.media.first()
|
||||||
|
&& let Ok(Some(exif)) = exif_dao.get_exif(span_context, &rel_path)
|
||||||
|
&& let (Some(lat), Some(lon)) = (exif.gps_latitude, exif.gps_longitude)
|
||||||
|
{
|
||||||
|
beat.gps = Some((lat as f64, lon as f64));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sample_evenly_returns_all_when_under_cap() {
|
||||||
|
let v = vec![1, 2, 3];
|
||||||
|
assert_eq!(sample_evenly(&v, 5), vec![1, 2, 3]);
|
||||||
|
assert_eq!(sample_evenly(&v, 3), vec![1, 2, 3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sample_evenly_keeps_endpoints_and_spreads() {
|
||||||
|
let v: Vec<i32> = (0..100).collect();
|
||||||
|
let picked = sample_evenly(&v, 5);
|
||||||
|
assert_eq!(picked.len(), 5);
|
||||||
|
assert_eq!(picked[0], 0); // first kept
|
||||||
|
assert_eq!(*picked.last().unwrap(), 99); // last kept
|
||||||
|
// Strictly increasing, no dupes.
|
||||||
|
assert!(picked.windows(2).all(|w| w[0] < w[1]));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sample_evenly_handles_one_and_zero() {
|
||||||
|
let v: Vec<i32> = (0..10).collect();
|
||||||
|
assert_eq!(sample_evenly(&v, 1), vec![0]);
|
||||||
|
assert!(sample_evenly(&v, 0).is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn descriptor_is_stable_and_distinguishes_inputs() {
|
||||||
|
let a = ReelSelector::Memories {
|
||||||
|
span: MemoriesSpan::Day,
|
||||||
|
tz_offset_minutes: -480,
|
||||||
|
library: None,
|
||||||
|
max_segments: 24,
|
||||||
|
};
|
||||||
|
let b = ReelSelector::Memories {
|
||||||
|
span: MemoriesSpan::Week,
|
||||||
|
tz_offset_minutes: -480,
|
||||||
|
library: None,
|
||||||
|
max_segments: 24,
|
||||||
|
};
|
||||||
|
assert_eq!(a.descriptor(), a.clone().descriptor());
|
||||||
|
assert_ne!(a.descriptor(), b.descriptor());
|
||||||
|
assert!(a.descriptor().contains("lib=all"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn distinct_years_dedupes_and_sorts() {
|
||||||
|
let items = vec![
|
||||||
|
memories::MemoryItem {
|
||||||
|
path: "a".into(),
|
||||||
|
created: Some(1_560_000_000), // 2019
|
||||||
|
modified: None,
|
||||||
|
library_id: 1,
|
||||||
|
},
|
||||||
|
memories::MemoryItem {
|
||||||
|
path: "b".into(),
|
||||||
|
created: Some(1_560_086_400), // 2019
|
||||||
|
modified: None,
|
||||||
|
library_id: 1,
|
||||||
|
},
|
||||||
|
memories::MemoryItem {
|
||||||
|
path: "c".into(),
|
||||||
|
created: Some(1_623_000_000), // 2021
|
||||||
|
modified: None,
|
||||||
|
library_id: 1,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
assert_eq!(distinct_years(&items, None), vec![2019, 2021]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build an item at a given unix timestamp (seconds) with a chosen extension.
|
||||||
|
fn item_ext(ts: i64, name: &str, ext: &str) -> memories::MemoryItem {
|
||||||
|
memories::MemoryItem {
|
||||||
|
path: format!("{name}.{ext}"),
|
||||||
|
created: Some(ts),
|
||||||
|
modified: None,
|
||||||
|
library_id: 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fn item_at(ts: i64, name: &str) -> memories::MemoryItem {
|
||||||
|
item_ext(ts, name, "jpg")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn budget_segments_caps_to_duration_target() {
|
||||||
|
// 90s / 5s ≈ 18, bounded by the request max and hard cap.
|
||||||
|
assert_eq!(budget_segments(40), 18);
|
||||||
|
assert_eq!(budget_segments(5), 5); // request asked for fewer
|
||||||
|
assert_eq!(budget_segments(1000), 18); // hard cap / budget wins
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn cluster_by_gap_splits_on_large_jumps() {
|
||||||
|
// Two photos minutes apart, then one a day later → two events.
|
||||||
|
let items = vec![
|
||||||
|
item_at(1_000_000, "a"),
|
||||||
|
item_at(1_000_300, "b"), // +5 min → same event
|
||||||
|
item_at(1_100_000, "c"), // +~27h → new event
|
||||||
|
];
|
||||||
|
let clusters = cluster_by_gap(&items, EVENT_GAP_SECONDS);
|
||||||
|
assert_eq!(clusters.len(), 2);
|
||||||
|
assert_eq!(clusters[0].len(), 2);
|
||||||
|
assert_eq!(clusters[1].len(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn photo_beats_one_per_event_when_they_fit() {
|
||||||
|
// Three well-separated events, budget of 10 → three beats, each holding
|
||||||
|
// all of its (few) photos.
|
||||||
|
let items = vec![
|
||||||
|
item_at(0, "a"),
|
||||||
|
item_at(50, "b"), // same event as a
|
||||||
|
item_at(1_000_000, "c"),
|
||||||
|
item_at(2_000_000, "d"),
|
||||||
|
];
|
||||||
|
let beats = form_photo_beats(&items, 10, MAX_BURST_PHOTOS);
|
||||||
|
assert_eq!(beats.len(), 3);
|
||||||
|
assert_eq!(beats[0].media.len(), 2); // burst of the first event
|
||||||
|
assert_eq!(beats[1].media.len(), 1);
|
||||||
|
assert_eq!(beats[2].media.len(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn photo_beats_merge_events_when_over_budget() {
|
||||||
|
// Six distinct events but only two beats → adjacent events fold in, and
|
||||||
|
// every event's photos still appear (capped by the burst max).
|
||||||
|
let items: Vec<memories::MemoryItem> = (0..6)
|
||||||
|
.map(|i| item_at(i as i64 * 1_000_000, &format!("e{i}")))
|
||||||
|
.collect();
|
||||||
|
let beats = form_photo_beats(&items, 2, MAX_BURST_PHOTOS);
|
||||||
|
assert_eq!(beats.len(), 2);
|
||||||
|
let shown: usize = beats.iter().map(|b| b.media.len()).sum();
|
||||||
|
assert_eq!(shown, 6); // all six moments still shown across two beats
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn photo_beats_cap_burst_to_max() {
|
||||||
|
// One dense event of 30 photos, generous budget → a single beat that
|
||||||
|
// bursts at most MAX_BURST_PHOTOS, not all 30.
|
||||||
|
let items: Vec<memories::MemoryItem> = (0..30)
|
||||||
|
.map(|i| item_at(i as i64, &format!("p{i}")))
|
||||||
|
.collect();
|
||||||
|
let beats = form_photo_beats(&items, 18, MAX_BURST_PHOTOS);
|
||||||
|
assert_eq!(beats.len(), 1);
|
||||||
|
assert_eq!(beats[0].media.len(), MAX_BURST_PHOTOS);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn split_beat_budget_handles_each_mix() {
|
||||||
|
// Only photos / only videos → that kind gets the whole budget.
|
||||||
|
assert_eq!(split_beat_budget(10, 0, 18), (18, 0));
|
||||||
|
assert_eq!(split_beat_budget(0, 10, 18), (0, 10)); // capped at n_videos
|
||||||
|
assert_eq!(split_beat_budget(0, 30, 18), (0, 18)); // capped at budget
|
||||||
|
// Mixed → clips up to half (≥1), photos the rest.
|
||||||
|
assert_eq!(split_beat_budget(100, 100, 18), (9, 9));
|
||||||
|
assert_eq!(split_beat_budget(100, 1, 18), (17, 1)); // few videos
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn form_beats_mixes_clip_and_photo_beats_in_time_order() {
|
||||||
|
let photos = vec![item_at(0, "p0"), item_at(2_000_000, "p1")];
|
||||||
|
// A video between the two photo events (in time).
|
||||||
|
let videos = vec![item_ext(1_000_000, "v0", "mp4")];
|
||||||
|
let beats = form_beats(&photos, &videos, 10, MAX_BURST_PHOTOS);
|
||||||
|
// Two photo events + one clip = three beats, chronological.
|
||||||
|
assert_eq!(beats.len(), 3);
|
||||||
|
assert!(!beats[0].is_clip()); // p0 @ t=0
|
||||||
|
assert!(beats[1].is_clip()); // v0 @ t=1e6
|
||||||
|
assert!(!beats[2].is_clip()); // p1 @ t=2e6
|
||||||
|
assert!(matches!(beats[1].media[0], SegmentMedia::Clip { .. }));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn form_beats_videos_only_become_clip_beats() {
|
||||||
|
let videos: Vec<memories::MemoryItem> = (0..3)
|
||||||
|
.map(|i| item_ext(i as i64 * 1_000_000, &format!("v{i}"), "mov"))
|
||||||
|
.collect();
|
||||||
|
let beats = form_beats(&[], &videos, 10, MAX_BURST_PHOTOS);
|
||||||
|
assert_eq!(beats.len(), 3);
|
||||||
|
assert!(beats.iter().all(|b| b.is_clip()));
|
||||||
|
}
|
||||||
|
}
|
||||||
+47
-3
@@ -8,9 +8,10 @@ use crate::ai::turn_registry::TurnRegistry;
|
|||||||
use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient};
|
use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient};
|
||||||
use crate::database::{
|
use crate::database::{
|
||||||
CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, InsightGenerationJobDao, KnowledgeDao,
|
CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, InsightGenerationJobDao, KnowledgeDao,
|
||||||
LocationHistoryDao, SearchHistoryDao, SqliteCalendarEventDao, SqliteDailySummaryDao,
|
LocationHistoryDao, PrecomputedReelDao, SearchHistoryDao, SqliteCalendarEventDao,
|
||||||
SqliteExifDao, SqliteInsightDao, SqliteInsightGenerationJobDao, SqliteKnowledgeDao,
|
SqliteDailySummaryDao, SqliteExifDao, SqliteInsightDao, SqliteInsightGenerationJobDao,
|
||||||
SqliteLocationHistoryDao, SqliteSearchHistoryDao, connect,
|
SqliteKnowledgeDao, SqliteLocationHistoryDao, SqlitePrecomputedReelDao, SqliteSearchHistoryDao,
|
||||||
|
SqliteUserAiPrefsDao, UserAiPrefsDao, connect,
|
||||||
};
|
};
|
||||||
use crate::database::{PreviewDao, SqlitePreviewDao};
|
use crate::database::{PreviewDao, SqlitePreviewDao};
|
||||||
use crate::faces;
|
use crate::faces;
|
||||||
@@ -53,6 +54,10 @@ pub struct AppState {
|
|||||||
pub video_path: String,
|
pub video_path: String,
|
||||||
pub gif_path: String,
|
pub gif_path: String,
|
||||||
pub preview_clips_path: String,
|
pub preview_clips_path: String,
|
||||||
|
/// Directory for cached memory-reel MP4s (+ title sidecars). Derived from
|
||||||
|
/// `REELS_DIRECTORY`, defaulting to a `reels` dir beside the preview clips.
|
||||||
|
/// Created lazily by the reel pipeline on first render.
|
||||||
|
pub reels_path: String,
|
||||||
pub excluded_dirs: Vec<String>,
|
pub excluded_dirs: Vec<String>,
|
||||||
pub ollama: OllamaClient,
|
pub ollama: OllamaClient,
|
||||||
/// `None` when `OPENROUTER_API_KEY` is not configured. Consulted only
|
/// `None` when `OPENROUTER_API_KEY` is not configured. Consulted only
|
||||||
@@ -84,6 +89,14 @@ pub struct AppState {
|
|||||||
pub clip_client: ClipClient,
|
pub clip_client: ClipClient,
|
||||||
pub insight_job_dao: Arc<Mutex<Box<dyn InsightGenerationJobDao>>>,
|
pub insight_job_dao: Arc<Mutex<Box<dyn InsightGenerationJobDao>>>,
|
||||||
pub insight_job_handles: Arc<Mutex<HashMap<i32, tokio::task::AbortHandle>>>,
|
pub insight_job_handles: Arc<Mutex<HashMap<i32, tokio::task::AbortHandle>>>,
|
||||||
|
/// Ledger for precomputed memory reels. Written by the nightly agentic
|
||||||
|
/// job (Section D); read by `GET /reels/precomputed` (Section C).
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub precomputed_reel_dao: Arc<Mutex<Box<dyn PrecomputedReelDao>>>,
|
||||||
|
/// User AI preferences (voice, timezone, library). Mirrored by the
|
||||||
|
/// client; read by the nightly pre-generation scheduler.
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub user_ai_prefs_dao: Arc<Mutex<Box<dyn UserAiPrefsDao>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AppState {
|
impl AppState {
|
||||||
@@ -97,6 +110,7 @@ impl AppState {
|
|||||||
self.libraries.iter().find(|l| l.id == id)
|
self.libraries.iter().find(|l| l.id == id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
pub fn library_by_name(&self, name: &str) -> Option<&Library> {
|
pub fn library_by_name(&self, name: &str) -> Option<&Library> {
|
||||||
self.libraries.iter().find(|l| l.name == name)
|
self.libraries.iter().find(|l| l.name == name)
|
||||||
}
|
}
|
||||||
@@ -125,6 +139,8 @@ impl AppState {
|
|||||||
clip_client: ClipClient,
|
clip_client: ClipClient,
|
||||||
insight_job_dao: Arc<Mutex<Box<dyn InsightGenerationJobDao>>>,
|
insight_job_dao: Arc<Mutex<Box<dyn InsightGenerationJobDao>>>,
|
||||||
insight_job_handles: Arc<Mutex<HashMap<i32, tokio::task::AbortHandle>>>,
|
insight_job_handles: Arc<Mutex<HashMap<i32, tokio::task::AbortHandle>>>,
|
||||||
|
precomputed_reel_dao: Arc<Mutex<Box<dyn PrecomputedReelDao>>>,
|
||||||
|
user_ai_prefs_dao: Arc<Mutex<Box<dyn UserAiPrefsDao>>>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
assert!(
|
assert!(
|
||||||
!libraries_vec.is_empty(),
|
!libraries_vec.is_empty(),
|
||||||
@@ -141,6 +157,19 @@ impl AppState {
|
|||||||
preview_dao,
|
preview_dao,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Reels cache dir: explicit env, else a `reels` sibling of the preview
|
||||||
|
// clips dir (a known-writable, test-safe location). Not created here —
|
||||||
|
// the reel pipeline does `create_dir_all` before its first write, so
|
||||||
|
// construction (incl. tests) never touches the filesystem.
|
||||||
|
let reels_path = std::env::var("REELS_DIRECTORY").unwrap_or_else(|_| {
|
||||||
|
std::path::Path::new(&preview_clips_path)
|
||||||
|
.parent()
|
||||||
|
.map(|p| p.join("reels"))
|
||||||
|
.unwrap_or_else(|| std::path::PathBuf::from("reels"))
|
||||||
|
.to_string_lossy()
|
||||||
|
.to_string()
|
||||||
|
});
|
||||||
|
|
||||||
let library_health = libraries::new_health_map(&libraries_vec);
|
let library_health = libraries::new_health_map(&libraries_vec);
|
||||||
let live_libraries = Arc::new(RwLock::new(libraries_vec.clone()));
|
let live_libraries = Arc::new(RwLock::new(libraries_vec.clone()));
|
||||||
Self {
|
Self {
|
||||||
@@ -155,6 +184,7 @@ impl AppState {
|
|||||||
video_path,
|
video_path,
|
||||||
gif_path,
|
gif_path,
|
||||||
preview_clips_path,
|
preview_clips_path,
|
||||||
|
reels_path,
|
||||||
excluded_dirs,
|
excluded_dirs,
|
||||||
ollama,
|
ollama,
|
||||||
openrouter,
|
openrouter,
|
||||||
@@ -169,6 +199,8 @@ impl AppState {
|
|||||||
clip_client,
|
clip_client,
|
||||||
insight_job_dao,
|
insight_job_dao,
|
||||||
insight_job_handles,
|
insight_job_handles,
|
||||||
|
precomputed_reel_dao,
|
||||||
|
user_ai_prefs_dao,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -249,6 +281,14 @@ impl Default for AppState {
|
|||||||
let insight_job_handles: Arc<Mutex<HashMap<i32, tokio::task::AbortHandle>>> =
|
let insight_job_handles: Arc<Mutex<HashMap<i32, tokio::task::AbortHandle>>> =
|
||||||
Arc::new(Mutex::new(HashMap::new()));
|
Arc::new(Mutex::new(HashMap::new()));
|
||||||
|
|
||||||
|
// Initialize precomputed reel DAO (nightly pre-generation ledger)
|
||||||
|
let precomputed_reel_dao: Arc<Mutex<Box<dyn PrecomputedReelDao>>> =
|
||||||
|
Arc::new(Mutex::new(Box::new(SqlitePrecomputedReelDao::new())));
|
||||||
|
|
||||||
|
// Initialize user AI preferences DAO (Section E)
|
||||||
|
let user_ai_prefs_dao: Arc<Mutex<Box<dyn UserAiPrefsDao>>> =
|
||||||
|
Arc::new(Mutex::new(Box::new(SqliteUserAiPrefsDao::new())));
|
||||||
|
|
||||||
// Load base path and ensure the primary library row reflects it.
|
// Load base path and ensure the primary library row reflects it.
|
||||||
let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env");
|
let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env");
|
||||||
let mut seed_conn = connect();
|
let mut seed_conn = connect();
|
||||||
@@ -326,6 +366,8 @@ impl Default for AppState {
|
|||||||
clip_client,
|
clip_client,
|
||||||
insight_job_dao,
|
insight_job_dao,
|
||||||
insight_job_handles,
|
insight_job_handles,
|
||||||
|
precomputed_reel_dao,
|
||||||
|
user_ai_prefs_dao,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -535,6 +577,8 @@ impl AppState {
|
|||||||
ClipClient::new(None), // disabled in test
|
ClipClient::new(None), // disabled in test
|
||||||
Arc::new(Mutex::new(Box::new(SqliteInsightGenerationJobDao::new()))), // placeholder for test
|
Arc::new(Mutex::new(Box::new(SqliteInsightGenerationJobDao::new()))), // placeholder for test
|
||||||
Arc::new(Mutex::new(HashMap::new())), // placeholder for test
|
Arc::new(Mutex::new(HashMap::new())), // placeholder for test
|
||||||
|
Arc::new(Mutex::new(Box::new(SqlitePrecomputedReelDao::new()))), // placeholder for test
|
||||||
|
Arc::new(Mutex::new(Box::new(SqliteUserAiPrefsDao::new()))), // placeholder for test
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+1
-1
@@ -168,7 +168,7 @@ async fn get_tags<D: TagDao>(
|
|||||||
// this file, so tags added under one library show up under the
|
// this file, so tags added under one library show up under the
|
||||||
// others when they hold the same file. Falls back to direct rel_path
|
// others when they hold the same file. Falls back to direct rel_path
|
||||||
// match when the file hasn't been hashed yet.
|
// match when the file hasn't been hashed yet.
|
||||||
let library = libraries::resolve_library_param(&app_state, request.library.as_deref())
|
let library = libraries::resolve_library_param_state(&app_state, request.library.as_deref())
|
||||||
.ok()
|
.ok()
|
||||||
.flatten()
|
.flatten()
|
||||||
.unwrap_or_else(|| app_state.primary_library());
|
.unwrap_or_else(|| app_state.primary_library());
|
||||||
|
|||||||
@@ -0,0 +1,521 @@
|
|||||||
|
//! `/photos/search/unified?q=<natural language>` — unified NL photo search.
|
||||||
|
//!
|
||||||
|
//! One free-text box that composes the two existing engines instead of making
|
||||||
|
//! the user pick between them:
|
||||||
|
//! 1. A grounded local-LLM call ([`crate::ai::nl_query`]) translates the
|
||||||
|
//! query into a structured filter + a semantic term.
|
||||||
|
//! 2. Structured filters (tags / EXIF / geo / date / media-type) define the
|
||||||
|
//! candidate set; the semantic term ranks within it via CLIP.
|
||||||
|
//!
|
||||||
|
//! Path A (orchestration): we reuse `clip_search`'s scoring core and the
|
||||||
|
//! existing `ExifDao` / `TagDao` queries, joining on `content_hash`. EXIF rows
|
||||||
|
//! are the universal candidate carrier — each has `(library_id, file_path,
|
||||||
|
//! content_hash, date_taken)` — so the structured filter is just a predicate
|
||||||
|
//! over them, and the CLIP hits (which key on `content_hash`) intersect by
|
||||||
|
//! hash. No new schema, no surgery on `list_photos`.
|
||||||
|
//!
|
||||||
|
//! Degenerate cases collapse to the existing behavior: semantic-only → plain
|
||||||
|
//! CLIP search; filters-only → a date-sorted filtered listing.
|
||||||
|
//!
|
||||||
|
//! Person filtering is intentionally deferred (no person→photos resolver yet).
|
||||||
|
|
||||||
|
use crate::AppState;
|
||||||
|
use crate::ai::backend::{BackendKind, SamplingOverrides};
|
||||||
|
use crate::ai::nl_query::{StructuredQuery, translate_nl_query};
|
||||||
|
use crate::clip_search::{
|
||||||
|
SearchHit, parse_library_scope, resolve_hits, score_error_response, score_photos,
|
||||||
|
};
|
||||||
|
use crate::data::Claims;
|
||||||
|
use crate::database::ExifDao;
|
||||||
|
use crate::file_types::{is_image_file, is_video_file};
|
||||||
|
use crate::geo::{forward_geocode, gps_bounding_box, haversine_distance};
|
||||||
|
use crate::tags::TagDao;
|
||||||
|
use actix_web::HttpResponse;
|
||||||
|
use actix_web::web::{Data, Query};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::collections::HashSet;
|
||||||
|
use std::path::Path;
|
||||||
|
use std::sync::Mutex;
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct UnifiedQuery {
|
||||||
|
/// Natural-language query. Required; empty triggers 400.
|
||||||
|
pub q: String,
|
||||||
|
#[serde(default = "default_limit")]
|
||||||
|
pub limit: usize,
|
||||||
|
#[serde(default)]
|
||||||
|
pub offset: usize,
|
||||||
|
/// CLIP cosine floor for the semantic ranking stage. Same default as the
|
||||||
|
/// plain search endpoint.
|
||||||
|
#[serde(default = "default_threshold")]
|
||||||
|
pub threshold: f32,
|
||||||
|
/// Legacy single-library scope (see clip_search).
|
||||||
|
pub library: Option<i32>,
|
||||||
|
/// Multi-library scope, comma-separated ids.
|
||||||
|
pub library_ids: Option<String>,
|
||||||
|
/// Optional model override. The client passes the user's currently-selected
|
||||||
|
/// local model so the translation step reuses a model that's already loaded
|
||||||
|
/// (avoids a llama-swap eviction / cold start). Falls back to the configured
|
||||||
|
/// default local model when absent. Local only — no hybrid here.
|
||||||
|
pub model: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_limit() -> usize {
|
||||||
|
20
|
||||||
|
}
|
||||||
|
fn default_threshold() -> f32 {
|
||||||
|
0.20
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A geocoded place echoed back so the client can show / edit the location
|
||||||
|
/// filter it actually searched.
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
struct ResolvedPlace {
|
||||||
|
display_name: String,
|
||||||
|
lat: f64,
|
||||||
|
lon: f64,
|
||||||
|
radius_km: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// How the server interpreted the NL query — echoed to the client to render
|
||||||
|
/// editable filter chips. tag ids map to the client's existing tag list.
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
struct Interpreted {
|
||||||
|
semantic: Option<String>,
|
||||||
|
tag_ids: Vec<i32>,
|
||||||
|
exclude_tag_ids: Vec<i32>,
|
||||||
|
/// Words the model treated as tags that don't exist in the vocab; folded
|
||||||
|
/// into the semantic term and surfaced here so the UI can explain it.
|
||||||
|
unmatched_tags: Vec<String>,
|
||||||
|
camera_make: Option<String>,
|
||||||
|
camera_model: Option<String>,
|
||||||
|
lens_model: Option<String>,
|
||||||
|
date_from: Option<i64>,
|
||||||
|
date_to: Option<i64>,
|
||||||
|
media_type: Option<String>,
|
||||||
|
place: Option<ResolvedPlace>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
struct UnifiedResponse {
|
||||||
|
query: String,
|
||||||
|
interpreted: Interpreted,
|
||||||
|
/// CLIP model version used for ranking; `None` when the query had no
|
||||||
|
/// semantic term (filters-only).
|
||||||
|
model_version: Option<String>,
|
||||||
|
/// Embeddings scored by CLIP (0 when filters-only).
|
||||||
|
considered: usize,
|
||||||
|
/// Matches before pagination.
|
||||||
|
total_matching: usize,
|
||||||
|
offset: usize,
|
||||||
|
results: Vec<SearchHit>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
struct ErrorBody {
|
||||||
|
error: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bad_request(msg: impl Into<String>) -> HttpResponse {
|
||||||
|
HttpResponse::BadRequest().json(ErrorBody { error: msg.into() })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Combine the model's semantic term with any tag words that didn't match the
|
||||||
|
/// vocab, so a hallucinated/non-vocab tag becomes a soft semantic signal
|
||||||
|
/// rather than being dropped.
|
||||||
|
fn effective_semantic(sq: &StructuredQuery) -> Option<String> {
|
||||||
|
let mut parts: Vec<String> = Vec::new();
|
||||||
|
if let Some(s) = sq.semantic.as_deref() {
|
||||||
|
parts.push(s.to_string());
|
||||||
|
}
|
||||||
|
parts.extend(sq.unmatched_tags.iter().cloned());
|
||||||
|
if parts.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(parts.join(" "))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn unified_search<TagD: TagDao>(
|
||||||
|
_: Claims,
|
||||||
|
state: Data<AppState>,
|
||||||
|
exif_dao: Data<Mutex<Box<dyn ExifDao>>>,
|
||||||
|
tag_dao: Data<Mutex<TagD>>,
|
||||||
|
query: Query<UnifiedQuery>,
|
||||||
|
) -> HttpResponse {
|
||||||
|
let nl = query.q.trim().to_string();
|
||||||
|
if nl.is_empty() {
|
||||||
|
return bad_request("query parameter `q` is required");
|
||||||
|
}
|
||||||
|
|
||||||
|
let limit = query.limit.clamp(1, 200);
|
||||||
|
let offset = query.offset;
|
||||||
|
let threshold = query.threshold.clamp(-1.0, 1.0);
|
||||||
|
|
||||||
|
let library_ids = match parse_library_scope(query.library_ids.as_deref(), query.library) {
|
||||||
|
Ok(ids) => ids,
|
||||||
|
Err(msg) => return bad_request(msg),
|
||||||
|
};
|
||||||
|
|
||||||
|
let ctx = opentelemetry::Context::current();
|
||||||
|
|
||||||
|
// ── 1. Translate the NL query, grounded on the real tag vocabulary ──
|
||||||
|
let tag_vocab: Vec<(i32, String)> = {
|
||||||
|
let mut dao = tag_dao.lock().expect("tag dao");
|
||||||
|
match dao.get_all_tags(&ctx, None) {
|
||||||
|
Ok(tags) => tags.into_iter().map(|(_, t)| (t.id, t.name)).collect(),
|
||||||
|
Err(e) => {
|
||||||
|
log::warn!("unified_search: get_all_tags failed: {e:?}");
|
||||||
|
Vec::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Respect env/config for the LLM backend (LLM_BACKEND → ollama or
|
||||||
|
// llama-swap); local only, no hybrid, per the feature's design.
|
||||||
|
//
|
||||||
|
// Translation-model precedence:
|
||||||
|
// 1. UNIFIED_SEARCH_MODEL env — pin a small, fast model that can stay
|
||||||
|
// co-resident with CLIP (and the chat model) so translation never
|
||||||
|
// evicts them. This is the recommended setup on a tight VRAM budget.
|
||||||
|
// 2. the client-selected model — routes translation to whatever the user
|
||||||
|
// already has loaded (no swap) when no dedicated model is pinned.
|
||||||
|
// 3. None → resolve_backend uses the configured default local model.
|
||||||
|
let translation_model = std::env::var("UNIFIED_SEARCH_MODEL")
|
||||||
|
.ok()
|
||||||
|
.filter(|m| !m.trim().is_empty())
|
||||||
|
.or_else(|| query.model.clone())
|
||||||
|
.filter(|m| !m.trim().is_empty());
|
||||||
|
let overrides = SamplingOverrides {
|
||||||
|
model: translation_model,
|
||||||
|
num_ctx: None,
|
||||||
|
temperature: None,
|
||||||
|
top_p: None,
|
||||||
|
top_k: None,
|
||||||
|
min_p: None,
|
||||||
|
enable_thinking: None,
|
||||||
|
};
|
||||||
|
let backend = match state
|
||||||
|
.insight_generator
|
||||||
|
.resolve_backend(BackendKind::Local, &overrides)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(b) => b,
|
||||||
|
Err(e) => {
|
||||||
|
log::warn!("unified_search: resolve_backend failed: {e:?}");
|
||||||
|
return HttpResponse::ServiceUnavailable().json(ErrorBody {
|
||||||
|
error: "LLM backend unavailable".into(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
log::info!("unified_search: translating with model={}", backend.model());
|
||||||
|
|
||||||
|
let today = chrono::Utc::now().date_naive();
|
||||||
|
let sq = match translate_nl_query(backend.chat(), &nl, &tag_vocab, today).await {
|
||||||
|
Ok(sq) => sq,
|
||||||
|
Err(e) => {
|
||||||
|
log::warn!("unified_search: translate_nl_query failed: {e:?}");
|
||||||
|
return HttpResponse::BadGateway().json(ErrorBody {
|
||||||
|
error: "could not interpret the query".into(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// ── 2. Forward-geocode the place name into a gps circle ──
|
||||||
|
let resolved_place = match sq.place.as_deref() {
|
||||||
|
Some(p) => forward_geocode(p).await.map(|g| ResolvedPlace {
|
||||||
|
display_name: g.display_name,
|
||||||
|
lat: g.lat,
|
||||||
|
lon: g.lon,
|
||||||
|
radius_km: g.radius_km,
|
||||||
|
}),
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
let gps = resolved_place.as_ref().map(|p| (p.lat, p.lon, p.radius_km));
|
||||||
|
|
||||||
|
let semantic = effective_semantic(&sq);
|
||||||
|
|
||||||
|
let has_exif_filter = sq.camera_make.is_some()
|
||||||
|
|| sq.camera_model.is_some()
|
||||||
|
|| sq.lens_model.is_some()
|
||||||
|
|| sq.date_from.is_some()
|
||||||
|
|| sq.date_to.is_some();
|
||||||
|
let has_struct =
|
||||||
|
has_exif_filter || gps.is_some() || !sq.tag_ids.is_empty() || sq.media_type.is_some();
|
||||||
|
|
||||||
|
// Stage trace: what the model extracted + whether a structured filter is
|
||||||
|
// active. The chips show this to the user too, but logging it makes the
|
||||||
|
// "why no results" path debuggable from the server side.
|
||||||
|
log::info!(
|
||||||
|
"unified_search: q={nl:?} semantic={:?} tag_ids={:?} exclude={:?} place={:?} gps={:?} date=({:?},{:?}) media={:?} unmatched={:?} has_struct={has_struct}",
|
||||||
|
sq.semantic,
|
||||||
|
sq.tag_ids,
|
||||||
|
sq.exclude_tag_ids,
|
||||||
|
resolved_place.as_ref().map(|p| p.display_name.as_str()),
|
||||||
|
gps,
|
||||||
|
sq.date_from,
|
||||||
|
sq.date_to,
|
||||||
|
sq.media_type,
|
||||||
|
sq.unmatched_tags,
|
||||||
|
);
|
||||||
|
|
||||||
|
// ── 3. Build the structured candidate set (EXIF rows passing every
|
||||||
|
// filter). Skipped entirely for a pure-semantic query. ──
|
||||||
|
let mut candidate: Vec<crate::database::models::ImageExif> = Vec::new();
|
||||||
|
let mut allowed_hashes: HashSet<String> = HashSet::new();
|
||||||
|
if has_struct {
|
||||||
|
// Tag membership set (rel_path only — same cross-library imprecision
|
||||||
|
// as the existing /photos tag listing). ANY-mode: a photo matches if
|
||||||
|
// it carries any of the named tags. ALL-mode over-constrains NL
|
||||||
|
// queries (the model maps several words to tags and few photos carry
|
||||||
|
// them all); the semantic term does the precision work instead.
|
||||||
|
let tag_set: Option<HashSet<String>> = if sq.tag_ids.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
let mut dao = tag_dao.lock().expect("tag dao");
|
||||||
|
match dao.get_files_with_any_tag_ids(
|
||||||
|
sq.tag_ids.clone(),
|
||||||
|
sq.exclude_tag_ids.clone(),
|
||||||
|
&ctx,
|
||||||
|
) {
|
||||||
|
Ok(files) => Some(files.into_iter().map(|f| f.file_name).collect()),
|
||||||
|
Err(e) => {
|
||||||
|
log::warn!("unified_search: tag filter failed: {e:?}");
|
||||||
|
Some(HashSet::new())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
log::info!(
|
||||||
|
"unified_search: tag_ids={:?} -> tag_set_files={:?}",
|
||||||
|
sq.tag_ids,
|
||||||
|
tag_set.as_ref().map(|s| s.len())
|
||||||
|
);
|
||||||
|
|
||||||
|
// EXIF query handles camera/lens/gps-box/date. With no EXIF filters
|
||||||
|
// it returns the whole table, which we then narrow by the predicates
|
||||||
|
// below (tags / media / scope). Fine at personal-library scale.
|
||||||
|
let gps_bounds = gps.map(|(lat, lon, r)| gps_bounding_box(lat, lon, r));
|
||||||
|
let rows = {
|
||||||
|
let mut dao = exif_dao.lock().expect("exif dao");
|
||||||
|
dao.query_by_exif(
|
||||||
|
&ctx,
|
||||||
|
None, // scope filtered in-Rust to support multi-library
|
||||||
|
sq.camera_make.as_deref(),
|
||||||
|
sq.camera_model.as_deref(),
|
||||||
|
sq.lens_model.as_deref(),
|
||||||
|
gps_bounds,
|
||||||
|
sq.date_from,
|
||||||
|
sq.date_to,
|
||||||
|
)
|
||||||
|
.unwrap_or_else(|e| {
|
||||||
|
log::warn!("unified_search: query_by_exif failed: {e:?}");
|
||||||
|
Vec::new()
|
||||||
|
})
|
||||||
|
};
|
||||||
|
|
||||||
|
candidate = rows
|
||||||
|
.into_iter()
|
||||||
|
.filter(|row| {
|
||||||
|
// Library scope.
|
||||||
|
if !library_ids.is_empty() && !library_ids.contains(&row.library_id) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Precise GPS distance (the EXIF query only did a coarse box).
|
||||||
|
if let Some((lat, lon, radius_km)) = gps {
|
||||||
|
match (row.gps_latitude, row.gps_longitude) {
|
||||||
|
(Some(plat), Some(plon)) => {
|
||||||
|
if haversine_distance(lat, lon, plat as f64, plon as f64) > radius_km {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => return false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Media type.
|
||||||
|
if let Some(mt) = sq.media_type.as_deref() {
|
||||||
|
let p = Path::new(&row.file_path);
|
||||||
|
let ok = if mt == "video" {
|
||||||
|
is_video_file(p)
|
||||||
|
} else {
|
||||||
|
is_image_file(p)
|
||||||
|
};
|
||||||
|
if !ok {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Tag membership.
|
||||||
|
if let Some(ts) = &tag_set
|
||||||
|
&& !ts.contains(&row.file_path)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
true
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
allowed_hashes = candidate
|
||||||
|
.iter()
|
||||||
|
.filter_map(|r| r.content_hash.clone())
|
||||||
|
.collect();
|
||||||
|
log::info!(
|
||||||
|
"unified_search: candidate_rows={} allowed_hashes={}",
|
||||||
|
candidate.len(),
|
||||||
|
allowed_hashes.len()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── 4. Rank ──
|
||||||
|
match semantic {
|
||||||
|
Some(ref sem) => {
|
||||||
|
// When structured filters are present they ARE the constraint —
|
||||||
|
// CLIP only ranks within the candidate set. So drop the global
|
||||||
|
// similarity threshold (it's tuned for whole-library search and
|
||||||
|
// would pre-discard filter-matching photos that scored just under
|
||||||
|
// it — e.g. a 2022 beach photo at 0.18 — before the intersection
|
||||||
|
// ever runs). With no filters, keep the user's threshold for the
|
||||||
|
// plain semantic case.
|
||||||
|
let clip_threshold = if has_struct { -1.0 } else { threshold };
|
||||||
|
let scored = match score_photos(
|
||||||
|
&state,
|
||||||
|
&exif_dao,
|
||||||
|
sem,
|
||||||
|
&library_ids,
|
||||||
|
clip_threshold,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(s) => s,
|
||||||
|
Err(e) => return score_error_response(e),
|
||||||
|
};
|
||||||
|
let considered = scored.considered;
|
||||||
|
let clip_hits = scored.hits.len();
|
||||||
|
let hits: Vec<(f32, String)> = if has_struct {
|
||||||
|
scored
|
||||||
|
.hits
|
||||||
|
.into_iter()
|
||||||
|
.filter(|(_, h)| allowed_hashes.contains(h))
|
||||||
|
.collect()
|
||||||
|
} else {
|
||||||
|
scored.hits
|
||||||
|
};
|
||||||
|
log::info!(
|
||||||
|
"unified_search: clip considered={considered} hits={clip_hits} after_struct_filter={}",
|
||||||
|
hits.len()
|
||||||
|
);
|
||||||
|
let total_matching = hits.len();
|
||||||
|
let page = paginate(&hits, offset, limit);
|
||||||
|
let results = resolve_hits(&exif_dao, &page);
|
||||||
|
HttpResponse::Ok().json(UnifiedResponse {
|
||||||
|
query: nl,
|
||||||
|
interpreted: interpreted(&sq, resolved_place),
|
||||||
|
model_version: Some(scored.model_version),
|
||||||
|
considered: scored.considered,
|
||||||
|
total_matching,
|
||||||
|
offset,
|
||||||
|
results,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
// Filters-only: no semantic term. Require at least one filter,
|
||||||
|
// then return the candidate set newest-first.
|
||||||
|
if !has_struct {
|
||||||
|
return bad_request("query had no searchable terms");
|
||||||
|
}
|
||||||
|
candidate.sort_by(|a, b| b.date_taken.cmp(&a.date_taken));
|
||||||
|
let total_matching = candidate.len();
|
||||||
|
log::info!("unified_search: filters-only matches={total_matching}");
|
||||||
|
let end = (offset + limit).min(total_matching);
|
||||||
|
let results: Vec<SearchHit> = if offset >= total_matching {
|
||||||
|
Vec::new()
|
||||||
|
} else {
|
||||||
|
candidate[offset..end]
|
||||||
|
.iter()
|
||||||
|
.map(|r| SearchHit {
|
||||||
|
library_id: r.library_id,
|
||||||
|
rel_path: r.file_path.clone(),
|
||||||
|
content_hash: r.content_hash.clone().unwrap_or_default(),
|
||||||
|
score: 0.0,
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
};
|
||||||
|
HttpResponse::Ok().json(UnifiedResponse {
|
||||||
|
query: nl,
|
||||||
|
interpreted: interpreted(&sq, resolved_place),
|
||||||
|
model_version: None,
|
||||||
|
considered: 0,
|
||||||
|
total_matching,
|
||||||
|
offset,
|
||||||
|
results,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Slice a sorted hit list at `[offset, offset+limit)`, tolerating
|
||||||
|
/// out-of-range offsets (empty page).
|
||||||
|
fn paginate(hits: &[(f32, String)], offset: usize, limit: usize) -> Vec<(f32, String)> {
|
||||||
|
if offset >= hits.len() {
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
let end = (offset + limit).min(hits.len());
|
||||||
|
hits[offset..end].to_vec()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn interpreted(sq: &StructuredQuery, place: Option<ResolvedPlace>) -> Interpreted {
|
||||||
|
Interpreted {
|
||||||
|
semantic: sq.semantic.clone(),
|
||||||
|
tag_ids: sq.tag_ids.clone(),
|
||||||
|
exclude_tag_ids: sq.exclude_tag_ids.clone(),
|
||||||
|
unmatched_tags: sq.unmatched_tags.clone(),
|
||||||
|
camera_make: sq.camera_make.clone(),
|
||||||
|
camera_model: sq.camera_model.clone(),
|
||||||
|
lens_model: sq.lens_model.clone(),
|
||||||
|
date_from: sq.date_from,
|
||||||
|
date_to: sq.date_to,
|
||||||
|
media_type: sq.media_type.clone(),
|
||||||
|
place,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::ai::nl_query::StructuredQuery;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn effective_semantic_combines_semantic_and_unmatched() {
|
||||||
|
let sq = StructuredQuery {
|
||||||
|
semantic: Some("sunset".into()),
|
||||||
|
unmatched_tags: vec!["golden hour".into()],
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
assert_eq!(
|
||||||
|
effective_semantic(&sq).as_deref(),
|
||||||
|
Some("sunset golden hour")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn effective_semantic_none_when_empty() {
|
||||||
|
let sq = StructuredQuery::default();
|
||||||
|
assert_eq!(effective_semantic(&sq), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn effective_semantic_unmatched_only() {
|
||||||
|
let sq = StructuredQuery {
|
||||||
|
unmatched_tags: vec!["disco".into()],
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
assert_eq!(effective_semantic(&sq).as_deref(), Some("disco"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn paginate_handles_out_of_range_offset() {
|
||||||
|
let hits = vec![(0.9, "a".to_string()), (0.8, "b".to_string())];
|
||||||
|
assert_eq!(paginate(&hits, 5, 10).len(), 0);
|
||||||
|
assert_eq!(paginate(&hits, 0, 1).len(), 1);
|
||||||
|
assert_eq!(paginate(&hits, 1, 10).len(), 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
+1
-1
@@ -231,7 +231,7 @@ impl Ffmpeg {
|
|||||||
/// a hard failure — previously the `parse::<f64>` on empty stdout produced
|
/// a hard failure — previously the `parse::<f64>` on empty stdout produced
|
||||||
/// "cannot parse float from empty string" and poisoned the preview-clip row
|
/// "cannot parse float from empty string" and poisoned the preview-clip row
|
||||||
/// with status=failed, which the watcher would re-queue every full scan.
|
/// with status=failed, which the watcher would re-queue every full scan.
|
||||||
async fn get_duration_seconds(input_file: &str) -> Result<Option<f64>> {
|
pub async fn get_duration_seconds(input_file: &str) -> Result<Option<f64>> {
|
||||||
if let Some(d) = probe_duration(input_file, "format=duration").await? {
|
if let Some(d) = probe_duration(input_file, "format=duration").await? {
|
||||||
return Ok(Some(d));
|
return Ok(Some(d));
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user