//! `/photos/search?q=` — CLIP semantic photo search. //! //! The route lives outside `files.rs` to keep that 1500+ line module //! focused on EXIF / tag listing. The flow is: //! //! 1. Parse query params (`q`, `limit`, `threshold`, optional `library`). //! 2. Call Apollo's `/api/internal/clip/encode_text` to get the query //! vector (L2-normalized 768-d f32 for ViT-L/14). //! 3. Load every `(content_hash, clip_embedding)` for the scope from //! `image_exif` via `ExifDao::list_clip_index`. ~28–43 MB for a 14k //! library at ViT-L/14; loaded fresh per request — fast enough for //! v1, optimize via an AppState cache later if needed. //! 4. Dot product (= cosine since both sides are L2-normalized), filter //! above `threshold`, top-K by score. //! 5. Resolve each surviving hash back to a `(library_id, rel_path)` so //! the frontend can render the photo / hand off to the carousel. //! //! Response shape is intentionally minimal — paths + score — so the //! frontend can reuse existing PhotoGrid rendering by joining against //! `/api/photos/match` (or calling `/image/metadata` lazily). Don't //! bake camera/EXIF metadata into this route; it would force a fan-out //! per result and balloon the response. use crate::AppState; use crate::ai::clip_client::ClipError; use crate::database::ExifDao; use actix_web::{HttpResponse, Result as ActixResult, web}; use base64::Engine; use serde::{Deserialize, Serialize}; use std::sync::Mutex; #[derive(Debug, Deserialize)] pub struct SearchQuery { /// Natural-language query. Required; empty triggers 400. pub q: String, /// Max results to return in this page. Capped to 200 server-side. /// Defaults to 20. Pair with `offset` for pagination. #[serde(default = "default_limit")] pub limit: usize, /// Zero-based offset into the sorted-and-filtered result set. The /// scoring loop still runs over the full embedding matrix on every /// page (cheap at personal-library scale — sub-100ms — and avoids /// stateful pagination cursors). Defaults to 0. #[serde(default)] pub offset: usize, /// Cosine-similarity floor below which results are dropped. /// 0.20 is the rough "this is plausibly relevant" line for OpenAI /// CLIP; tunable per call when sweeping. Defaults to 0.20. #[serde(default = "default_threshold")] pub threshold: f32, /// Optional single-library scope. Legacy param — new clients pass /// `library_ids` instead so multi-select scopes (Apollo's HUD library /// chips, FileViewer-React's library picker) actually filter. Kept /// for back-compat; `library_ids` wins when both are supplied. pub library: Option, /// Optional multi-library scope, comma-separated id list /// (`?library_ids=1,3`). Empty / omitted = every enabled library /// (the historical default). Apollo and FileViewer-React both send /// this when 2+ libraries are selected; the single-library case /// works through either param interchangeably. pub library_ids: Option, /// Optional model-version filter. Defaults to the live engine's /// version (queried lazily). Forces a strict join so mid-flight /// model swaps can't mix geometries in a single response. #[serde(default)] pub model_version: Option, } fn default_limit() -> usize { 20 } fn default_threshold() -> f32 { 0.20 } #[derive(Debug, Serialize)] pub struct SearchHit { pub library_id: i32, pub rel_path: String, pub content_hash: String, /// Cosine similarity in [-1, 1]. In practice OpenAI CLIP returns /// 0.10–0.40 for the typical photo library. pub score: f32, } #[derive(Debug, Serialize)] pub struct SearchResponse { pub query: String, pub model_version: String, pub threshold: f32, /// Total embeddings scored (= every photo in scope with a stored /// embedding). Same value across pages of the same query. pub considered: usize, /// Count of results above threshold, before pagination. Lets the /// client decide whether a "Load more" button is meaningful and /// stop fetching when ``offset + results.len() >= total_matching``. pub total_matching: usize, pub offset: usize, pub results: Vec, } #[derive(Debug, Serialize)] struct SearchError { error: String, } /// Decode a stored `clip_embedding` BLOB back into a `Vec`. Returns /// `None` on malformed bytes — those rows get skipped rather than /// failing the whole query. fn decode_embedding(bytes: &[u8]) -> Option> { if bytes.is_empty() || bytes.len() % 4 != 0 { return None; } let mut out = Vec::with_capacity(bytes.len() / 4); for chunk in bytes.chunks_exact(4) { out.push(f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])); } Some(out) } #[inline] fn dot(a: &[f32], b: &[f32]) -> f32 { a.iter().zip(b.iter()).map(|(x, y)| x * y).sum() } pub async fn search_photos( state: web::Data, exif_dao: web::Data>>, query: web::Query, ) -> ActixResult { let q_text = query.q.trim().to_string(); if q_text.is_empty() { return Ok(HttpResponse::BadRequest().json(SearchError { error: "query parameter `q` is required".into(), })); } if !state.clip_client.is_enabled() { return Ok(HttpResponse::ServiceUnavailable().json(SearchError { error: "CLIP search is disabled (no Apollo CLIP endpoint configured)".into(), })); } let limit = query.limit.clamp(1, 200); let offset = query.offset; let threshold = query.threshold.clamp(-1.0, 1.0); // 1. Encode the query text. Fast — Apollo's text encoder is ~50ms // on CPU. Bail with a clear error message if Apollo's down so the // user sees "service unavailable" rather than empty results. let query_resp = match state.clip_client.encode_text(&q_text).await { Ok(r) => r, Err(ClipError::Permanent(e)) => { return Ok(HttpResponse::BadRequest().json(SearchError { error: format!("query rejected: {e}"), })); } Err(ClipError::Transient(e)) => { return Ok(HttpResponse::BadGateway().json(SearchError { error: format!("CLIP service unavailable: {e}"), })); } Err(ClipError::Disabled) => { return Ok(HttpResponse::ServiceUnavailable().json(SearchError { error: "CLIP service disabled".into(), })); } }; // decode_embedding works on raw bytes; the wire format is b64. let query_bytes = base64::engine::general_purpose::STANDARD .decode(query_resp.embedding.as_bytes()) .unwrap_or_default(); let query_vec = match decode_embedding(&query_bytes) { Some(v) => v, None => { return Ok(HttpResponse::BadGateway().json(SearchError { error: "CLIP service returned a malformed query embedding".into(), })); } }; // 2. Decide which library scope to search. `library_ids` (multi) // wins over the legacy `library` (single) when both are present; // either / both empty falls back to "every enabled library". let library_ids: Vec = if let Some(raw) = query.library_ids.as_deref() { let mut out: Vec = Vec::new(); for piece in raw.split(',') { let trimmed = piece.trim(); if trimmed.is_empty() { continue; } match trimmed.parse::() { Ok(id) => { if !out.contains(&id) { out.push(id); } } Err(_) => { return Ok(HttpResponse::BadRequest().json(SearchError { error: format!("invalid library_ids entry: {trimmed:?}"), })); } } } out } else if let Some(id) = query.library { vec![id] } else { Vec::new() }; // 3. Pull the (hash, embedding) matrix. Lock contention here is // bounded — one big SELECT under a mutex Arc> // and then we release before scoring. If this becomes a hotspot // we'll cache the decoded matrix in AppState with TTL. let ctx = opentelemetry::Context::current(); let rows: Vec<(String, Vec)> = { let mut dao = exif_dao.lock().expect("exif dao"); match dao.list_clip_index( &ctx, &library_ids, query .model_version .as_deref() .or(Some(&query_resp.model_version)), ) { Ok(r) => r, Err(e) => { log::warn!("clip_search: list_clip_index failed: {:?}", e); return Ok(HttpResponse::InternalServerError().json(SearchError { error: "failed to load search index".into(), })); } } }; let considered = rows.len(); if considered == 0 { return Ok(HttpResponse::Ok().json(SearchResponse { query: q_text, model_version: query_resp.model_version, threshold, considered, total_matching: 0, offset, results: Vec::new(), })); } // 4. Score. Cap the loop's transient allocation; we keep all scores // and sort at the end. With ~14k entries the sort is microseconds. let mut scored: Vec<(f32, String)> = Vec::with_capacity(considered); for (hash, blob) in rows { let Some(emb) = decode_embedding(&blob) else { continue; }; if emb.len() != query_vec.len() { continue; } let sim = dot(&emb, &query_vec); if sim < threshold { continue; } scored.push((sim, hash)); } scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)); let total_matching = scored.len(); // Pagination — slice the sorted list at `[offset, offset+limit)`. // Offsets past the end produce empty pages rather than an error so // the client can stop fetching naturally on "load more" past the end. let scored: Vec<(f32, String)> = if offset >= total_matching { Vec::new() } else { let end = (offset + limit).min(total_matching); scored[offset..end].to_vec() }; if scored.is_empty() { return Ok(HttpResponse::Ok().json(SearchResponse { query: q_text, model_version: query_resp.model_version, threshold, considered, total_matching, offset, results: Vec::new(), })); } // 5. Resolve each surviving hash back to a `(library_id, rel_path)`. // `get_rel_paths_by_hash` returns every rel_path; we pick the first // one for the result. Apollo / the UI can fetch alternatives via // /image/metadata when needed. let hashes: Vec = scored.iter().map(|(_, h)| h.clone()).collect(); let path_map = { let mut dao = exif_dao.lock().expect("exif dao"); match dao.get_rel_paths_for_hashes(&ctx, &hashes) { Ok(m) => m, Err(e) => { log::warn!("clip_search: get_rel_paths_for_hashes failed: {:?}", e); return Ok(HttpResponse::InternalServerError().json(SearchError { error: "failed to resolve photo paths".into(), })); } } }; // We need (library_id, rel_path) — get_rel_paths_for_hashes only // returns rel_paths. Cross-reference via find_by_content_hash to // pick the library too. Single call per surviving hash; cheap at // top-20. let mut results = Vec::with_capacity(scored.len()); { let mut dao = exif_dao.lock().expect("exif dao"); for (score, hash) in scored { let row = match dao.find_by_content_hash(&ctx, &hash) { Ok(Some(r)) => r, Ok(None) => continue, Err(e) => { log::warn!( "clip_search: find_by_content_hash failed for {}: {:?}", hash, e ); continue; } }; // Prefer get_rel_paths_for_hashes's first entry if it // exists (it shares semantics with `image_exif`'s natural // order), falling back to the ImageExif row. let rel_path = path_map .get(&hash) .and_then(|paths| paths.first().cloned()) .unwrap_or(row.file_path); results.push(SearchHit { library_id: row.library_id, rel_path, content_hash: hash, score, }); } } Ok(HttpResponse::Ok().json(SearchResponse { query: q_text, model_version: query_resp.model_version, threshold, considered, total_matching, offset, results, })) }