//! `/photos/search/unified?q=` — unified NL photo search. //! //! One free-text box that composes the two existing engines instead of making //! the user pick between them: //! 1. A grounded local-LLM call ([`crate::ai::nl_query`]) translates the //! query into a structured filter + a semantic term. //! 2. Structured filters (tags / EXIF / geo / date / media-type) define the //! candidate set; the semantic term ranks within it via CLIP. //! //! Path A (orchestration): we reuse `clip_search`'s scoring core and the //! existing `ExifDao` / `TagDao` queries, joining on `content_hash`. EXIF rows //! are the universal candidate carrier — each has `(library_id, file_path, //! content_hash, date_taken)` — so the structured filter is just a predicate //! over them, and the CLIP hits (which key on `content_hash`) intersect by //! hash. No new schema, no surgery on `list_photos`. //! //! Degenerate cases collapse to the existing behavior: semantic-only → plain //! CLIP search; filters-only → a date-sorted filtered listing. //! //! Person filtering is intentionally deferred (no person→photos resolver yet). use crate::AppState; use crate::ai::backend::{BackendKind, SamplingOverrides}; use crate::ai::nl_query::{StructuredQuery, translate_nl_query}; use crate::clip_search::{ SearchHit, parse_library_scope, resolve_hits, score_error_response, score_photos, }; use crate::data::Claims; use crate::database::ExifDao; use crate::file_types::{is_image_file, is_video_file}; use crate::geo::{forward_geocode, gps_bounding_box, haversine_distance}; use crate::tags::TagDao; use actix_web::HttpResponse; use actix_web::web::{Data, Query}; use serde::{Deserialize, Serialize}; use std::collections::HashSet; use std::path::Path; use std::sync::Mutex; #[derive(Debug, Deserialize)] pub struct UnifiedQuery { /// Natural-language query. Required; empty triggers 400. pub q: String, #[serde(default = "default_limit")] pub limit: usize, #[serde(default)] pub offset: usize, /// CLIP cosine floor for the semantic ranking stage. Same default as the /// plain search endpoint. #[serde(default = "default_threshold")] pub threshold: f32, /// Legacy single-library scope (see clip_search). pub library: Option, /// Multi-library scope, comma-separated ids. pub library_ids: Option, /// Optional model override. The client passes the user's currently-selected /// local model so the translation step reuses a model that's already loaded /// (avoids a llama-swap eviction / cold start). Falls back to the configured /// default local model when absent. Local only — no hybrid here. pub model: Option, } fn default_limit() -> usize { 20 } fn default_threshold() -> f32 { 0.20 } /// A geocoded place echoed back so the client can show / edit the location /// filter it actually searched. #[derive(Debug, Serialize)] struct ResolvedPlace { display_name: String, lat: f64, lon: f64, radius_km: f64, } /// How the server interpreted the NL query — echoed to the client to render /// editable filter chips. tag ids map to the client's existing tag list. #[derive(Debug, Serialize)] struct Interpreted { semantic: Option, tag_ids: Vec, exclude_tag_ids: Vec, /// Words the model treated as tags that don't exist in the vocab; folded /// into the semantic term and surfaced here so the UI can explain it. unmatched_tags: Vec, camera_make: Option, camera_model: Option, lens_model: Option, date_from: Option, date_to: Option, media_type: Option, place: Option, } #[derive(Debug, Serialize)] struct UnifiedResponse { query: String, interpreted: Interpreted, /// CLIP model version used for ranking; `None` when the query had no /// semantic term (filters-only). model_version: Option, /// Embeddings scored by CLIP (0 when filters-only). considered: usize, /// Matches before pagination. total_matching: usize, offset: usize, results: Vec, } #[derive(Debug, Serialize)] struct ErrorBody { error: String, } fn bad_request(msg: impl Into) -> HttpResponse { HttpResponse::BadRequest().json(ErrorBody { error: msg.into() }) } /// Combine the model's semantic term with any tag words that didn't match the /// vocab, so a hallucinated/non-vocab tag becomes a soft semantic signal /// rather than being dropped. fn effective_semantic(sq: &StructuredQuery) -> Option { let mut parts: Vec = Vec::new(); if let Some(s) = sq.semantic.as_deref() { parts.push(s.to_string()); } parts.extend(sq.unmatched_tags.iter().cloned()); if parts.is_empty() { None } else { Some(parts.join(" ")) } } pub async fn unified_search( _: Claims, state: Data, exif_dao: Data>>, tag_dao: Data>, query: Query, ) -> HttpResponse { let nl = query.q.trim().to_string(); if nl.is_empty() { return bad_request("query parameter `q` is required"); } let limit = query.limit.clamp(1, 200); let offset = query.offset; let threshold = query.threshold.clamp(-1.0, 1.0); let library_ids = match parse_library_scope(query.library_ids.as_deref(), query.library) { Ok(ids) => ids, Err(msg) => return bad_request(msg), }; let ctx = opentelemetry::Context::current(); // ── 1. Translate the NL query, grounded on the real tag vocabulary ── let tag_vocab: Vec<(i32, String)> = { let mut dao = tag_dao.lock().expect("tag dao"); match dao.get_all_tags(&ctx, None) { Ok(tags) => tags.into_iter().map(|(_, t)| (t.id, t.name)).collect(), Err(e) => { log::warn!("unified_search: get_all_tags failed: {e:?}"); Vec::new() } } }; // Respect env/config for the LLM backend (LLM_BACKEND → ollama or // llama-swap); local only, no hybrid, per the feature's design. The // client-supplied model (the user's current selection) routes translation // to an already-loaded model when possible; otherwise resolve_backend // falls back to the configured default. let overrides = SamplingOverrides { model: query.model.clone().filter(|m| !m.is_empty()), num_ctx: None, temperature: None, top_p: None, top_k: None, min_p: None, }; let backend = match state .insight_generator .resolve_backend(BackendKind::Local, &overrides) .await { Ok(b) => b, Err(e) => { log::warn!("unified_search: resolve_backend failed: {e:?}"); return HttpResponse::ServiceUnavailable().json(ErrorBody { error: "LLM backend unavailable".into(), }); } }; let today = chrono::Utc::now().date_naive(); let sq = match translate_nl_query(backend.chat(), &nl, &tag_vocab, today).await { Ok(sq) => sq, Err(e) => { log::warn!("unified_search: translate_nl_query failed: {e:?}"); return HttpResponse::BadGateway().json(ErrorBody { error: "could not interpret the query".into(), }); } }; // ── 2. Forward-geocode the place name into a gps circle ── let resolved_place = match sq.place.as_deref() { Some(p) => forward_geocode(p).await.map(|g| ResolvedPlace { display_name: g.display_name, lat: g.lat, lon: g.lon, radius_km: g.radius_km, }), None => None, }; let gps = resolved_place.as_ref().map(|p| (p.lat, p.lon, p.radius_km)); let semantic = effective_semantic(&sq); let has_exif_filter = sq.camera_make.is_some() || sq.camera_model.is_some() || sq.lens_model.is_some() || sq.date_from.is_some() || sq.date_to.is_some(); let has_struct = has_exif_filter || gps.is_some() || !sq.tag_ids.is_empty() || sq.media_type.is_some(); // Stage trace: what the model extracted + whether a structured filter is // active. The chips show this to the user too, but logging it makes the // "why no results" path debuggable from the server side. log::info!( "unified_search: q={nl:?} semantic={:?} tag_ids={:?} exclude={:?} place={:?} gps={:?} date=({:?},{:?}) media={:?} unmatched={:?} has_struct={has_struct}", sq.semantic, sq.tag_ids, sq.exclude_tag_ids, resolved_place.as_ref().map(|p| p.display_name.as_str()), gps, sq.date_from, sq.date_to, sq.media_type, sq.unmatched_tags, ); // ── 3. Build the structured candidate set (EXIF rows passing every // filter). Skipped entirely for a pure-semantic query. ── let mut candidate: Vec = Vec::new(); let mut allowed_hashes: HashSet = HashSet::new(); if has_struct { // Tag membership set (rel_path only — same cross-library imprecision // as the existing /photos tag listing). ALL-mode: the photo must // carry every named tag. let tag_set: Option> = if sq.tag_ids.is_empty() { None } else { let mut dao = tag_dao.lock().expect("tag dao"); match dao.get_files_with_all_tag_ids( sq.tag_ids.clone(), sq.exclude_tag_ids.clone(), &ctx, ) { Ok(files) => Some(files.into_iter().map(|f| f.file_name).collect()), Err(e) => { log::warn!("unified_search: tag filter failed: {e:?}"); Some(HashSet::new()) } } }; log::info!( "unified_search: tag_ids={:?} -> tag_set_files={:?}", sq.tag_ids, tag_set.as_ref().map(|s| s.len()) ); // EXIF query handles camera/lens/gps-box/date. With no EXIF filters // it returns the whole table, which we then narrow by the predicates // below (tags / media / scope). Fine at personal-library scale. let gps_bounds = gps.map(|(lat, lon, r)| gps_bounding_box(lat, lon, r)); let rows = { let mut dao = exif_dao.lock().expect("exif dao"); dao.query_by_exif( &ctx, None, // scope filtered in-Rust to support multi-library sq.camera_make.as_deref(), sq.camera_model.as_deref(), sq.lens_model.as_deref(), gps_bounds, sq.date_from, sq.date_to, ) .unwrap_or_else(|e| { log::warn!("unified_search: query_by_exif failed: {e:?}"); Vec::new() }) }; candidate = rows .into_iter() .filter(|row| { // Library scope. if !library_ids.is_empty() && !library_ids.contains(&row.library_id) { return false; } // Precise GPS distance (the EXIF query only did a coarse box). if let Some((lat, lon, radius_km)) = gps { match (row.gps_latitude, row.gps_longitude) { (Some(plat), Some(plon)) => { if haversine_distance(lat, lon, plat as f64, plon as f64) > radius_km { return false; } } _ => return false, } } // Media type. if let Some(mt) = sq.media_type.as_deref() { let p = Path::new(&row.file_path); let ok = if mt == "video" { is_video_file(p) } else { is_image_file(p) }; if !ok { return false; } } // Tag membership. if let Some(ts) = &tag_set && !ts.contains(&row.file_path) { return false; } true }) .collect(); allowed_hashes = candidate .iter() .filter_map(|r| r.content_hash.clone()) .collect(); log::info!( "unified_search: candidate_rows={} allowed_hashes={}", candidate.len(), allowed_hashes.len() ); } // ── 4. Rank ── match semantic { Some(ref sem) => { // Semantic term present: CLIP-rank, then keep only hits that pass // the structured filters (by content_hash). let scored = match score_photos(&state, &exif_dao, sem, &library_ids, threshold, None).await { Ok(s) => s, Err(e) => return score_error_response(e), }; let considered = scored.considered; let clip_hits = scored.hits.len(); let hits: Vec<(f32, String)> = if has_struct { scored .hits .into_iter() .filter(|(_, h)| allowed_hashes.contains(h)) .collect() } else { scored.hits }; log::info!( "unified_search: clip considered={considered} hits={clip_hits} after_struct_filter={}", hits.len() ); let total_matching = hits.len(); let page = paginate(&hits, offset, limit); let results = resolve_hits(&exif_dao, &page); HttpResponse::Ok().json(UnifiedResponse { query: nl, interpreted: interpreted(&sq, resolved_place), model_version: Some(scored.model_version), considered: scored.considered, total_matching, offset, results, }) } None => { // Filters-only: no semantic term. Require at least one filter, // then return the candidate set newest-first. if !has_struct { return bad_request("query had no searchable terms"); } candidate.sort_by(|a, b| b.date_taken.cmp(&a.date_taken)); let total_matching = candidate.len(); log::info!("unified_search: filters-only matches={total_matching}"); let end = (offset + limit).min(total_matching); let results: Vec = if offset >= total_matching { Vec::new() } else { candidate[offset..end] .iter() .map(|r| SearchHit { library_id: r.library_id, rel_path: r.file_path.clone(), content_hash: r.content_hash.clone().unwrap_or_default(), score: 0.0, }) .collect() }; HttpResponse::Ok().json(UnifiedResponse { query: nl, interpreted: interpreted(&sq, resolved_place), model_version: None, considered: 0, total_matching, offset, results, }) } } } /// Slice a sorted hit list at `[offset, offset+limit)`, tolerating /// out-of-range offsets (empty page). fn paginate(hits: &[(f32, String)], offset: usize, limit: usize) -> Vec<(f32, String)> { if offset >= hits.len() { return Vec::new(); } let end = (offset + limit).min(hits.len()); hits[offset..end].to_vec() } fn interpreted(sq: &StructuredQuery, place: Option) -> Interpreted { Interpreted { semantic: sq.semantic.clone(), tag_ids: sq.tag_ids.clone(), exclude_tag_ids: sq.exclude_tag_ids.clone(), unmatched_tags: sq.unmatched_tags.clone(), camera_make: sq.camera_make.clone(), camera_model: sq.camera_model.clone(), lens_model: sq.lens_model.clone(), date_from: sq.date_from, date_to: sq.date_to, media_type: sq.media_type.clone(), place, } } #[cfg(test)] mod tests { use super::*; use crate::ai::nl_query::StructuredQuery; #[test] fn effective_semantic_combines_semantic_and_unmatched() { let sq = StructuredQuery { semantic: Some("sunset".into()), unmatched_tags: vec!["golden hour".into()], ..Default::default() }; assert_eq!( effective_semantic(&sq).as_deref(), Some("sunset golden hour") ); } #[test] fn effective_semantic_none_when_empty() { let sq = StructuredQuery::default(); assert_eq!(effective_semantic(&sq), None); } #[test] fn effective_semantic_unmatched_only() { let sq = StructuredQuery { unmatched_tags: vec!["disco".into()], ..Default::default() }; assert_eq!(effective_semantic(&sq).as_deref(), Some("disco")); } #[test] fn paginate_handles_out_of_range_offset() { let hits = vec![(0.9, "a".to_string()), (0.8, "b".to_string())]; assert_eq!(paginate(&hits, 5, 10).len(), 0); assert_eq!(paginate(&hits, 0, 1).len(), 1); assert_eq!(paginate(&hits, 1, 10).len(), 1); } }