Unified NL search Phase 2: /photos/search/unified endpoint
Composes the two existing engines (Path A orchestration): - Translate NL -> StructuredQuery via local LLM, respecting LLM_BACKEND (resolve_backend(Local) -> ollama or llama-swap; no hybrid). - Forward-geocode the place name into a gps circle. - Structured filters (tags/EXIF/geo/date/media) build a candidate set of EXIF rows; CLIP ranks within it, joined by content_hash. Degenerate cases match existing behavior: semantic-only -> plain CLIP; filters-only -> date-sorted. - Echoes the interpreted query (incl. resolved place) for editable client chips. Refactor: extracted reusable cores from clip_search (score_photos, resolve_hits, parse_library_scope, score_error_response) shared by both endpoints. Removed the Phase 1 allow-until-wired attributes now that nl_query + geo are consumed. fmt + clippy clean; 23 backend tests pass (7 geo, 12 nl_query, 4 unified). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,452 @@
|
||||
//! `/photos/search/unified?q=<natural language>` — unified NL photo search.
|
||||
//!
|
||||
//! One free-text box that composes the two existing engines instead of making
|
||||
//! the user pick between them:
|
||||
//! 1. A grounded local-LLM call ([`crate::ai::nl_query`]) translates the
|
||||
//! query into a structured filter + a semantic term.
|
||||
//! 2. Structured filters (tags / EXIF / geo / date / media-type) define the
|
||||
//! candidate set; the semantic term ranks within it via CLIP.
|
||||
//!
|
||||
//! Path A (orchestration): we reuse `clip_search`'s scoring core and the
|
||||
//! existing `ExifDao` / `TagDao` queries, joining on `content_hash`. EXIF rows
|
||||
//! are the universal candidate carrier — each has `(library_id, file_path,
|
||||
//! content_hash, date_taken)` — so the structured filter is just a predicate
|
||||
//! over them, and the CLIP hits (which key on `content_hash`) intersect by
|
||||
//! hash. No new schema, no surgery on `list_photos`.
|
||||
//!
|
||||
//! Degenerate cases collapse to the existing behavior: semantic-only → plain
|
||||
//! CLIP search; filters-only → a date-sorted filtered listing.
|
||||
//!
|
||||
//! Person filtering is intentionally deferred (no person→photos resolver yet).
|
||||
|
||||
use crate::AppState;
|
||||
use crate::ai::backend::{BackendKind, SamplingOverrides};
|
||||
use crate::ai::nl_query::{StructuredQuery, translate_nl_query};
|
||||
use crate::clip_search::{
|
||||
SearchHit, parse_library_scope, resolve_hits, score_error_response, score_photos,
|
||||
};
|
||||
use crate::data::Claims;
|
||||
use crate::database::ExifDao;
|
||||
use crate::file_types::{is_image_file, is_video_file};
|
||||
use crate::geo::{forward_geocode, gps_bounding_box, haversine_distance};
|
||||
use crate::tags::TagDao;
|
||||
use actix_web::HttpResponse;
|
||||
use actix_web::web::{Data, Query};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
use std::sync::Mutex;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct UnifiedQuery {
|
||||
/// Natural-language query. Required; empty triggers 400.
|
||||
pub q: String,
|
||||
#[serde(default = "default_limit")]
|
||||
pub limit: usize,
|
||||
#[serde(default)]
|
||||
pub offset: usize,
|
||||
/// CLIP cosine floor for the semantic ranking stage. Same default as the
|
||||
/// plain search endpoint.
|
||||
#[serde(default = "default_threshold")]
|
||||
pub threshold: f32,
|
||||
/// Legacy single-library scope (see clip_search).
|
||||
pub library: Option<i32>,
|
||||
/// Multi-library scope, comma-separated ids.
|
||||
pub library_ids: Option<String>,
|
||||
}
|
||||
|
||||
fn default_limit() -> usize {
|
||||
20
|
||||
}
|
||||
fn default_threshold() -> f32 {
|
||||
0.20
|
||||
}
|
||||
|
||||
/// A geocoded place echoed back so the client can show / edit the location
|
||||
/// filter it actually searched.
|
||||
#[derive(Debug, Serialize)]
|
||||
struct ResolvedPlace {
|
||||
display_name: String,
|
||||
lat: f64,
|
||||
lon: f64,
|
||||
radius_km: f64,
|
||||
}
|
||||
|
||||
/// How the server interpreted the NL query — echoed to the client to render
|
||||
/// editable filter chips. tag ids map to the client's existing tag list.
|
||||
#[derive(Debug, Serialize)]
|
||||
struct Interpreted {
|
||||
semantic: Option<String>,
|
||||
tag_ids: Vec<i32>,
|
||||
exclude_tag_ids: Vec<i32>,
|
||||
/// Words the model treated as tags that don't exist in the vocab; folded
|
||||
/// into the semantic term and surfaced here so the UI can explain it.
|
||||
unmatched_tags: Vec<String>,
|
||||
camera_make: Option<String>,
|
||||
camera_model: Option<String>,
|
||||
lens_model: Option<String>,
|
||||
date_from: Option<i64>,
|
||||
date_to: Option<i64>,
|
||||
media_type: Option<String>,
|
||||
place: Option<ResolvedPlace>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct UnifiedResponse {
|
||||
query: String,
|
||||
interpreted: Interpreted,
|
||||
/// CLIP model version used for ranking; `None` when the query had no
|
||||
/// semantic term (filters-only).
|
||||
model_version: Option<String>,
|
||||
/// Embeddings scored by CLIP (0 when filters-only).
|
||||
considered: usize,
|
||||
/// Matches before pagination.
|
||||
total_matching: usize,
|
||||
offset: usize,
|
||||
results: Vec<SearchHit>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct ErrorBody {
|
||||
error: String,
|
||||
}
|
||||
|
||||
fn bad_request(msg: impl Into<String>) -> HttpResponse {
|
||||
HttpResponse::BadRequest().json(ErrorBody { error: msg.into() })
|
||||
}
|
||||
|
||||
/// Combine the model's semantic term with any tag words that didn't match the
|
||||
/// vocab, so a hallucinated/non-vocab tag becomes a soft semantic signal
|
||||
/// rather than being dropped.
|
||||
fn effective_semantic(sq: &StructuredQuery) -> Option<String> {
|
||||
let mut parts: Vec<String> = Vec::new();
|
||||
if let Some(s) = sq.semantic.as_deref() {
|
||||
parts.push(s.to_string());
|
||||
}
|
||||
parts.extend(sq.unmatched_tags.iter().cloned());
|
||||
if parts.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(parts.join(" "))
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn unified_search<TagD: TagDao>(
|
||||
_: Claims,
|
||||
state: Data<AppState>,
|
||||
exif_dao: Data<Mutex<Box<dyn ExifDao>>>,
|
||||
tag_dao: Data<Mutex<TagD>>,
|
||||
query: Query<UnifiedQuery>,
|
||||
) -> HttpResponse {
|
||||
let nl = query.q.trim().to_string();
|
||||
if nl.is_empty() {
|
||||
return bad_request("query parameter `q` is required");
|
||||
}
|
||||
|
||||
let limit = query.limit.clamp(1, 200);
|
||||
let offset = query.offset;
|
||||
let threshold = query.threshold.clamp(-1.0, 1.0);
|
||||
|
||||
let library_ids = match parse_library_scope(query.library_ids.as_deref(), query.library) {
|
||||
Ok(ids) => ids,
|
||||
Err(msg) => return bad_request(msg),
|
||||
};
|
||||
|
||||
let ctx = opentelemetry::Context::current();
|
||||
|
||||
// ── 1. Translate the NL query, grounded on the real tag vocabulary ──
|
||||
let tag_vocab: Vec<(i32, String)> = {
|
||||
let mut dao = tag_dao.lock().expect("tag dao");
|
||||
match dao.get_all_tags(&ctx, None) {
|
||||
Ok(tags) => tags.into_iter().map(|(_, t)| (t.id, t.name)).collect(),
|
||||
Err(e) => {
|
||||
log::warn!("unified_search: get_all_tags failed: {e:?}");
|
||||
Vec::new()
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Respect env/config for the LLM backend (LLM_BACKEND → ollama or
|
||||
// llama-swap); local only, no hybrid, per the feature's design.
|
||||
let overrides = SamplingOverrides {
|
||||
model: None,
|
||||
num_ctx: None,
|
||||
temperature: None,
|
||||
top_p: None,
|
||||
top_k: None,
|
||||
min_p: None,
|
||||
};
|
||||
let backend = match state
|
||||
.insight_generator
|
||||
.resolve_backend(BackendKind::Local, &overrides)
|
||||
.await
|
||||
{
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
log::warn!("unified_search: resolve_backend failed: {e:?}");
|
||||
return HttpResponse::ServiceUnavailable().json(ErrorBody {
|
||||
error: "LLM backend unavailable".into(),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let today = chrono::Utc::now().date_naive();
|
||||
let sq = match translate_nl_query(backend.chat(), &nl, &tag_vocab, today).await {
|
||||
Ok(sq) => sq,
|
||||
Err(e) => {
|
||||
log::warn!("unified_search: translate_nl_query failed: {e:?}");
|
||||
return HttpResponse::BadGateway().json(ErrorBody {
|
||||
error: "could not interpret the query".into(),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
// ── 2. Forward-geocode the place name into a gps circle ──
|
||||
let resolved_place = match sq.place.as_deref() {
|
||||
Some(p) => forward_geocode(p).await.map(|g| ResolvedPlace {
|
||||
display_name: g.display_name,
|
||||
lat: g.lat,
|
||||
lon: g.lon,
|
||||
radius_km: g.radius_km,
|
||||
}),
|
||||
None => None,
|
||||
};
|
||||
let gps = resolved_place.as_ref().map(|p| (p.lat, p.lon, p.radius_km));
|
||||
|
||||
let semantic = effective_semantic(&sq);
|
||||
|
||||
let has_exif_filter = sq.camera_make.is_some()
|
||||
|| sq.camera_model.is_some()
|
||||
|| sq.lens_model.is_some()
|
||||
|| sq.date_from.is_some()
|
||||
|| sq.date_to.is_some();
|
||||
let has_struct =
|
||||
has_exif_filter || gps.is_some() || !sq.tag_ids.is_empty() || sq.media_type.is_some();
|
||||
|
||||
// ── 3. Build the structured candidate set (EXIF rows passing every
|
||||
// filter). Skipped entirely for a pure-semantic query. ──
|
||||
let mut candidate: Vec<crate::database::models::ImageExif> = Vec::new();
|
||||
let mut allowed_hashes: HashSet<String> = HashSet::new();
|
||||
if has_struct {
|
||||
// Tag membership set (rel_path only — same cross-library imprecision
|
||||
// as the existing /photos tag listing). ALL-mode: the photo must
|
||||
// carry every named tag.
|
||||
let tag_set: Option<HashSet<String>> = if sq.tag_ids.is_empty() {
|
||||
None
|
||||
} else {
|
||||
let mut dao = tag_dao.lock().expect("tag dao");
|
||||
match dao.get_files_with_all_tag_ids(
|
||||
sq.tag_ids.clone(),
|
||||
sq.exclude_tag_ids.clone(),
|
||||
&ctx,
|
||||
) {
|
||||
Ok(files) => Some(files.into_iter().map(|f| f.file_name).collect()),
|
||||
Err(e) => {
|
||||
log::warn!("unified_search: tag filter failed: {e:?}");
|
||||
Some(HashSet::new())
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// EXIF query handles camera/lens/gps-box/date. With no EXIF filters
|
||||
// it returns the whole table, which we then narrow by the predicates
|
||||
// below (tags / media / scope). Fine at personal-library scale.
|
||||
let gps_bounds = gps.map(|(lat, lon, r)| gps_bounding_box(lat, lon, r));
|
||||
let rows = {
|
||||
let mut dao = exif_dao.lock().expect("exif dao");
|
||||
dao.query_by_exif(
|
||||
&ctx,
|
||||
None, // scope filtered in-Rust to support multi-library
|
||||
sq.camera_make.as_deref(),
|
||||
sq.camera_model.as_deref(),
|
||||
sq.lens_model.as_deref(),
|
||||
gps_bounds,
|
||||
sq.date_from,
|
||||
sq.date_to,
|
||||
)
|
||||
.unwrap_or_else(|e| {
|
||||
log::warn!("unified_search: query_by_exif failed: {e:?}");
|
||||
Vec::new()
|
||||
})
|
||||
};
|
||||
|
||||
candidate = rows
|
||||
.into_iter()
|
||||
.filter(|row| {
|
||||
// Library scope.
|
||||
if !library_ids.is_empty() && !library_ids.contains(&row.library_id) {
|
||||
return false;
|
||||
}
|
||||
// Precise GPS distance (the EXIF query only did a coarse box).
|
||||
if let Some((lat, lon, radius_km)) = gps {
|
||||
match (row.gps_latitude, row.gps_longitude) {
|
||||
(Some(plat), Some(plon)) => {
|
||||
if haversine_distance(lat, lon, plat as f64, plon as f64) > radius_km {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
_ => return false,
|
||||
}
|
||||
}
|
||||
// Media type.
|
||||
if let Some(mt) = sq.media_type.as_deref() {
|
||||
let p = Path::new(&row.file_path);
|
||||
let ok = if mt == "video" {
|
||||
is_video_file(p)
|
||||
} else {
|
||||
is_image_file(p)
|
||||
};
|
||||
if !ok {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// Tag membership.
|
||||
if let Some(ts) = &tag_set
|
||||
&& !ts.contains(&row.file_path)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
true
|
||||
})
|
||||
.collect();
|
||||
|
||||
allowed_hashes = candidate
|
||||
.iter()
|
||||
.filter_map(|r| r.content_hash.clone())
|
||||
.collect();
|
||||
}
|
||||
|
||||
// ── 4. Rank ──
|
||||
match semantic {
|
||||
Some(ref sem) => {
|
||||
// Semantic term present: CLIP-rank, then keep only hits that pass
|
||||
// the structured filters (by content_hash).
|
||||
let scored =
|
||||
match score_photos(&state, &exif_dao, sem, &library_ids, threshold, None).await {
|
||||
Ok(s) => s,
|
||||
Err(e) => return score_error_response(e),
|
||||
};
|
||||
let hits: Vec<(f32, String)> = if has_struct {
|
||||
scored
|
||||
.hits
|
||||
.into_iter()
|
||||
.filter(|(_, h)| allowed_hashes.contains(h))
|
||||
.collect()
|
||||
} else {
|
||||
scored.hits
|
||||
};
|
||||
let total_matching = hits.len();
|
||||
let page = paginate(&hits, offset, limit);
|
||||
let results = resolve_hits(&exif_dao, &page);
|
||||
HttpResponse::Ok().json(UnifiedResponse {
|
||||
query: nl,
|
||||
interpreted: interpreted(&sq, resolved_place),
|
||||
model_version: Some(scored.model_version),
|
||||
considered: scored.considered,
|
||||
total_matching,
|
||||
offset,
|
||||
results,
|
||||
})
|
||||
}
|
||||
None => {
|
||||
// Filters-only: no semantic term. Require at least one filter,
|
||||
// then return the candidate set newest-first.
|
||||
if !has_struct {
|
||||
return bad_request("query had no searchable terms");
|
||||
}
|
||||
candidate.sort_by(|a, b| b.date_taken.cmp(&a.date_taken));
|
||||
let total_matching = candidate.len();
|
||||
let end = (offset + limit).min(total_matching);
|
||||
let results: Vec<SearchHit> = if offset >= total_matching {
|
||||
Vec::new()
|
||||
} else {
|
||||
candidate[offset..end]
|
||||
.iter()
|
||||
.map(|r| SearchHit {
|
||||
library_id: r.library_id,
|
||||
rel_path: r.file_path.clone(),
|
||||
content_hash: r.content_hash.clone().unwrap_or_default(),
|
||||
score: 0.0,
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
HttpResponse::Ok().json(UnifiedResponse {
|
||||
query: nl,
|
||||
interpreted: interpreted(&sq, resolved_place),
|
||||
model_version: None,
|
||||
considered: 0,
|
||||
total_matching,
|
||||
offset,
|
||||
results,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Slice a sorted hit list at `[offset, offset+limit)`, tolerating
|
||||
/// out-of-range offsets (empty page).
|
||||
fn paginate(hits: &[(f32, String)], offset: usize, limit: usize) -> Vec<(f32, String)> {
|
||||
if offset >= hits.len() {
|
||||
return Vec::new();
|
||||
}
|
||||
let end = (offset + limit).min(hits.len());
|
||||
hits[offset..end].to_vec()
|
||||
}
|
||||
|
||||
fn interpreted(sq: &StructuredQuery, place: Option<ResolvedPlace>) -> Interpreted {
|
||||
Interpreted {
|
||||
semantic: sq.semantic.clone(),
|
||||
tag_ids: sq.tag_ids.clone(),
|
||||
exclude_tag_ids: sq.exclude_tag_ids.clone(),
|
||||
unmatched_tags: sq.unmatched_tags.clone(),
|
||||
camera_make: sq.camera_make.clone(),
|
||||
camera_model: sq.camera_model.clone(),
|
||||
lens_model: sq.lens_model.clone(),
|
||||
date_from: sq.date_from,
|
||||
date_to: sq.date_to,
|
||||
media_type: sq.media_type.clone(),
|
||||
place,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::ai::nl_query::StructuredQuery;
|
||||
|
||||
#[test]
|
||||
fn effective_semantic_combines_semantic_and_unmatched() {
|
||||
let sq = StructuredQuery {
|
||||
semantic: Some("sunset".into()),
|
||||
unmatched_tags: vec!["golden hour".into()],
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(
|
||||
effective_semantic(&sq).as_deref(),
|
||||
Some("sunset golden hour")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn effective_semantic_none_when_empty() {
|
||||
let sq = StructuredQuery::default();
|
||||
assert_eq!(effective_semantic(&sq), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn effective_semantic_unmatched_only() {
|
||||
let sq = StructuredQuery {
|
||||
unmatched_tags: vec!["disco".into()],
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(effective_semantic(&sq).as_deref(), Some("disco"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn paginate_handles_out_of_range_offset() {
|
||||
let hits = vec![(0.9, "a".to_string()), (0.8, "b".to_string())];
|
||||
assert_eq!(paginate(&hits, 5, 10).len(), 0);
|
||||
assert_eq!(paginate(&hits, 0, 1).len(), 1);
|
||||
assert_eq!(paginate(&hits, 1, 10).len(), 1);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user