From 09ccb347c7be5c5177c7bbe580f6c3268a85d970 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sun, 14 Jun 2026 00:44:16 -0400 Subject: [PATCH 1/9] =?UTF-8?q?Unified=20NL=20search=20Phase=201:=20NL?= =?UTF-8?q?=E2=86=92structured-query=20translator=20+=20forward=20geocodin?= =?UTF-8?q?g?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foundation for the /photos/search/unified endpoint (Phase 2). Two new, fully unit-tested pieces, not yet wired into a route (allow-until-wired, mirroring llm_client.rs): - ai/nl_query.rs: translate a free-text query into a StructuredQuery via one grounded LLM call. Two-stage — the model emits names/ISO dates, then a pure resolve step maps tag names against the real vocab and converts dates to unix seconds. Hallucinated (non-vocab) tags are surfaced in unmatched_tags rather than silently used as hard filters — the anti-noise guard. 12 tests. - geo::forward_geocode + bbox_to_circle: resolve a place name to a circle via Nominatim /search, collapsing the bounding box to centroid + circumscribing radius so "Portland" and "Italy" both map onto the existing gps circle filter with no schema change. Radius is the max centroid-to-corner distance (corners aren't equidistant on a sphere). 4 tests. fmt + clippy clean; 19 new tests pass. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/ai/mod.rs | 1 + src/ai/nl_query.rs | 414 +++++++++++++++++++++++++++++++++++++++++++++ src/geo.rs | 180 ++++++++++++++++++++ 3 files changed, 595 insertions(+) create mode 100644 src/ai/nl_query.rs diff --git a/src/ai/mod.rs b/src/ai/mod.rs index c5302fb..7d0802e 100644 --- a/src/ai/mod.rs +++ b/src/ai/mod.rs @@ -10,6 +10,7 @@ pub mod insight_generator; pub mod llamacpp; pub mod llm_client; pub mod local_llm; +pub mod nl_query; pub mod ollama; pub mod openrouter; pub mod pronunciation; diff --git a/src/ai/nl_query.rs b/src/ai/nl_query.rs new file mode 100644 index 0000000..a94fc06 --- /dev/null +++ b/src/ai/nl_query.rs @@ -0,0 +1,414 @@ +//! Natural-language → structured-query translation for unified photo search. +//! +//! The unified search endpoint (`/photos/search/unified`, Phase 2) needs to +//! turn a free-text query like *"sunset photos in Italy from last summer"* +//! into the structured filter the existing `/photos` engine understands plus +//! a semantic term for CLIP ranking. That translation is a single grounded +//! LLM call, isolated here so it can be unit-tested without a network or the +//! full `InsightGenerator`. +//! +//! Two-stage design: +//! 1. The LLM emits a [`RawNlQuery`] — references are by *name* (tags) and +//! dates as ISO strings, never numeric ids it could hallucinate. +//! 2. [`resolve_raw_query`] maps names against the real tag vocabulary and +//! converts ISO dates to unix seconds, producing a [`StructuredQuery`]. +//! A tag the model invents that isn't in the vocab is surfaced in +//! `unmatched_tags` (the caller folds it back into the semantic term) +//! rather than silently dropped — this is the anti-noise guard. +//! +//! Geocoding of `place` and person filtering are intentionally *not* handled +//! here: `place` stays as text for the caller to forward-geocode (async, see +//! `geo::forward_geocode`), and person filtering is deferred until a +//! person→photos resolver exists. + +// Phase 1: this module is fully implemented and unit-tested, but its first +// consumer (the `/photos/search/unified` endpoint) lands in Phase 2. Mirrors +// llm_client.rs's allow-until-wired pattern so the bin target stays +// clippy-clean in the interim; remove when the endpoint is added. +#![allow(dead_code)] + +use crate::ai::llm_client::{ChatMessage, LlmClient, Tool, strip_think_blocks}; +use anyhow::{Result, anyhow}; +use serde::{Deserialize, Serialize}; + +/// Raw query object as emitted by the LLM. Tag references are by name +/// (resolved against the real vocab in Rust); dates are ISO `YYYY-MM-DD`. +/// Every field is optional so a partial / minimal model response still +/// deserializes. +#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +pub struct RawNlQuery { + /// Visual/scene description handed to CLIP for ranking. The descriptive + /// remainder after structured filters are peeled off. + #[serde(default)] + pub semantic: Option, + /// Tag names the photos must have. Matched case-insensitively against + /// the supplied vocabulary; non-matches land in `unmatched_tags`. + #[serde(default)] + pub tags: Vec, + /// Tag names the photos must NOT have. + #[serde(default)] + pub exclude_tags: Vec, + #[serde(default)] + pub camera_make: Option, + #[serde(default)] + pub camera_model: Option, + #[serde(default)] + pub lens_model: Option, + /// Free-text place/location name to forward-geocode (e.g. "Italy"). + #[serde(default)] + pub place: Option, + /// Inclusive start date, ISO `YYYY-MM-DD`. + #[serde(default)] + pub date_from: Option, + /// Inclusive end date, ISO `YYYY-MM-DD`. + #[serde(default)] + pub date_to: Option, + /// "photo" | "video" — normalized in [`resolve_raw_query`]. + #[serde(default)] + pub media_type: Option, +} + +/// Resolved structured query: tag names mapped to ids against the real +/// vocab, ISO dates converted to unix seconds. `place` stays as text for the +/// caller to forward-geocode into a gps circle. Serializable so the endpoint +/// can echo it back to the client as "this is how I read your query" +/// (editable filter chips). +#[derive(Debug, Clone, Default, PartialEq, Serialize)] +pub struct StructuredQuery { + pub semantic: Option, + pub tag_ids: Vec, + pub exclude_tag_ids: Vec, + /// Tag names the model produced that don't exist in the vocabulary. + /// The caller folds these back into the semantic term so the concept + /// isn't lost — and surfacing them keeps a hallucinated tag from + /// silently filtering the whole library to nothing. + pub unmatched_tags: Vec, + pub camera_make: Option, + pub camera_model: Option, + pub lens_model: Option, + /// Raw place name awaiting forward-geocoding by the caller. + pub place: Option, + pub date_from: Option, + pub date_to: Option, + /// Normalized to "photo" | "video"; `None` means no media-type filter. + pub media_type: Option, +} + +/// Convert an ISO `YYYY-MM-DD` date to a unix timestamp (seconds). With +/// `end_of_day`, returns 23:59:59 of that day so a `date_to` filter is +/// inclusive of the whole day; otherwise 00:00:00. Returns `None` for any +/// unparseable input (the filter is simply omitted rather than erroring). +pub fn iso_to_unix(date: &str, end_of_day: bool) -> Option { + let d = chrono::NaiveDate::parse_from_str(date.trim(), "%Y-%m-%d").ok()?; + let time = if end_of_day { + chrono::NaiveTime::from_hms_opt(23, 59, 59)? + } else { + chrono::NaiveTime::from_hms_opt(0, 0, 0)? + }; + Some(d.and_time(time).and_utc().timestamp()) +} + +/// Normalize a free-form media-type string to the engine's vocabulary. +/// Anything that isn't clearly photo or video (including "all") yields +/// `None` — no filter. +fn normalize_media_type(raw: &str) -> Option { + match raw.trim().to_lowercase().as_str() { + "photo" | "photos" | "image" | "images" | "picture" | "pictures" => { + Some("photo".to_string()) + } + "video" | "videos" | "movie" | "movies" | "clip" | "clips" => Some("video".to_string()), + _ => None, + } +} + +/// Resolve a raw LLM query against the real tag vocabulary, producing the +/// structured filter. Pure — no network, no LLM — so it carries the +/// correctness-critical mapping logic under unit test. +/// +/// `tag_vocab` is `(tag_id, tag_name)` pairs (the shape `TagDao::get_all_tags` +/// yields once the count is dropped). Matching is case-insensitive and exact +/// on the trimmed name. +pub fn resolve_raw_query(raw: RawNlQuery, tag_vocab: &[(i32, String)]) -> StructuredQuery { + // Case-insensitive name → id lookup. Built once per call. + let lookup: std::collections::HashMap = tag_vocab + .iter() + .map(|(id, name)| (name.trim().to_lowercase(), *id)) + .collect(); + + let resolve_names = |names: &[String], ids: &mut Vec, unmatched: &mut Vec| { + for name in names { + let key = name.trim().to_lowercase(); + if key.is_empty() { + continue; + } + match lookup.get(&key) { + Some(id) if !ids.contains(id) => ids.push(*id), + Some(_) => {} // duplicate, already collected + None => { + if !unmatched.iter().any(|u| u.eq_ignore_ascii_case(name)) { + unmatched.push(name.trim().to_string()); + } + } + } + } + }; + + let mut tag_ids = Vec::new(); + let mut unmatched_tags = Vec::new(); + resolve_names(&raw.tags, &mut tag_ids, &mut unmatched_tags); + + // Excluded tags that don't match a real tag are simply ignored — you + // can't exclude a tag that doesn't exist, and folding them into + // `semantic` would make no sense. + let mut exclude_tag_ids = Vec::new(); + let mut exclude_unmatched = Vec::new(); + resolve_names( + &raw.exclude_tags, + &mut exclude_tag_ids, + &mut exclude_unmatched, + ); + + let clean = |s: Option| s.map(|v| v.trim().to_string()).filter(|v| !v.is_empty()); + + StructuredQuery { + semantic: clean(raw.semantic), + tag_ids, + exclude_tag_ids, + unmatched_tags, + camera_make: clean(raw.camera_make), + camera_model: clean(raw.camera_model), + lens_model: clean(raw.lens_model), + place: clean(raw.place), + date_from: raw.date_from.as_deref().and_then(|d| iso_to_unix(d, false)), + date_to: raw.date_to.as_deref().and_then(|d| iso_to_unix(d, true)), + media_type: raw.media_type.as_deref().and_then(normalize_media_type), + } +} + +/// Build the grounded system prompt. The model is told the current date (so +/// "last summer" resolves) and the exact tag vocabulary (so it uses real +/// tags or routes the concept to `semantic` instead of inventing one). +fn build_system_prompt(tag_vocab: &[(i32, String)], today: chrono::NaiveDate) -> String { + // Cap the vocab dump so a huge library doesn't blow the context window; + // the most-used tags are the ones a query is likely to reference. + const MAX_TAGS: usize = 400; + let mut names: Vec<&str> = tag_vocab.iter().map(|(_, n)| n.as_str()).collect(); + names.sort_unstable(); + names.dedup(); + let shown = names.len().min(MAX_TAGS); + let vocab = names[..shown].join(", "); + let truncation = if names.len() > MAX_TAGS { + format!(" (showing {MAX_TAGS} of {} tags)", names.len()) + } else { + String::new() + }; + + format!( + "You translate a user's natural-language photo-search request into a JSON \ +filter. Today's date is {today}. Respond with ONLY a JSON object, no prose, no \ +code fences.\n\n\ +Schema (all fields optional):\n\ +{{\n \ +\"semantic\": string|null, // visual scene/subject for image similarity search\n \ +\"tags\": string[], // ONLY names from the tag list below\n \ +\"exclude_tags\": string[], // ONLY names from the tag list below\n \ +\"camera_make\": string|null,\n \ +\"camera_model\": string|null,\n \ +\"lens_model\": string|null,\n \ +\"place\": string|null, // a location name to look up (city, country, landmark)\n \ +\"date_from\": \"YYYY-MM-DD\"|null, // inclusive\n \ +\"date_to\": \"YYYY-MM-DD\"|null, // inclusive\n \ +\"media_type\": \"photo\"|\"video\"|null\n\ +}}\n\n\ +Rules:\n\ +- Put descriptive/visual concepts (\"sunset\", \"crowded beach\", \"red car\") in \"semantic\".\n\ +- Only use \"tags\"/\"exclude_tags\" values that appear EXACTLY in the tag list. If a \ +concept isn't a listed tag, put it in \"semantic\" instead — never invent a tag.\n\ +- Resolve relative dates against today's date (\"last summer\", \"2023\", \"last month\").\n\ +- Put place/location names in \"place\" (not \"semantic\").\n\ +- Omit (use null / empty array) anything the request doesn't mention.\n\n\ +Available tags{truncation}: {vocab}" + ) +} + +/// Extract the JSON object from a model response that may include a leading +/// `` block, code fences, or trailing prose. Strips the think block +/// first (so reasoning that mentions braces can't fool the scan), then +/// returns the substring from the first `{` to the last `}` inclusive — or +/// the trimmed text if no braces are found (which then fails to parse with a +/// clear error). +fn extract_json(raw: &str) -> String { + let s = strip_think_blocks(raw); + let start = s.find('{'); + let end = s.rfind('}'); + match (start, end) { + (Some(a), Some(b)) if b >= a => s[a..=b].to_string(), + _ => s.trim().to_string(), + } +} + +/// Parse a model response string into a [`StructuredQuery`], resolving names +/// against the vocab. Separated from the LLM call so it's unit-testable. +pub fn parse_response(response: &str, tag_vocab: &[(i32, String)]) -> Result { + let json = extract_json(response); + let raw: RawNlQuery = serde_json::from_str(&json) + .map_err(|e| anyhow!("failed to parse NL query JSON: {e}; raw response: {response:?}"))?; + Ok(resolve_raw_query(raw, tag_vocab)) +} + +/// Translate a natural-language query into a [`StructuredQuery`] via one +/// grounded LLM call. The `client` is any configured backend (the unified +/// endpoint passes the resolved chat backend); `tag_vocab` grounds the tag +/// mapping; `today` anchors relative-date resolution. +pub async fn translate_nl_query( + client: &dyn LlmClient, + nl: &str, + tag_vocab: &[(i32, String)], + today: chrono::NaiveDate, +) -> Result { + let system = build_system_prompt(tag_vocab, today); + let messages = vec![ChatMessage::system(system), ChatMessage::user(nl)]; + let (msg, _, _) = client.chat_with_tools(messages, Vec::::new()).await?; + parse_response(&msg.content, tag_vocab) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn vocab() -> Vec<(i32, String)> { + vec![ + (1, "beach".to_string()), + (2, "Sunset".to_string()), // mixed case to exercise case-insensitivity + (3, "family".to_string()), + ] + } + + #[test] + fn iso_to_unix_start_and_end_of_day() { + // 2023-01-01 UTC midnight = 1672531200. + assert_eq!(iso_to_unix("2023-01-01", false), Some(1_672_531_200)); + // End of that day is 86399 seconds later. + assert_eq!( + iso_to_unix("2023-01-01", true), + Some(1_672_531_200 + 86_399) + ); + } + + #[test] + fn iso_to_unix_rejects_garbage() { + assert_eq!(iso_to_unix("last summer", false), None); + assert_eq!(iso_to_unix("2023-13-99", false), None); + assert_eq!(iso_to_unix("", false), None); + } + + #[test] + fn resolve_matches_tags_case_insensitively() { + let raw = RawNlQuery { + tags: vec!["BEACH".to_string(), "sunset".to_string()], + ..Default::default() + }; + let q = resolve_raw_query(raw, &vocab()); + assert_eq!(q.tag_ids, vec![1, 2]); + assert!(q.unmatched_tags.is_empty()); + } + + #[test] + fn resolve_surfaces_unmatched_tags_not_silently_dropped() { + // A hallucinated / non-vocab tag must be surfaced so the caller can + // fold it into semantic — never silently used as a hard filter. + let raw = RawNlQuery { + tags: vec!["beach".to_string(), "golden hour".to_string()], + ..Default::default() + }; + let q = resolve_raw_query(raw, &vocab()); + assert_eq!(q.tag_ids, vec![1]); + assert_eq!(q.unmatched_tags, vec!["golden hour".to_string()]); + } + + #[test] + fn resolve_dedups_repeated_tags() { + let raw = RawNlQuery { + tags: vec![ + "beach".to_string(), + "Beach".to_string(), + "beach".to_string(), + ], + ..Default::default() + }; + let q = resolve_raw_query(raw, &vocab()); + assert_eq!(q.tag_ids, vec![1]); + } + + #[test] + fn resolve_normalizes_media_type_and_dates() { + let raw = RawNlQuery { + media_type: Some("Videos".to_string()), + date_from: Some("2023-06-01".to_string()), + date_to: Some("2023-06-30".to_string()), + ..Default::default() + }; + let q = resolve_raw_query(raw, &vocab()); + assert_eq!(q.media_type.as_deref(), Some("video")); + assert_eq!(q.date_from, iso_to_unix("2023-06-01", false)); + assert_eq!(q.date_to, iso_to_unix("2023-06-30", true)); + } + + #[test] + fn resolve_media_type_all_is_no_filter() { + let raw = RawNlQuery { + media_type: Some("all".to_string()), + ..Default::default() + }; + assert_eq!(resolve_raw_query(raw, &vocab()).media_type, None); + } + + #[test] + fn resolve_trims_and_empties_to_none() { + let raw = RawNlQuery { + semantic: Some(" ".to_string()), + camera_make: Some(" Fujifilm ".to_string()), + place: Some("".to_string()), + ..Default::default() + }; + let q = resolve_raw_query(raw, &vocab()); + assert_eq!(q.semantic, None); + assert_eq!(q.camera_make.as_deref(), Some("Fujifilm")); + assert_eq!(q.place, None); + } + + #[test] + fn parse_response_handles_code_fences_and_prose() { + let resp = "Here is the filter:\n```json\n{\"semantic\":\"sunset\",\"tags\":[\"beach\"]}\n```\nDone."; + let q = parse_response(resp, &vocab()).expect("parse"); + assert_eq!(q.semantic.as_deref(), Some("sunset")); + assert_eq!(q.tag_ids, vec![1]); + } + + #[test] + fn parse_response_handles_think_block_then_json() { + let resp = "user wants beach sunsets{\"tags\":[\"beach\",\"sunset\"]}"; + let q = parse_response(resp, &vocab()).expect("parse"); + assert_eq!(q.tag_ids, vec![1, 2]); + } + + #[test] + fn parse_response_errors_on_non_json() { + assert!(parse_response("I cannot help with that.", &vocab()).is_err()); + } + + #[test] + fn build_system_prompt_includes_date_and_vocab() { + let today = chrono::NaiveDate::from_ymd_opt(2026, 6, 14).unwrap(); + let prompt = build_system_prompt(&vocab(), today); + assert!( + prompt.contains("2026-06-14"), + "prompt should state today's date" + ); + assert!(prompt.contains("beach"), "prompt should list the vocab"); + assert!( + prompt.contains("never invent a tag"), + "prompt should warn against inventing tags" + ); + } +} diff --git a/src/geo.rs b/src/geo.rs index 46cc1dc..b7ef9d1 100644 --- a/src/geo.rs +++ b/src/geo.rs @@ -1,4 +1,5 @@ /// Geographic calculation utilities for GPS-based search +use serde::Deserialize; use std::f64; /// Calculate distance between two GPS coordinates using the Haversine formula. @@ -61,6 +62,148 @@ pub fn gps_bounding_box(lat: f64, lon: f64, radius_km: f64) -> (f64, f64, f64, f ) } +/// A place resolved from a free-text query via forward geocoding. +/// +/// The filter pipeline searches a *circle* (`gps_lat`/`gps_lon`/ +/// `gps_radius_km`), but a place can be anything from a single address to +/// a whole country. We collapse Nominatim's bounding box into the smallest +/// circle that circumscribes it (see [`bbox_to_circle`]) so "Portland" and +/// "Italy" both map onto the existing circle filter without a schema change. +// Phase 1: forward geocoding is implemented and unit-tested here, but its +// first consumer (the `/photos/search/unified` endpoint) lands in Phase 2. +// allow-until-wired (mirrors llm_client.rs); remove when the endpoint is added. +#[allow(dead_code)] +#[derive(Debug, Clone, PartialEq)] +pub struct GeoPlace { + /// Nominatim's canonical name for the match (e.g. "Italia"). + pub display_name: String, + /// Centroid latitude in decimal degrees. + pub lat: f64, + /// Centroid longitude in decimal degrees. + pub lon: f64, + /// Radius (km) of a circle centred on the centroid that covers the + /// matched area. Floored to [`MIN_PLACE_RADIUS_KM`] so a point result + /// (whose bounding box is microscopic) still yields a usable circle. + pub radius_km: f64, +} + +/// Floor for a geocoded place's radius. Point results (a street address) +/// come back with a near-zero bounding box; without a floor the circle +/// filter would match nothing. +#[allow(dead_code)] +pub const MIN_PLACE_RADIUS_KM: f64 = 0.5; + +/// Collapse a bounding box into the centroid + circumscribing radius. +/// +/// Input is Nominatim's `boundingbox` order: `(south_lat, north_lat, +/// west_lon, east_lon)`. The radius is the *largest* great-circle distance +/// from the centroid to any of the four corners, so the resulting circle +/// fully covers the box. (The corners aren't equidistant on a sphere — +/// longitude lines converge toward the poles, so the equator-facing edge's +/// corners are farthest; taking the max guarantees coverage in either +/// hemisphere.) +/// +/// Pure and exact (no flooring) so it can be unit-tested directly; callers +/// apply [`MIN_PLACE_RADIUS_KM`] when turning the result into a filter. +#[allow(dead_code)] +pub fn bbox_to_circle(south: f64, north: f64, west: f64, east: f64) -> (f64, f64, f64) { + let center_lat = (south + north) / 2.0; + let center_lon = (west + east) / 2.0; + let radius_km = [(south, west), (south, east), (north, west), (north, east)] + .iter() + .map(|(clat, clon)| haversine_distance(center_lat, center_lon, *clat, *clon)) + .fold(0.0_f64, f64::max); + (center_lat, center_lon, radius_km) +} + +/// Raw Nominatim `/search` result. `lat`/`lon` arrive as strings and +/// `boundingbox` as a 4-element string array `[south, north, west, east]`. +#[allow(dead_code)] +#[derive(Deserialize)] +struct NominatimSearchResult { + lat: String, + lon: String, + display_name: String, + boundingbox: Option<[String; 4]>, +} + +/// Forward-geocode a free-text place name to a [`GeoPlace`] via the public +/// OpenStreetMap Nominatim `/search` endpoint. +/// +/// Mirrors `InsightGenerator::reverse_geocode`'s error posture: any network, +/// HTTP, or parse failure returns `None` rather than propagating, so a flaky +/// geocoder degrades the query to "no location filter" instead of failing it. +/// +/// Nominatim's usage policy requires a `User-Agent` and rate-limits to ~1 +/// request/second; callers doing this interactively should cache results. +#[allow(dead_code)] +pub async fn forward_geocode(query: &str) -> Option { + let q = query.trim(); + if q.is_empty() { + return None; + } + + let client = reqwest::Client::new(); + let response = match client + .get("https://nominatim.openstreetmap.org/search") + .query(&[("format", "json"), ("limit", "1"), ("q", q)]) + .header("User-Agent", "ImageAPI/1.0") // Nominatim requires User-Agent + .send() + .await + { + Ok(resp) => resp, + Err(e) => { + log::warn!("Forward geocoding network error for {q:?}: {e}"); + return None; + } + }; + + if !response.status().is_success() { + log::warn!( + "Forward geocoding HTTP error for {q:?}: {}", + response.status() + ); + return None; + } + + let results: Vec = match response.json().await { + Ok(r) => r, + Err(e) => { + log::warn!("Forward geocoding JSON parse error for {q:?}: {e}"); + return None; + } + }; + + let top = results.into_iter().next()?; + let lat: f64 = top.lat.parse().ok()?; + let lon: f64 = top.lon.parse().ok()?; + + // Prefer the bounding box (handles large places); fall back to a + // point + floor radius when Nominatim omits it. + let (center_lat, center_lon, radius_km) = match &top.boundingbox { + Some([s, n, w, e]) => match (s.parse(), n.parse(), w.parse(), e.parse()) { + (Ok(s), Ok(n), Ok(w), Ok(e)) => bbox_to_circle(s, n, w, e), + _ => (lat, lon, 0.0), + }, + None => (lat, lon, 0.0), + }; + + let place = GeoPlace { + display_name: top.display_name, + lat: center_lat, + lon: center_lon, + radius_km: radius_km.max(MIN_PLACE_RADIUS_KM), + }; + log::info!( + "Forward geocoded {q:?} -> {} ({:.4}, {:.4}, r={:.1}km)", + place.display_name, + place.lat, + place.lon, + place.radius_km + ); + Some(place) +} + #[cfg(test)] mod tests { use super::*; @@ -118,4 +261,41 @@ mod tests { distance ); } + + #[test] + fn test_bbox_to_circle_centroid() { + // Symmetric box around (10, 20): centroid should land dead centre. + let (lat, lon, radius) = bbox_to_circle(9.0, 11.0, 19.0, 21.0); + assert!((lat - 10.0).abs() < 1e-9, "centroid lat, got {lat}"); + assert!((lon - 20.0).abs() < 1e-9, "centroid lon, got {lon}"); + assert!(radius > 0.0, "radius should be positive, got {radius}"); + } + + #[test] + fn test_bbox_to_circle_covers_corner() { + // The radius must reach every corner of the box. Verify the + // centroid-to-corner distance equals the returned radius for all + // four corners (they're symmetric, so all equal). + let (south, north, west, east) = (40.0, 42.0, -74.0, -72.0); + let (lat, lon, radius) = bbox_to_circle(south, north, west, east); + for (clat, clon) in [(south, west), (south, east), (north, west), (north, east)] { + let d = haversine_distance(lat, lon, clat, clon); + assert!( + d <= radius + 1e-6, + "corner ({clat},{clon}) at {d}km should be within radius {radius}km" + ); + } + } + + #[test] + fn test_bbox_to_circle_country_vs_city_scale() { + // A country-sized box yields a far larger radius than a city-sized + // one — confirming the bbox approach scales with place size. + let (_, _, country) = bbox_to_circle(35.5, 47.1, 6.6, 18.5); // ~Italy + let (_, _, city) = bbox_to_circle(45.4, 45.6, -122.8, -122.5); // ~Portland + assert!( + country > city * 10.0, + "country radius {country}km should dwarf city radius {city}km" + ); + } } From fd00dc936a8e291cac9dd633e06aa3a865211a8c Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sun, 14 Jun 2026 01:03:43 -0400 Subject: [PATCH 2/9] Unified NL search Phase 2: /photos/search/unified endpoint Composes the two existing engines (Path A orchestration): - Translate NL -> StructuredQuery via local LLM, respecting LLM_BACKEND (resolve_backend(Local) -> ollama or llama-swap; no hybrid). - Forward-geocode the place name into a gps circle. - Structured filters (tags/EXIF/geo/date/media) build a candidate set of EXIF rows; CLIP ranks within it, joined by content_hash. Degenerate cases match existing behavior: semantic-only -> plain CLIP; filters-only -> date-sorted. - Echoes the interpreted query (incl. resolved place) for editable client chips. Refactor: extracted reusable cores from clip_search (score_photos, resolve_hits, parse_library_scope, score_error_response) shared by both endpoints. Removed the Phase 1 allow-until-wired attributes now that nl_query + geo are consumed. fmt + clippy clean; 23 backend tests pass (7 geo, 12 nl_query, 4 unified). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/ai/nl_query.rs | 6 - src/clip_search.rs | 398 ++++++++++++++++++++----------------- src/geo.rs | 8 - src/lib.rs | 1 + src/main.rs | 8 + src/unified_search.rs | 452 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 675 insertions(+), 198 deletions(-) create mode 100644 src/unified_search.rs diff --git a/src/ai/nl_query.rs b/src/ai/nl_query.rs index a94fc06..d709322 100644 --- a/src/ai/nl_query.rs +++ b/src/ai/nl_query.rs @@ -21,12 +21,6 @@ //! `geo::forward_geocode`), and person filtering is deferred until a //! person→photos resolver exists. -// Phase 1: this module is fully implemented and unit-tested, but its first -// consumer (the `/photos/search/unified` endpoint) lands in Phase 2. Mirrors -// llm_client.rs's allow-until-wired pattern so the bin target stays -// clippy-clean in the interim; remove when the endpoint is added. -#![allow(dead_code)] - use crate::ai::llm_client::{ChatMessage, LlmClient, Tool, strip_think_blocks}; use anyhow::{Result, anyhow}; use serde::{Deserialize, Serialize}; diff --git a/src/clip_search.rs b/src/clip_search.rs index 98ea96e..7b4510e 100644 --- a/src/clip_search.rs +++ b/src/clip_search.rs @@ -124,65 +124,161 @@ fn dot(a: &[f32], b: &[f32]) -> f32 { a.iter().zip(b.iter()).map(|(x, y)| x * y).sum() } -pub async fn search_photos( - state: web::Data, - exif_dao: web::Data>>, - query: web::Query, -) -> ActixResult { - let q_text = query.q.trim().to_string(); - if q_text.is_empty() { - return Ok(HttpResponse::BadRequest().json(SearchError { - error: "query parameter `q` is required".into(), - })); - } +/// Failure modes of [`score_photos`]. Carries enough to let each caller pick +/// an appropriate HTTP status (the CLIP service being down is a 502, a +/// disabled feature is a 503, a rejected query is a 400, a DB failure 500). +pub enum ScoreError { + /// CLIP search isn't configured at all (no Apollo endpoint). + Disabled, + /// The query was rejected by the encoder (client error). + Rejected(String), + /// The CLIP service is transiently unavailable (upstream error). + Unavailable(String), + /// The encoder returned an embedding we couldn't decode. + MalformedEmbedding, + /// A database / index load failure. + Internal(String), +} + +/// Result of scoring the whole library against a query embedding: the +/// resolved model version, how many embeddings were considered, and every +/// `(score, content_hash)` above threshold, sorted by descending score. +/// Pagination and path resolution are the caller's job (see [`resolve_hits`]) +/// so this core can be reused for both the plain search endpoint and the +/// unified endpoint (which filters by hash before paginating). +pub struct ScoredPhotos { + pub model_version: String, + pub considered: usize, + /// `(cosine_score, content_hash)` pairs, descending by score. + pub hits: Vec<(f32, String)>, +} + +/// Encode `q_text` via CLIP and score it against every stored embedding in +/// the given library scope. Returns all matches above `threshold`, sorted by +/// descending similarity. Pure of HTTP concerns so it's shared by +/// `search_photos` and the unified search endpoint. +pub async fn score_photos( + state: &AppState, + exif_dao: &Mutex>, + q_text: &str, + library_ids: &[i32], + threshold: f32, + model_version: Option<&str>, +) -> Result { if !state.clip_client.is_enabled() { - return Ok(HttpResponse::ServiceUnavailable().json(SearchError { - error: "CLIP search is disabled (no Apollo CLIP endpoint configured)".into(), - })); + return Err(ScoreError::Disabled); } - let limit = query.limit.clamp(1, 200); - let offset = query.offset; - let threshold = query.threshold.clamp(-1.0, 1.0); - - // 1. Encode the query text. Fast — Apollo's text encoder is ~50ms - // on CPU. Bail with a clear error message if Apollo's down so the - // user sees "service unavailable" rather than empty results. - let query_resp = match state.clip_client.encode_text(&q_text).await { + // 1. Encode the query text. Fast — Apollo's text encoder is ~50ms on CPU. + let query_resp = match state.clip_client.encode_text(q_text).await { Ok(r) => r, - Err(ClipError::Permanent(e)) => { - return Ok(HttpResponse::BadRequest().json(SearchError { - error: format!("query rejected: {e}"), - })); - } - Err(ClipError::Transient(e)) => { - return Ok(HttpResponse::BadGateway().json(SearchError { - error: format!("CLIP service unavailable: {e}"), - })); - } - Err(ClipError::Disabled) => { - return Ok(HttpResponse::ServiceUnavailable().json(SearchError { - error: "CLIP service disabled".into(), - })); - } + Err(ClipError::Permanent(e)) => return Err(ScoreError::Rejected(e.to_string())), + Err(ClipError::Transient(e)) => return Err(ScoreError::Unavailable(e.to_string())), + Err(ClipError::Disabled) => return Err(ScoreError::Disabled), }; // decode_embedding works on raw bytes; the wire format is b64. let query_bytes = base64::engine::general_purpose::STANDARD .decode(query_resp.embedding.as_bytes()) .unwrap_or_default(); - let query_vec = match decode_embedding(&query_bytes) { - Some(v) => v, - None => { - return Ok(HttpResponse::BadGateway().json(SearchError { - error: "CLIP service returned a malformed query embedding".into(), - })); - } - }; + let query_vec = decode_embedding(&query_bytes).ok_or(ScoreError::MalformedEmbedding)?; - // 2. Decide which library scope to search. `library_ids` (multi) - // wins over the legacy `library` (single) when both are present; - // either / both empty falls back to "every enabled library". - let library_ids: Vec = if let Some(raw) = query.library_ids.as_deref() { + // 2. Pull the (hash, embedding) matrix under the dao lock, release + // before scoring. The caller-supplied `model_version` (or the live + // engine's) forces a strict join so a mid-flight model swap can't mix + // geometries. + let ctx = opentelemetry::Context::current(); + let rows: Vec<(String, Vec)> = { + let mut dao = exif_dao.lock().expect("exif dao"); + dao.list_clip_index( + &ctx, + library_ids, + model_version.or(Some(&query_resp.model_version)), + ) + .map_err(|e| { + log::warn!("clip_search: list_clip_index failed: {:?}", e); + ScoreError::Internal("failed to load search index".into()) + })? + }; + let considered = rows.len(); + + // 3. Score. Keep all matches and sort at the end (~microseconds at 14k). + let mut hits: Vec<(f32, String)> = Vec::with_capacity(considered); + for (hash, blob) in rows { + let Some(emb) = decode_embedding(&blob) else { + continue; + }; + if emb.len() != query_vec.len() { + continue; + } + let sim = dot(&emb, &query_vec); + if sim < threshold { + continue; + } + hits.push((sim, hash)); + } + hits.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)); + + Ok(ScoredPhotos { + model_version: query_resp.model_version, + considered, + hits, + }) +} + +/// Resolve a page of `(score, content_hash)` pairs back to [`SearchHit`]s +/// (each carrying `library_id` + `rel_path`). Hashes that no longer resolve +/// to a row are skipped. Shared by both endpoints. +pub fn resolve_hits( + exif_dao: &Mutex>, + scored: &[(f32, String)], +) -> Vec { + if scored.is_empty() { + return Vec::new(); + } + let ctx = opentelemetry::Context::current(); + let hashes: Vec = scored.iter().map(|(_, h)| h.clone()).collect(); + let mut dao = exif_dao.lock().expect("exif dao"); + let path_map = dao + .get_rel_paths_for_hashes(&ctx, &hashes) + .unwrap_or_else(|e| { + log::warn!("clip_search: get_rel_paths_for_hashes failed: {:?}", e); + std::collections::HashMap::new() + }); + + let mut results = Vec::with_capacity(scored.len()); + for (score, hash) in scored { + let row = match dao.find_by_content_hash(&ctx, hash) { + Ok(Some(r)) => r, + Ok(None) => continue, + Err(e) => { + log::warn!("clip_search: find_by_content_hash failed for {hash}: {e:?}"); + continue; + } + }; + // Prefer get_rel_paths_for_hashes's first entry (shares image_exif's + // natural order), falling back to the ImageExif row. + let rel_path = path_map + .get(hash) + .and_then(|paths| paths.first().cloned()) + .unwrap_or(row.file_path); + results.push(SearchHit { + library_id: row.library_id, + rel_path, + content_hash: hash.clone(), + score: *score, + }); + } + results +} + +/// Parse the `library_ids` (multi) / `library` (single) scope params into a +/// deduped id list. Empty = "every enabled library". Shared so the unified +/// endpoint scopes CLIP identically. +pub fn parse_library_scope( + library_ids: Option<&str>, + library: Option, +) -> Result, String> { + if let Some(raw) = library_ids { let mut out: Vec = Vec::new(); for piece in raw.split(',') { let trimmed = piece.trim(); @@ -195,158 +291,92 @@ pub async fn search_photos( out.push(id); } } - Err(_) => { - return Ok(HttpResponse::BadRequest().json(SearchError { - error: format!("invalid library_ids entry: {trimmed:?}"), - })); - } + Err(_) => return Err(format!("invalid library_ids entry: {trimmed:?}")), } } - out - } else if let Some(id) = query.library { - vec![id] + Ok(out) + } else if let Some(id) = library { + Ok(vec![id]) } else { - Vec::new() - }; + Ok(Vec::new()) + } +} - // 3. Pull the (hash, embedding) matrix. Lock contention here is - // bounded — one big SELECT under a mutex Arc> - // and then we release before scoring. If this becomes a hotspot - // we'll cache the decoded matrix in AppState with TTL. - let ctx = opentelemetry::Context::current(); - let rows: Vec<(String, Vec)> = { - let mut dao = exif_dao.lock().expect("exif dao"); - match dao.list_clip_index( - &ctx, - &library_ids, - query - .model_version - .as_deref() - .or(Some(&query_resp.model_version)), - ) { - Ok(r) => r, - Err(e) => { - log::warn!("clip_search: list_clip_index failed: {:?}", e); - return Ok(HttpResponse::InternalServerError().json(SearchError { - error: "failed to load search index".into(), - })); - } - } - }; - let considered = rows.len(); - if considered == 0 { - return Ok(HttpResponse::Ok().json(SearchResponse { - query: q_text, - model_version: query_resp.model_version, - threshold, - considered, - total_matching: 0, - offset, - results: Vec::new(), +pub async fn search_photos( + state: web::Data, + exif_dao: web::Data>>, + query: web::Query, +) -> ActixResult { + let q_text = query.q.trim().to_string(); + if q_text.is_empty() { + return Ok(HttpResponse::BadRequest().json(SearchError { + error: "query parameter `q` is required".into(), })); } - // 4. Score. Cap the loop's transient allocation; we keep all scores - // and sort at the end. With ~14k entries the sort is microseconds. - let mut scored: Vec<(f32, String)> = Vec::with_capacity(considered); - for (hash, blob) in rows { - let Some(emb) = decode_embedding(&blob) else { - continue; - }; - if emb.len() != query_vec.len() { - continue; - } - let sim = dot(&emb, &query_vec); - if sim < threshold { - continue; - } - scored.push((sim, hash)); - } - scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)); - let total_matching = scored.len(); - // Pagination — slice the sorted list at `[offset, offset+limit)`. - // Offsets past the end produce empty pages rather than an error so - // the client can stop fetching naturally on "load more" past the end. - let scored: Vec<(f32, String)> = if offset >= total_matching { + let limit = query.limit.clamp(1, 200); + let offset = query.offset; + let threshold = query.threshold.clamp(-1.0, 1.0); + + let library_ids = match parse_library_scope(query.library_ids.as_deref(), query.library) { + Ok(ids) => ids, + Err(msg) => return Ok(HttpResponse::BadRequest().json(SearchError { error: msg })), + }; + + let scored = match score_photos( + &state, + &exif_dao, + &q_text, + &library_ids, + threshold, + query.model_version.as_deref(), + ) + .await + { + Ok(s) => s, + Err(e) => return Ok(score_error_response(e)), + }; + + let total_matching = scored.hits.len(); + // Pagination — slice the sorted list at `[offset, offset+limit)`. Offsets + // past the end produce empty pages so "load more" stops naturally. + let page: Vec<(f32, String)> = if offset >= total_matching { Vec::new() } else { let end = (offset + limit).min(total_matching); - scored[offset..end].to_vec() + scored.hits[offset..end].to_vec() }; - - if scored.is_empty() { - return Ok(HttpResponse::Ok().json(SearchResponse { - query: q_text, - model_version: query_resp.model_version, - threshold, - considered, - total_matching, - offset, - results: Vec::new(), - })); - } - - // 5. Resolve each surviving hash back to a `(library_id, rel_path)`. - // `get_rel_paths_by_hash` returns every rel_path; we pick the first - // one for the result. Apollo / the UI can fetch alternatives via - // /image/metadata when needed. - let hashes: Vec = scored.iter().map(|(_, h)| h.clone()).collect(); - let path_map = { - let mut dao = exif_dao.lock().expect("exif dao"); - match dao.get_rel_paths_for_hashes(&ctx, &hashes) { - Ok(m) => m, - Err(e) => { - log::warn!("clip_search: get_rel_paths_for_hashes failed: {:?}", e); - return Ok(HttpResponse::InternalServerError().json(SearchError { - error: "failed to resolve photo paths".into(), - })); - } - } - }; - - // We need (library_id, rel_path) — get_rel_paths_for_hashes only - // returns rel_paths. Cross-reference via find_by_content_hash to - // pick the library too. Single call per surviving hash; cheap at - // top-20. - let mut results = Vec::with_capacity(scored.len()); - { - let mut dao = exif_dao.lock().expect("exif dao"); - for (score, hash) in scored { - let row = match dao.find_by_content_hash(&ctx, &hash) { - Ok(Some(r)) => r, - Ok(None) => continue, - Err(e) => { - log::warn!( - "clip_search: find_by_content_hash failed for {}: {:?}", - hash, - e - ); - continue; - } - }; - // Prefer get_rel_paths_for_hashes's first entry if it - // exists (it shares semantics with `image_exif`'s natural - // order), falling back to the ImageExif row. - let rel_path = path_map - .get(&hash) - .and_then(|paths| paths.first().cloned()) - .unwrap_or(row.file_path); - results.push(SearchHit { - library_id: row.library_id, - rel_path, - content_hash: hash, - score, - }); - } - } + let results = resolve_hits(&exif_dao, &page); Ok(HttpResponse::Ok().json(SearchResponse { query: q_text, - model_version: query_resp.model_version, + model_version: scored.model_version, threshold, - considered, + considered: scored.considered, total_matching, offset, results, })) } + +/// Map a [`ScoreError`] to the HTTP response `search_photos` historically +/// returned for each failure mode. Reused by the unified endpoint. +pub fn score_error_response(e: ScoreError) -> HttpResponse { + match e { + ScoreError::Disabled => HttpResponse::ServiceUnavailable().json(SearchError { + error: "CLIP search is disabled (no Apollo CLIP endpoint configured)".into(), + }), + ScoreError::Rejected(msg) => HttpResponse::BadRequest().json(SearchError { + error: format!("query rejected: {msg}"), + }), + ScoreError::Unavailable(msg) => HttpResponse::BadGateway().json(SearchError { + error: format!("CLIP service unavailable: {msg}"), + }), + ScoreError::MalformedEmbedding => HttpResponse::BadGateway().json(SearchError { + error: "CLIP service returned a malformed query embedding".into(), + }), + ScoreError::Internal(msg) => { + HttpResponse::InternalServerError().json(SearchError { error: msg }) + } + } +} diff --git a/src/geo.rs b/src/geo.rs index b7ef9d1..b54f609 100644 --- a/src/geo.rs +++ b/src/geo.rs @@ -69,10 +69,6 @@ pub fn gps_bounding_box(lat: f64, lon: f64, radius_km: f64) -> (f64, f64, f64, f /// a whole country. We collapse Nominatim's bounding box into the smallest /// circle that circumscribes it (see [`bbox_to_circle`]) so "Portland" and /// "Italy" both map onto the existing circle filter without a schema change. -// Phase 1: forward geocoding is implemented and unit-tested here, but its -// first consumer (the `/photos/search/unified` endpoint) lands in Phase 2. -// allow-until-wired (mirrors llm_client.rs); remove when the endpoint is added. -#[allow(dead_code)] #[derive(Debug, Clone, PartialEq)] pub struct GeoPlace { /// Nominatim's canonical name for the match (e.g. "Italia"). @@ -90,7 +86,6 @@ pub struct GeoPlace { /// Floor for a geocoded place's radius. Point results (a street address) /// come back with a near-zero bounding box; without a floor the circle /// filter would match nothing. -#[allow(dead_code)] pub const MIN_PLACE_RADIUS_KM: f64 = 0.5; /// Collapse a bounding box into the centroid + circumscribing radius. @@ -105,7 +100,6 @@ pub const MIN_PLACE_RADIUS_KM: f64 = 0.5; /// /// Pure and exact (no flooring) so it can be unit-tested directly; callers /// apply [`MIN_PLACE_RADIUS_KM`] when turning the result into a filter. -#[allow(dead_code)] pub fn bbox_to_circle(south: f64, north: f64, west: f64, east: f64) -> (f64, f64, f64) { let center_lat = (south + north) / 2.0; let center_lon = (west + east) / 2.0; @@ -118,7 +112,6 @@ pub fn bbox_to_circle(south: f64, north: f64, west: f64, east: f64) -> (f64, f64 /// Raw Nominatim `/search` result. `lat`/`lon` arrive as strings and /// `boundingbox` as a 4-element string array `[south, north, west, east]`. -#[allow(dead_code)] #[derive(Deserialize)] struct NominatimSearchResult { lat: String, @@ -136,7 +129,6 @@ struct NominatimSearchResult { /// /// Nominatim's usage policy requires a `User-Agent` and rate-limits to ~1 /// request/second; callers doing this interactively should cache results. -#[allow(dead_code)] pub async fn forward_geocode(query: &str) -> Option { let q = query.trim(); if q.is_empty() { diff --git a/src/lib.rs b/src/lib.rs index 0ea7ddb..a228472 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,6 +35,7 @@ pub mod tags; #[cfg(test)] pub mod testhelpers; pub mod thumbnails; +pub mod unified_search; pub mod utils; pub mod video; diff --git a/src/main.rs b/src/main.rs index e420d8b..7faa959 100644 --- a/src/main.rs +++ b/src/main.rs @@ -54,6 +54,7 @@ mod perceptual_hash; mod state; mod tags; mod thumbnails; +mod unified_search; mod utils; mod video; mod watcher; @@ -333,6 +334,13 @@ fn main() -> std::io::Result<()> { web::resource("/photos/search") .route(web::get().to(clip_search::search_photos)), ) + .service( + // Unified natural-language search: LLM translates the + // query into structured filters + a semantic term, then + // filters constrain and CLIP ranks. See src/unified_search.rs. + web::resource("/photos/search/unified") + .route(web::get().to(unified_search::unified_search::)), + ) .service(web::resource("/file/move").post(move_file::)) .service(handlers::image::get_image) .service(handlers::image::upload_image) diff --git a/src/unified_search.rs b/src/unified_search.rs new file mode 100644 index 0000000..a3187a4 --- /dev/null +++ b/src/unified_search.rs @@ -0,0 +1,452 @@ +//! `/photos/search/unified?q=` — unified NL photo search. +//! +//! One free-text box that composes the two existing engines instead of making +//! the user pick between them: +//! 1. A grounded local-LLM call ([`crate::ai::nl_query`]) translates the +//! query into a structured filter + a semantic term. +//! 2. Structured filters (tags / EXIF / geo / date / media-type) define the +//! candidate set; the semantic term ranks within it via CLIP. +//! +//! Path A (orchestration): we reuse `clip_search`'s scoring core and the +//! existing `ExifDao` / `TagDao` queries, joining on `content_hash`. EXIF rows +//! are the universal candidate carrier — each has `(library_id, file_path, +//! content_hash, date_taken)` — so the structured filter is just a predicate +//! over them, and the CLIP hits (which key on `content_hash`) intersect by +//! hash. No new schema, no surgery on `list_photos`. +//! +//! Degenerate cases collapse to the existing behavior: semantic-only → plain +//! CLIP search; filters-only → a date-sorted filtered listing. +//! +//! Person filtering is intentionally deferred (no person→photos resolver yet). + +use crate::AppState; +use crate::ai::backend::{BackendKind, SamplingOverrides}; +use crate::ai::nl_query::{StructuredQuery, translate_nl_query}; +use crate::clip_search::{ + SearchHit, parse_library_scope, resolve_hits, score_error_response, score_photos, +}; +use crate::data::Claims; +use crate::database::ExifDao; +use crate::file_types::{is_image_file, is_video_file}; +use crate::geo::{forward_geocode, gps_bounding_box, haversine_distance}; +use crate::tags::TagDao; +use actix_web::HttpResponse; +use actix_web::web::{Data, Query}; +use serde::{Deserialize, Serialize}; +use std::collections::HashSet; +use std::path::Path; +use std::sync::Mutex; + +#[derive(Debug, Deserialize)] +pub struct UnifiedQuery { + /// Natural-language query. Required; empty triggers 400. + pub q: String, + #[serde(default = "default_limit")] + pub limit: usize, + #[serde(default)] + pub offset: usize, + /// CLIP cosine floor for the semantic ranking stage. Same default as the + /// plain search endpoint. + #[serde(default = "default_threshold")] + pub threshold: f32, + /// Legacy single-library scope (see clip_search). + pub library: Option, + /// Multi-library scope, comma-separated ids. + pub library_ids: Option, +} + +fn default_limit() -> usize { + 20 +} +fn default_threshold() -> f32 { + 0.20 +} + +/// A geocoded place echoed back so the client can show / edit the location +/// filter it actually searched. +#[derive(Debug, Serialize)] +struct ResolvedPlace { + display_name: String, + lat: f64, + lon: f64, + radius_km: f64, +} + +/// How the server interpreted the NL query — echoed to the client to render +/// editable filter chips. tag ids map to the client's existing tag list. +#[derive(Debug, Serialize)] +struct Interpreted { + semantic: Option, + tag_ids: Vec, + exclude_tag_ids: Vec, + /// Words the model treated as tags that don't exist in the vocab; folded + /// into the semantic term and surfaced here so the UI can explain it. + unmatched_tags: Vec, + camera_make: Option, + camera_model: Option, + lens_model: Option, + date_from: Option, + date_to: Option, + media_type: Option, + place: Option, +} + +#[derive(Debug, Serialize)] +struct UnifiedResponse { + query: String, + interpreted: Interpreted, + /// CLIP model version used for ranking; `None` when the query had no + /// semantic term (filters-only). + model_version: Option, + /// Embeddings scored by CLIP (0 when filters-only). + considered: usize, + /// Matches before pagination. + total_matching: usize, + offset: usize, + results: Vec, +} + +#[derive(Debug, Serialize)] +struct ErrorBody { + error: String, +} + +fn bad_request(msg: impl Into) -> HttpResponse { + HttpResponse::BadRequest().json(ErrorBody { error: msg.into() }) +} + +/// Combine the model's semantic term with any tag words that didn't match the +/// vocab, so a hallucinated/non-vocab tag becomes a soft semantic signal +/// rather than being dropped. +fn effective_semantic(sq: &StructuredQuery) -> Option { + let mut parts: Vec = Vec::new(); + if let Some(s) = sq.semantic.as_deref() { + parts.push(s.to_string()); + } + parts.extend(sq.unmatched_tags.iter().cloned()); + if parts.is_empty() { + None + } else { + Some(parts.join(" ")) + } +} + +pub async fn unified_search( + _: Claims, + state: Data, + exif_dao: Data>>, + tag_dao: Data>, + query: Query, +) -> HttpResponse { + let nl = query.q.trim().to_string(); + if nl.is_empty() { + return bad_request("query parameter `q` is required"); + } + + let limit = query.limit.clamp(1, 200); + let offset = query.offset; + let threshold = query.threshold.clamp(-1.0, 1.0); + + let library_ids = match parse_library_scope(query.library_ids.as_deref(), query.library) { + Ok(ids) => ids, + Err(msg) => return bad_request(msg), + }; + + let ctx = opentelemetry::Context::current(); + + // ── 1. Translate the NL query, grounded on the real tag vocabulary ── + let tag_vocab: Vec<(i32, String)> = { + let mut dao = tag_dao.lock().expect("tag dao"); + match dao.get_all_tags(&ctx, None) { + Ok(tags) => tags.into_iter().map(|(_, t)| (t.id, t.name)).collect(), + Err(e) => { + log::warn!("unified_search: get_all_tags failed: {e:?}"); + Vec::new() + } + } + }; + + // Respect env/config for the LLM backend (LLM_BACKEND → ollama or + // llama-swap); local only, no hybrid, per the feature's design. + let overrides = SamplingOverrides { + model: None, + num_ctx: None, + temperature: None, + top_p: None, + top_k: None, + min_p: None, + }; + let backend = match state + .insight_generator + .resolve_backend(BackendKind::Local, &overrides) + .await + { + Ok(b) => b, + Err(e) => { + log::warn!("unified_search: resolve_backend failed: {e:?}"); + return HttpResponse::ServiceUnavailable().json(ErrorBody { + error: "LLM backend unavailable".into(), + }); + } + }; + + let today = chrono::Utc::now().date_naive(); + let sq = match translate_nl_query(backend.chat(), &nl, &tag_vocab, today).await { + Ok(sq) => sq, + Err(e) => { + log::warn!("unified_search: translate_nl_query failed: {e:?}"); + return HttpResponse::BadGateway().json(ErrorBody { + error: "could not interpret the query".into(), + }); + } + }; + + // ── 2. Forward-geocode the place name into a gps circle ── + let resolved_place = match sq.place.as_deref() { + Some(p) => forward_geocode(p).await.map(|g| ResolvedPlace { + display_name: g.display_name, + lat: g.lat, + lon: g.lon, + radius_km: g.radius_km, + }), + None => None, + }; + let gps = resolved_place.as_ref().map(|p| (p.lat, p.lon, p.radius_km)); + + let semantic = effective_semantic(&sq); + + let has_exif_filter = sq.camera_make.is_some() + || sq.camera_model.is_some() + || sq.lens_model.is_some() + || sq.date_from.is_some() + || sq.date_to.is_some(); + let has_struct = + has_exif_filter || gps.is_some() || !sq.tag_ids.is_empty() || sq.media_type.is_some(); + + // ── 3. Build the structured candidate set (EXIF rows passing every + // filter). Skipped entirely for a pure-semantic query. ── + let mut candidate: Vec = Vec::new(); + let mut allowed_hashes: HashSet = HashSet::new(); + if has_struct { + // Tag membership set (rel_path only — same cross-library imprecision + // as the existing /photos tag listing). ALL-mode: the photo must + // carry every named tag. + let tag_set: Option> = if sq.tag_ids.is_empty() { + None + } else { + let mut dao = tag_dao.lock().expect("tag dao"); + match dao.get_files_with_all_tag_ids( + sq.tag_ids.clone(), + sq.exclude_tag_ids.clone(), + &ctx, + ) { + Ok(files) => Some(files.into_iter().map(|f| f.file_name).collect()), + Err(e) => { + log::warn!("unified_search: tag filter failed: {e:?}"); + Some(HashSet::new()) + } + } + }; + + // EXIF query handles camera/lens/gps-box/date. With no EXIF filters + // it returns the whole table, which we then narrow by the predicates + // below (tags / media / scope). Fine at personal-library scale. + let gps_bounds = gps.map(|(lat, lon, r)| gps_bounding_box(lat, lon, r)); + let rows = { + let mut dao = exif_dao.lock().expect("exif dao"); + dao.query_by_exif( + &ctx, + None, // scope filtered in-Rust to support multi-library + sq.camera_make.as_deref(), + sq.camera_model.as_deref(), + sq.lens_model.as_deref(), + gps_bounds, + sq.date_from, + sq.date_to, + ) + .unwrap_or_else(|e| { + log::warn!("unified_search: query_by_exif failed: {e:?}"); + Vec::new() + }) + }; + + candidate = rows + .into_iter() + .filter(|row| { + // Library scope. + if !library_ids.is_empty() && !library_ids.contains(&row.library_id) { + return false; + } + // Precise GPS distance (the EXIF query only did a coarse box). + if let Some((lat, lon, radius_km)) = gps { + match (row.gps_latitude, row.gps_longitude) { + (Some(plat), Some(plon)) => { + if haversine_distance(lat, lon, plat as f64, plon as f64) > radius_km { + return false; + } + } + _ => return false, + } + } + // Media type. + if let Some(mt) = sq.media_type.as_deref() { + let p = Path::new(&row.file_path); + let ok = if mt == "video" { + is_video_file(p) + } else { + is_image_file(p) + }; + if !ok { + return false; + } + } + // Tag membership. + if let Some(ts) = &tag_set + && !ts.contains(&row.file_path) + { + return false; + } + true + }) + .collect(); + + allowed_hashes = candidate + .iter() + .filter_map(|r| r.content_hash.clone()) + .collect(); + } + + // ── 4. Rank ── + match semantic { + Some(ref sem) => { + // Semantic term present: CLIP-rank, then keep only hits that pass + // the structured filters (by content_hash). + let scored = + match score_photos(&state, &exif_dao, sem, &library_ids, threshold, None).await { + Ok(s) => s, + Err(e) => return score_error_response(e), + }; + let hits: Vec<(f32, String)> = if has_struct { + scored + .hits + .into_iter() + .filter(|(_, h)| allowed_hashes.contains(h)) + .collect() + } else { + scored.hits + }; + let total_matching = hits.len(); + let page = paginate(&hits, offset, limit); + let results = resolve_hits(&exif_dao, &page); + HttpResponse::Ok().json(UnifiedResponse { + query: nl, + interpreted: interpreted(&sq, resolved_place), + model_version: Some(scored.model_version), + considered: scored.considered, + total_matching, + offset, + results, + }) + } + None => { + // Filters-only: no semantic term. Require at least one filter, + // then return the candidate set newest-first. + if !has_struct { + return bad_request("query had no searchable terms"); + } + candidate.sort_by(|a, b| b.date_taken.cmp(&a.date_taken)); + let total_matching = candidate.len(); + let end = (offset + limit).min(total_matching); + let results: Vec = if offset >= total_matching { + Vec::new() + } else { + candidate[offset..end] + .iter() + .map(|r| SearchHit { + library_id: r.library_id, + rel_path: r.file_path.clone(), + content_hash: r.content_hash.clone().unwrap_or_default(), + score: 0.0, + }) + .collect() + }; + HttpResponse::Ok().json(UnifiedResponse { + query: nl, + interpreted: interpreted(&sq, resolved_place), + model_version: None, + considered: 0, + total_matching, + offset, + results, + }) + } + } +} + +/// Slice a sorted hit list at `[offset, offset+limit)`, tolerating +/// out-of-range offsets (empty page). +fn paginate(hits: &[(f32, String)], offset: usize, limit: usize) -> Vec<(f32, String)> { + if offset >= hits.len() { + return Vec::new(); + } + let end = (offset + limit).min(hits.len()); + hits[offset..end].to_vec() +} + +fn interpreted(sq: &StructuredQuery, place: Option) -> Interpreted { + Interpreted { + semantic: sq.semantic.clone(), + tag_ids: sq.tag_ids.clone(), + exclude_tag_ids: sq.exclude_tag_ids.clone(), + unmatched_tags: sq.unmatched_tags.clone(), + camera_make: sq.camera_make.clone(), + camera_model: sq.camera_model.clone(), + lens_model: sq.lens_model.clone(), + date_from: sq.date_from, + date_to: sq.date_to, + media_type: sq.media_type.clone(), + place, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ai::nl_query::StructuredQuery; + + #[test] + fn effective_semantic_combines_semantic_and_unmatched() { + let sq = StructuredQuery { + semantic: Some("sunset".into()), + unmatched_tags: vec!["golden hour".into()], + ..Default::default() + }; + assert_eq!( + effective_semantic(&sq).as_deref(), + Some("sunset golden hour") + ); + } + + #[test] + fn effective_semantic_none_when_empty() { + let sq = StructuredQuery::default(); + assert_eq!(effective_semantic(&sq), None); + } + + #[test] + fn effective_semantic_unmatched_only() { + let sq = StructuredQuery { + unmatched_tags: vec!["disco".into()], + ..Default::default() + }; + assert_eq!(effective_semantic(&sq).as_deref(), Some("disco")); + } + + #[test] + fn paginate_handles_out_of_range_offset() { + let hits = vec![(0.9, "a".to_string()), (0.8, "b".to_string())]; + assert_eq!(paginate(&hits, 5, 10).len(), 0); + assert_eq!(paginate(&hits, 0, 1).len(), 1); + assert_eq!(paginate(&hits, 1, 10).len(), 1); + } +} From 8dce536f385ac086791283add6b1e4004c481992 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sun, 14 Jun 2026 01:19:53 -0400 Subject: [PATCH 3/9] Unified search: accept client model override (avoid model swapping) Add an optional `model` query param to /photos/search/unified, passed into resolve_backend's overrides. The client sends the user's currently-selected local model so the translation step reuses an already-loaded model instead of forcing a llama-swap eviction + cold start. Falls back to the configured default when absent. Still local only (no hybrid). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/unified_search.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/unified_search.rs b/src/unified_search.rs index a3187a4..7bb78dd 100644 --- a/src/unified_search.rs +++ b/src/unified_search.rs @@ -53,6 +53,11 @@ pub struct UnifiedQuery { pub library: Option, /// Multi-library scope, comma-separated ids. pub library_ids: Option, + /// Optional model override. The client passes the user's currently-selected + /// local model so the translation step reuses a model that's already loaded + /// (avoids a llama-swap eviction / cold start). Falls back to the configured + /// default local model when absent. Local only — no hybrid here. + pub model: Option, } fn default_limit() -> usize { @@ -167,9 +172,12 @@ pub async fn unified_search( }; // Respect env/config for the LLM backend (LLM_BACKEND → ollama or - // llama-swap); local only, no hybrid, per the feature's design. + // llama-swap); local only, no hybrid, per the feature's design. The + // client-supplied model (the user's current selection) routes translation + // to an already-loaded model when possible; otherwise resolve_backend + // falls back to the configured default. let overrides = SamplingOverrides { - model: None, + model: query.model.clone().filter(|m| !m.is_empty()), num_ctx: None, temperature: None, top_p: None, From 2cde88d73472981db8082d5c38cb62fa81498711 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sun, 14 Jun 2026 01:29:21 -0400 Subject: [PATCH 4/9] Unified search: stage-by-stage logging to debug empty results Log the translated query (semantic/tags/place/date/media + has_struct), the tag-filter file count, candidate-row + allowed-hash counts, and the CLIP considered/hits/after-filter counts. Pinpoints which stage drops results to zero (over-extracted filter, tag path mismatch, Any/All over-constraint, or CLIP threshold). info-level for now while debugging. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/unified_search.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/unified_search.rs b/src/unified_search.rs index 7bb78dd..d80feec 100644 --- a/src/unified_search.rs +++ b/src/unified_search.rs @@ -231,6 +231,22 @@ pub async fn unified_search( let has_struct = has_exif_filter || gps.is_some() || !sq.tag_ids.is_empty() || sq.media_type.is_some(); + // Stage trace: what the model extracted + whether a structured filter is + // active. The chips show this to the user too, but logging it makes the + // "why no results" path debuggable from the server side. + log::info!( + "unified_search: q={nl:?} semantic={:?} tag_ids={:?} exclude={:?} place={:?} gps={:?} date=({:?},{:?}) media={:?} unmatched={:?} has_struct={has_struct}", + sq.semantic, + sq.tag_ids, + sq.exclude_tag_ids, + resolved_place.as_ref().map(|p| p.display_name.as_str()), + gps, + sq.date_from, + sq.date_to, + sq.media_type, + sq.unmatched_tags, + ); + // ── 3. Build the structured candidate set (EXIF rows passing every // filter). Skipped entirely for a pure-semantic query. ── let mut candidate: Vec = Vec::new(); @@ -255,6 +271,11 @@ pub async fn unified_search( } } }; + log::info!( + "unified_search: tag_ids={:?} -> tag_set_files={:?}", + sq.tag_ids, + tag_set.as_ref().map(|s| s.len()) + ); // EXIF query handles camera/lens/gps-box/date. With no EXIF filters // it returns the whole table, which we then narrow by the predicates @@ -322,6 +343,11 @@ pub async fn unified_search( .iter() .filter_map(|r| r.content_hash.clone()) .collect(); + log::info!( + "unified_search: candidate_rows={} allowed_hashes={}", + candidate.len(), + allowed_hashes.len() + ); } // ── 4. Rank ── @@ -334,6 +360,8 @@ pub async fn unified_search( Ok(s) => s, Err(e) => return score_error_response(e), }; + let considered = scored.considered; + let clip_hits = scored.hits.len(); let hits: Vec<(f32, String)> = if has_struct { scored .hits @@ -343,6 +371,10 @@ pub async fn unified_search( } else { scored.hits }; + log::info!( + "unified_search: clip considered={considered} hits={clip_hits} after_struct_filter={}", + hits.len() + ); let total_matching = hits.len(); let page = paginate(&hits, offset, limit); let results = resolve_hits(&exif_dao, &page); @@ -364,6 +396,7 @@ pub async fn unified_search( } candidate.sort_by(|a, b| b.date_taken.cmp(&a.date_taken)); let total_matching = candidate.len(); + log::info!("unified_search: filters-only matches={total_matching}"); let end = (offset + limit).min(total_matching); let results: Vec = if offset >= total_matching { Vec::new() From b18ab6ed99fb662e87de659a8669734365b0c4c9 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sun, 14 Jun 2026 01:58:48 -0400 Subject: [PATCH 5/9] Unified search: UNIFIED_SEARCH_MODEL env override for the translation step Pin the NL->structured translation to a small, fast model that can stay co-resident with CLIP (and the chat model) so it never evicts them on a tight VRAM budget. Precedence: UNIFIED_SEARCH_MODEL env > client-selected model > configured default. Logs the effective model (backend.model()) so model A/B tests are visible. Documented in .env.example. Co-Authored-By: Claude Opus 4.8 (1M context) --- .env.example | 10 ++++++++++ src/unified_search.rs | 21 ++++++++++++++++----- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/.env.example b/.env.example index 2e431bc..64c31d3 100644 --- a/.env.example +++ b/.env.example @@ -80,6 +80,16 @@ AGENTIC_CHAT_MAX_ITERATIONS=6 # LLAMA_SWAP_ALLOWED_MODELS=chat,vision,embed # LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS=180 +# ── Unified search translation model (optional) ───────────────────────── +# /photos/search/unified runs one small LLM call to translate a natural- +# language query into structured filters + a semantic term, then CLIP-ranks. +# That step needs an LLM AND CLIP available at once. On a tight VRAM budget a +# large chat model can't co-reside with CLIP, so pin a small, fast model here +# (it can stay loaded alongside CLIP and the chat model). Precedence: +# UNIFIED_SEARCH_MODEL > the client's selected model > the configured default. +# Use the configured backend (LLM_BACKEND); local only — no hybrid. +# UNIFIED_SEARCH_MODEL=qwen3-0.6b + # ── Text-to-speech (optional, requires LLAMA_SWAP_URL) ─────────────────── # TTS routes through the same llama-swap proxy (a Chatterbox model id), so it # only needs LLAMA_SWAP_URL — it does NOT require LLM_BACKEND=llamacpp. diff --git a/src/unified_search.rs b/src/unified_search.rs index d80feec..bb6344c 100644 --- a/src/unified_search.rs +++ b/src/unified_search.rs @@ -172,12 +172,22 @@ pub async fn unified_search( }; // Respect env/config for the LLM backend (LLM_BACKEND → ollama or - // llama-swap); local only, no hybrid, per the feature's design. The - // client-supplied model (the user's current selection) routes translation - // to an already-loaded model when possible; otherwise resolve_backend - // falls back to the configured default. + // llama-swap); local only, no hybrid, per the feature's design. + // + // Translation-model precedence: + // 1. UNIFIED_SEARCH_MODEL env — pin a small, fast model that can stay + // co-resident with CLIP (and the chat model) so translation never + // evicts them. This is the recommended setup on a tight VRAM budget. + // 2. the client-selected model — routes translation to whatever the user + // already has loaded (no swap) when no dedicated model is pinned. + // 3. None → resolve_backend uses the configured default local model. + let translation_model = std::env::var("UNIFIED_SEARCH_MODEL") + .ok() + .filter(|m| !m.trim().is_empty()) + .or_else(|| query.model.clone()) + .filter(|m| !m.trim().is_empty()); let overrides = SamplingOverrides { - model: query.model.clone().filter(|m| !m.is_empty()), + model: translation_model, num_ctx: None, temperature: None, top_p: None, @@ -197,6 +207,7 @@ pub async fn unified_search( }); } }; + log::info!("unified_search: translating with model={}", backend.model()); let today = chrono::Utc::now().date_naive(); let sq = match translate_nl_query(backend.chat(), &nl, &tag_vocab, today).await { From e7f6dd56dacba4d146dec195b5b703343b706721 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sun, 14 Jun 2026 02:02:57 -0400 Subject: [PATCH 6/9] clip_client: log encode_text failures (URL + status/body or network error) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CLIP encode failure reason was only ever returned in the HTTP response body, never logged server-side, making 502s from /photos/search opaque. Log the underlying cause — network error to the URL, or the Apollo HTTP status + response body — so CLIP-service problems are diagnosable from the ImageApi log. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/ai/clip_client.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ai/clip_client.rs b/src/ai/clip_client.rs index 85c66a7..3519e8b 100644 --- a/src/ai/clip_client.rs +++ b/src/ai/clip_client.rs @@ -191,11 +191,13 @@ impl ClipClient { let resp = match self.client.post(&url).json(&body).send().await { Ok(r) => r, Err(e) if e.is_timeout() || e.is_connect() => { + log::warn!("clip encode_text network error to {url}: {e}"); return Err(ClipError::Transient(anyhow::anyhow!( "clip client network: {e}" ))); } Err(e) => { + log::warn!("clip encode_text request error to {url}: {e}"); return Err(ClipError::Transient(anyhow::anyhow!( "clip client request: {e}" ))); @@ -210,6 +212,7 @@ impl ClipClient { return Ok(body); } let body_text = resp.text().await.unwrap_or_default(); + log::warn!("clip encode_text HTTP {status} from {url}: {body_text}"); Err(classify_error_response(status.as_u16(), &body_text)) } From b594acc4bd5ba66c8ac844fc07748f8c133a21f4 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sun, 14 Jun 2026 02:20:06 -0400 Subject: [PATCH 7/9] Unified search: rank within filtered set instead of pre-thresholding CLIP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When structured filters are present they're the constraint and CLIP only ranks within the candidate set, so drop the global similarity threshold for that case. Previously the 0.2 whole-library threshold ran BEFORE intersecting with the filters, discarding filter-matching photos that scored just under it (e.g. a 2022 beach photo at 0.18) — producing after_struct_filter=0 even when matches existed. Plain semantic (no filters) keeps the user's threshold. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/unified_search.rs | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/unified_search.rs b/src/unified_search.rs index bb6344c..9eea405 100644 --- a/src/unified_search.rs +++ b/src/unified_search.rs @@ -364,13 +364,27 @@ pub async fn unified_search( // ── 4. Rank ── match semantic { Some(ref sem) => { - // Semantic term present: CLIP-rank, then keep only hits that pass - // the structured filters (by content_hash). - let scored = - match score_photos(&state, &exif_dao, sem, &library_ids, threshold, None).await { - Ok(s) => s, - Err(e) => return score_error_response(e), - }; + // When structured filters are present they ARE the constraint — + // CLIP only ranks within the candidate set. So drop the global + // similarity threshold (it's tuned for whole-library search and + // would pre-discard filter-matching photos that scored just under + // it — e.g. a 2022 beach photo at 0.18 — before the intersection + // ever runs). With no filters, keep the user's threshold for the + // plain semantic case. + let clip_threshold = if has_struct { -1.0 } else { threshold }; + let scored = match score_photos( + &state, + &exif_dao, + sem, + &library_ids, + clip_threshold, + None, + ) + .await + { + Ok(s) => s, + Err(e) => return score_error_response(e), + }; let considered = scored.considered; let clip_hits = scored.hits.len(); let hits: Vec<(f32, String)> = if has_struct { From 7684220f52d664006f41940b7987245854b8c3de Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sun, 14 Jun 2026 02:25:24 -0400 Subject: [PATCH 8/9] Unified search: use ANY-mode tag matching, not ALL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ALL-mode over-constrains NL queries — the model maps several query words to tags and few photos carry every one, zeroing the candidate set. Switch to ANY (a photo matches if it has any named tag); the semantic CLIP ranking provides precision within that pool. Exclude tags still filter out. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/unified_search.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/unified_search.rs b/src/unified_search.rs index 9eea405..555773c 100644 --- a/src/unified_search.rs +++ b/src/unified_search.rs @@ -264,13 +264,15 @@ pub async fn unified_search( let mut allowed_hashes: HashSet = HashSet::new(); if has_struct { // Tag membership set (rel_path only — same cross-library imprecision - // as the existing /photos tag listing). ALL-mode: the photo must - // carry every named tag. + // as the existing /photos tag listing). ANY-mode: a photo matches if + // it carries any of the named tags. ALL-mode over-constrains NL + // queries (the model maps several words to tags and few photos carry + // them all); the semantic term does the precision work instead. let tag_set: Option> = if sq.tag_ids.is_empty() { None } else { let mut dao = tag_dao.lock().expect("tag dao"); - match dao.get_files_with_all_tag_ids( + match dao.get_files_with_any_tag_ids( sq.tag_ids.clone(), sq.exclude_tag_ids.clone(), &ctx, From 475072810e1c604cf151a7d4e4a01c6346421b6d Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Wed, 17 Jun 2026 18:14:44 -0400 Subject: [PATCH 9/9] AI: add enable_thinking reasoning toggle plumbed to llama.cpp New optional SamplingOverride forwarded to llama-server as chat_template_kwargs.enable_thinking (gates Qwen3-style reasoning blocks). None leaves the template default; other backends ignore it. Wired through the agentic-insight and chat-turn request bodies/handlers. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/ai/backend.rs | 6 ++++++ src/ai/handlers.rs | 15 +++++++++++++++ src/ai/insight_chat.rs | 9 +++++++++ src/ai/insight_generator.rs | 3 +++ src/ai/llamacpp.rs | 19 +++++++++++++++++++ src/bin/populate_knowledge.rs | 1 + src/reels/script.rs | 1 + src/unified_search.rs | 1 + 8 files changed, 55 insertions(+) diff --git a/src/ai/backend.rs b/src/ai/backend.rs index 0515f1c..dfcdd03 100644 --- a/src/ai/backend.rs +++ b/src/ai/backend.rs @@ -41,6 +41,10 @@ pub struct SamplingOverrides { pub top_p: Option, pub top_k: Option, pub min_p: Option, + /// Reasoning toggle. Only the llama.cpp backend honors it (forwarded as + /// `chat_template_kwargs.enable_thinking`); other backends ignore it. + /// `None` leaves the model/template default in place. + pub enable_thinking: Option, } impl SamplingOverrides { @@ -124,6 +128,7 @@ mod tests { top_p: None, top_k: None, min_p: None, + enable_thinking: None, }; assert!(!empty.has_sampling()); @@ -134,6 +139,7 @@ mod tests { top_p: None, top_k: None, min_p: None, + enable_thinking: None, }; assert!(with_temp.has_sampling()); } diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs index c6bc212..ae9f300 100644 --- a/src/ai/handlers.rs +++ b/src/ai/handlers.rs @@ -40,6 +40,12 @@ pub struct GeneratePhotoInsightRequest { pub top_k: Option, #[serde(default)] pub min_p: Option, + /// Reasoning toggle for thinking-capable models. Forwarded to the + /// llama.cpp backend as `chat_template_kwargs.enable_thinking`; ignored + /// by other backends and the non-agentic (Ollama) path. Only the agentic + /// endpoint routes through llama.cpp. None defers to the template default. + #[serde(default)] + pub enable_thinking: Option, /// `"local"` (default, Ollama with images) | `"hybrid"` (local vision + /// OpenRouter chat). Only respected by the agentic endpoint. #[serde(default)] @@ -868,6 +874,7 @@ pub async fn generate_agentic_insight_handler( request.top_p, request.top_k, request.min_p, + request.enable_thinking, max_iterations, request.backend.clone(), fewshot_examples, @@ -1169,6 +1176,11 @@ pub struct ChatTurnHttpRequest { pub top_k: Option, #[serde(default)] pub min_p: Option, + /// Reasoning toggle for thinking-capable models. Forwarded to the + /// llama.cpp backend as `chat_template_kwargs.enable_thinking`; ignored + /// by other backends. None defers to the model/template default. + #[serde(default)] + pub enable_thinking: Option, #[serde(default)] pub max_iterations: Option, /// Per-turn system-prompt override. Ephemeral in append mode, @@ -1247,6 +1259,7 @@ pub async fn chat_turn_handler( top_p: request.top_p, top_k: request.top_k, min_p: request.min_p, + enable_thinking: request.enable_thinking, max_iterations: request.max_iterations, system_prompt: request.system_prompt.clone(), persona_id: request.persona_id.clone(), @@ -1473,6 +1486,7 @@ pub async fn chat_stream_handler( top_p: request.top_p, top_k: request.top_k, min_p: request.min_p, + enable_thinking: request.enable_thinking, max_iterations: request.max_iterations, system_prompt: request.system_prompt.clone(), persona_id: request.persona_id.clone(), @@ -1618,6 +1632,7 @@ pub async fn turn_async_handler( top_p: request.top_p, top_k: request.top_k, min_p: request.min_p, + enable_thinking: request.enable_thinking, max_iterations: request.max_iterations, system_prompt: request.system_prompt.clone(), persona_id: request.persona_id.clone(), diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs index 84f2b32..af00731 100644 --- a/src/ai/insight_chat.rs +++ b/src/ai/insight_chat.rs @@ -70,6 +70,10 @@ pub struct ChatTurnRequest { pub top_p: Option, pub top_k: Option, pub min_p: Option, + /// Reasoning toggle for thinking-capable models. Forwarded to the + /// llama.cpp backend as `chat_template_kwargs.enable_thinking`; ignored + /// by other backends. None defers to the model/template default. + pub enable_thinking: Option, pub max_iterations: Option, /// Per-turn system-prompt override. In append mode (default), applied /// ephemerally — original system message restored before persistence. @@ -344,6 +348,7 @@ impl InsightChatService { top_p: req.top_p, top_k: req.top_k, min_p: req.min_p, + enable_thinking: req.enable_thinking, }; let backend = self.generator.resolve_backend(kind, &overrides).await?; let model_used = backend.model().to_string(); @@ -847,6 +852,7 @@ impl InsightChatService { top_p: req.top_p, top_k: req.top_k, min_p: req.min_p, + enable_thinking: req.enable_thinking, }; let backend = self.generator.resolve_backend(kind, &overrides).await?; let model_used = backend.model().to_string(); @@ -1017,6 +1023,7 @@ impl InsightChatService { top_p: req.top_p, top_k: req.top_k, min_p: req.min_p, + enable_thinking: req.enable_thinking, }; let backend = self.generator.resolve_backend(kind, &overrides).await?; let model_used = backend.model().to_string(); @@ -1425,6 +1432,7 @@ impl InsightChatService { top_p: req.top_p, top_k: req.top_k, min_p: req.min_p, + enable_thinking: req.enable_thinking, }; let backend = self.generator.resolve_backend(kind, &overrides).await?; let model_used = backend.model().to_string(); @@ -1607,6 +1615,7 @@ impl InsightChatService { top_p: req.top_p, top_k: req.top_k, min_p: req.min_p, + enable_thinking: req.enable_thinking, }; let backend = self.generator.resolve_backend(kind, &overrides).await?; let model_used = backend.model().to_string(); diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 4ff8494..d45fa55 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -3933,6 +3933,7 @@ Return ONLY the summary, nothing else."#, if let Some(ctx) = overrides.num_ctx { c.set_num_ctx(Some(ctx)); } + c.set_enable_thinking(overrides.enable_thinking); Box::new(c) } else { // Pure Ollama local. @@ -4064,6 +4065,7 @@ Return ONLY the summary, nothing else."#, top_p: Option, top_k: Option, min_p: Option, + enable_thinking: Option, max_iterations: usize, backend: Option, fewshot_examples: Vec>, @@ -4091,6 +4093,7 @@ Return ONLY the summary, nothing else."#, top_p, top_k, min_p, + enable_thinking, }; let backend = self.resolve_backend(kind, &overrides).await?; span.set_attribute(KeyValue::new("model", backend.model().to_string())); diff --git a/src/ai/llamacpp.rs b/src/ai/llamacpp.rs index 8a7c898..77e7f63 100644 --- a/src/ai/llamacpp.rs +++ b/src/ai/llamacpp.rs @@ -64,6 +64,12 @@ pub struct LlamaCppClient { top_p: Option, top_k: Option, min_p: Option, + /// When `Some`, forwarded to llama-server as + /// `chat_template_kwargs: {"enable_thinking": }`. The Jinja chat + /// template (e.g. Qwen3) reads this to gate its reasoning block. `None` + /// omits the key entirely, leaving the template's own default. Templates + /// that don't reference the key ignore it, so sending it is harmless. + enable_thinking: Option, } impl LlamaCppClient { @@ -89,6 +95,7 @@ impl LlamaCppClient { top_p: None, top_k: None, min_p: None, + enable_thinking: None, } } @@ -104,6 +111,12 @@ impl LlamaCppClient { self.num_ctx = num_ctx; } + /// Set the reasoning toggle forwarded as `chat_template_kwargs.enable_thinking`. + /// `None` leaves the chat template's own default in place. + pub fn set_enable_thinking(&mut self, enable_thinking: Option) { + self.enable_thinking = enable_thinking; + } + pub fn set_sampling_params( &mut self, temperature: Option, @@ -458,6 +471,12 @@ impl LlamaCppClient { // via -c, so we silently drop the override here. The config.yaml // entry is the source of truth for context size. let _ = self.num_ctx; + // Reasoning toggle for thinking-capable templates (Qwen3 et al.). + // llama-server forwards chat_template_kwargs into the Jinja render + // (requires --jinja); templates that ignore the key are unaffected. + if let Some(think) = self.enable_thinking { + v.push(("chat_template_kwargs", json!({ "enable_thinking": think }))); + } v } diff --git a/src/bin/populate_knowledge.rs b/src/bin/populate_knowledge.rs index 71f2f8a..396eddc 100644 --- a/src/bin/populate_knowledge.rs +++ b/src/bin/populate_knowledge.rs @@ -336,6 +336,7 @@ async fn main() -> anyhow::Result<()> { args.top_p, args.top_k, args.min_p, + None, // enable_thinking: leave model/template default args.max_iterations, None, Vec::new(), diff --git a/src/reels/script.rs b/src/reels/script.rs index 858efd1..38ef9cc 100644 --- a/src/reels/script.rs +++ b/src/reels/script.rs @@ -309,6 +309,7 @@ pub async fn generate_script_agentic( top_p: None, top_k: None, min_p: None, + enable_thinking: None, }, ) .await diff --git a/src/unified_search.rs b/src/unified_search.rs index 555773c..0940a92 100644 --- a/src/unified_search.rs +++ b/src/unified_search.rs @@ -193,6 +193,7 @@ pub async fn unified_search( top_p: None, top_k: None, min_p: None, + enable_thinking: None, }; let backend = match state .insight_generator