From 09ccb347c7be5c5177c7bbe580f6c3268a85d970 Mon Sep 17 00:00:00 2001
From: Cameron Cordes <cameronc.dev@gmail.com>
Date: Sun, 14 Jun 2026 00:44:16 -0400
Subject: [PATCH 1/9] =?UTF-8?q?Unified=20NL=20search=20Phase=201:=20NL?=
 =?UTF-8?q?=E2=86=92structured-query=20translator=20+=20forward=20geocodin?=
 =?UTF-8?q?g?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Foundation for the /photos/search/unified endpoint (Phase 2). Two new,
fully unit-tested pieces, not yet wired into a route (allow-until-wired,
mirroring llm_client.rs):

- ai/nl_query.rs: translate a free-text query into a StructuredQuery via one
  grounded LLM call. Two-stage — the model emits names/ISO dates, then a pure
  resolve step maps tag names against the real vocab and converts dates to
  unix seconds. Hallucinated (non-vocab) tags are surfaced in unmatched_tags
  rather than silently used as hard filters — the anti-noise guard. 12 tests.

- geo::forward_geocode + bbox_to_circle: resolve a place name to a circle via
  Nominatim /search, collapsing the bounding box to centroid + circumscribing
  radius so "Portland" and "Italy" both map onto the existing gps circle
  filter with no schema change. Radius is the max centroid-to-corner distance
  (corners aren't equidistant on a sphere). 4 tests.

fmt + clippy clean; 19 new tests pass.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/ai/mod.rs      |   1 +
 src/ai/nl_query.rs | 414 +++++++++++++++++++++++++++++++++++++++++++++
 src/geo.rs         | 180 ++++++++++++++++++++
 3 files changed, 595 insertions(+)
 create mode 100644 src/ai/nl_query.rs

diff --git a/src/ai/mod.rs b/src/ai/mod.rs
index c5302fb..7d0802e 100644
--- a/src/ai/mod.rs
+++ b/src/ai/mod.rs
@@ -10,6 +10,7 @@ pub mod insight_generator;
 pub mod llamacpp;
 pub mod llm_client;
 pub mod local_llm;
+pub mod nl_query;
 pub mod ollama;
 pub mod openrouter;
 pub mod pronunciation;
diff --git a/src/ai/nl_query.rs b/src/ai/nl_query.rs
new file mode 100644
index 0000000..a94fc06
--- /dev/null
+++ b/src/ai/nl_query.rs
@@ -0,0 +1,414 @@
+//! Natural-language → structured-query translation for unified photo search.
+//!
+//! The unified search endpoint (`/photos/search/unified`, Phase 2) needs to
+//! turn a free-text query like *"sunset photos in Italy from last summer"*
+//! into the structured filter the existing `/photos` engine understands plus
+//! a semantic term for CLIP ranking. That translation is a single grounded
+//! LLM call, isolated here so it can be unit-tested without a network or the
+//! full `InsightGenerator`.
+//!
+//! Two-stage design:
+//!  1. The LLM emits a [`RawNlQuery`] — references are by *name* (tags) and
+//!     dates as ISO strings, never numeric ids it could hallucinate.
+//!  2. [`resolve_raw_query`] maps names against the real tag vocabulary and
+//!     converts ISO dates to unix seconds, producing a [`StructuredQuery`].
+//!     A tag the model invents that isn't in the vocab is surfaced in
+//!     `unmatched_tags` (the caller folds it back into the semantic term)
+//!     rather than silently dropped — this is the anti-noise guard.
+//!
+//! Geocoding of `place` and person filtering are intentionally *not* handled
+//! here: `place` stays as text for the caller to forward-geocode (async, see
+//! `geo::forward_geocode`), and person filtering is deferred until a
+//! person→photos resolver exists.
+
+// Phase 1: this module is fully implemented and unit-tested, but its first
+// consumer (the `/photos/search/unified` endpoint) lands in Phase 2. Mirrors
+// llm_client.rs's allow-until-wired pattern so the bin target stays
+// clippy-clean in the interim; remove when the endpoint is added.
+#![allow(dead_code)]
+
+use crate::ai::llm_client::{ChatMessage, LlmClient, Tool, strip_think_blocks};
+use anyhow::{Result, anyhow};
+use serde::{Deserialize, Serialize};
+
+/// Raw query object as emitted by the LLM. Tag references are by name
+/// (resolved against the real vocab in Rust); dates are ISO `YYYY-MM-DD`.
+/// Every field is optional so a partial / minimal model response still
+/// deserializes.
+#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
+pub struct RawNlQuery {
+    /// Visual/scene description handed to CLIP for ranking. The descriptive
+    /// remainder after structured filters are peeled off.
+    #[serde(default)]
+    pub semantic: Option<String>,
+    /// Tag names the photos must have. Matched case-insensitively against
+    /// the supplied vocabulary; non-matches land in `unmatched_tags`.
+    #[serde(default)]
+    pub tags: Vec<String>,
+    /// Tag names the photos must NOT have.
+    #[serde(default)]
+    pub exclude_tags: Vec<String>,
+    #[serde(default)]
+    pub camera_make: Option<String>,
+    #[serde(default)]
+    pub camera_model: Option<String>,
+    #[serde(default)]
+    pub lens_model: Option<String>,
+    /// Free-text place/location name to forward-geocode (e.g. "Italy").
+    #[serde(default)]
+    pub place: Option<String>,
+    /// Inclusive start date, ISO `YYYY-MM-DD`.
+    #[serde(default)]
+    pub date_from: Option<String>,
+    /// Inclusive end date, ISO `YYYY-MM-DD`.
+    #[serde(default)]
+    pub date_to: Option<String>,
+    /// "photo" | "video" — normalized in [`resolve_raw_query`].
+    #[serde(default)]
+    pub media_type: Option<String>,
+}
+
+/// Resolved structured query: tag names mapped to ids against the real
+/// vocab, ISO dates converted to unix seconds. `place` stays as text for the
+/// caller to forward-geocode into a gps circle. Serializable so the endpoint
+/// can echo it back to the client as "this is how I read your query"
+/// (editable filter chips).
+#[derive(Debug, Clone, Default, PartialEq, Serialize)]
+pub struct StructuredQuery {
+    pub semantic: Option<String>,
+    pub tag_ids: Vec<i32>,
+    pub exclude_tag_ids: Vec<i32>,
+    /// Tag names the model produced that don't exist in the vocabulary.
+    /// The caller folds these back into the semantic term so the concept
+    /// isn't lost — and surfacing them keeps a hallucinated tag from
+    /// silently filtering the whole library to nothing.
+    pub unmatched_tags: Vec<String>,
+    pub camera_make: Option<String>,
+    pub camera_model: Option<String>,
+    pub lens_model: Option<String>,
+    /// Raw place name awaiting forward-geocoding by the caller.
+    pub place: Option<String>,
+    pub date_from: Option<i64>,
+    pub date_to: Option<i64>,
+    /// Normalized to "photo" | "video"; `None` means no media-type filter.
+    pub media_type: Option<String>,
+}
+
+/// Convert an ISO `YYYY-MM-DD` date to a unix timestamp (seconds). With
+/// `end_of_day`, returns 23:59:59 of that day so a `date_to` filter is
+/// inclusive of the whole day; otherwise 00:00:00. Returns `None` for any
+/// unparseable input (the filter is simply omitted rather than erroring).
+pub fn iso_to_unix(date: &str, end_of_day: bool) -> Option<i64> {
+    let d = chrono::NaiveDate::parse_from_str(date.trim(), "%Y-%m-%d").ok()?;
+    let time = if end_of_day {
+        chrono::NaiveTime::from_hms_opt(23, 59, 59)?
+    } else {
+        chrono::NaiveTime::from_hms_opt(0, 0, 0)?
+    };
+    Some(d.and_time(time).and_utc().timestamp())
+}
+
+/// Normalize a free-form media-type string to the engine's vocabulary.
+/// Anything that isn't clearly photo or video (including "all") yields
+/// `None` — no filter.
+fn normalize_media_type(raw: &str) -> Option<String> {
+    match raw.trim().to_lowercase().as_str() {
+        "photo" | "photos" | "image" | "images" | "picture" | "pictures" => {
+            Some("photo".to_string())
+        }
+        "video" | "videos" | "movie" | "movies" | "clip" | "clips" => Some("video".to_string()),
+        _ => None,
+    }
+}
+
+/// Resolve a raw LLM query against the real tag vocabulary, producing the
+/// structured filter. Pure — no network, no LLM — so it carries the
+/// correctness-critical mapping logic under unit test.
+///
+/// `tag_vocab` is `(tag_id, tag_name)` pairs (the shape `TagDao::get_all_tags`
+/// yields once the count is dropped). Matching is case-insensitive and exact
+/// on the trimmed name.
+pub fn resolve_raw_query(raw: RawNlQuery, tag_vocab: &[(i32, String)]) -> StructuredQuery {
+    // Case-insensitive name → id lookup. Built once per call.
+    let lookup: std::collections::HashMap<String, i32> = tag_vocab
+        .iter()
+        .map(|(id, name)| (name.trim().to_lowercase(), *id))
+        .collect();
+
+    let resolve_names = |names: &[String], ids: &mut Vec<i32>, unmatched: &mut Vec<String>| {
+        for name in names {
+            let key = name.trim().to_lowercase();
+            if key.is_empty() {
+                continue;
+            }
+            match lookup.get(&key) {
+                Some(id) if !ids.contains(id) => ids.push(*id),
+                Some(_) => {} // duplicate, already collected
+                None => {
+                    if !unmatched.iter().any(|u| u.eq_ignore_ascii_case(name)) {
+                        unmatched.push(name.trim().to_string());
+                    }
+                }
+            }
+        }
+    };
+
+    let mut tag_ids = Vec::new();
+    let mut unmatched_tags = Vec::new();
+    resolve_names(&raw.tags, &mut tag_ids, &mut unmatched_tags);
+
+    // Excluded tags that don't match a real tag are simply ignored — you
+    // can't exclude a tag that doesn't exist, and folding them into
+    // `semantic` would make no sense.
+    let mut exclude_tag_ids = Vec::new();
+    let mut exclude_unmatched = Vec::new();
+    resolve_names(
+        &raw.exclude_tags,
+        &mut exclude_tag_ids,
+        &mut exclude_unmatched,
+    );
+
+    let clean = |s: Option<String>| s.map(|v| v.trim().to_string()).filter(|v| !v.is_empty());
+
+    StructuredQuery {
+        semantic: clean(raw.semantic),
+        tag_ids,
+        exclude_tag_ids,
+        unmatched_tags,
+        camera_make: clean(raw.camera_make),
+        camera_model: clean(raw.camera_model),
+        lens_model: clean(raw.lens_model),
+        place: clean(raw.place),
+        date_from: raw.date_from.as_deref().and_then(|d| iso_to_unix(d, false)),
+        date_to: raw.date_to.as_deref().and_then(|d| iso_to_unix(d, true)),
+        media_type: raw.media_type.as_deref().and_then(normalize_media_type),
+    }
+}
+
+/// Build the grounded system prompt. The model is told the current date (so
+/// "last summer" resolves) and the exact tag vocabulary (so it uses real
+/// tags or routes the concept to `semantic` instead of inventing one).
+fn build_system_prompt(tag_vocab: &[(i32, String)], today: chrono::NaiveDate) -> String {
+    // Cap the vocab dump so a huge library doesn't blow the context window;
+    // the most-used tags are the ones a query is likely to reference.
+    const MAX_TAGS: usize = 400;
+    let mut names: Vec<&str> = tag_vocab.iter().map(|(_, n)| n.as_str()).collect();
+    names.sort_unstable();
+    names.dedup();
+    let shown = names.len().min(MAX_TAGS);
+    let vocab = names[..shown].join(", ");
+    let truncation = if names.len() > MAX_TAGS {
+        format!(" (showing {MAX_TAGS} of {} tags)", names.len())
+    } else {
+        String::new()
+    };
+
+    format!(
+        "You translate a user's natural-language photo-search request into a JSON \
+filter. Today's date is {today}. Respond with ONLY a JSON object, no prose, no \
+code fences.\n\n\
+Schema (all fields optional):\n\
+{{\n  \
+\"semantic\": string|null,        // visual scene/subject for image similarity search\n  \
+\"tags\": string[],               // ONLY names from the tag list below\n  \
+\"exclude_tags\": string[],       // ONLY names from the tag list below\n  \
+\"camera_make\": string|null,\n  \
+\"camera_model\": string|null,\n  \
+\"lens_model\": string|null,\n  \
+\"place\": string|null,           // a location name to look up (city, country, landmark)\n  \
+\"date_from\": \"YYYY-MM-DD\"|null,  // inclusive\n  \
+\"date_to\": \"YYYY-MM-DD\"|null,    // inclusive\n  \
+\"media_type\": \"photo\"|\"video\"|null\n\
+}}\n\n\
+Rules:\n\
+- Put descriptive/visual concepts (\"sunset\", \"crowded beach\", \"red car\") in \"semantic\".\n\
+- Only use \"tags\"/\"exclude_tags\" values that appear EXACTLY in the tag list. If a \
+concept isn't a listed tag, put it in \"semantic\" instead — never invent a tag.\n\
+- Resolve relative dates against today's date (\"last summer\", \"2023\", \"last month\").\n\
+- Put place/location names in \"place\" (not \"semantic\").\n\
+- Omit (use null / empty array) anything the request doesn't mention.\n\n\
+Available tags{truncation}: {vocab}"
+    )
+}
+
+/// Extract the JSON object from a model response that may include a leading
+/// `<think>` block, code fences, or trailing prose. Strips the think block
+/// first (so reasoning that mentions braces can't fool the scan), then
+/// returns the substring from the first `{` to the last `}` inclusive — or
+/// the trimmed text if no braces are found (which then fails to parse with a
+/// clear error).
+fn extract_json(raw: &str) -> String {
+    let s = strip_think_blocks(raw);
+    let start = s.find('{');
+    let end = s.rfind('}');
+    match (start, end) {
+        (Some(a), Some(b)) if b >= a => s[a..=b].to_string(),
+        _ => s.trim().to_string(),
+    }
+}
+
+/// Parse a model response string into a [`StructuredQuery`], resolving names
+/// against the vocab. Separated from the LLM call so it's unit-testable.
+pub fn parse_response(response: &str, tag_vocab: &[(i32, String)]) -> Result<StructuredQuery> {
+    let json = extract_json(response);
+    let raw: RawNlQuery = serde_json::from_str(&json)
+        .map_err(|e| anyhow!("failed to parse NL query JSON: {e}; raw response: {response:?}"))?;
+    Ok(resolve_raw_query(raw, tag_vocab))
+}
+
+/// Translate a natural-language query into a [`StructuredQuery`] via one
+/// grounded LLM call. The `client` is any configured backend (the unified
+/// endpoint passes the resolved chat backend); `tag_vocab` grounds the tag
+/// mapping; `today` anchors relative-date resolution.
+pub async fn translate_nl_query(
+    client: &dyn LlmClient,
+    nl: &str,
+    tag_vocab: &[(i32, String)],
+    today: chrono::NaiveDate,
+) -> Result<StructuredQuery> {
+    let system = build_system_prompt(tag_vocab, today);
+    let messages = vec![ChatMessage::system(system), ChatMessage::user(nl)];
+    let (msg, _, _) = client.chat_with_tools(messages, Vec::<Tool>::new()).await?;
+    parse_response(&msg.content, tag_vocab)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn vocab() -> Vec<(i32, String)> {
+        vec![
+            (1, "beach".to_string()),
+            (2, "Sunset".to_string()), // mixed case to exercise case-insensitivity
+            (3, "family".to_string()),
+        ]
+    }
+
+    #[test]
+    fn iso_to_unix_start_and_end_of_day() {
+        // 2023-01-01 UTC midnight = 1672531200.
+        assert_eq!(iso_to_unix("2023-01-01", false), Some(1_672_531_200));
+        // End of that day is 86399 seconds later.
+        assert_eq!(
+            iso_to_unix("2023-01-01", true),
+            Some(1_672_531_200 + 86_399)
+        );
+    }
+
+    #[test]
+    fn iso_to_unix_rejects_garbage() {
+        assert_eq!(iso_to_unix("last summer", false), None);
+        assert_eq!(iso_to_unix("2023-13-99", false), None);
+        assert_eq!(iso_to_unix("", false), None);
+    }
+
+    #[test]
+    fn resolve_matches_tags_case_insensitively() {
+        let raw = RawNlQuery {
+            tags: vec!["BEACH".to_string(), "sunset".to_string()],
+            ..Default::default()
+        };
+        let q = resolve_raw_query(raw, &vocab());
+        assert_eq!(q.tag_ids, vec![1, 2]);
+        assert!(q.unmatched_tags.is_empty());
+    }
+
+    #[test]
+    fn resolve_surfaces_unmatched_tags_not_silently_dropped() {
+        // A hallucinated / non-vocab tag must be surfaced so the caller can
+        // fold it into semantic — never silently used as a hard filter.
+        let raw = RawNlQuery {
+            tags: vec!["beach".to_string(), "golden hour".to_string()],
+            ..Default::default()
+        };
+        let q = resolve_raw_query(raw, &vocab());
+        assert_eq!(q.tag_ids, vec![1]);
+        assert_eq!(q.unmatched_tags, vec!["golden hour".to_string()]);
+    }
+
+    #[test]
+    fn resolve_dedups_repeated_tags() {
+        let raw = RawNlQuery {
+            tags: vec![
+                "beach".to_string(),
+                "Beach".to_string(),
+                "beach".to_string(),
+            ],
+            ..Default::default()
+        };
+        let q = resolve_raw_query(raw, &vocab());
+        assert_eq!(q.tag_ids, vec![1]);
+    }
+
+    #[test]
+    fn resolve_normalizes_media_type_and_dates() {
+        let raw = RawNlQuery {
+            media_type: Some("Videos".to_string()),
+            date_from: Some("2023-06-01".to_string()),
+            date_to: Some("2023-06-30".to_string()),
+            ..Default::default()
+        };
+        let q = resolve_raw_query(raw, &vocab());
+        assert_eq!(q.media_type.as_deref(), Some("video"));
+        assert_eq!(q.date_from, iso_to_unix("2023-06-01", false));
+        assert_eq!(q.date_to, iso_to_unix("2023-06-30", true));
+    }
+
+    #[test]
+    fn resolve_media_type_all_is_no_filter() {
+        let raw = RawNlQuery {
+            media_type: Some("all".to_string()),
+            ..Default::default()
+        };
+        assert_eq!(resolve_raw_query(raw, &vocab()).media_type, None);
+    }
+
+    #[test]
+    fn resolve_trims_and_empties_to_none() {
+        let raw = RawNlQuery {
+            semantic: Some("   ".to_string()),
+            camera_make: Some("  Fujifilm  ".to_string()),
+            place: Some("".to_string()),
+            ..Default::default()
+        };
+        let q = resolve_raw_query(raw, &vocab());
+        assert_eq!(q.semantic, None);
+        assert_eq!(q.camera_make.as_deref(), Some("Fujifilm"));
+        assert_eq!(q.place, None);
+    }
+
+    #[test]
+    fn parse_response_handles_code_fences_and_prose() {
+        let resp = "Here is the filter:\n```json\n{\"semantic\":\"sunset\",\"tags\":[\"beach\"]}\n```\nDone.";
+        let q = parse_response(resp, &vocab()).expect("parse");
+        assert_eq!(q.semantic.as_deref(), Some("sunset"));
+        assert_eq!(q.tag_ids, vec![1]);
+    }
+
+    #[test]
+    fn parse_response_handles_think_block_then_json() {
+        let resp = "<think>user wants beach sunsets</think>{\"tags\":[\"beach\",\"sunset\"]}";
+        let q = parse_response(resp, &vocab()).expect("parse");
+        assert_eq!(q.tag_ids, vec![1, 2]);
+    }
+
+    #[test]
+    fn parse_response_errors_on_non_json() {
+        assert!(parse_response("I cannot help with that.", &vocab()).is_err());
+    }
+
+    #[test]
+    fn build_system_prompt_includes_date_and_vocab() {
+        let today = chrono::NaiveDate::from_ymd_opt(2026, 6, 14).unwrap();
+        let prompt = build_system_prompt(&vocab(), today);
+        assert!(
+            prompt.contains("2026-06-14"),
+            "prompt should state today's date"
+        );
+        assert!(prompt.contains("beach"), "prompt should list the vocab");
+        assert!(
+            prompt.contains("never invent a tag"),
+            "prompt should warn against inventing tags"
+        );
+    }
+}
diff --git a/src/geo.rs b/src/geo.rs
index 46cc1dc..b7ef9d1 100644
--- a/src/geo.rs
+++ b/src/geo.rs
@@ -1,4 +1,5 @@
 /// Geographic calculation utilities for GPS-based search
+use serde::Deserialize;
 use std::f64;
 
 /// Calculate distance between two GPS coordinates using the Haversine formula.
@@ -61,6 +62,148 @@ pub fn gps_bounding_box(lat: f64, lon: f64, radius_km: f64) -> (f64, f64, f64, f
     )
 }
 
+/// A place resolved from a free-text query via forward geocoding.
+///
+/// The filter pipeline searches a *circle* (`gps_lat`/`gps_lon`/
+/// `gps_radius_km`), but a place can be anything from a single address to
+/// a whole country. We collapse Nominatim's bounding box into the smallest
+/// circle that circumscribes it (see [`bbox_to_circle`]) so "Portland" and
+/// "Italy" both map onto the existing circle filter without a schema change.
+// Phase 1: forward geocoding is implemented and unit-tested here, but its
+// first consumer (the `/photos/search/unified` endpoint) lands in Phase 2.
+// allow-until-wired (mirrors llm_client.rs); remove when the endpoint is added.
+#[allow(dead_code)]
+#[derive(Debug, Clone, PartialEq)]
+pub struct GeoPlace {
+    /// Nominatim's canonical name for the match (e.g. "Italia").
+    pub display_name: String,
+    /// Centroid latitude in decimal degrees.
+    pub lat: f64,
+    /// Centroid longitude in decimal degrees.
+    pub lon: f64,
+    /// Radius (km) of a circle centred on the centroid that covers the
+    /// matched area. Floored to [`MIN_PLACE_RADIUS_KM`] so a point result
+    /// (whose bounding box is microscopic) still yields a usable circle.
+    pub radius_km: f64,
+}
+
+/// Floor for a geocoded place's radius. Point results (a street address)
+/// come back with a near-zero bounding box; without a floor the circle
+/// filter would match nothing.
+#[allow(dead_code)]
+pub const MIN_PLACE_RADIUS_KM: f64 = 0.5;
+
+/// Collapse a bounding box into the centroid + circumscribing radius.
+///
+/// Input is Nominatim's `boundingbox` order: `(south_lat, north_lat,
+/// west_lon, east_lon)`. The radius is the *largest* great-circle distance
+/// from the centroid to any of the four corners, so the resulting circle
+/// fully covers the box. (The corners aren't equidistant on a sphere —
+/// longitude lines converge toward the poles, so the equator-facing edge's
+/// corners are farthest; taking the max guarantees coverage in either
+/// hemisphere.)
+///
+/// Pure and exact (no flooring) so it can be unit-tested directly; callers
+/// apply [`MIN_PLACE_RADIUS_KM`] when turning the result into a filter.
+#[allow(dead_code)]
+pub fn bbox_to_circle(south: f64, north: f64, west: f64, east: f64) -> (f64, f64, f64) {
+    let center_lat = (south + north) / 2.0;
+    let center_lon = (west + east) / 2.0;
+    let radius_km = [(south, west), (south, east), (north, west), (north, east)]
+        .iter()
+        .map(|(clat, clon)| haversine_distance(center_lat, center_lon, *clat, *clon))
+        .fold(0.0_f64, f64::max);
+    (center_lat, center_lon, radius_km)
+}
+
+/// Raw Nominatim `/search` result. `lat`/`lon` arrive as strings and
+/// `boundingbox` as a 4-element string array `[south, north, west, east]`.
+#[allow(dead_code)]
+#[derive(Deserialize)]
+struct NominatimSearchResult {
+    lat: String,
+    lon: String,
+    display_name: String,
+    boundingbox: Option<[String; 4]>,
+}
+
+/// Forward-geocode a free-text place name to a [`GeoPlace`] via the public
+/// OpenStreetMap Nominatim `/search` endpoint.
+///
+/// Mirrors `InsightGenerator::reverse_geocode`'s error posture: any network,
+/// HTTP, or parse failure returns `None` rather than propagating, so a flaky
+/// geocoder degrades the query to "no location filter" instead of failing it.
+///
+/// Nominatim's usage policy requires a `User-Agent` and rate-limits to ~1
+/// request/second; callers doing this interactively should cache results.
+#[allow(dead_code)]
+pub async fn forward_geocode(query: &str) -> Option<GeoPlace> {
+    let q = query.trim();
+    if q.is_empty() {
+        return None;
+    }
+
+    let client = reqwest::Client::new();
+    let response = match client
+        .get("https://nominatim.openstreetmap.org/search")
+        .query(&[("format", "json"), ("limit", "1"), ("q", q)])
+        .header("User-Agent", "ImageAPI/1.0") // Nominatim requires User-Agent
+        .send()
+        .await
+    {
+        Ok(resp) => resp,
+        Err(e) => {
+            log::warn!("Forward geocoding network error for {q:?}: {e}");
+            return None;
+        }
+    };
+
+    if !response.status().is_success() {
+        log::warn!(
+            "Forward geocoding HTTP error for {q:?}: {}",
+            response.status()
+        );
+        return None;
+    }
+
+    let results: Vec<NominatimSearchResult> = match response.json().await {
+        Ok(r) => r,
+        Err(e) => {
+            log::warn!("Forward geocoding JSON parse error for {q:?}: {e}");
+            return None;
+        }
+    };
+
+    let top = results.into_iter().next()?;
+    let lat: f64 = top.lat.parse().ok()?;
+    let lon: f64 = top.lon.parse().ok()?;
+
+    // Prefer the bounding box (handles large places); fall back to a
+    // point + floor radius when Nominatim omits it.
+    let (center_lat, center_lon, radius_km) = match &top.boundingbox {
+        Some([s, n, w, e]) => match (s.parse(), n.parse(), w.parse(), e.parse()) {
+            (Ok(s), Ok(n), Ok(w), Ok(e)) => bbox_to_circle(s, n, w, e),
+            _ => (lat, lon, 0.0),
+        },
+        None => (lat, lon, 0.0),
+    };
+
+    let place = GeoPlace {
+        display_name: top.display_name,
+        lat: center_lat,
+        lon: center_lon,
+        radius_km: radius_km.max(MIN_PLACE_RADIUS_KM),
+    };
+    log::info!(
+        "Forward geocoded {q:?} -> {} ({:.4}, {:.4}, r={:.1}km)",
+        place.display_name,
+        place.lat,
+        place.lon,
+        place.radius_km
+    );
+    Some(place)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -118,4 +261,41 @@ mod tests {
             distance
         );
     }
+
+    #[test]
+    fn test_bbox_to_circle_centroid() {
+        // Symmetric box around (10, 20): centroid should land dead centre.
+        let (lat, lon, radius) = bbox_to_circle(9.0, 11.0, 19.0, 21.0);
+        assert!((lat - 10.0).abs() < 1e-9, "centroid lat, got {lat}");
+        assert!((lon - 20.0).abs() < 1e-9, "centroid lon, got {lon}");
+        assert!(radius > 0.0, "radius should be positive, got {radius}");
+    }
+
+    #[test]
+    fn test_bbox_to_circle_covers_corner() {
+        // The radius must reach every corner of the box. Verify the
+        // centroid-to-corner distance equals the returned radius for all
+        // four corners (they're symmetric, so all equal).
+        let (south, north, west, east) = (40.0, 42.0, -74.0, -72.0);
+        let (lat, lon, radius) = bbox_to_circle(south, north, west, east);
+        for (clat, clon) in [(south, west), (south, east), (north, west), (north, east)] {
+            let d = haversine_distance(lat, lon, clat, clon);
+            assert!(
+                d <= radius + 1e-6,
+                "corner ({clat},{clon}) at {d}km should be within radius {radius}km"
+            );
+        }
+    }
+
+    #[test]
+    fn test_bbox_to_circle_country_vs_city_scale() {
+        // A country-sized box yields a far larger radius than a city-sized
+        // one — confirming the bbox approach scales with place size.
+        let (_, _, country) = bbox_to_circle(35.5, 47.1, 6.6, 18.5); // ~Italy
+        let (_, _, city) = bbox_to_circle(45.4, 45.6, -122.8, -122.5); // ~Portland
+        assert!(
+            country > city * 10.0,
+            "country radius {country}km should dwarf city radius {city}km"
+        );
+    }
 }

From fd00dc936a8e291cac9dd633e06aa3a865211a8c Mon Sep 17 00:00:00 2001
From: Cameron Cordes <cameronc.dev@gmail.com>
Date: Sun, 14 Jun 2026 01:03:43 -0400
Subject: [PATCH 2/9] Unified NL search Phase 2: /photos/search/unified
 endpoint

Composes the two existing engines (Path A orchestration):
- Translate NL -> StructuredQuery via local LLM, respecting LLM_BACKEND
  (resolve_backend(Local) -> ollama or llama-swap; no hybrid).
- Forward-geocode the place name into a gps circle.
- Structured filters (tags/EXIF/geo/date/media) build a candidate set of EXIF
  rows; CLIP ranks within it, joined by content_hash. Degenerate cases match
  existing behavior: semantic-only -> plain CLIP; filters-only -> date-sorted.
- Echoes the interpreted query (incl. resolved place) for editable client chips.

Refactor: extracted reusable cores from clip_search (score_photos, resolve_hits,
parse_library_scope, score_error_response) shared by both endpoints. Removed the
Phase 1 allow-until-wired attributes now that nl_query + geo are consumed.

fmt + clippy clean; 23 backend tests pass (7 geo, 12 nl_query, 4 unified).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/ai/nl_query.rs    |   6 -
 src/clip_search.rs    | 398 ++++++++++++++++++++-----------------
 src/geo.rs            |   8 -
 src/lib.rs            |   1 +
 src/main.rs           |   8 +
 src/unified_search.rs | 452 ++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 675 insertions(+), 198 deletions(-)
 create mode 100644 src/unified_search.rs

diff --git a/src/ai/nl_query.rs b/src/ai/nl_query.rs
index a94fc06..d709322 100644
--- a/src/ai/nl_query.rs
+++ b/src/ai/nl_query.rs
@@ -21,12 +21,6 @@
 //! `geo::forward_geocode`), and person filtering is deferred until a
 //! person→photos resolver exists.
 
-// Phase 1: this module is fully implemented and unit-tested, but its first
-// consumer (the `/photos/search/unified` endpoint) lands in Phase 2. Mirrors
-// llm_client.rs's allow-until-wired pattern so the bin target stays
-// clippy-clean in the interim; remove when the endpoint is added.
-#![allow(dead_code)]
-
 use crate::ai::llm_client::{ChatMessage, LlmClient, Tool, strip_think_blocks};
 use anyhow::{Result, anyhow};
 use serde::{Deserialize, Serialize};
diff --git a/src/clip_search.rs b/src/clip_search.rs
index 98ea96e..7b4510e 100644
--- a/src/clip_search.rs
+++ b/src/clip_search.rs
@@ -124,65 +124,161 @@ fn dot(a: &[f32], b: &[f32]) -> f32 {
     a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
 }
 
-pub async fn search_photos(
-    state: web::Data<AppState>,
-    exif_dao: web::Data<Mutex<Box<dyn ExifDao>>>,
-    query: web::Query<SearchQuery>,
-) -> ActixResult<HttpResponse> {
-    let q_text = query.q.trim().to_string();
-    if q_text.is_empty() {
-        return Ok(HttpResponse::BadRequest().json(SearchError {
-            error: "query parameter `q` is required".into(),
-        }));
-    }
+/// Failure modes of [`score_photos`]. Carries enough to let each caller pick
+/// an appropriate HTTP status (the CLIP service being down is a 502, a
+/// disabled feature is a 503, a rejected query is a 400, a DB failure 500).
+pub enum ScoreError {
+    /// CLIP search isn't configured at all (no Apollo endpoint).
+    Disabled,
+    /// The query was rejected by the encoder (client error).
+    Rejected(String),
+    /// The CLIP service is transiently unavailable (upstream error).
+    Unavailable(String),
+    /// The encoder returned an embedding we couldn't decode.
+    MalformedEmbedding,
+    /// A database / index load failure.
+    Internal(String),
+}
+
+/// Result of scoring the whole library against a query embedding: the
+/// resolved model version, how many embeddings were considered, and every
+/// `(score, content_hash)` above threshold, sorted by descending score.
+/// Pagination and path resolution are the caller's job (see [`resolve_hits`])
+/// so this core can be reused for both the plain search endpoint and the
+/// unified endpoint (which filters by hash before paginating).
+pub struct ScoredPhotos {
+    pub model_version: String,
+    pub considered: usize,
+    /// `(cosine_score, content_hash)` pairs, descending by score.
+    pub hits: Vec<(f32, String)>,
+}
+
+/// Encode `q_text` via CLIP and score it against every stored embedding in
+/// the given library scope. Returns all matches above `threshold`, sorted by
+/// descending similarity. Pure of HTTP concerns so it's shared by
+/// `search_photos` and the unified search endpoint.
+pub async fn score_photos(
+    state: &AppState,
+    exif_dao: &Mutex<Box<dyn ExifDao>>,
+    q_text: &str,
+    library_ids: &[i32],
+    threshold: f32,
+    model_version: Option<&str>,
+) -> Result<ScoredPhotos, ScoreError> {
     if !state.clip_client.is_enabled() {
-        return Ok(HttpResponse::ServiceUnavailable().json(SearchError {
-            error: "CLIP search is disabled (no Apollo CLIP endpoint configured)".into(),
-        }));
+        return Err(ScoreError::Disabled);
     }
 
-    let limit = query.limit.clamp(1, 200);
-    let offset = query.offset;
-    let threshold = query.threshold.clamp(-1.0, 1.0);
-
-    // 1. Encode the query text. Fast — Apollo's text encoder is ~50ms
-    // on CPU. Bail with a clear error message if Apollo's down so the
-    // user sees "service unavailable" rather than empty results.
-    let query_resp = match state.clip_client.encode_text(&q_text).await {
+    // 1. Encode the query text. Fast — Apollo's text encoder is ~50ms on CPU.
+    let query_resp = match state.clip_client.encode_text(q_text).await {
         Ok(r) => r,
-        Err(ClipError::Permanent(e)) => {
-            return Ok(HttpResponse::BadRequest().json(SearchError {
-                error: format!("query rejected: {e}"),
-            }));
-        }
-        Err(ClipError::Transient(e)) => {
-            return Ok(HttpResponse::BadGateway().json(SearchError {
-                error: format!("CLIP service unavailable: {e}"),
-            }));
-        }
-        Err(ClipError::Disabled) => {
-            return Ok(HttpResponse::ServiceUnavailable().json(SearchError {
-                error: "CLIP service disabled".into(),
-            }));
-        }
+        Err(ClipError::Permanent(e)) => return Err(ScoreError::Rejected(e.to_string())),
+        Err(ClipError::Transient(e)) => return Err(ScoreError::Unavailable(e.to_string())),
+        Err(ClipError::Disabled) => return Err(ScoreError::Disabled),
     };
     // decode_embedding works on raw bytes; the wire format is b64.
     let query_bytes = base64::engine::general_purpose::STANDARD
         .decode(query_resp.embedding.as_bytes())
         .unwrap_or_default();
-    let query_vec = match decode_embedding(&query_bytes) {
-        Some(v) => v,
-        None => {
-            return Ok(HttpResponse::BadGateway().json(SearchError {
-                error: "CLIP service returned a malformed query embedding".into(),
-            }));
-        }
-    };
+    let query_vec = decode_embedding(&query_bytes).ok_or(ScoreError::MalformedEmbedding)?;
 
-    // 2. Decide which library scope to search. `library_ids` (multi)
-    // wins over the legacy `library` (single) when both are present;
-    // either / both empty falls back to "every enabled library".
-    let library_ids: Vec<i32> = if let Some(raw) = query.library_ids.as_deref() {
+    // 2. Pull the (hash, embedding) matrix under the dao lock, release
+    // before scoring. The caller-supplied `model_version` (or the live
+    // engine's) forces a strict join so a mid-flight model swap can't mix
+    // geometries.
+    let ctx = opentelemetry::Context::current();
+    let rows: Vec<(String, Vec<u8>)> = {
+        let mut dao = exif_dao.lock().expect("exif dao");
+        dao.list_clip_index(
+            &ctx,
+            library_ids,
+            model_version.or(Some(&query_resp.model_version)),
+        )
+        .map_err(|e| {
+            log::warn!("clip_search: list_clip_index failed: {:?}", e);
+            ScoreError::Internal("failed to load search index".into())
+        })?
+    };
+    let considered = rows.len();
+
+    // 3. Score. Keep all matches and sort at the end (~microseconds at 14k).
+    let mut hits: Vec<(f32, String)> = Vec::with_capacity(considered);
+    for (hash, blob) in rows {
+        let Some(emb) = decode_embedding(&blob) else {
+            continue;
+        };
+        if emb.len() != query_vec.len() {
+            continue;
+        }
+        let sim = dot(&emb, &query_vec);
+        if sim < threshold {
+            continue;
+        }
+        hits.push((sim, hash));
+    }
+    hits.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
+
+    Ok(ScoredPhotos {
+        model_version: query_resp.model_version,
+        considered,
+        hits,
+    })
+}
+
+/// Resolve a page of `(score, content_hash)` pairs back to [`SearchHit`]s
+/// (each carrying `library_id` + `rel_path`). Hashes that no longer resolve
+/// to a row are skipped. Shared by both endpoints.
+pub fn resolve_hits(
+    exif_dao: &Mutex<Box<dyn ExifDao>>,
+    scored: &[(f32, String)],
+) -> Vec<SearchHit> {
+    if scored.is_empty() {
+        return Vec::new();
+    }
+    let ctx = opentelemetry::Context::current();
+    let hashes: Vec<String> = scored.iter().map(|(_, h)| h.clone()).collect();
+    let mut dao = exif_dao.lock().expect("exif dao");
+    let path_map = dao
+        .get_rel_paths_for_hashes(&ctx, &hashes)
+        .unwrap_or_else(|e| {
+            log::warn!("clip_search: get_rel_paths_for_hashes failed: {:?}", e);
+            std::collections::HashMap::new()
+        });
+
+    let mut results = Vec::with_capacity(scored.len());
+    for (score, hash) in scored {
+        let row = match dao.find_by_content_hash(&ctx, hash) {
+            Ok(Some(r)) => r,
+            Ok(None) => continue,
+            Err(e) => {
+                log::warn!("clip_search: find_by_content_hash failed for {hash}: {e:?}");
+                continue;
+            }
+        };
+        // Prefer get_rel_paths_for_hashes's first entry (shares image_exif's
+        // natural order), falling back to the ImageExif row.
+        let rel_path = path_map
+            .get(hash)
+            .and_then(|paths| paths.first().cloned())
+            .unwrap_or(row.file_path);
+        results.push(SearchHit {
+            library_id: row.library_id,
+            rel_path,
+            content_hash: hash.clone(),
+            score: *score,
+        });
+    }
+    results
+}
+
+/// Parse the `library_ids` (multi) / `library` (single) scope params into a
+/// deduped id list. Empty = "every enabled library". Shared so the unified
+/// endpoint scopes CLIP identically.
+pub fn parse_library_scope(
+    library_ids: Option<&str>,
+    library: Option<i32>,
+) -> Result<Vec<i32>, String> {
+    if let Some(raw) = library_ids {
         let mut out: Vec<i32> = Vec::new();
         for piece in raw.split(',') {
             let trimmed = piece.trim();
@@ -195,158 +291,92 @@ pub async fn search_photos(
                         out.push(id);
                     }
                 }
-                Err(_) => {
-                    return Ok(HttpResponse::BadRequest().json(SearchError {
-                        error: format!("invalid library_ids entry: {trimmed:?}"),
-                    }));
-                }
+                Err(_) => return Err(format!("invalid library_ids entry: {trimmed:?}")),
             }
         }
-        out
-    } else if let Some(id) = query.library {
-        vec![id]
+        Ok(out)
+    } else if let Some(id) = library {
+        Ok(vec![id])
     } else {
-        Vec::new()
-    };
+        Ok(Vec::new())
+    }
+}
 
-    // 3. Pull the (hash, embedding) matrix. Lock contention here is
-    // bounded — one big SELECT under a mutex Arc<Mutex<dyn ExifDao>>
-    // and then we release before scoring. If this becomes a hotspot
-    // we'll cache the decoded matrix in AppState with TTL.
-    let ctx = opentelemetry::Context::current();
-    let rows: Vec<(String, Vec<u8>)> = {
-        let mut dao = exif_dao.lock().expect("exif dao");
-        match dao.list_clip_index(
-            &ctx,
-            &library_ids,
-            query
-                .model_version
-                .as_deref()
-                .or(Some(&query_resp.model_version)),
-        ) {
-            Ok(r) => r,
-            Err(e) => {
-                log::warn!("clip_search: list_clip_index failed: {:?}", e);
-                return Ok(HttpResponse::InternalServerError().json(SearchError {
-                    error: "failed to load search index".into(),
-                }));
-            }
-        }
-    };
-    let considered = rows.len();
-    if considered == 0 {
-        return Ok(HttpResponse::Ok().json(SearchResponse {
-            query: q_text,
-            model_version: query_resp.model_version,
-            threshold,
-            considered,
-            total_matching: 0,
-            offset,
-            results: Vec::new(),
+pub async fn search_photos(
+    state: web::Data<AppState>,
+    exif_dao: web::Data<Mutex<Box<dyn ExifDao>>>,
+    query: web::Query<SearchQuery>,
+) -> ActixResult<HttpResponse> {
+    let q_text = query.q.trim().to_string();
+    if q_text.is_empty() {
+        return Ok(HttpResponse::BadRequest().json(SearchError {
+            error: "query parameter `q` is required".into(),
         }));
     }
 
-    // 4. Score. Cap the loop's transient allocation; we keep all scores
-    // and sort at the end. With ~14k entries the sort is microseconds.
-    let mut scored: Vec<(f32, String)> = Vec::with_capacity(considered);
-    for (hash, blob) in rows {
-        let Some(emb) = decode_embedding(&blob) else {
-            continue;
-        };
-        if emb.len() != query_vec.len() {
-            continue;
-        }
-        let sim = dot(&emb, &query_vec);
-        if sim < threshold {
-            continue;
-        }
-        scored.push((sim, hash));
-    }
-    scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
-    let total_matching = scored.len();
-    // Pagination — slice the sorted list at `[offset, offset+limit)`.
-    // Offsets past the end produce empty pages rather than an error so
-    // the client can stop fetching naturally on "load more" past the end.
-    let scored: Vec<(f32, String)> = if offset >= total_matching {
+    let limit = query.limit.clamp(1, 200);
+    let offset = query.offset;
+    let threshold = query.threshold.clamp(-1.0, 1.0);
+
+    let library_ids = match parse_library_scope(query.library_ids.as_deref(), query.library) {
+        Ok(ids) => ids,
+        Err(msg) => return Ok(HttpResponse::BadRequest().json(SearchError { error: msg })),
+    };
+
+    let scored = match score_photos(
+        &state,
+        &exif_dao,
+        &q_text,
+        &library_ids,
+        threshold,
+        query.model_version.as_deref(),
+    )
+    .await
+    {
+        Ok(s) => s,
+        Err(e) => return Ok(score_error_response(e)),
+    };
+
+    let total_matching = scored.hits.len();
+    // Pagination — slice the sorted list at `[offset, offset+limit)`. Offsets
+    // past the end produce empty pages so "load more" stops naturally.
+    let page: Vec<(f32, String)> = if offset >= total_matching {
         Vec::new()
     } else {
         let end = (offset + limit).min(total_matching);
-        scored[offset..end].to_vec()
+        scored.hits[offset..end].to_vec()
     };
-
-    if scored.is_empty() {
-        return Ok(HttpResponse::Ok().json(SearchResponse {
-            query: q_text,
-            model_version: query_resp.model_version,
-            threshold,
-            considered,
-            total_matching,
-            offset,
-            results: Vec::new(),
-        }));
-    }
-
-    // 5. Resolve each surviving hash back to a `(library_id, rel_path)`.
-    // `get_rel_paths_by_hash` returns every rel_path; we pick the first
-    // one for the result. Apollo / the UI can fetch alternatives via
-    // /image/metadata when needed.
-    let hashes: Vec<String> = scored.iter().map(|(_, h)| h.clone()).collect();
-    let path_map = {
-        let mut dao = exif_dao.lock().expect("exif dao");
-        match dao.get_rel_paths_for_hashes(&ctx, &hashes) {
-            Ok(m) => m,
-            Err(e) => {
-                log::warn!("clip_search: get_rel_paths_for_hashes failed: {:?}", e);
-                return Ok(HttpResponse::InternalServerError().json(SearchError {
-                    error: "failed to resolve photo paths".into(),
-                }));
-            }
-        }
-    };
-
-    // We need (library_id, rel_path) — get_rel_paths_for_hashes only
-    // returns rel_paths. Cross-reference via find_by_content_hash to
-    // pick the library too. Single call per surviving hash; cheap at
-    // top-20.
-    let mut results = Vec::with_capacity(scored.len());
-    {
-        let mut dao = exif_dao.lock().expect("exif dao");
-        for (score, hash) in scored {
-            let row = match dao.find_by_content_hash(&ctx, &hash) {
-                Ok(Some(r)) => r,
-                Ok(None) => continue,
-                Err(e) => {
-                    log::warn!(
-                        "clip_search: find_by_content_hash failed for {}: {:?}",
-                        hash,
-                        e
-                    );
-                    continue;
-                }
-            };
-            // Prefer get_rel_paths_for_hashes's first entry if it
-            // exists (it shares semantics with `image_exif`'s natural
-            // order), falling back to the ImageExif row.
-            let rel_path = path_map
-                .get(&hash)
-                .and_then(|paths| paths.first().cloned())
-                .unwrap_or(row.file_path);
-            results.push(SearchHit {
-                library_id: row.library_id,
-                rel_path,
-                content_hash: hash,
-                score,
-            });
-        }
-    }
+    let results = resolve_hits(&exif_dao, &page);
 
     Ok(HttpResponse::Ok().json(SearchResponse {
         query: q_text,
-        model_version: query_resp.model_version,
+        model_version: scored.model_version,
         threshold,
-        considered,
+        considered: scored.considered,
         total_matching,
         offset,
         results,
     }))
 }
+
+/// Map a [`ScoreError`] to the HTTP response `search_photos` historically
+/// returned for each failure mode. Reused by the unified endpoint.
+pub fn score_error_response(e: ScoreError) -> HttpResponse {
+    match e {
+        ScoreError::Disabled => HttpResponse::ServiceUnavailable().json(SearchError {
+            error: "CLIP search is disabled (no Apollo CLIP endpoint configured)".into(),
+        }),
+        ScoreError::Rejected(msg) => HttpResponse::BadRequest().json(SearchError {
+            error: format!("query rejected: {msg}"),
+        }),
+        ScoreError::Unavailable(msg) => HttpResponse::BadGateway().json(SearchError {
+            error: format!("CLIP service unavailable: {msg}"),
+        }),
+        ScoreError::MalformedEmbedding => HttpResponse::BadGateway().json(SearchError {
+            error: "CLIP service returned a malformed query embedding".into(),
+        }),
+        ScoreError::Internal(msg) => {
+            HttpResponse::InternalServerError().json(SearchError { error: msg })
+        }
+    }
+}
diff --git a/src/geo.rs b/src/geo.rs
index b7ef9d1..b54f609 100644
--- a/src/geo.rs
+++ b/src/geo.rs
@@ -69,10 +69,6 @@ pub fn gps_bounding_box(lat: f64, lon: f64, radius_km: f64) -> (f64, f64, f64, f
 /// a whole country. We collapse Nominatim's bounding box into the smallest
 /// circle that circumscribes it (see [`bbox_to_circle`]) so "Portland" and
 /// "Italy" both map onto the existing circle filter without a schema change.
-// Phase 1: forward geocoding is implemented and unit-tested here, but its
-// first consumer (the `/photos/search/unified` endpoint) lands in Phase 2.
-// allow-until-wired (mirrors llm_client.rs); remove when the endpoint is added.
-#[allow(dead_code)]
 #[derive(Debug, Clone, PartialEq)]
 pub struct GeoPlace {
     /// Nominatim's canonical name for the match (e.g. "Italia").
@@ -90,7 +86,6 @@ pub struct GeoPlace {
 /// Floor for a geocoded place's radius. Point results (a street address)
 /// come back with a near-zero bounding box; without a floor the circle
 /// filter would match nothing.
-#[allow(dead_code)]
 pub const MIN_PLACE_RADIUS_KM: f64 = 0.5;
 
 /// Collapse a bounding box into the centroid + circumscribing radius.
@@ -105,7 +100,6 @@ pub const MIN_PLACE_RADIUS_KM: f64 = 0.5;
 ///
 /// Pure and exact (no flooring) so it can be unit-tested directly; callers
 /// apply [`MIN_PLACE_RADIUS_KM`] when turning the result into a filter.
-#[allow(dead_code)]
 pub fn bbox_to_circle(south: f64, north: f64, west: f64, east: f64) -> (f64, f64, f64) {
     let center_lat = (south + north) / 2.0;
     let center_lon = (west + east) / 2.0;
@@ -118,7 +112,6 @@ pub fn bbox_to_circle(south: f64, north: f64, west: f64, east: f64) -> (f64, f64
 
 /// Raw Nominatim `/search` result. `lat`/`lon` arrive as strings and
 /// `boundingbox` as a 4-element string array `[south, north, west, east]`.
-#[allow(dead_code)]
 #[derive(Deserialize)]
 struct NominatimSearchResult {
     lat: String,
@@ -136,7 +129,6 @@ struct NominatimSearchResult {
 ///
 /// Nominatim's usage policy requires a `User-Agent` and rate-limits to ~1
 /// request/second; callers doing this interactively should cache results.
-#[allow(dead_code)]
 pub async fn forward_geocode(query: &str) -> Option<GeoPlace> {
     let q = query.trim();
     if q.is_empty() {
diff --git a/src/lib.rs b/src/lib.rs
index 0ea7ddb..a228472 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -35,6 +35,7 @@ pub mod tags;
 #[cfg(test)]
 pub mod testhelpers;
 pub mod thumbnails;
+pub mod unified_search;
 pub mod utils;
 pub mod video;
 
diff --git a/src/main.rs b/src/main.rs
index e420d8b..7faa959 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -54,6 +54,7 @@ mod perceptual_hash;
 mod state;
 mod tags;
 mod thumbnails;
+mod unified_search;
 mod utils;
 mod video;
 mod watcher;
@@ -333,6 +334,13 @@ fn main() -> std::io::Result<()> {
                     web::resource("/photos/search")
                         .route(web::get().to(clip_search::search_photos)),
                 )
+                .service(
+                    // Unified natural-language search: LLM translates the
+                    // query into structured filters + a semantic term, then
+                    // filters constrain and CLIP ranks. See src/unified_search.rs.
+                    web::resource("/photos/search/unified")
+                        .route(web::get().to(unified_search::unified_search::<SqliteTagDao>)),
+                )
                 .service(web::resource("/file/move").post(move_file::<RealFileSystem>))
                 .service(handlers::image::get_image)
                 .service(handlers::image::upload_image)
diff --git a/src/unified_search.rs b/src/unified_search.rs
new file mode 100644
index 0000000..a3187a4
--- /dev/null
+++ b/src/unified_search.rs
@@ -0,0 +1,452 @@
+//! `/photos/search/unified?q=<natural language>` — unified NL photo search.
+//!
+//! One free-text box that composes the two existing engines instead of making
+//! the user pick between them:
+//!  1. A grounded local-LLM call ([`crate::ai::nl_query`]) translates the
+//!     query into a structured filter + a semantic term.
+//!  2. Structured filters (tags / EXIF / geo / date / media-type) define the
+//!     candidate set; the semantic term ranks within it via CLIP.
+//!
+//! Path A (orchestration): we reuse `clip_search`'s scoring core and the
+//! existing `ExifDao` / `TagDao` queries, joining on `content_hash`. EXIF rows
+//! are the universal candidate carrier — each has `(library_id, file_path,
+//! content_hash, date_taken)` — so the structured filter is just a predicate
+//! over them, and the CLIP hits (which key on `content_hash`) intersect by
+//! hash. No new schema, no surgery on `list_photos`.
+//!
+//! Degenerate cases collapse to the existing behavior: semantic-only → plain
+//! CLIP search; filters-only → a date-sorted filtered listing.
+//!
+//! Person filtering is intentionally deferred (no person→photos resolver yet).
+
+use crate::AppState;
+use crate::ai::backend::{BackendKind, SamplingOverrides};
+use crate::ai::nl_query::{StructuredQuery, translate_nl_query};
+use crate::clip_search::{
+    SearchHit, parse_library_scope, resolve_hits, score_error_response, score_photos,
+};
+use crate::data::Claims;
+use crate::database::ExifDao;
+use crate::file_types::{is_image_file, is_video_file};
+use crate::geo::{forward_geocode, gps_bounding_box, haversine_distance};
+use crate::tags::TagDao;
+use actix_web::HttpResponse;
+use actix_web::web::{Data, Query};
+use serde::{Deserialize, Serialize};
+use std::collections::HashSet;
+use std::path::Path;
+use std::sync::Mutex;
+
+#[derive(Debug, Deserialize)]
+pub struct UnifiedQuery {
+    /// Natural-language query. Required; empty triggers 400.
+    pub q: String,
+    #[serde(default = "default_limit")]
+    pub limit: usize,
+    #[serde(default)]
+    pub offset: usize,
+    /// CLIP cosine floor for the semantic ranking stage. Same default as the
+    /// plain search endpoint.
+    #[serde(default = "default_threshold")]
+    pub threshold: f32,
+    /// Legacy single-library scope (see clip_search).
+    pub library: Option<i32>,
+    /// Multi-library scope, comma-separated ids.
+    pub library_ids: Option<String>,
+}
+
+fn default_limit() -> usize {
+    20
+}
+fn default_threshold() -> f32 {
+    0.20
+}
+
+/// A geocoded place echoed back so the client can show / edit the location
+/// filter it actually searched.
+#[derive(Debug, Serialize)]
+struct ResolvedPlace {
+    display_name: String,
+    lat: f64,
+    lon: f64,
+    radius_km: f64,
+}
+
+/// How the server interpreted the NL query — echoed to the client to render
+/// editable filter chips. tag ids map to the client's existing tag list.
+#[derive(Debug, Serialize)]
+struct Interpreted {
+    semantic: Option<String>,
+    tag_ids: Vec<i32>,
+    exclude_tag_ids: Vec<i32>,
+    /// Words the model treated as tags that don't exist in the vocab; folded
+    /// into the semantic term and surfaced here so the UI can explain it.
+    unmatched_tags: Vec<String>,
+    camera_make: Option<String>,
+    camera_model: Option<String>,
+    lens_model: Option<String>,
+    date_from: Option<i64>,
+    date_to: Option<i64>,
+    media_type: Option<String>,
+    place: Option<ResolvedPlace>,
+}
+
+#[derive(Debug, Serialize)]
+struct UnifiedResponse {
+    query: String,
+    interpreted: Interpreted,
+    /// CLIP model version used for ranking; `None` when the query had no
+    /// semantic term (filters-only).
+    model_version: Option<String>,
+    /// Embeddings scored by CLIP (0 when filters-only).
+    considered: usize,
+    /// Matches before pagination.
+    total_matching: usize,
+    offset: usize,
+    results: Vec<SearchHit>,
+}
+
+#[derive(Debug, Serialize)]
+struct ErrorBody {
+    error: String,
+}
+
+fn bad_request(msg: impl Into<String>) -> HttpResponse {
+    HttpResponse::BadRequest().json(ErrorBody { error: msg.into() })
+}
+
+/// Combine the model's semantic term with any tag words that didn't match the
+/// vocab, so a hallucinated/non-vocab tag becomes a soft semantic signal
+/// rather than being dropped.
+fn effective_semantic(sq: &StructuredQuery) -> Option<String> {
+    let mut parts: Vec<String> = Vec::new();
+    if let Some(s) = sq.semantic.as_deref() {
+        parts.push(s.to_string());
+    }
+    parts.extend(sq.unmatched_tags.iter().cloned());
+    if parts.is_empty() {
+        None
+    } else {
+        Some(parts.join(" "))
+    }
+}
+
+pub async fn unified_search<TagD: TagDao>(
+    _: Claims,
+    state: Data<AppState>,
+    exif_dao: Data<Mutex<Box<dyn ExifDao>>>,
+    tag_dao: Data<Mutex<TagD>>,
+    query: Query<UnifiedQuery>,
+) -> HttpResponse {
+    let nl = query.q.trim().to_string();
+    if nl.is_empty() {
+        return bad_request("query parameter `q` is required");
+    }
+
+    let limit = query.limit.clamp(1, 200);
+    let offset = query.offset;
+    let threshold = query.threshold.clamp(-1.0, 1.0);
+
+    let library_ids = match parse_library_scope(query.library_ids.as_deref(), query.library) {
+        Ok(ids) => ids,
+        Err(msg) => return bad_request(msg),
+    };
+
+    let ctx = opentelemetry::Context::current();
+
+    // ── 1. Translate the NL query, grounded on the real tag vocabulary ──
+    let tag_vocab: Vec<(i32, String)> = {
+        let mut dao = tag_dao.lock().expect("tag dao");
+        match dao.get_all_tags(&ctx, None) {
+            Ok(tags) => tags.into_iter().map(|(_, t)| (t.id, t.name)).collect(),
+            Err(e) => {
+                log::warn!("unified_search: get_all_tags failed: {e:?}");
+                Vec::new()
+            }
+        }
+    };
+
+    // Respect env/config for the LLM backend (LLM_BACKEND → ollama or
+    // llama-swap); local only, no hybrid, per the feature's design.
+    let overrides = SamplingOverrides {
+        model: None,
+        num_ctx: None,
+        temperature: None,
+        top_p: None,
+        top_k: None,
+        min_p: None,
+    };
+    let backend = match state
+        .insight_generator
+        .resolve_backend(BackendKind::Local, &overrides)
+        .await
+    {
+        Ok(b) => b,
+        Err(e) => {
+            log::warn!("unified_search: resolve_backend failed: {e:?}");
+            return HttpResponse::ServiceUnavailable().json(ErrorBody {
+                error: "LLM backend unavailable".into(),
+            });
+        }
+    };
+
+    let today = chrono::Utc::now().date_naive();
+    let sq = match translate_nl_query(backend.chat(), &nl, &tag_vocab, today).await {
+        Ok(sq) => sq,
+        Err(e) => {
+            log::warn!("unified_search: translate_nl_query failed: {e:?}");
+            return HttpResponse::BadGateway().json(ErrorBody {
+                error: "could not interpret the query".into(),
+            });
+        }
+    };
+
+    // ── 2. Forward-geocode the place name into a gps circle ──
+    let resolved_place = match sq.place.as_deref() {
+        Some(p) => forward_geocode(p).await.map(|g| ResolvedPlace {
+            display_name: g.display_name,
+            lat: g.lat,
+            lon: g.lon,
+            radius_km: g.radius_km,
+        }),
+        None => None,
+    };
+    let gps = resolved_place.as_ref().map(|p| (p.lat, p.lon, p.radius_km));
+
+    let semantic = effective_semantic(&sq);
+
+    let has_exif_filter = sq.camera_make.is_some()
+        || sq.camera_model.is_some()
+        || sq.lens_model.is_some()
+        || sq.date_from.is_some()
+        || sq.date_to.is_some();
+    let has_struct =
+        has_exif_filter || gps.is_some() || !sq.tag_ids.is_empty() || sq.media_type.is_some();
+
+    // ── 3. Build the structured candidate set (EXIF rows passing every
+    // filter). Skipped entirely for a pure-semantic query. ──
+    let mut candidate: Vec<crate::database::models::ImageExif> = Vec::new();
+    let mut allowed_hashes: HashSet<String> = HashSet::new();
+    if has_struct {
+        // Tag membership set (rel_path only — same cross-library imprecision
+        // as the existing /photos tag listing). ALL-mode: the photo must
+        // carry every named tag.
+        let tag_set: Option<HashSet<String>> = if sq.tag_ids.is_empty() {
+            None
+        } else {
+            let mut dao = tag_dao.lock().expect("tag dao");
+            match dao.get_files_with_all_tag_ids(
+                sq.tag_ids.clone(),
+                sq.exclude_tag_ids.clone(),
+                &ctx,
+            ) {
+                Ok(files) => Some(files.into_iter().map(|f| f.file_name).collect()),
+                Err(e) => {
+                    log::warn!("unified_search: tag filter failed: {e:?}");
+                    Some(HashSet::new())
+                }
+            }
+        };
+
+        // EXIF query handles camera/lens/gps-box/date. With no EXIF filters
+        // it returns the whole table, which we then narrow by the predicates
+        // below (tags / media / scope). Fine at personal-library scale.
+        let gps_bounds = gps.map(|(lat, lon, r)| gps_bounding_box(lat, lon, r));
+        let rows = {
+            let mut dao = exif_dao.lock().expect("exif dao");
+            dao.query_by_exif(
+                &ctx,
+                None, // scope filtered in-Rust to support multi-library
+                sq.camera_make.as_deref(),
+                sq.camera_model.as_deref(),
+                sq.lens_model.as_deref(),
+                gps_bounds,
+                sq.date_from,
+                sq.date_to,
+            )
+            .unwrap_or_else(|e| {
+                log::warn!("unified_search: query_by_exif failed: {e:?}");
+                Vec::new()
+            })
+        };
+
+        candidate = rows
+            .into_iter()
+            .filter(|row| {
+                // Library scope.
+                if !library_ids.is_empty() && !library_ids.contains(&row.library_id) {
+                    return false;
+                }
+                // Precise GPS distance (the EXIF query only did a coarse box).
+                if let Some((lat, lon, radius_km)) = gps {
+                    match (row.gps_latitude, row.gps_longitude) {
+                        (Some(plat), Some(plon)) => {
+                            if haversine_distance(lat, lon, plat as f64, plon as f64) > radius_km {
+                                return false;
+                            }
+                        }
+                        _ => return false,
+                    }
+                }
+                // Media type.
+                if let Some(mt) = sq.media_type.as_deref() {
+                    let p = Path::new(&row.file_path);
+                    let ok = if mt == "video" {
+                        is_video_file(p)
+                    } else {
+                        is_image_file(p)
+                    };
+                    if !ok {
+                        return false;
+                    }
+                }
+                // Tag membership.
+                if let Some(ts) = &tag_set
+                    && !ts.contains(&row.file_path)
+                {
+                    return false;
+                }
+                true
+            })
+            .collect();
+
+        allowed_hashes = candidate
+            .iter()
+            .filter_map(|r| r.content_hash.clone())
+            .collect();
+    }
+
+    // ── 4. Rank ──
+    match semantic {
+        Some(ref sem) => {
+            // Semantic term present: CLIP-rank, then keep only hits that pass
+            // the structured filters (by content_hash).
+            let scored =
+                match score_photos(&state, &exif_dao, sem, &library_ids, threshold, None).await {
+                    Ok(s) => s,
+                    Err(e) => return score_error_response(e),
+                };
+            let hits: Vec<(f32, String)> = if has_struct {
+                scored
+                    .hits
+                    .into_iter()
+                    .filter(|(_, h)| allowed_hashes.contains(h))
+                    .collect()
+            } else {
+                scored.hits
+            };
+            let total_matching = hits.len();
+            let page = paginate(&hits, offset, limit);
+            let results = resolve_hits(&exif_dao, &page);
+            HttpResponse::Ok().json(UnifiedResponse {
+                query: nl,
+                interpreted: interpreted(&sq, resolved_place),
+                model_version: Some(scored.model_version),
+                considered: scored.considered,
+                total_matching,
+                offset,
+                results,
+            })
+        }
+        None => {
+            // Filters-only: no semantic term. Require at least one filter,
+            // then return the candidate set newest-first.
+            if !has_struct {
+                return bad_request("query had no searchable terms");
+            }
+            candidate.sort_by(|a, b| b.date_taken.cmp(&a.date_taken));
+            let total_matching = candidate.len();
+            let end = (offset + limit).min(total_matching);
+            let results: Vec<SearchHit> = if offset >= total_matching {
+                Vec::new()
+            } else {
+                candidate[offset..end]
+                    .iter()
+                    .map(|r| SearchHit {
+                        library_id: r.library_id,
+                        rel_path: r.file_path.clone(),
+                        content_hash: r.content_hash.clone().unwrap_or_default(),
+                        score: 0.0,
+                    })
+                    .collect()
+            };
+            HttpResponse::Ok().json(UnifiedResponse {
+                query: nl,
+                interpreted: interpreted(&sq, resolved_place),
+                model_version: None,
+                considered: 0,
+                total_matching,
+                offset,
+                results,
+            })
+        }
+    }
+}
+
+/// Slice a sorted hit list at `[offset, offset+limit)`, tolerating
+/// out-of-range offsets (empty page).
+fn paginate(hits: &[(f32, String)], offset: usize, limit: usize) -> Vec<(f32, String)> {
+    if offset >= hits.len() {
+        return Vec::new();
+    }
+    let end = (offset + limit).min(hits.len());
+    hits[offset..end].to_vec()
+}
+
+fn interpreted(sq: &StructuredQuery, place: Option<ResolvedPlace>) -> Interpreted {
+    Interpreted {
+        semantic: sq.semantic.clone(),
+        tag_ids: sq.tag_ids.clone(),
+        exclude_tag_ids: sq.exclude_tag_ids.clone(),
+        unmatched_tags: sq.unmatched_tags.clone(),
+        camera_make: sq.camera_make.clone(),
+        camera_model: sq.camera_model.clone(),
+        lens_model: sq.lens_model.clone(),
+        date_from: sq.date_from,
+        date_to: sq.date_to,
+        media_type: sq.media_type.clone(),
+        place,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::ai::nl_query::StructuredQuery;
+
+    #[test]
+    fn effective_semantic_combines_semantic_and_unmatched() {
+        let sq = StructuredQuery {
+            semantic: Some("sunset".into()),
+            unmatched_tags: vec!["golden hour".into()],
+            ..Default::default()
+        };
+        assert_eq!(
+            effective_semantic(&sq).as_deref(),
+            Some("sunset golden hour")
+        );
+    }
+
+    #[test]
+    fn effective_semantic_none_when_empty() {
+        let sq = StructuredQuery::default();
+        assert_eq!(effective_semantic(&sq), None);
+    }
+
+    #[test]
+    fn effective_semantic_unmatched_only() {
+        let sq = StructuredQuery {
+            unmatched_tags: vec!["disco".into()],
+            ..Default::default()
+        };
+        assert_eq!(effective_semantic(&sq).as_deref(), Some("disco"));
+    }
+
+    #[test]
+    fn paginate_handles_out_of_range_offset() {
+        let hits = vec![(0.9, "a".to_string()), (0.8, "b".to_string())];
+        assert_eq!(paginate(&hits, 5, 10).len(), 0);
+        assert_eq!(paginate(&hits, 0, 1).len(), 1);
+        assert_eq!(paginate(&hits, 1, 10).len(), 1);
+    }
+}

From 8dce536f385ac086791283add6b1e4004c481992 Mon Sep 17 00:00:00 2001
From: Cameron Cordes <cameronc.dev@gmail.com>
Date: Sun, 14 Jun 2026 01:19:53 -0400
Subject: [PATCH 3/9] Unified search: accept client model override (avoid model
 swapping)

Add an optional `model` query param to /photos/search/unified, passed into
resolve_backend's overrides. The client sends the user's currently-selected
local model so the translation step reuses an already-loaded model instead of
forcing a llama-swap eviction + cold start. Falls back to the configured
default when absent. Still local only (no hybrid).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/unified_search.rs | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/unified_search.rs b/src/unified_search.rs
index a3187a4..7bb78dd 100644
--- a/src/unified_search.rs
+++ b/src/unified_search.rs
@@ -53,6 +53,11 @@ pub struct UnifiedQuery {
     pub library: Option<i32>,
     /// Multi-library scope, comma-separated ids.
     pub library_ids: Option<String>,
+    /// Optional model override. The client passes the user's currently-selected
+    /// local model so the translation step reuses a model that's already loaded
+    /// (avoids a llama-swap eviction / cold start). Falls back to the configured
+    /// default local model when absent. Local only — no hybrid here.
+    pub model: Option<String>,
 }
 
 fn default_limit() -> usize {
@@ -167,9 +172,12 @@ pub async fn unified_search<TagD: TagDao>(
     };
 
     // Respect env/config for the LLM backend (LLM_BACKEND → ollama or
-    // llama-swap); local only, no hybrid, per the feature's design.
+    // llama-swap); local only, no hybrid, per the feature's design. The
+    // client-supplied model (the user's current selection) routes translation
+    // to an already-loaded model when possible; otherwise resolve_backend
+    // falls back to the configured default.
     let overrides = SamplingOverrides {
-        model: None,
+        model: query.model.clone().filter(|m| !m.is_empty()),
         num_ctx: None,
         temperature: None,
         top_p: None,

From 2cde88d73472981db8082d5c38cb62fa81498711 Mon Sep 17 00:00:00 2001
From: Cameron Cordes <cameronc.dev@gmail.com>
Date: Sun, 14 Jun 2026 01:29:21 -0400
Subject: [PATCH 4/9] Unified search: stage-by-stage logging to debug empty
 results

Log the translated query (semantic/tags/place/date/media + has_struct), the
tag-filter file count, candidate-row + allowed-hash counts, and the CLIP
considered/hits/after-filter counts. Pinpoints which stage drops results to
zero (over-extracted filter, tag path mismatch, Any/All over-constraint, or
CLIP threshold). info-level for now while debugging.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/unified_search.rs | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/src/unified_search.rs b/src/unified_search.rs
index 7bb78dd..d80feec 100644
--- a/src/unified_search.rs
+++ b/src/unified_search.rs
@@ -231,6 +231,22 @@ pub async fn unified_search<TagD: TagDao>(
     let has_struct =
         has_exif_filter || gps.is_some() || !sq.tag_ids.is_empty() || sq.media_type.is_some();
 
+    // Stage trace: what the model extracted + whether a structured filter is
+    // active. The chips show this to the user too, but logging it makes the
+    // "why no results" path debuggable from the server side.
+    log::info!(
+        "unified_search: q={nl:?} semantic={:?} tag_ids={:?} exclude={:?} place={:?} gps={:?} date=({:?},{:?}) media={:?} unmatched={:?} has_struct={has_struct}",
+        sq.semantic,
+        sq.tag_ids,
+        sq.exclude_tag_ids,
+        resolved_place.as_ref().map(|p| p.display_name.as_str()),
+        gps,
+        sq.date_from,
+        sq.date_to,
+        sq.media_type,
+        sq.unmatched_tags,
+    );
+
     // ── 3. Build the structured candidate set (EXIF rows passing every
     // filter). Skipped entirely for a pure-semantic query. ──
     let mut candidate: Vec<crate::database::models::ImageExif> = Vec::new();
@@ -255,6 +271,11 @@ pub async fn unified_search<TagD: TagDao>(
                 }
             }
         };
+        log::info!(
+            "unified_search: tag_ids={:?} -> tag_set_files={:?}",
+            sq.tag_ids,
+            tag_set.as_ref().map(|s| s.len())
+        );
 
         // EXIF query handles camera/lens/gps-box/date. With no EXIF filters
         // it returns the whole table, which we then narrow by the predicates
@@ -322,6 +343,11 @@ pub async fn unified_search<TagD: TagDao>(
             .iter()
             .filter_map(|r| r.content_hash.clone())
             .collect();
+        log::info!(
+            "unified_search: candidate_rows={} allowed_hashes={}",
+            candidate.len(),
+            allowed_hashes.len()
+        );
     }
 
     // ── 4. Rank ──
@@ -334,6 +360,8 @@ pub async fn unified_search<TagD: TagDao>(
                     Ok(s) => s,
                     Err(e) => return score_error_response(e),
                 };
+            let considered = scored.considered;
+            let clip_hits = scored.hits.len();
             let hits: Vec<(f32, String)> = if has_struct {
                 scored
                     .hits
@@ -343,6 +371,10 @@ pub async fn unified_search<TagD: TagDao>(
             } else {
                 scored.hits
             };
+            log::info!(
+                "unified_search: clip considered={considered} hits={clip_hits} after_struct_filter={}",
+                hits.len()
+            );
             let total_matching = hits.len();
             let page = paginate(&hits, offset, limit);
             let results = resolve_hits(&exif_dao, &page);
@@ -364,6 +396,7 @@ pub async fn unified_search<TagD: TagDao>(
             }
             candidate.sort_by(|a, b| b.date_taken.cmp(&a.date_taken));
             let total_matching = candidate.len();
+            log::info!("unified_search: filters-only matches={total_matching}");
             let end = (offset + limit).min(total_matching);
             let results: Vec<SearchHit> = if offset >= total_matching {
                 Vec::new()

From b18ab6ed99fb662e87de659a8669734365b0c4c9 Mon Sep 17 00:00:00 2001
From: Cameron Cordes <cameronc.dev@gmail.com>
Date: Sun, 14 Jun 2026 01:58:48 -0400
Subject: [PATCH 5/9] Unified search: UNIFIED_SEARCH_MODEL env override for the
 translation step

Pin the NL->structured translation to a small, fast model that can stay
co-resident with CLIP (and the chat model) so it never evicts them on a tight
VRAM budget. Precedence: UNIFIED_SEARCH_MODEL env > client-selected model >
configured default. Logs the effective model (backend.model()) so model A/B
tests are visible. Documented in .env.example.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .env.example          | 10 ++++++++++
 src/unified_search.rs | 21 ++++++++++++++++-----
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/.env.example b/.env.example
index 2e431bc..64c31d3 100644
--- a/.env.example
+++ b/.env.example
@@ -80,6 +80,16 @@ AGENTIC_CHAT_MAX_ITERATIONS=6
 # LLAMA_SWAP_ALLOWED_MODELS=chat,vision,embed
 # LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS=180
 
+# ── Unified search translation model (optional) ─────────────────────────
+# /photos/search/unified runs one small LLM call to translate a natural-
+# language query into structured filters + a semantic term, then CLIP-ranks.
+# That step needs an LLM AND CLIP available at once. On a tight VRAM budget a
+# large chat model can't co-reside with CLIP, so pin a small, fast model here
+# (it can stay loaded alongside CLIP and the chat model). Precedence:
+# UNIFIED_SEARCH_MODEL > the client's selected model > the configured default.
+# Use the configured backend (LLM_BACKEND); local only — no hybrid.
+# UNIFIED_SEARCH_MODEL=qwen3-0.6b
+
 # ── Text-to-speech (optional, requires LLAMA_SWAP_URL) ───────────────────
 # TTS routes through the same llama-swap proxy (a Chatterbox model id), so it
 # only needs LLAMA_SWAP_URL — it does NOT require LLM_BACKEND=llamacpp.
diff --git a/src/unified_search.rs b/src/unified_search.rs
index d80feec..bb6344c 100644
--- a/src/unified_search.rs
+++ b/src/unified_search.rs
@@ -172,12 +172,22 @@ pub async fn unified_search<TagD: TagDao>(
     };
 
     // Respect env/config for the LLM backend (LLM_BACKEND → ollama or
-    // llama-swap); local only, no hybrid, per the feature's design. The
-    // client-supplied model (the user's current selection) routes translation
-    // to an already-loaded model when possible; otherwise resolve_backend
-    // falls back to the configured default.
+    // llama-swap); local only, no hybrid, per the feature's design.
+    //
+    // Translation-model precedence:
+    //   1. UNIFIED_SEARCH_MODEL env — pin a small, fast model that can stay
+    //      co-resident with CLIP (and the chat model) so translation never
+    //      evicts them. This is the recommended setup on a tight VRAM budget.
+    //   2. the client-selected model — routes translation to whatever the user
+    //      already has loaded (no swap) when no dedicated model is pinned.
+    //   3. None → resolve_backend uses the configured default local model.
+    let translation_model = std::env::var("UNIFIED_SEARCH_MODEL")
+        .ok()
+        .filter(|m| !m.trim().is_empty())
+        .or_else(|| query.model.clone())
+        .filter(|m| !m.trim().is_empty());
     let overrides = SamplingOverrides {
-        model: query.model.clone().filter(|m| !m.is_empty()),
+        model: translation_model,
         num_ctx: None,
         temperature: None,
         top_p: None,
@@ -197,6 +207,7 @@ pub async fn unified_search<TagD: TagDao>(
             });
         }
     };
+    log::info!("unified_search: translating with model={}", backend.model());
 
     let today = chrono::Utc::now().date_naive();
     let sq = match translate_nl_query(backend.chat(), &nl, &tag_vocab, today).await {

From e7f6dd56dacba4d146dec195b5b703343b706721 Mon Sep 17 00:00:00 2001
From: Cameron Cordes <cameronc.dev@gmail.com>
Date: Sun, 14 Jun 2026 02:02:57 -0400
Subject: [PATCH 6/9] clip_client: log encode_text failures (URL + status/body
 or network error)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The CLIP encode failure reason was only ever returned in the HTTP response
body, never logged server-side, making 502s from /photos/search opaque. Log
the underlying cause — network error to the URL, or the Apollo HTTP status +
response body — so CLIP-service problems are diagnosable from the ImageApi log.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/ai/clip_client.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/ai/clip_client.rs b/src/ai/clip_client.rs
index 85c66a7..3519e8b 100644
--- a/src/ai/clip_client.rs
+++ b/src/ai/clip_client.rs
@@ -191,11 +191,13 @@ impl ClipClient {
         let resp = match self.client.post(&url).json(&body).send().await {
             Ok(r) => r,
             Err(e) if e.is_timeout() || e.is_connect() => {
+                log::warn!("clip encode_text network error to {url}: {e}");
                 return Err(ClipError::Transient(anyhow::anyhow!(
                     "clip client network: {e}"
                 )));
             }
             Err(e) => {
+                log::warn!("clip encode_text request error to {url}: {e}");
                 return Err(ClipError::Transient(anyhow::anyhow!(
                     "clip client request: {e}"
                 )));
@@ -210,6 +212,7 @@ impl ClipClient {
             return Ok(body);
         }
         let body_text = resp.text().await.unwrap_or_default();
+        log::warn!("clip encode_text HTTP {status} from {url}: {body_text}");
         Err(classify_error_response(status.as_u16(), &body_text))
     }
 

From b594acc4bd5ba66c8ac844fc07748f8c133a21f4 Mon Sep 17 00:00:00 2001
From: Cameron Cordes <cameronc.dev@gmail.com>
Date: Sun, 14 Jun 2026 02:20:06 -0400
Subject: [PATCH 7/9] Unified search: rank within filtered set instead of
 pre-thresholding CLIP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When structured filters are present they're the constraint and CLIP only ranks
within the candidate set, so drop the global similarity threshold for that
case. Previously the 0.2 whole-library threshold ran BEFORE intersecting with
the filters, discarding filter-matching photos that scored just under it (e.g.
a 2022 beach photo at 0.18) — producing after_struct_filter=0 even when matches
existed. Plain semantic (no filters) keeps the user's threshold.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/unified_search.rs | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/src/unified_search.rs b/src/unified_search.rs
index bb6344c..9eea405 100644
--- a/src/unified_search.rs
+++ b/src/unified_search.rs
@@ -364,13 +364,27 @@ pub async fn unified_search<TagD: TagDao>(
     // ── 4. Rank ──
     match semantic {
         Some(ref sem) => {
-            // Semantic term present: CLIP-rank, then keep only hits that pass
-            // the structured filters (by content_hash).
-            let scored =
-                match score_photos(&state, &exif_dao, sem, &library_ids, threshold, None).await {
-                    Ok(s) => s,
-                    Err(e) => return score_error_response(e),
-                };
+            // When structured filters are present they ARE the constraint —
+            // CLIP only ranks within the candidate set. So drop the global
+            // similarity threshold (it's tuned for whole-library search and
+            // would pre-discard filter-matching photos that scored just under
+            // it — e.g. a 2022 beach photo at 0.18 — before the intersection
+            // ever runs). With no filters, keep the user's threshold for the
+            // plain semantic case.
+            let clip_threshold = if has_struct { -1.0 } else { threshold };
+            let scored = match score_photos(
+                &state,
+                &exif_dao,
+                sem,
+                &library_ids,
+                clip_threshold,
+                None,
+            )
+            .await
+            {
+                Ok(s) => s,
+                Err(e) => return score_error_response(e),
+            };
             let considered = scored.considered;
             let clip_hits = scored.hits.len();
             let hits: Vec<(f32, String)> = if has_struct {

From 7684220f52d664006f41940b7987245854b8c3de Mon Sep 17 00:00:00 2001
From: Cameron Cordes <cameronc.dev@gmail.com>
Date: Sun, 14 Jun 2026 02:25:24 -0400
Subject: [PATCH 8/9] Unified search: use ANY-mode tag matching, not ALL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ALL-mode over-constrains NL queries — the model maps several query words to
tags and few photos carry every one, zeroing the candidate set. Switch to
ANY (a photo matches if it has any named tag); the semantic CLIP ranking
provides precision within that pool. Exclude tags still filter out.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/unified_search.rs | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/unified_search.rs b/src/unified_search.rs
index 9eea405..555773c 100644
--- a/src/unified_search.rs
+++ b/src/unified_search.rs
@@ -264,13 +264,15 @@ pub async fn unified_search<TagD: TagDao>(
     let mut allowed_hashes: HashSet<String> = HashSet::new();
     if has_struct {
         // Tag membership set (rel_path only — same cross-library imprecision
-        // as the existing /photos tag listing). ALL-mode: the photo must
-        // carry every named tag.
+        // as the existing /photos tag listing). ANY-mode: a photo matches if
+        // it carries any of the named tags. ALL-mode over-constrains NL
+        // queries (the model maps several words to tags and few photos carry
+        // them all); the semantic term does the precision work instead.
         let tag_set: Option<HashSet<String>> = if sq.tag_ids.is_empty() {
             None
         } else {
             let mut dao = tag_dao.lock().expect("tag dao");
-            match dao.get_files_with_all_tag_ids(
+            match dao.get_files_with_any_tag_ids(
                 sq.tag_ids.clone(),
                 sq.exclude_tag_ids.clone(),
                 &ctx,

From 475072810e1c604cf151a7d4e4a01c6346421b6d Mon Sep 17 00:00:00 2001
From: Cameron Cordes <cameronc.dev@gmail.com>
Date: Wed, 17 Jun 2026 18:14:44 -0400
Subject: [PATCH 9/9] AI: add enable_thinking reasoning toggle plumbed to
 llama.cpp

New optional SamplingOverride forwarded to llama-server as
chat_template_kwargs.enable_thinking (gates Qwen3-style reasoning
blocks). None leaves the template default; other backends ignore it.
Wired through the agentic-insight and chat-turn request bodies/handlers.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/ai/backend.rs             |  6 ++++++
 src/ai/handlers.rs            | 15 +++++++++++++++
 src/ai/insight_chat.rs        |  9 +++++++++
 src/ai/insight_generator.rs   |  3 +++
 src/ai/llamacpp.rs            | 19 +++++++++++++++++++
 src/bin/populate_knowledge.rs |  1 +
 src/reels/script.rs           |  1 +
 src/unified_search.rs         |  1 +
 8 files changed, 55 insertions(+)

diff --git a/src/ai/backend.rs b/src/ai/backend.rs
index 0515f1c..dfcdd03 100644
--- a/src/ai/backend.rs
+++ b/src/ai/backend.rs
@@ -41,6 +41,10 @@ pub struct SamplingOverrides {
     pub top_p: Option<f32>,
     pub top_k: Option<i32>,
     pub min_p: Option<f32>,
+    /// Reasoning toggle. Only the llama.cpp backend honors it (forwarded as
+    /// `chat_template_kwargs.enable_thinking`); other backends ignore it.
+    /// `None` leaves the model/template default in place.
+    pub enable_thinking: Option<bool>,
 }
 
 impl SamplingOverrides {
@@ -124,6 +128,7 @@ mod tests {
             top_p: None,
             top_k: None,
             min_p: None,
+            enable_thinking: None,
         };
         assert!(!empty.has_sampling());
 
@@ -134,6 +139,7 @@ mod tests {
             top_p: None,
             top_k: None,
             min_p: None,
+            enable_thinking: None,
         };
         assert!(with_temp.has_sampling());
     }
diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs
index c6bc212..ae9f300 100644
--- a/src/ai/handlers.rs
+++ b/src/ai/handlers.rs
@@ -40,6 +40,12 @@ pub struct GeneratePhotoInsightRequest {
     pub top_k: Option<i32>,
     #[serde(default)]
     pub min_p: Option<f32>,
+    /// Reasoning toggle for thinking-capable models. Forwarded to the
+    /// llama.cpp backend as `chat_template_kwargs.enable_thinking`; ignored
+    /// by other backends and the non-agentic (Ollama) path. Only the agentic
+    /// endpoint routes through llama.cpp. None defers to the template default.
+    #[serde(default)]
+    pub enable_thinking: Option<bool>,
     /// `"local"` (default, Ollama with images) | `"hybrid"` (local vision +
     /// OpenRouter chat). Only respected by the agentic endpoint.
     #[serde(default)]
@@ -868,6 +874,7 @@ pub async fn generate_agentic_insight_handler(
                     request.top_p,
                     request.top_k,
                     request.min_p,
+                    request.enable_thinking,
                     max_iterations,
                     request.backend.clone(),
                     fewshot_examples,
@@ -1169,6 +1176,11 @@ pub struct ChatTurnHttpRequest {
     pub top_k: Option<i32>,
     #[serde(default)]
     pub min_p: Option<f32>,
+    /// Reasoning toggle for thinking-capable models. Forwarded to the
+    /// llama.cpp backend as `chat_template_kwargs.enable_thinking`; ignored
+    /// by other backends. None defers to the model/template default.
+    #[serde(default)]
+    pub enable_thinking: Option<bool>,
     #[serde(default)]
     pub max_iterations: Option<usize>,
     /// Per-turn system-prompt override. Ephemeral in append mode,
@@ -1247,6 +1259,7 @@ pub async fn chat_turn_handler(
         top_p: request.top_p,
         top_k: request.top_k,
         min_p: request.min_p,
+        enable_thinking: request.enable_thinking,
         max_iterations: request.max_iterations,
         system_prompt: request.system_prompt.clone(),
         persona_id: request.persona_id.clone(),
@@ -1473,6 +1486,7 @@ pub async fn chat_stream_handler(
         top_p: request.top_p,
         top_k: request.top_k,
         min_p: request.min_p,
+        enable_thinking: request.enable_thinking,
         max_iterations: request.max_iterations,
         system_prompt: request.system_prompt.clone(),
         persona_id: request.persona_id.clone(),
@@ -1618,6 +1632,7 @@ pub async fn turn_async_handler(
         top_p: request.top_p,
         top_k: request.top_k,
         min_p: request.min_p,
+        enable_thinking: request.enable_thinking,
         max_iterations: request.max_iterations,
         system_prompt: request.system_prompt.clone(),
         persona_id: request.persona_id.clone(),
diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs
index 84f2b32..af00731 100644
--- a/src/ai/insight_chat.rs
+++ b/src/ai/insight_chat.rs
@@ -70,6 +70,10 @@ pub struct ChatTurnRequest {
     pub top_p: Option<f32>,
     pub top_k: Option<i32>,
     pub min_p: Option<f32>,
+    /// Reasoning toggle for thinking-capable models. Forwarded to the
+    /// llama.cpp backend as `chat_template_kwargs.enable_thinking`; ignored
+    /// by other backends. None defers to the model/template default.
+    pub enable_thinking: Option<bool>,
     pub max_iterations: Option<usize>,
     /// Per-turn system-prompt override. In append mode (default), applied
     /// ephemerally — original system message restored before persistence.
@@ -344,6 +348,7 @@ impl InsightChatService {
             top_p: req.top_p,
             top_k: req.top_k,
             min_p: req.min_p,
+            enable_thinking: req.enable_thinking,
         };
         let backend = self.generator.resolve_backend(kind, &overrides).await?;
         let model_used = backend.model().to_string();
@@ -847,6 +852,7 @@ impl InsightChatService {
             top_p: req.top_p,
             top_k: req.top_k,
             min_p: req.min_p,
+            enable_thinking: req.enable_thinking,
         };
         let backend = self.generator.resolve_backend(kind, &overrides).await?;
         let model_used = backend.model().to_string();
@@ -1017,6 +1023,7 @@ impl InsightChatService {
             top_p: req.top_p,
             top_k: req.top_k,
             min_p: req.min_p,
+            enable_thinking: req.enable_thinking,
         };
         let backend = self.generator.resolve_backend(kind, &overrides).await?;
         let model_used = backend.model().to_string();
@@ -1425,6 +1432,7 @@ impl InsightChatService {
             top_p: req.top_p,
             top_k: req.top_k,
             min_p: req.min_p,
+            enable_thinking: req.enable_thinking,
         };
         let backend = self.generator.resolve_backend(kind, &overrides).await?;
         let model_used = backend.model().to_string();
@@ -1607,6 +1615,7 @@ impl InsightChatService {
             top_p: req.top_p,
             top_k: req.top_k,
             min_p: req.min_p,
+            enable_thinking: req.enable_thinking,
         };
         let backend = self.generator.resolve_backend(kind, &overrides).await?;
         let model_used = backend.model().to_string();
diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs
index 4ff8494..d45fa55 100644
--- a/src/ai/insight_generator.rs
+++ b/src/ai/insight_generator.rs
@@ -3933,6 +3933,7 @@ Return ONLY the summary, nothing else."#,
             if let Some(ctx) = overrides.num_ctx {
                 c.set_num_ctx(Some(ctx));
             }
+            c.set_enable_thinking(overrides.enable_thinking);
             Box::new(c)
         } else {
             // Pure Ollama local.
@@ -4064,6 +4065,7 @@ Return ONLY the summary, nothing else."#,
         top_p: Option<f32>,
         top_k: Option<i32>,
         min_p: Option<f32>,
+        enable_thinking: Option<bool>,
         max_iterations: usize,
         backend: Option<String>,
         fewshot_examples: Vec<Vec<ChatMessage>>,
@@ -4091,6 +4093,7 @@ Return ONLY the summary, nothing else."#,
             top_p,
             top_k,
             min_p,
+            enable_thinking,
         };
         let backend = self.resolve_backend(kind, &overrides).await?;
         span.set_attribute(KeyValue::new("model", backend.model().to_string()));
diff --git a/src/ai/llamacpp.rs b/src/ai/llamacpp.rs
index 8a7c898..77e7f63 100644
--- a/src/ai/llamacpp.rs
+++ b/src/ai/llamacpp.rs
@@ -64,6 +64,12 @@ pub struct LlamaCppClient {
     top_p: Option<f32>,
     top_k: Option<i32>,
     min_p: Option<f32>,
+    /// When `Some`, forwarded to llama-server as
+    /// `chat_template_kwargs: {"enable_thinking": <bool>}`. The Jinja chat
+    /// template (e.g. Qwen3) reads this to gate its reasoning block. `None`
+    /// omits the key entirely, leaving the template's own default. Templates
+    /// that don't reference the key ignore it, so sending it is harmless.
+    enable_thinking: Option<bool>,
 }
 
 impl LlamaCppClient {
@@ -89,6 +95,7 @@ impl LlamaCppClient {
             top_p: None,
             top_k: None,
             min_p: None,
+            enable_thinking: None,
         }
     }
 
@@ -104,6 +111,12 @@ impl LlamaCppClient {
         self.num_ctx = num_ctx;
     }
 
+    /// Set the reasoning toggle forwarded as `chat_template_kwargs.enable_thinking`.
+    /// `None` leaves the chat template's own default in place.
+    pub fn set_enable_thinking(&mut self, enable_thinking: Option<bool>) {
+        self.enable_thinking = enable_thinking;
+    }
+
     pub fn set_sampling_params(
         &mut self,
         temperature: Option<f32>,
@@ -458,6 +471,12 @@ impl LlamaCppClient {
         // via -c, so we silently drop the override here. The config.yaml
         // entry is the source of truth for context size.
         let _ = self.num_ctx;
+        // Reasoning toggle for thinking-capable templates (Qwen3 et al.).
+        // llama-server forwards chat_template_kwargs into the Jinja render
+        // (requires --jinja); templates that ignore the key are unaffected.
+        if let Some(think) = self.enable_thinking {
+            v.push(("chat_template_kwargs", json!({ "enable_thinking": think })));
+        }
         v
     }
 
diff --git a/src/bin/populate_knowledge.rs b/src/bin/populate_knowledge.rs
index 71f2f8a..396eddc 100644
--- a/src/bin/populate_knowledge.rs
+++ b/src/bin/populate_knowledge.rs
@@ -336,6 +336,7 @@ async fn main() -> anyhow::Result<()> {
                 args.top_p,
                 args.top_k,
                 args.min_p,
+                None, // enable_thinking: leave model/template default
                 args.max_iterations,
                 None,
                 Vec::new(),
diff --git a/src/reels/script.rs b/src/reels/script.rs
index 858efd1..38ef9cc 100644
--- a/src/reels/script.rs
+++ b/src/reels/script.rs
@@ -309,6 +309,7 @@ pub async fn generate_script_agentic(
                 top_p: None,
                 top_k: None,
                 min_p: None,
+                enable_thinking: None,
             },
         )
         .await
diff --git a/src/unified_search.rs b/src/unified_search.rs
index 555773c..0940a92 100644
--- a/src/unified_search.rs
+++ b/src/unified_search.rs
@@ -193,6 +193,7 @@ pub async fn unified_search<TagD: TagDao>(
         top_p: None,
         top_k: None,
         min_p: None,
+        enable_thinking: None,
     };
     let backend = match state
         .insight_generator