Previously the endpoint only accepted `library=<id>` (single id) — multi- select scopes had to be filtered upstream by Apollo, which kept the filter logic out of FileViewer-React's reach (it calls ImageApi directly and got no scoping for 2+ active libraries). Adds `library_ids` (comma-separated id list, e.g. `?library_ids=1,3`). Parsed inside the existing scope decision: `library_ids` wins when both are supplied; either / both empty falls back to "every enabled library" (historical default). Malformed entries return 400. Dedupes ids while preserving order so a stray `library_ids=1,1,3` doesn't double-pass to the DAO. The single-id path still works unchanged for older clients. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
353 lines
13 KiB
Rust
353 lines
13 KiB
Rust
//! `/photos/search?q=<text>` — CLIP semantic photo search.
|
||
//!
|
||
//! The route lives outside `files.rs` to keep that 1500+ line module
|
||
//! focused on EXIF / tag listing. The flow is:
|
||
//!
|
||
//! 1. Parse query params (`q`, `limit`, `threshold`, optional `library`).
|
||
//! 2. Call Apollo's `/api/internal/clip/encode_text` to get the query
|
||
//! vector (L2-normalized 768-d f32 for ViT-L/14).
|
||
//! 3. Load every `(content_hash, clip_embedding)` for the scope from
|
||
//! `image_exif` via `ExifDao::list_clip_index`. ~28–43 MB for a 14k
|
||
//! library at ViT-L/14; loaded fresh per request — fast enough for
|
||
//! v1, optimize via an AppState cache later if needed.
|
||
//! 4. Dot product (= cosine since both sides are L2-normalized), filter
|
||
//! above `threshold`, top-K by score.
|
||
//! 5. Resolve each surviving hash back to a `(library_id, rel_path)` so
|
||
//! the frontend can render the photo / hand off to the carousel.
|
||
//!
|
||
//! Response shape is intentionally minimal — paths + score — so the
|
||
//! frontend can reuse existing PhotoGrid rendering by joining against
|
||
//! `/api/photos/match` (or calling `/image/metadata` lazily). Don't
|
||
//! bake camera/EXIF metadata into this route; it would force a fan-out
|
||
//! per result and balloon the response.
|
||
|
||
use crate::AppState;
|
||
use crate::ai::clip_client::ClipError;
|
||
use crate::database::ExifDao;
|
||
use actix_web::{HttpResponse, Result as ActixResult, web};
|
||
use base64::Engine;
|
||
use serde::{Deserialize, Serialize};
|
||
use std::sync::Mutex;
|
||
|
||
#[derive(Debug, Deserialize)]
|
||
pub struct SearchQuery {
|
||
/// Natural-language query. Required; empty triggers 400.
|
||
pub q: String,
|
||
/// Max results to return in this page. Capped to 200 server-side.
|
||
/// Defaults to 20. Pair with `offset` for pagination.
|
||
#[serde(default = "default_limit")]
|
||
pub limit: usize,
|
||
/// Zero-based offset into the sorted-and-filtered result set. The
|
||
/// scoring loop still runs over the full embedding matrix on every
|
||
/// page (cheap at personal-library scale — sub-100ms — and avoids
|
||
/// stateful pagination cursors). Defaults to 0.
|
||
#[serde(default)]
|
||
pub offset: usize,
|
||
/// Cosine-similarity floor below which results are dropped.
|
||
/// 0.20 is the rough "this is plausibly relevant" line for OpenAI
|
||
/// CLIP; tunable per call when sweeping. Defaults to 0.20.
|
||
#[serde(default = "default_threshold")]
|
||
pub threshold: f32,
|
||
/// Optional single-library scope. Legacy param — new clients pass
|
||
/// `library_ids` instead so multi-select scopes (Apollo's HUD library
|
||
/// chips, FileViewer-React's library picker) actually filter. Kept
|
||
/// for back-compat; `library_ids` wins when both are supplied.
|
||
pub library: Option<i32>,
|
||
/// Optional multi-library scope, comma-separated id list
|
||
/// (`?library_ids=1,3`). Empty / omitted = every enabled library
|
||
/// (the historical default). Apollo and FileViewer-React both send
|
||
/// this when 2+ libraries are selected; the single-library case
|
||
/// works through either param interchangeably.
|
||
pub library_ids: Option<String>,
|
||
/// Optional model-version filter. Defaults to the live engine's
|
||
/// version (queried lazily). Forces a strict join so mid-flight
|
||
/// model swaps can't mix geometries in a single response.
|
||
#[serde(default)]
|
||
pub model_version: Option<String>,
|
||
}
|
||
|
||
fn default_limit() -> usize {
|
||
20
|
||
}
|
||
|
||
fn default_threshold() -> f32 {
|
||
0.20
|
||
}
|
||
|
||
#[derive(Debug, Serialize)]
|
||
pub struct SearchHit {
|
||
pub library_id: i32,
|
||
pub rel_path: String,
|
||
pub content_hash: String,
|
||
/// Cosine similarity in [-1, 1]. In practice OpenAI CLIP returns
|
||
/// 0.10–0.40 for the typical photo library.
|
||
pub score: f32,
|
||
}
|
||
|
||
#[derive(Debug, Serialize)]
|
||
pub struct SearchResponse {
|
||
pub query: String,
|
||
pub model_version: String,
|
||
pub threshold: f32,
|
||
/// Total embeddings scored (= every photo in scope with a stored
|
||
/// embedding). Same value across pages of the same query.
|
||
pub considered: usize,
|
||
/// Count of results above threshold, before pagination. Lets the
|
||
/// client decide whether a "Load more" button is meaningful and
|
||
/// stop fetching when ``offset + results.len() >= total_matching``.
|
||
pub total_matching: usize,
|
||
pub offset: usize,
|
||
pub results: Vec<SearchHit>,
|
||
}
|
||
|
||
#[derive(Debug, Serialize)]
|
||
struct SearchError {
|
||
error: String,
|
||
}
|
||
|
||
/// Decode a stored `clip_embedding` BLOB back into a `Vec<f32>`. Returns
|
||
/// `None` on malformed bytes — those rows get skipped rather than
|
||
/// failing the whole query.
|
||
fn decode_embedding(bytes: &[u8]) -> Option<Vec<f32>> {
|
||
if bytes.is_empty() || bytes.len() % 4 != 0 {
|
||
return None;
|
||
}
|
||
let mut out = Vec::with_capacity(bytes.len() / 4);
|
||
for chunk in bytes.chunks_exact(4) {
|
||
out.push(f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]));
|
||
}
|
||
Some(out)
|
||
}
|
||
|
||
#[inline]
|
||
fn dot(a: &[f32], b: &[f32]) -> f32 {
|
||
a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
|
||
}
|
||
|
||
pub async fn search_photos(
|
||
state: web::Data<AppState>,
|
||
exif_dao: web::Data<Mutex<Box<dyn ExifDao>>>,
|
||
query: web::Query<SearchQuery>,
|
||
) -> ActixResult<HttpResponse> {
|
||
let q_text = query.q.trim().to_string();
|
||
if q_text.is_empty() {
|
||
return Ok(HttpResponse::BadRequest().json(SearchError {
|
||
error: "query parameter `q` is required".into(),
|
||
}));
|
||
}
|
||
if !state.clip_client.is_enabled() {
|
||
return Ok(HttpResponse::ServiceUnavailable().json(SearchError {
|
||
error: "CLIP search is disabled (no Apollo CLIP endpoint configured)".into(),
|
||
}));
|
||
}
|
||
|
||
let limit = query.limit.clamp(1, 200);
|
||
let offset = query.offset;
|
||
let threshold = query.threshold.clamp(-1.0, 1.0);
|
||
|
||
// 1. Encode the query text. Fast — Apollo's text encoder is ~50ms
|
||
// on CPU. Bail with a clear error message if Apollo's down so the
|
||
// user sees "service unavailable" rather than empty results.
|
||
let query_resp = match state.clip_client.encode_text(&q_text).await {
|
||
Ok(r) => r,
|
||
Err(ClipError::Permanent(e)) => {
|
||
return Ok(HttpResponse::BadRequest().json(SearchError {
|
||
error: format!("query rejected: {e}"),
|
||
}));
|
||
}
|
||
Err(ClipError::Transient(e)) => {
|
||
return Ok(HttpResponse::BadGateway().json(SearchError {
|
||
error: format!("CLIP service unavailable: {e}"),
|
||
}));
|
||
}
|
||
Err(ClipError::Disabled) => {
|
||
return Ok(HttpResponse::ServiceUnavailable().json(SearchError {
|
||
error: "CLIP service disabled".into(),
|
||
}));
|
||
}
|
||
};
|
||
// decode_embedding works on raw bytes; the wire format is b64.
|
||
let query_bytes = base64::engine::general_purpose::STANDARD
|
||
.decode(query_resp.embedding.as_bytes())
|
||
.unwrap_or_default();
|
||
let query_vec = match decode_embedding(&query_bytes) {
|
||
Some(v) => v,
|
||
None => {
|
||
return Ok(HttpResponse::BadGateway().json(SearchError {
|
||
error: "CLIP service returned a malformed query embedding".into(),
|
||
}));
|
||
}
|
||
};
|
||
|
||
// 2. Decide which library scope to search. `library_ids` (multi)
|
||
// wins over the legacy `library` (single) when both are present;
|
||
// either / both empty falls back to "every enabled library".
|
||
let library_ids: Vec<i32> = if let Some(raw) = query.library_ids.as_deref() {
|
||
let mut out: Vec<i32> = Vec::new();
|
||
for piece in raw.split(',') {
|
||
let trimmed = piece.trim();
|
||
if trimmed.is_empty() {
|
||
continue;
|
||
}
|
||
match trimmed.parse::<i32>() {
|
||
Ok(id) => {
|
||
if !out.contains(&id) {
|
||
out.push(id);
|
||
}
|
||
}
|
||
Err(_) => {
|
||
return Ok(HttpResponse::BadRequest().json(SearchError {
|
||
error: format!("invalid library_ids entry: {trimmed:?}"),
|
||
}));
|
||
}
|
||
}
|
||
}
|
||
out
|
||
} else if let Some(id) = query.library {
|
||
vec![id]
|
||
} else {
|
||
Vec::new()
|
||
};
|
||
|
||
// 3. Pull the (hash, embedding) matrix. Lock contention here is
|
||
// bounded — one big SELECT under a mutex Arc<Mutex<dyn ExifDao>>
|
||
// and then we release before scoring. If this becomes a hotspot
|
||
// we'll cache the decoded matrix in AppState with TTL.
|
||
let ctx = opentelemetry::Context::current();
|
||
let rows: Vec<(String, Vec<u8>)> = {
|
||
let mut dao = exif_dao.lock().expect("exif dao");
|
||
match dao.list_clip_index(
|
||
&ctx,
|
||
&library_ids,
|
||
query
|
||
.model_version
|
||
.as_deref()
|
||
.or(Some(&query_resp.model_version)),
|
||
) {
|
||
Ok(r) => r,
|
||
Err(e) => {
|
||
log::warn!("clip_search: list_clip_index failed: {:?}", e);
|
||
return Ok(HttpResponse::InternalServerError().json(SearchError {
|
||
error: "failed to load search index".into(),
|
||
}));
|
||
}
|
||
}
|
||
};
|
||
let considered = rows.len();
|
||
if considered == 0 {
|
||
return Ok(HttpResponse::Ok().json(SearchResponse {
|
||
query: q_text,
|
||
model_version: query_resp.model_version,
|
||
threshold,
|
||
considered,
|
||
total_matching: 0,
|
||
offset,
|
||
results: Vec::new(),
|
||
}));
|
||
}
|
||
|
||
// 4. Score. Cap the loop's transient allocation; we keep all scores
|
||
// and sort at the end. With ~14k entries the sort is microseconds.
|
||
let mut scored: Vec<(f32, String)> = Vec::with_capacity(considered);
|
||
for (hash, blob) in rows {
|
||
let Some(emb) = decode_embedding(&blob) else {
|
||
continue;
|
||
};
|
||
if emb.len() != query_vec.len() {
|
||
continue;
|
||
}
|
||
let sim = dot(&emb, &query_vec);
|
||
if sim < threshold {
|
||
continue;
|
||
}
|
||
scored.push((sim, hash));
|
||
}
|
||
scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||
let total_matching = scored.len();
|
||
// Pagination — slice the sorted list at `[offset, offset+limit)`.
|
||
// Offsets past the end produce empty pages rather than an error so
|
||
// the client can stop fetching naturally on "load more" past the end.
|
||
let scored: Vec<(f32, String)> = if offset >= total_matching {
|
||
Vec::new()
|
||
} else {
|
||
let end = (offset + limit).min(total_matching);
|
||
scored[offset..end].to_vec()
|
||
};
|
||
|
||
if scored.is_empty() {
|
||
return Ok(HttpResponse::Ok().json(SearchResponse {
|
||
query: q_text,
|
||
model_version: query_resp.model_version,
|
||
threshold,
|
||
considered,
|
||
total_matching,
|
||
offset,
|
||
results: Vec::new(),
|
||
}));
|
||
}
|
||
|
||
// 5. Resolve each surviving hash back to a `(library_id, rel_path)`.
|
||
// `get_rel_paths_by_hash` returns every rel_path; we pick the first
|
||
// one for the result. Apollo / the UI can fetch alternatives via
|
||
// /image/metadata when needed.
|
||
let hashes: Vec<String> = scored.iter().map(|(_, h)| h.clone()).collect();
|
||
let path_map = {
|
||
let mut dao = exif_dao.lock().expect("exif dao");
|
||
match dao.get_rel_paths_for_hashes(&ctx, &hashes) {
|
||
Ok(m) => m,
|
||
Err(e) => {
|
||
log::warn!("clip_search: get_rel_paths_for_hashes failed: {:?}", e);
|
||
return Ok(HttpResponse::InternalServerError().json(SearchError {
|
||
error: "failed to resolve photo paths".into(),
|
||
}));
|
||
}
|
||
}
|
||
};
|
||
|
||
// We need (library_id, rel_path) — get_rel_paths_for_hashes only
|
||
// returns rel_paths. Cross-reference via find_by_content_hash to
|
||
// pick the library too. Single call per surviving hash; cheap at
|
||
// top-20.
|
||
let mut results = Vec::with_capacity(scored.len());
|
||
{
|
||
let mut dao = exif_dao.lock().expect("exif dao");
|
||
for (score, hash) in scored {
|
||
let row = match dao.find_by_content_hash(&ctx, &hash) {
|
||
Ok(Some(r)) => r,
|
||
Ok(None) => continue,
|
||
Err(e) => {
|
||
log::warn!(
|
||
"clip_search: find_by_content_hash failed for {}: {:?}",
|
||
hash,
|
||
e
|
||
);
|
||
continue;
|
||
}
|
||
};
|
||
// Prefer get_rel_paths_for_hashes's first entry if it
|
||
// exists (it shares semantics with `image_exif`'s natural
|
||
// order), falling back to the ImageExif row.
|
||
let rel_path = path_map
|
||
.get(&hash)
|
||
.and_then(|paths| paths.first().cloned())
|
||
.unwrap_or(row.file_path);
|
||
results.push(SearchHit {
|
||
library_id: row.library_id,
|
||
rel_path,
|
||
content_hash: hash,
|
||
score,
|
||
});
|
||
}
|
||
}
|
||
|
||
Ok(HttpResponse::Ok().json(SearchResponse {
|
||
query: q_text,
|
||
model_version: query_resp.model_version,
|
||
threshold,
|
||
considered,
|
||
total_matching,
|
||
offset,
|
||
results,
|
||
}))
|
||
}
|