clip-search: offset-based pagination on /photos/search
Adds `offset` query param (default 0) and `total_matching` + `offset` response fields. Backend already computes the full sorted list of above-threshold matches per query; pagination just slices it at [offset, offset+limit) instead of always returning the top window. Offsets past the end return an empty page cleanly so the client can stop fetching naturally. Re-scores on every page rather than caching the sorted list — at personal-library scale (~14k embeddings, 768d) the dot-product loop is sub-100ms and the lack of state means no eviction / staleness concerns. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -33,10 +33,16 @@ use std::sync::Mutex;
|
|||||||
pub struct SearchQuery {
|
pub struct SearchQuery {
|
||||||
/// Natural-language query. Required; empty triggers 400.
|
/// Natural-language query. Required; empty triggers 400.
|
||||||
pub q: String,
|
pub q: String,
|
||||||
/// Max results to return. Capped to 200 server-side; the UI almost
|
/// Max results to return in this page. Capped to 200 server-side.
|
||||||
/// always wants ≤50. Defaults to 20.
|
/// Defaults to 20. Pair with `offset` for pagination.
|
||||||
#[serde(default = "default_limit")]
|
#[serde(default = "default_limit")]
|
||||||
pub limit: usize,
|
pub limit: usize,
|
||||||
|
/// Zero-based offset into the sorted-and-filtered result set. The
|
||||||
|
/// scoring loop still runs over the full embedding matrix on every
|
||||||
|
/// page (cheap at personal-library scale — sub-100ms — and avoids
|
||||||
|
/// stateful pagination cursors). Defaults to 0.
|
||||||
|
#[serde(default)]
|
||||||
|
pub offset: usize,
|
||||||
/// Cosine-similarity floor below which results are dropped.
|
/// Cosine-similarity floor below which results are dropped.
|
||||||
/// 0.20 is the rough "this is plausibly relevant" line for OpenAI
|
/// 0.20 is the rough "this is plausibly relevant" line for OpenAI
|
||||||
/// CLIP; tunable per call when sweeping. Defaults to 0.20.
|
/// CLIP; tunable per call when sweeping. Defaults to 0.20.
|
||||||
@@ -76,7 +82,14 @@ pub struct SearchResponse {
|
|||||||
pub query: String,
|
pub query: String,
|
||||||
pub model_version: String,
|
pub model_version: String,
|
||||||
pub threshold: f32,
|
pub threshold: f32,
|
||||||
|
/// Total embeddings scored (= every photo in scope with a stored
|
||||||
|
/// embedding). Same value across pages of the same query.
|
||||||
pub considered: usize,
|
pub considered: usize,
|
||||||
|
/// Count of results above threshold, before pagination. Lets the
|
||||||
|
/// client decide whether a "Load more" button is meaningful and
|
||||||
|
/// stop fetching when ``offset + results.len() >= total_matching``.
|
||||||
|
pub total_matching: usize,
|
||||||
|
pub offset: usize,
|
||||||
pub results: Vec<SearchHit>,
|
pub results: Vec<SearchHit>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -122,6 +135,7 @@ pub async fn search_photos(
|
|||||||
}
|
}
|
||||||
|
|
||||||
let limit = query.limit.clamp(1, 200);
|
let limit = query.limit.clamp(1, 200);
|
||||||
|
let offset = query.offset;
|
||||||
let threshold = query.threshold.clamp(-1.0, 1.0);
|
let threshold = query.threshold.clamp(-1.0, 1.0);
|
||||||
|
|
||||||
// 1. Encode the query text. Fast — Apollo's text encoder is ~50ms
|
// 1. Encode the query text. Fast — Apollo's text encoder is ~50ms
|
||||||
@@ -195,6 +209,8 @@ pub async fn search_photos(
|
|||||||
model_version: query_resp.model_version,
|
model_version: query_resp.model_version,
|
||||||
threshold,
|
threshold,
|
||||||
considered,
|
considered,
|
||||||
|
total_matching: 0,
|
||||||
|
offset,
|
||||||
results: Vec::new(),
|
results: Vec::new(),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
@@ -216,7 +232,16 @@ pub async fn search_photos(
|
|||||||
scored.push((sim, hash));
|
scored.push((sim, hash));
|
||||||
}
|
}
|
||||||
scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||||
scored.truncate(limit);
|
let total_matching = scored.len();
|
||||||
|
// Pagination — slice the sorted list at `[offset, offset+limit)`.
|
||||||
|
// Offsets past the end produce empty pages rather than an error so
|
||||||
|
// the client can stop fetching naturally on "load more" past the end.
|
||||||
|
let scored: Vec<(f32, String)> = if offset >= total_matching {
|
||||||
|
Vec::new()
|
||||||
|
} else {
|
||||||
|
let end = (offset + limit).min(total_matching);
|
||||||
|
scored[offset..end].to_vec()
|
||||||
|
};
|
||||||
|
|
||||||
if scored.is_empty() {
|
if scored.is_empty() {
|
||||||
return Ok(HttpResponse::Ok().json(SearchResponse {
|
return Ok(HttpResponse::Ok().json(SearchResponse {
|
||||||
@@ -224,6 +249,8 @@ pub async fn search_photos(
|
|||||||
model_version: query_resp.model_version,
|
model_version: query_resp.model_version,
|
||||||
threshold,
|
threshold,
|
||||||
considered,
|
considered,
|
||||||
|
total_matching,
|
||||||
|
offset,
|
||||||
results: Vec::new(),
|
results: Vec::new(),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
@@ -287,6 +314,8 @@ pub async fn search_photos(
|
|||||||
model_version: query_resp.model_version,
|
model_version: query_resp.model_version,
|
||||||
threshold,
|
threshold,
|
||||||
considered,
|
considered,
|
||||||
|
total_matching,
|
||||||
|
offset,
|
||||||
results,
|
results,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user