Unified search: accept client model override (avoid model swapping)
Add an optional `model` query param to /photos/search/unified, passed into resolve_backend's overrides. The client sends the user's currently-selected local model so the translation step reuses an already-loaded model instead of forcing a llama-swap eviction + cold start. Falls back to the configured default when absent. Still local only (no hybrid). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+10
-2
@@ -53,6 +53,11 @@ pub struct UnifiedQuery {
|
|||||||
pub library: Option<i32>,
|
pub library: Option<i32>,
|
||||||
/// Multi-library scope, comma-separated ids.
|
/// Multi-library scope, comma-separated ids.
|
||||||
pub library_ids: Option<String>,
|
pub library_ids: Option<String>,
|
||||||
|
/// Optional model override. The client passes the user's currently-selected
|
||||||
|
/// local model so the translation step reuses a model that's already loaded
|
||||||
|
/// (avoids a llama-swap eviction / cold start). Falls back to the configured
|
||||||
|
/// default local model when absent. Local only — no hybrid here.
|
||||||
|
pub model: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn default_limit() -> usize {
|
fn default_limit() -> usize {
|
||||||
@@ -167,9 +172,12 @@ pub async fn unified_search<TagD: TagDao>(
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Respect env/config for the LLM backend (LLM_BACKEND → ollama or
|
// Respect env/config for the LLM backend (LLM_BACKEND → ollama or
|
||||||
// llama-swap); local only, no hybrid, per the feature's design.
|
// llama-swap); local only, no hybrid, per the feature's design. The
|
||||||
|
// client-supplied model (the user's current selection) routes translation
|
||||||
|
// to an already-loaded model when possible; otherwise resolve_backend
|
||||||
|
// falls back to the configured default.
|
||||||
let overrides = SamplingOverrides {
|
let overrides = SamplingOverrides {
|
||||||
model: None,
|
model: query.model.clone().filter(|m| !m.is_empty()),
|
||||||
num_ctx: None,
|
num_ctx: None,
|
||||||
temperature: None,
|
temperature: None,
|
||||||
top_p: None,
|
top_p: None,
|
||||||
|
|||||||
Reference in New Issue
Block a user