diff --git a/src/unified_search.rs b/src/unified_search.rs index a3187a4..7bb78dd 100644 --- a/src/unified_search.rs +++ b/src/unified_search.rs @@ -53,6 +53,11 @@ pub struct UnifiedQuery { pub library: Option, /// Multi-library scope, comma-separated ids. pub library_ids: Option, + /// Optional model override. The client passes the user's currently-selected + /// local model so the translation step reuses a model that's already loaded + /// (avoids a llama-swap eviction / cold start). Falls back to the configured + /// default local model when absent. Local only — no hybrid here. + pub model: Option, } fn default_limit() -> usize { @@ -167,9 +172,12 @@ pub async fn unified_search( }; // Respect env/config for the LLM backend (LLM_BACKEND → ollama or - // llama-swap); local only, no hybrid, per the feature's design. + // llama-swap); local only, no hybrid, per the feature's design. The + // client-supplied model (the user's current selection) routes translation + // to an already-loaded model when possible; otherwise resolve_backend + // falls back to the configured default. let overrides = SamplingOverrides { - model: None, + model: query.model.clone().filter(|m| !m.is_empty()), num_ctx: None, temperature: None, top_p: None,