diff --git a/src/unified_search.rs b/src/unified_search.rs
index a3187a4..7bb78dd 100644
--- a/src/unified_search.rs
+++ b/src/unified_search.rs
@@ -53,6 +53,11 @@ pub struct UnifiedQuery {
     pub library: Option<i32>,
     /// Multi-library scope, comma-separated ids.
     pub library_ids: Option<String>,
+    /// Optional model override. The client passes the user's currently-selected
+    /// local model so the translation step reuses a model that's already loaded
+    /// (avoids a llama-swap eviction / cold start). Falls back to the configured
+    /// default local model when absent. Local only — no hybrid here.
+    pub model: Option<String>,
 }
 
 fn default_limit() -> usize {
@@ -167,9 +172,12 @@ pub async fn unified_search<TagD: TagDao>(
     };
 
     // Respect env/config for the LLM backend (LLM_BACKEND → ollama or
-    // llama-swap); local only, no hybrid, per the feature's design.
+    // llama-swap); local only, no hybrid, per the feature's design. The
+    // client-supplied model (the user's current selection) routes translation
+    // to an already-loaded model when possible; otherwise resolve_backend
+    // falls back to the configured default.
     let overrides = SamplingOverrides {
-        model: None,
+        model: query.model.clone().filter(|m| !m.is_empty()),
         num_ctx: None,
         temperature: None,
         top_p: None,