Feature/unified nl search #106
+10
-2
@@ -53,6 +53,11 @@ pub struct UnifiedQuery {
|
||||
pub library: Option<i32>,
|
||||
/// Multi-library scope, comma-separated ids.
|
||||
pub library_ids: Option<String>,
|
||||
/// Optional model override. The client passes the user's currently-selected
|
||||
/// local model so the translation step reuses a model that's already loaded
|
||||
/// (avoids a llama-swap eviction / cold start). Falls back to the configured
|
||||
/// default local model when absent. Local only — no hybrid here.
|
||||
pub model: Option<String>,
|
||||
}
|
||||
|
||||
fn default_limit() -> usize {
|
||||
@@ -167,9 +172,12 @@ pub async fn unified_search<TagD: TagDao>(
|
||||
};
|
||||
|
||||
// Respect env/config for the LLM backend (LLM_BACKEND → ollama or
|
||||
// llama-swap); local only, no hybrid, per the feature's design.
|
||||
// llama-swap); local only, no hybrid, per the feature's design. The
|
||||
// client-supplied model (the user's current selection) routes translation
|
||||
// to an already-loaded model when possible; otherwise resolve_backend
|
||||
// falls back to the configured default.
|
||||
let overrides = SamplingOverrides {
|
||||
model: None,
|
||||
model: query.model.clone().filter(|m| !m.is_empty()),
|
||||
num_ctx: None,
|
||||
temperature: None,
|
||||
top_p: None,
|
||||
|
||||
Reference in New Issue
Block a user