fix: prevent hybrid mode from leaking OpenRouter model to local llamacpp client
When backend=hybrid with LLM_BACKEND=llamacpp, the user-selected model (an OpenRouter id like "google/gemini-3-flash-preview") was being applied to the local LlamaCppClient's primary_model and vision_model. This caused describe_image to send the OpenRouter model name to llama-swap, which returned 400 because it has no such slot. Guard the local-client model override with !is_hybrid so it only applies in local-only mode (where the user is selecting a different local model). Bump to v1.2.0. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+30
-8
@@ -13,7 +13,10 @@ impl BackendKind {
|
||||
match s.trim().to_lowercase().as_str() {
|
||||
"local" | "" => Ok(Self::Local),
|
||||
"hybrid" => Ok(Self::Hybrid),
|
||||
other => Err(anyhow!("unknown backend '{}'; expected 'local' or 'hybrid'", other)),
|
||||
other => Err(anyhow!(
|
||||
"unknown backend '{}'; expected 'local' or 'hybrid'",
|
||||
other
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,7 +68,12 @@ impl ResolvedBackend {
|
||||
kind: BackendKind,
|
||||
images_inline: bool,
|
||||
) -> Self {
|
||||
Self { chat, local, kind, images_inline }
|
||||
Self {
|
||||
chat,
|
||||
local,
|
||||
kind,
|
||||
images_inline,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn chat(&self) -> &dyn LlmClient {
|
||||
@@ -97,21 +105,35 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn backend_kind_as_str_roundtrips() {
|
||||
assert_eq!(BackendKind::parse(BackendKind::Local.as_str()).unwrap(), BackendKind::Local);
|
||||
assert_eq!(BackendKind::parse(BackendKind::Hybrid.as_str()).unwrap(), BackendKind::Hybrid);
|
||||
assert_eq!(
|
||||
BackendKind::parse(BackendKind::Local.as_str()).unwrap(),
|
||||
BackendKind::Local
|
||||
);
|
||||
assert_eq!(
|
||||
BackendKind::parse(BackendKind::Hybrid.as_str()).unwrap(),
|
||||
BackendKind::Hybrid
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sampling_overrides_has_sampling() {
|
||||
let empty = SamplingOverrides {
|
||||
model: None, num_ctx: None, temperature: None,
|
||||
top_p: None, top_k: None, min_p: None,
|
||||
model: None,
|
||||
num_ctx: None,
|
||||
temperature: None,
|
||||
top_p: None,
|
||||
top_k: None,
|
||||
min_p: None,
|
||||
};
|
||||
assert!(!empty.has_sampling());
|
||||
|
||||
let with_temp = SamplingOverrides {
|
||||
model: None, num_ctx: Some(4096), temperature: Some(0.7),
|
||||
top_p: None, top_k: None, min_p: None,
|
||||
model: None,
|
||||
num_ctx: Some(4096),
|
||||
temperature: Some(0.7),
|
||||
top_p: None,
|
||||
top_k: None,
|
||||
min_p: None,
|
||||
};
|
||||
assert!(with_temp.has_sampling());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user