fix: prevent hybrid mode from leaking OpenRouter model to local llamacpp client

When backend=hybrid with LLM_BACKEND=llamacpp, the user-selected model
(an OpenRouter id like "google/gemini-3-flash-preview") was being applied
to the local LlamaCppClient's primary_model and vision_model. This caused
describe_image to send the OpenRouter model name to llama-swap, which
returned 400 because it has no such slot.

Guard the local-client model override with !is_hybrid so it only applies
in local-only mode (where the user is selecting a different local model).
Bump to v1.2.0.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-05-26 09:55:16 -04:00
parent 0a627f4880
commit b03ee60342
7 changed files with 172 additions and 79 deletions
+30 -8
View File
@@ -13,7 +13,10 @@ impl BackendKind {
match s.trim().to_lowercase().as_str() {
"local" | "" => Ok(Self::Local),
"hybrid" => Ok(Self::Hybrid),
other => Err(anyhow!("unknown backend '{}'; expected 'local' or 'hybrid'", other)),
other => Err(anyhow!(
"unknown backend '{}'; expected 'local' or 'hybrid'",
other
)),
}
}
@@ -65,7 +68,12 @@ impl ResolvedBackend {
kind: BackendKind,
images_inline: bool,
) -> Self {
Self { chat, local, kind, images_inline }
Self {
chat,
local,
kind,
images_inline,
}
}
pub fn chat(&self) -> &dyn LlmClient {
@@ -97,21 +105,35 @@ mod tests {
#[test]
fn backend_kind_as_str_roundtrips() {
assert_eq!(BackendKind::parse(BackendKind::Local.as_str()).unwrap(), BackendKind::Local);
assert_eq!(BackendKind::parse(BackendKind::Hybrid.as_str()).unwrap(), BackendKind::Hybrid);
assert_eq!(
BackendKind::parse(BackendKind::Local.as_str()).unwrap(),
BackendKind::Local
);
assert_eq!(
BackendKind::parse(BackendKind::Hybrid.as_str()).unwrap(),
BackendKind::Hybrid
);
}
#[test]
fn sampling_overrides_has_sampling() {
let empty = SamplingOverrides {
model: None, num_ctx: None, temperature: None,
top_p: None, top_k: None, min_p: None,
model: None,
num_ctx: None,
temperature: None,
top_p: None,
top_k: None,
min_p: None,
};
assert!(!empty.has_sampling());
let with_temp = SamplingOverrides {
model: None, num_ctx: Some(4096), temperature: Some(0.7),
top_p: None, top_k: None, min_p: None,
model: None,
num_ctx: Some(4096),
temperature: Some(0.7),
top_p: None,
top_k: None,
min_p: None,
};
assert!(with_temp.has_sampling());
}