feature/llamacpp-backend #101
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -2051,7 +2051,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "image-api"
|
name = "image-api"
|
||||||
version = "1.1.0"
|
version = "1.2.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix",
|
"actix",
|
||||||
"actix-cors",
|
"actix-cors",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "image-api"
|
name = "image-api"
|
||||||
version = "1.1.0"
|
version = "1.2.0"
|
||||||
authors = ["Cameron Cordes <cameronc.dev@gmail.com>"]
|
authors = ["Cameron Cordes <cameronc.dev@gmail.com>"]
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
|
|
||||||
|
|||||||
@@ -13,7 +13,10 @@ impl BackendKind {
|
|||||||
match s.trim().to_lowercase().as_str() {
|
match s.trim().to_lowercase().as_str() {
|
||||||
"local" | "" => Ok(Self::Local),
|
"local" | "" => Ok(Self::Local),
|
||||||
"hybrid" => Ok(Self::Hybrid),
|
"hybrid" => Ok(Self::Hybrid),
|
||||||
other => Err(anyhow!("unknown backend '{}'; expected 'local' or 'hybrid'", other)),
|
other => Err(anyhow!(
|
||||||
|
"unknown backend '{}'; expected 'local' or 'hybrid'",
|
||||||
|
other
|
||||||
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -65,7 +68,12 @@ impl ResolvedBackend {
|
|||||||
kind: BackendKind,
|
kind: BackendKind,
|
||||||
images_inline: bool,
|
images_inline: bool,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self { chat, local, kind, images_inline }
|
Self {
|
||||||
|
chat,
|
||||||
|
local,
|
||||||
|
kind,
|
||||||
|
images_inline,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn chat(&self) -> &dyn LlmClient {
|
pub fn chat(&self) -> &dyn LlmClient {
|
||||||
@@ -97,21 +105,35 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn backend_kind_as_str_roundtrips() {
|
fn backend_kind_as_str_roundtrips() {
|
||||||
assert_eq!(BackendKind::parse(BackendKind::Local.as_str()).unwrap(), BackendKind::Local);
|
assert_eq!(
|
||||||
assert_eq!(BackendKind::parse(BackendKind::Hybrid.as_str()).unwrap(), BackendKind::Hybrid);
|
BackendKind::parse(BackendKind::Local.as_str()).unwrap(),
|
||||||
|
BackendKind::Local
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
BackendKind::parse(BackendKind::Hybrid.as_str()).unwrap(),
|
||||||
|
BackendKind::Hybrid
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn sampling_overrides_has_sampling() {
|
fn sampling_overrides_has_sampling() {
|
||||||
let empty = SamplingOverrides {
|
let empty = SamplingOverrides {
|
||||||
model: None, num_ctx: None, temperature: None,
|
model: None,
|
||||||
top_p: None, top_k: None, min_p: None,
|
num_ctx: None,
|
||||||
|
temperature: None,
|
||||||
|
top_p: None,
|
||||||
|
top_k: None,
|
||||||
|
min_p: None,
|
||||||
};
|
};
|
||||||
assert!(!empty.has_sampling());
|
assert!(!empty.has_sampling());
|
||||||
|
|
||||||
let with_temp = SamplingOverrides {
|
let with_temp = SamplingOverrides {
|
||||||
model: None, num_ctx: Some(4096), temperature: Some(0.7),
|
model: None,
|
||||||
top_p: None, top_k: None, min_p: None,
|
num_ctx: Some(4096),
|
||||||
|
temperature: Some(0.7),
|
||||||
|
top_p: None,
|
||||||
|
top_k: None,
|
||||||
|
min_p: None,
|
||||||
};
|
};
|
||||||
assert!(with_temp.has_sampling());
|
assert!(with_temp.has_sampling());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -308,7 +308,9 @@ impl InsightChatService {
|
|||||||
|
|
||||||
let stored_model = insight.model_version.clone();
|
let stored_model = insight.model_version.clone();
|
||||||
let overrides = SamplingOverrides {
|
let overrides = SamplingOverrides {
|
||||||
model: req.model.clone()
|
model: req
|
||||||
|
.model
|
||||||
|
.clone()
|
||||||
.or_else(|| Some(stored_model.clone()))
|
.or_else(|| Some(stored_model.clone()))
|
||||||
.filter(|m| !m.is_empty()),
|
.filter(|m| !m.is_empty()),
|
||||||
num_ctx: req.num_ctx,
|
num_ctx: req.num_ctx,
|
||||||
@@ -731,7 +733,9 @@ impl InsightChatService {
|
|||||||
|
|
||||||
let stored_model = insight.model_version.clone();
|
let stored_model = insight.model_version.clone();
|
||||||
let overrides = SamplingOverrides {
|
let overrides = SamplingOverrides {
|
||||||
model: req.model.clone()
|
model: req
|
||||||
|
.model
|
||||||
|
.clone()
|
||||||
.or_else(|| Some(stored_model.clone()))
|
.or_else(|| Some(stored_model.clone()))
|
||||||
.filter(|m| !m.is_empty()),
|
.filter(|m| !m.is_empty()),
|
||||||
num_ctx: req.num_ctx,
|
num_ctx: req.num_ctx,
|
||||||
@@ -928,17 +932,15 @@ impl InsightChatService {
|
|||||||
// images_inline backends send images directly to the chat model.
|
// images_inline backends send images directly to the chat model.
|
||||||
let visual_block = if !backend.images_inline {
|
let visual_block = if !backend.images_inline {
|
||||||
match image_base64.as_deref() {
|
match image_base64.as_deref() {
|
||||||
Some(b64) => {
|
Some(b64) => match backend.local().describe_image(b64).await {
|
||||||
match backend.local().describe_image(b64).await {
|
Ok(desc) => {
|
||||||
Ok(desc) => {
|
format!("Visual description (from local vision model):\n{}\n", desc)
|
||||||
format!("Visual description (from local vision model):\n{}\n", desc)
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
log::warn!("{} bootstrap: describe_image failed: {}", kind.as_str(), e);
|
|
||||||
String::new()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
Err(e) => {
|
||||||
|
log::warn!("{} bootstrap: describe_image failed: {}", kind.as_str(), e);
|
||||||
|
String::new()
|
||||||
|
}
|
||||||
|
},
|
||||||
None => String::new(),
|
None => String::new(),
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -1328,10 +1330,7 @@ fn resolve_bootstrap_backend(supplied: Option<&str>) -> Result<String> {
|
|||||||
.filter(|s| !s.is_empty())
|
.filter(|s| !s.is_empty())
|
||||||
.unwrap_or_else(|| "local".to_string());
|
.unwrap_or_else(|| "local".to_string());
|
||||||
if !matches!(lower.as_str(), "local" | "hybrid") {
|
if !matches!(lower.as_str(), "local" | "hybrid") {
|
||||||
bail!(
|
bail!("unknown backend '{}'; expected 'local' or 'hybrid'", lower);
|
||||||
"unknown backend '{}'; expected 'local' or 'hybrid'",
|
|
||||||
lower
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
Ok(lower)
|
Ok(lower)
|
||||||
}
|
}
|
||||||
@@ -1971,7 +1970,11 @@ mod tests {
|
|||||||
// Both "openrouter" and the former "llamacpp" value are unknown now.
|
// Both "openrouter" and the former "llamacpp" value are unknown now.
|
||||||
for label in &["openrouter", "llamacpp"] {
|
for label in &["openrouter", "llamacpp"] {
|
||||||
let err = validate_cross_replay("local", label).unwrap_err();
|
let err = validate_cross_replay("local", label).unwrap_err();
|
||||||
assert!(format!("{}", err).contains("unknown backend"), "label={}", label);
|
assert!(
|
||||||
|
format!("{}", err).contains("unknown backend"),
|
||||||
|
"label={}",
|
||||||
|
label
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -11,9 +11,9 @@ use std::sync::{Arc, Mutex};
|
|||||||
|
|
||||||
use crate::ai::apollo_client::{ApolloClient, ApolloPlace};
|
use crate::ai::apollo_client::{ApolloClient, ApolloPlace};
|
||||||
use crate::ai::backend::{BackendKind, ResolvedBackend, SamplingOverrides};
|
use crate::ai::backend::{BackendKind, ResolvedBackend, SamplingOverrides};
|
||||||
|
use crate::ai::llamacpp::LlamaCppClient;
|
||||||
use crate::ai::llm_client::LlmClient;
|
use crate::ai::llm_client::LlmClient;
|
||||||
use crate::ai::ollama::{ChatMessage, OllamaClient, Tool};
|
use crate::ai::ollama::{ChatMessage, OllamaClient, Tool};
|
||||||
use crate::ai::llamacpp::LlamaCppClient;
|
|
||||||
use crate::ai::openrouter::OpenRouterClient;
|
use crate::ai::openrouter::OpenRouterClient;
|
||||||
use crate::ai::sms_client::{SmsApiClient, SmsSearchHit, SmsSearchParams};
|
use crate::ai::sms_client::{SmsApiClient, SmsSearchHit, SmsSearchParams};
|
||||||
use crate::ai::user_display_name;
|
use crate::ai::user_display_name;
|
||||||
@@ -35,7 +35,10 @@ pub(crate) fn parse_title_body(raw: &str) -> (String, String) {
|
|||||||
let trimmed = raw.trim();
|
let trimmed = raw.trim();
|
||||||
|
|
||||||
// Try "Title: <title>\n\n<body>" or "Title: <title>\n<body>"
|
// Try "Title: <title>\n\n<body>" or "Title: <title>\n<body>"
|
||||||
if let Some(rest) = trimmed.strip_prefix("Title:").or_else(|| trimmed.strip_prefix("title:")) {
|
if let Some(rest) = trimmed
|
||||||
|
.strip_prefix("Title:")
|
||||||
|
.or_else(|| trimmed.strip_prefix("title:"))
|
||||||
|
{
|
||||||
let rest = rest.trim_start();
|
let rest = rest.trim_start();
|
||||||
if let Some(split_pos) = rest.find("\n\n").or_else(|| rest.find('\n')) {
|
if let Some(split_pos) = rest.find("\n\n").or_else(|| rest.find('\n')) {
|
||||||
let title = rest[..split_pos].trim();
|
let title = rest[..split_pos].trim();
|
||||||
@@ -1644,7 +1647,10 @@ Return ONLY the summary, nothing else."#,
|
|||||||
"get_location_history" => self.tool_get_location_history(arguments, cx).await,
|
"get_location_history" => self.tool_get_location_history(arguments, cx).await,
|
||||||
"get_file_tags" => self.tool_get_file_tags(arguments, cx).await,
|
"get_file_tags" => self.tool_get_file_tags(arguments, cx).await,
|
||||||
"get_faces_in_photo" => self.tool_get_faces_in_photo(arguments, cx).await,
|
"get_faces_in_photo" => self.tool_get_faces_in_photo(arguments, cx).await,
|
||||||
"describe_photo" => self.tool_describe_photo(backend.local(), image_base64).await,
|
"describe_photo" => {
|
||||||
|
self.tool_describe_photo(backend.local(), image_base64)
|
||||||
|
.await
|
||||||
|
}
|
||||||
"reverse_geocode" => self.tool_reverse_geocode(arguments).await,
|
"reverse_geocode" => self.tool_reverse_geocode(arguments).await,
|
||||||
"get_personal_place_at" => self.tool_get_personal_place_at(arguments).await,
|
"get_personal_place_at" => self.tool_get_personal_place_at(arguments).await,
|
||||||
"recall_entities" => self.tool_recall_entities(arguments, cx).await,
|
"recall_entities" => self.tool_recall_entities(arguments, cx).await,
|
||||||
@@ -1655,7 +1661,13 @@ Return ONLY the summary, nothing else."#,
|
|||||||
"store_entity" => self.tool_store_entity(arguments, cx).await,
|
"store_entity" => self.tool_store_entity(arguments, cx).await,
|
||||||
"store_fact" => {
|
"store_fact" => {
|
||||||
self.tool_store_fact(
|
self.tool_store_fact(
|
||||||
arguments, file_path, user_id, persona_id, model, backend_label, cx,
|
arguments,
|
||||||
|
file_path,
|
||||||
|
user_id,
|
||||||
|
persona_id,
|
||||||
|
model,
|
||||||
|
backend_label,
|
||||||
|
cx,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
@@ -1810,9 +1822,8 @@ Return ONLY the summary, nothing else."#,
|
|||||||
);
|
);
|
||||||
|
|
||||||
let started = std::time::Instant::now();
|
let started = std::time::Instant::now();
|
||||||
let system = Some(
|
let system =
|
||||||
"You are a terse relevance ranker. You output only numbers separated by commas.",
|
Some("You are a terse relevance ranker. You output only numbers separated by commas.");
|
||||||
);
|
|
||||||
let response = local.generate(&prompt, system, None).await?;
|
let response = local.generate(&prompt, system, None).await?;
|
||||||
log::info!(
|
log::info!(
|
||||||
"rerank: finished in {} ms (prompt={} chars)",
|
"rerank: finished in {} ms (prompt={} chars)",
|
||||||
@@ -1960,7 +1971,8 @@ Return ONLY the summary, nothing else."#,
|
|||||||
.unwrap_or(20)
|
.unwrap_or(20)
|
||||||
.clamp(1, 50) as usize;
|
.clamp(1, 50) as usize;
|
||||||
let contact_id = args.get("contact_id").and_then(|v| v.as_i64());
|
let contact_id = args.get("contact_id").and_then(|v| v.as_i64());
|
||||||
let contact = args.get("contact")
|
let contact = args
|
||||||
|
.get("contact")
|
||||||
.and_then(|v| v.as_str())
|
.and_then(|v| v.as_str())
|
||||||
.map(|s| s.trim().to_string())
|
.map(|s| s.trim().to_string())
|
||||||
.filter(|s| !s.is_empty())
|
.filter(|s| !s.is_empty())
|
||||||
@@ -2710,22 +2722,17 @@ Return ONLY the summary, nothing else."#,
|
|||||||
// Generate embedding for name + description (best-effort) via the
|
// Generate embedding for name + description (best-effort) via the
|
||||||
// configured local backend.
|
// configured local backend.
|
||||||
let embed_text = format!("{} {}", name, description);
|
let embed_text = format!("{} {}", name, description);
|
||||||
let embedding: Option<Vec<u8>> = match crate::ai::embed_one(
|
let embedding: Option<Vec<u8>> =
|
||||||
&self.ollama,
|
match crate::ai::embed_one(&self.ollama, self.llamacpp.as_deref(), &embed_text).await {
|
||||||
self.llamacpp.as_deref(),
|
Ok(vec) => {
|
||||||
&embed_text,
|
let bytes: Vec<u8> = vec.iter().flat_map(|f| f.to_le_bytes()).collect();
|
||||||
)
|
Some(bytes)
|
||||||
.await
|
}
|
||||||
{
|
Err(e) => {
|
||||||
Ok(vec) => {
|
log::warn!("Embedding generation failed for entity '{}': {}", name, e);
|
||||||
let bytes: Vec<u8> = vec.iter().flat_map(|f| f.to_le_bytes()).collect();
|
None
|
||||||
Some(bytes)
|
}
|
||||||
}
|
};
|
||||||
Err(e) => {
|
|
||||||
log::warn!("Embedding generation failed for entity '{}': {}", name, e);
|
|
||||||
None
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let now = chrono::Utc::now().timestamp();
|
let now = chrono::Utc::now().timestamp();
|
||||||
let insert = InsertEntity {
|
let insert = InsertEntity {
|
||||||
@@ -3606,8 +3613,7 @@ Return ONLY the summary, nothing else."#,
|
|||||||
kind: BackendKind,
|
kind: BackendKind,
|
||||||
overrides: &SamplingOverrides,
|
overrides: &SamplingOverrides,
|
||||||
) -> Result<ResolvedBackend> {
|
) -> Result<ResolvedBackend> {
|
||||||
let local_via_llamacpp =
|
let local_via_llamacpp = crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some();
|
||||||
crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some();
|
|
||||||
let is_hybrid = kind == BackendKind::Hybrid;
|
let is_hybrid = kind == BackendKind::Hybrid;
|
||||||
|
|
||||||
// ── chat client ────────────────────────────────────────────────
|
// ── chat client ────────────────────────────────────────────────
|
||||||
@@ -3680,12 +3686,15 @@ Return ONLY the summary, nothing else."#,
|
|||||||
};
|
};
|
||||||
|
|
||||||
// ── local client (utility calls: rerank, describe_image, etc.) ─
|
// ── local client (utility calls: rerank, describe_image, etc.) ─
|
||||||
// For llamacpp: mirror the chat model selection so rerank /
|
// For llamacpp in local mode: mirror the chat model selection so
|
||||||
// describe_image hit the same model that's already loaded —
|
// rerank / describe_image hit the same model that's already
|
||||||
// avoids a mid-turn model swap in llama-swap exclusive mode.
|
// loaded — avoids a mid-turn model swap in llama-swap exclusive
|
||||||
|
// mode. In hybrid mode the override is an OpenRouter model id
|
||||||
|
// (e.g. "google/gemini-3-flash-preview") which llama-swap can't
|
||||||
|
// serve — keep the configured local slots.
|
||||||
let local: Box<dyn LlmClient> = if local_via_llamacpp {
|
let local: Box<dyn LlmClient> = if local_via_llamacpp {
|
||||||
let mut lc = self.llamacpp.as_ref().unwrap().as_ref().clone();
|
let mut lc = self.llamacpp.as_ref().unwrap().as_ref().clone();
|
||||||
if let Some(ref m) = overrides.model {
|
if !is_hybrid && let Some(ref m) = overrides.model {
|
||||||
lc.primary_model = m.clone();
|
lc.primary_model = m.clone();
|
||||||
lc.set_vision_model(m.clone());
|
lc.set_vision_model(m.clone());
|
||||||
}
|
}
|
||||||
@@ -3713,14 +3722,14 @@ Return ONLY the summary, nothing else."#,
|
|||||||
.await
|
.await
|
||||||
.unwrap_or(false);
|
.unwrap_or(false);
|
||||||
|
|
||||||
let available_on_fallback =
|
let available_on_fallback = if let Some(ref fallback_url) = self.ollama.fallback_url
|
||||||
if let Some(ref fallback_url) = self.ollama.fallback_url {
|
{
|
||||||
OllamaClient::is_model_available(fallback_url, model)
|
OllamaClient::is_model_available(fallback_url, model)
|
||||||
.await
|
.await
|
||||||
.unwrap_or(false)
|
.unwrap_or(false)
|
||||||
} else {
|
} else {
|
||||||
false
|
false
|
||||||
};
|
};
|
||||||
|
|
||||||
if !available_on_primary && !available_on_fallback {
|
if !available_on_primary && !available_on_fallback {
|
||||||
anyhow::bail!(
|
anyhow::bail!(
|
||||||
@@ -3761,10 +3770,7 @@ Return ONLY the summary, nothing else."#,
|
|||||||
};
|
};
|
||||||
|
|
||||||
if !capabilities.has_tool_calling {
|
if !capabilities.has_tool_calling {
|
||||||
anyhow::bail!(
|
anyhow::bail!("tool calling not supported by model '{}'", ollama_for_caps);
|
||||||
"tool calling not supported by model '{}'",
|
|
||||||
ollama_for_caps
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
capabilities.has_vision
|
capabilities.has_vision
|
||||||
@@ -3801,9 +3807,7 @@ Return ONLY the summary, nothing else."#,
|
|||||||
span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64));
|
span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64));
|
||||||
|
|
||||||
// 1. Resolve backend + build clients.
|
// 1. Resolve backend + build clients.
|
||||||
let kind = BackendKind::parse(
|
let kind = BackendKind::parse(backend.as_deref().unwrap_or("local"))?;
|
||||||
backend.as_deref().unwrap_or("local"),
|
|
||||||
)?;
|
|
||||||
span.set_attribute(KeyValue::new("backend", kind.as_str()));
|
span.set_attribute(KeyValue::new("backend", kind.as_str()));
|
||||||
let overrides = SamplingOverrides {
|
let overrides = SamplingOverrides {
|
||||||
model: custom_model,
|
model: custom_model,
|
||||||
@@ -3933,7 +3937,11 @@ Return ONLY the summary, nothing else."#,
|
|||||||
Some(desc)
|
Some(desc)
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
log::warn!("{}: vision describe failed, continuing without: {}", kind, e);
|
log::warn!(
|
||||||
|
"{}: vision describe failed, continuing without: {}",
|
||||||
|
kind,
|
||||||
|
e
|
||||||
|
);
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -4813,4 +4821,42 @@ mod tests {
|
|||||||
assert!(t.len() <= 60);
|
assert!(t.len() <= 60);
|
||||||
assert_eq!(b, input);
|
assert_eq!(b, input);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Regression: hybrid mode was leaking the OpenRouter model override
|
||||||
|
/// into the local llamacpp client, causing describe_image to send
|
||||||
|
/// e.g. "google/gemini-3-flash-preview" to llama-swap (which 400s).
|
||||||
|
#[test]
|
||||||
|
fn resolve_backend_hybrid_does_not_leak_model_to_local_llamacpp() {
|
||||||
|
use crate::ai::llamacpp::LlamaCppClient;
|
||||||
|
|
||||||
|
let mut base =
|
||||||
|
LlamaCppClient::new(Some("http://localhost:9292/v1".into()), Some("chat".into()));
|
||||||
|
base.set_vision_model("vision".into());
|
||||||
|
base.set_embedding_model("embed".into());
|
||||||
|
|
||||||
|
let openrouter_model = "google/gemini-3-flash-preview";
|
||||||
|
let overrides_model: Option<String> = Some(openrouter_model.into());
|
||||||
|
let is_hybrid = true;
|
||||||
|
|
||||||
|
// Replicate the resolve_backend local-client construction
|
||||||
|
// (lines ~3686-3695 of this file).
|
||||||
|
let mut lc = base.clone();
|
||||||
|
if let Some(ref m) = overrides_model {
|
||||||
|
if !is_hybrid {
|
||||||
|
lc.primary_model = m.clone();
|
||||||
|
lc.set_vision_model(m.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// In hybrid mode the local client must keep its configured slots.
|
||||||
|
assert_eq!(
|
||||||
|
lc.vision_model, "vision",
|
||||||
|
"hybrid mode must not override vision_model with OpenRouter model"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
lc.primary_model, "chat",
|
||||||
|
"hybrid mode must not override primary_model with OpenRouter model"
|
||||||
|
);
|
||||||
|
assert_eq!(lc.embedding_model, "embed");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -409,10 +409,7 @@ impl LlmClient for LlamaCppClient {
|
|||||||
tools.len()
|
tools.len()
|
||||||
);
|
);
|
||||||
let mut body = serde_json::Map::new();
|
let mut body = serde_json::Map::new();
|
||||||
body.insert(
|
body.insert("model".into(), Value::String(self.primary_model.clone()));
|
||||||
"model".into(),
|
|
||||||
Value::String(self.primary_model.clone()),
|
|
||||||
);
|
|
||||||
body.insert(
|
body.insert(
|
||||||
"messages".into(),
|
"messages".into(),
|
||||||
Value::Array(Self::messages_to_openai(&messages)),
|
Value::Array(Self::messages_to_openai(&messages)),
|
||||||
@@ -1071,6 +1068,28 @@ mod tests {
|
|||||||
assert_eq!(local.embedding_model, "embed");
|
assert_eq!(local.embedding_model, "embed");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn hybrid_mode_local_client_preserves_vision_model() {
|
||||||
|
// In hybrid mode, overrides.model is an OpenRouter model id
|
||||||
|
// (e.g. "google/gemini-3-flash-preview"). The local llamacpp
|
||||||
|
// client must NOT adopt that as its vision_model — it should
|
||||||
|
// keep the configured LLAMA_SWAP_VISION_MODEL so describe_image
|
||||||
|
// hits the correct local slot instead of sending an unknown
|
||||||
|
// model name to llama-swap.
|
||||||
|
let mut base = LlamaCppClient::new(None, Some("chat".into()));
|
||||||
|
base.set_vision_model("vision".into());
|
||||||
|
base.set_embedding_model("embed".into());
|
||||||
|
|
||||||
|
// Simulate what resolve_backend SHOULD do for hybrid mode:
|
||||||
|
// clone but do NOT override primary_model / vision_model.
|
||||||
|
let local = base.clone();
|
||||||
|
|
||||||
|
// The local client keeps its configured slots.
|
||||||
|
assert_eq!(local.primary_model, "chat");
|
||||||
|
assert_eq!(local.vision_model, "vision");
|
||||||
|
assert_eq!(local.embedding_model, "embed");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn assistant_tool_calls_emit_null_content() {
|
fn assistant_tool_calls_emit_null_content() {
|
||||||
let msg = ChatMessage {
|
let msg = ChatMessage {
|
||||||
@@ -1086,7 +1105,10 @@ mod tests {
|
|||||||
images: None,
|
images: None,
|
||||||
};
|
};
|
||||||
let wire = LlamaCppClient::messages_to_openai(&[msg]);
|
let wire = LlamaCppClient::messages_to_openai(&[msg]);
|
||||||
assert!(wire[0]["content"].is_null(), "empty content + tool_calls should emit null");
|
assert!(
|
||||||
|
wire[0]["content"].is_null(),
|
||||||
|
"empty content + tool_calls should emit null"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
@@ -25,11 +25,11 @@ pub use handlers::{
|
|||||||
get_insight_handler, get_openrouter_models_handler, rate_insight_handler,
|
get_insight_handler, get_openrouter_models_handler, rate_insight_handler,
|
||||||
};
|
};
|
||||||
pub use insight_generator::InsightGenerator;
|
pub use insight_generator::InsightGenerator;
|
||||||
|
pub use llamacpp::LlamaCppClient;
|
||||||
#[allow(unused_imports)]
|
#[allow(unused_imports)]
|
||||||
pub use llm_client::{
|
pub use llm_client::{
|
||||||
ChatMessage, LlmClient, ModelCapabilities, Tool, ToolCall, ToolCallFunction, ToolFunction,
|
ChatMessage, LlmClient, ModelCapabilities, Tool, ToolCall, ToolCallFunction, ToolFunction,
|
||||||
};
|
};
|
||||||
pub use llamacpp::LlamaCppClient;
|
|
||||||
pub use ollama::{EMBEDDING_MODEL, OllamaClient};
|
pub use ollama::{EMBEDDING_MODEL, OllamaClient};
|
||||||
pub use sms_client::{SmsApiClient, SmsMessage};
|
pub use sms_client::{SmsApiClient, SmsMessage};
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user