// LlamaCppClient — talks to a llama-swap proxy that fronts one or more // llama-server processes. llama-swap exposes an OpenAI-compatible HTTP // surface (`/v1/chat/completions`, `/v1/embeddings`, `/v1/models`), so the // wire translation mirrors `OpenRouterClient` almost exactly. // // Differences from OpenRouter: // - No bearer auth or attribution headers; llama-swap is LAN-only. // - Three model slots (`primary_model` = chat, `vision_model`, `embedding_model`) // each map to a model id in the llama-swap config. `describe_image` and // `generate_embeddings` issue requests with the appropriate slot id in the // `model` field, which is how llama-swap selects which backend process to // run. // - `/v1/models` returns only the configured slot ids — capabilities aren't // reported by the API, so `vision_models` is a config-time allowlist (env // `LLAMA_SWAP_VISION_MODELS`) used to set `has_vision` on responses. // `has_tool_calling` is assumed true for every slot, since llama-swap entries // default to launching llama-server with `--jinja`. // // First consumer lands alongside the three-way backend dispatch in // insight_generator / insight_chat. #![allow(dead_code)] use anyhow::{Context, Result, anyhow, bail}; use async_trait::async_trait; use reqwest::Client; use serde::Deserialize; use serde_json::{Value, json}; use std::time::Duration; use crate::ai::llm_client::{ ChatMessage, LlmClient, LlmStreamEvent, ModelCapabilities, Tool, ToolCall, ToolCallFunction, }; use futures::stream::{BoxStream, StreamExt}; const DEFAULT_BASE_URL: &str = "http://localhost:9292/v1"; const DEFAULT_PRIMARY_MODEL: &str = "chat"; const DEFAULT_VISION_MODEL: &str = "vision"; const DEFAULT_EMBEDDING_MODEL: &str = "embed"; const DEFAULT_REQUEST_TIMEOUT_SECS: u64 = 180; /// OpenAI-compatible client targeting a llama-swap proxy in front of one or /// more llama-server processes. See the module doc-comment for the slot model. #[derive(Clone)] pub struct LlamaCppClient { client: Client, pub base_url: String, /// Chat model slot id (e.g. `"chat"`). Used for `generate` / /// `chat_with_tools` / `chat_with_tools_stream`. pub primary_model: String, /// Embedding model slot id (e.g. `"embed"`). Used for /// `generate_embeddings`. pub embedding_model: String, /// Vision model slot id (e.g. `"vision"`). Used for `describe_image` and /// included in `vision_models` automatically so capability lookups for /// the default vision slot report `has_vision = true` even when the env /// allowlist is empty. pub vision_model: String, /// Operator-curated set of slot ids known to be multimodal. Drives the /// `has_vision` field in `list_models` / `model_capabilities`, since /// llama-swap's `/v1/models` doesn't report modality. Empty allowlist /// still marks `vision_model` as vision-capable. pub vision_models: Vec, num_ctx: Option, temperature: Option, top_p: Option, top_k: Option, min_p: Option, } impl LlamaCppClient { pub fn new(base_url: Option, primary_model: Option) -> Self { let timeout_secs = std::env::var("LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS") .ok() .and_then(|v| v.parse::().ok()) .unwrap_or(DEFAULT_REQUEST_TIMEOUT_SECS); Self { client: Client::builder() .connect_timeout(Duration::from_secs(10)) .timeout(Duration::from_secs(timeout_secs)) .build() .unwrap_or_else(|_| Client::new()), base_url: base_url.unwrap_or_else(|| DEFAULT_BASE_URL.to_string()), primary_model: primary_model.unwrap_or_else(|| DEFAULT_PRIMARY_MODEL.to_string()), embedding_model: DEFAULT_EMBEDDING_MODEL.to_string(), vision_model: DEFAULT_VISION_MODEL.to_string(), vision_models: Vec::new(), num_ctx: None, temperature: None, top_p: None, top_k: None, min_p: None, } } pub fn set_embedding_model(&mut self, model: String) { self.embedding_model = model; } pub fn set_vision_model(&mut self, model: String) { self.vision_model = model; } pub fn set_vision_models(&mut self, models: Vec) { self.vision_models = models; } pub fn set_num_ctx(&mut self, num_ctx: Option) { self.num_ctx = num_ctx; } pub fn set_sampling_params( &mut self, temperature: Option, top_p: Option, top_k: Option, min_p: Option, ) { self.temperature = temperature; self.top_p = top_p; self.top_k = top_k; self.min_p = min_p; } /// Translate canonical messages to the OpenAI-compatible wire shape. /// Behaviorally identical to `OpenRouterClient::messages_to_openai` — /// stringify tool-call arguments, rewrite images into content-parts, attach /// `tool_call_id` to `role=tool` messages based on the preceding assistant /// turn's tool calls. fn messages_to_openai(messages: &[ChatMessage]) -> Vec { let mut out = Vec::with_capacity(messages.len()); let mut last_tool_call_ids: Vec = Vec::new(); let mut next_tool_result_idx: usize = 0; for msg in messages { let mut obj = serde_json::Map::new(); obj.insert("role".into(), Value::String(msg.role.clone())); match &msg.images { Some(images) if !images.is_empty() => { let mut parts: Vec = Vec::new(); if !msg.content.is_empty() { parts.push(json!({"type": "text", "text": msg.content})); } for img in images { let url = image_to_data_url(img); parts.push(json!({ "type": "image_url", "image_url": { "url": url } })); } obj.insert("content".into(), Value::Array(parts)); } _ => { obj.insert("content".into(), Value::String(msg.content.clone())); } } if let Some(tcs) = &msg.tool_calls && msg.role == "assistant" { let converted: Vec = tcs .iter() .enumerate() .map(|(i, call)| { let id = call.id.clone().unwrap_or_else(|| format!("call_{}", i)); let args_str = serde_json::to_string(&call.function.arguments) .unwrap_or_else(|_| "{}".to_string()); json!({ "id": id, "type": "function", "function": { "name": call.function.name, "arguments": args_str, } }) }) .collect(); last_tool_call_ids = converted .iter() .filter_map(|v| v.get("id").and_then(|x| x.as_str()).map(String::from)) .collect(); next_tool_result_idx = 0; obj.insert("tool_calls".into(), Value::Array(converted)); } if msg.role == "tool" { let id = last_tool_call_ids .get(next_tool_result_idx) .cloned() .unwrap_or_else(|| "call_0".to_string()); obj.insert("tool_call_id".into(), Value::String(id)); next_tool_result_idx += 1; } out.push(Value::Object(obj)); } out } /// Parse an OpenAI-compatible assistant message back into canonical shape. /// llama.cpp emits `reasoning_content` on thinking models; we drop it for /// parity with OpenRouter (which also strips upstream reasoning fields). fn openai_message_to_chat(msg: &Value) -> Result { let obj = msg .as_object() .ok_or_else(|| anyhow!("response message is not an object"))?; let role = obj .get("role") .and_then(|v| v.as_str()) .unwrap_or("assistant") .to_string(); let content = obj .get("content") .and_then(|v| v.as_str()) .unwrap_or("") .to_string(); let tool_calls = if let Some(tcs) = obj.get("tool_calls").and_then(|v| v.as_array()) { let mut parsed = Vec::with_capacity(tcs.len()); for tc in tcs { let id = tc.get("id").and_then(|v| v.as_str()).map(String::from); let function = tc .get("function") .ok_or_else(|| anyhow!("tool_call missing function field"))?; let name = function .get("name") .and_then(|v| v.as_str()) .unwrap_or_default() .to_string(); let args_value = match function.get("arguments") { Some(Value::String(s)) => { serde_json::from_str::(s).unwrap_or_else(|_| json!({})) } Some(v @ Value::Object(_)) => v.clone(), _ => json!({}), }; parsed.push(ToolCall { id, function: ToolCallFunction { name, arguments: args_value, }, }); } Some(parsed) } else { None }; Ok(ChatMessage { role, content, tool_calls, images: None, }) } fn build_options(&self) -> Vec<(&'static str, Value)> { let mut v = Vec::new(); if let Some(t) = self.temperature { v.push(("temperature", json!(t))); } if let Some(p) = self.top_p { v.push(("top_p", json!(p))); } if let Some(k) = self.top_k { v.push(("top_k", json!(k))); } if let Some(m) = self.min_p { v.push(("min_p", json!(m))); } // num_ctx isn't an OpenAI param; llama-server bakes ctx in at launch // via -c, so we silently drop the override here. The config.yaml // entry is the source of truth for context size. let _ = self.num_ctx; v } /// Issue a chat request with an explicit model id override. Used by /// `describe_image` to route through the vision slot without mutating /// `self.primary_model`. async fn chat_completion_with_model( &self, model: &str, messages: Vec, tools: Vec, ) -> Result<(ChatMessage, Option, Option)> { let url = format!("{}/chat/completions", self.base_url); let mut body = serde_json::Map::new(); body.insert("model".into(), Value::String(model.to_string())); body.insert( "messages".into(), Value::Array(Self::messages_to_openai(&messages)), ); body.insert("stream".into(), Value::Bool(false)); if !tools.is_empty() { body.insert( "tools".into(), serde_json::to_value(&tools).context("serializing tools")?, ); } for (k, v) in self.build_options() { body.insert(k.into(), v); } let resp = self .client .post(&url) .json(&Value::Object(body)) .send() .await .with_context(|| format!("POST {} failed", url))?; if !resp.status().is_success() { let status = resp.status(); let body = resp.text().await.unwrap_or_default(); bail!("llama-swap chat request failed: {} — {}", status, body); } let parsed: Value = resp.json().await.context("parsing chat response")?; let choice = parsed .get("choices") .and_then(|v| v.as_array()) .and_then(|a| a.first()) .ok_or_else(|| { anyhow!( "response missing choices[0]: {}", extract_error_detail(&parsed) ) })?; let msg = choice.get("message").ok_or_else(|| { anyhow!( "choices[0] missing message: {}", extract_error_detail(&parsed) ) })?; let chat_msg = Self::openai_message_to_chat(msg)?; let usage = parsed.get("usage"); let prompt_tokens = usage .and_then(|u| u.get("prompt_tokens")) .and_then(|v| v.as_i64()) .map(|n| n as i32); let completion_tokens = usage .and_then(|u| u.get("completion_tokens")) .and_then(|v| v.as_i64()) .map(|n| n as i32); Ok((chat_msg, prompt_tokens, completion_tokens)) } } #[async_trait] impl LlmClient for LlamaCppClient { async fn generate( &self, prompt: &str, system: Option<&str>, images: Option>, ) -> Result { let mut messages: Vec = Vec::new(); if let Some(sys) = system { messages.push(ChatMessage::system(sys)); } let mut user = ChatMessage::user(prompt); user.images = images; messages.push(user); let (reply, _, _) = self.chat_with_tools(messages, Vec::new()).await?; Ok(reply.content) } async fn chat_with_tools( &self, messages: Vec, tools: Vec, ) -> Result<(ChatMessage, Option, Option)> { log::info!( "llama-swap chat_with_tools: model={} messages={} tools={}", self.primary_model, messages.len(), tools.len() ); self.chat_completion_with_model(&self.primary_model.clone(), messages, tools) .await } async fn chat_with_tools_stream( &self, messages: Vec, tools: Vec, ) -> Result>> { let url = format!("{}/chat/completions", self.base_url); let mut body = serde_json::Map::new(); body.insert( "model".into(), Value::String(self.primary_model.clone()), ); body.insert( "messages".into(), Value::Array(Self::messages_to_openai(&messages)), ); body.insert("stream".into(), Value::Bool(true)); body.insert( "stream_options".into(), serde_json::json!({ "include_usage": true }), ); if !tools.is_empty() { body.insert( "tools".into(), serde_json::to_value(&tools).context("serializing tools")?, ); } for (k, v) in self.build_options() { body.insert(k.into(), v); } let resp = self .client .post(&url) .json(&Value::Object(body)) .send() .await .with_context(|| format!("POST {} failed", url))?; if !resp.status().is_success() { let status = resp.status(); let body = resp.text().await.unwrap_or_default(); bail!("llama-swap stream request failed: {} — {}", status, body); } let byte_stream = resp.bytes_stream(); let stream = async_stream::stream! { let mut byte_stream = byte_stream; let mut buf: Vec = Vec::new(); let mut accumulated_content = String::new(); let mut tool_state: std::collections::BTreeMap< usize, (Option, Option, String), > = std::collections::BTreeMap::new(); let mut role = "assistant".to_string(); let mut prompt_tokens: Option = None; let mut completion_tokens: Option = None; let mut done_seen = false; while let Some(chunk) = byte_stream.next().await { let chunk = match chunk { Ok(b) => b, Err(e) => { yield Err(anyhow!("stream read failed: {}", e)); return; } }; buf.extend_from_slice(&chunk); while let Some(sep) = find_double_newline(&buf) { let frame = buf.drain(..sep + 2).collect::>(); let frame_str = match std::str::from_utf8(&frame) { Ok(s) => s, Err(_) => continue, }; for line in frame_str.lines() { let line = line.trim_end_matches('\r'); let payload = match line.strip_prefix("data: ") { Some(p) => p, None => continue, }; if payload == "[DONE]" { done_seen = true; break; } let v: Value = match serde_json::from_str(payload) { Ok(v) => v, Err(e) => { log::warn!( "malformed llama-swap SSE frame: {} ({})", payload, e ); continue; } }; if let Some(usage) = v.get("usage") { prompt_tokens = usage .get("prompt_tokens") .and_then(|n| n.as_i64()) .map(|n| n as i32); completion_tokens = usage .get("completion_tokens") .and_then(|n| n.as_i64()) .map(|n| n as i32); } let Some(choices) = v.get("choices").and_then(|c| c.as_array()) else { continue; }; let Some(choice) = choices.first() else { continue }; let delta = match choice.get("delta") { Some(d) => d, None => continue, }; if let Some(r) = delta.get("role").and_then(|v| v.as_str()) { role = r.to_string(); } if let Some(content) = delta.get("content").and_then(|v| v.as_str()) && !content.is_empty() { accumulated_content.push_str(content); yield Ok(LlmStreamEvent::TextDelta(content.to_string())); } if let Some(tcs) = delta.get("tool_calls").and_then(|v| v.as_array()) { for tc_delta in tcs { let idx = tc_delta .get("index") .and_then(|n| n.as_u64()) .unwrap_or(0) as usize; let entry = tool_state .entry(idx) .or_insert((None, None, String::new())); if let Some(id) = tc_delta.get("id").and_then(|v| v.as_str()) { entry.0 = Some(id.to_string()); } if let Some(func) = tc_delta.get("function") { if let Some(name) = func.get("name").and_then(|v| v.as_str()) { entry.1 = Some(name.to_string()); } if let Some(args) = func.get("arguments").and_then(|v| v.as_str()) { entry.2.push_str(args); } } } } } if done_seen { break; } } if done_seen { break; } } let tool_calls: Option> = if tool_state.is_empty() { None } else { let mut v = Vec::with_capacity(tool_state.len()); for (_idx, (id, name, args)) in tool_state { let arguments: Value = if args.trim().is_empty() { Value::Object(Default::default()) } else { serde_json::from_str(&args).unwrap_or_else(|_| { Value::Object(Default::default()) }) }; v.push(ToolCall { id, function: ToolCallFunction { name: name.unwrap_or_default(), arguments, }, }); } Some(v) }; let message = ChatMessage { role, content: accumulated_content, tool_calls, images: None, }; yield Ok(LlmStreamEvent::Done { message, prompt_eval_count: prompt_tokens, eval_count: completion_tokens, }); }; Ok(Box::pin(stream)) } async fn generate_embeddings(&self, texts: &[&str]) -> Result>> { let url = format!("{}/embeddings", self.base_url); let body = json!({ "model": self.embedding_model, "input": texts, }); let resp = self .client .post(&url) .json(&body) .send() .await .with_context(|| format!("POST {} failed", url))?; if !resp.status().is_success() { let status = resp.status(); let body = resp.text().await.unwrap_or_default(); bail!("llama-swap embedding request failed: {} — {}", status, body); } #[derive(Deserialize)] struct EmbedResponse { data: Vec, } #[derive(Deserialize)] struct EmbedItem { embedding: Vec, } let parsed: EmbedResponse = resp.json().await.context("parsing embed response")?; Ok(parsed.data.into_iter().map(|i| i.embedding).collect()) } async fn describe_image(&self, image_base64: &str) -> Result { let prompt = "Briefly describe what you see in this image in 1-2 sentences. \ Focus on the people, location, and activity."; let system = "You are a scene description assistant. Be concise and factual."; let messages = vec![ ChatMessage::system(system), ChatMessage { role: "user".to_string(), content: prompt.to_string(), tool_calls: None, images: Some(vec![image_base64.to_string()]), }, ]; let (reply, _, _) = self .chat_completion_with_model(&self.vision_model.clone(), messages, Vec::new()) .await?; Ok(reply.content) } async fn list_models(&self) -> Result> { let url = format!("{}/models", self.base_url); let resp = self .client .get(&url) .send() .await .with_context(|| format!("GET {} failed", url))?; if !resp.status().is_success() { let status = resp.status(); let body = resp.text().await.unwrap_or_default(); bail!("llama-swap list_models failed: {} — {}", status, body); } let parsed: Value = resp.json().await.context("parsing models response")?; let data = parsed .get("data") .and_then(|v| v.as_array()) .ok_or_else(|| anyhow!("models response missing data[]"))?; let caps: Vec = data .iter() .map(|m| self.parse_model_capabilities(m)) .collect(); Ok(caps) } async fn model_capabilities(&self, model: &str) -> Result { let all = self.list_models().await?; all.into_iter() .find(|m| m.name == model) .ok_or_else(|| anyhow!("model '{}' not found on llama-swap", model)) } fn primary_model(&self) -> &str { &self.primary_model } } impl LlamaCppClient { fn parse_model_capabilities(&self, m: &Value) -> ModelCapabilities { let name = m .get("id") .and_then(|v| v.as_str()) .unwrap_or_default() .to_string(); let has_vision = name == self.vision_model || self.vision_models.iter().any(|v| v == &name); // Tool calling is the default for llama-swap entries we configure // (--jinja flag); no negative-list mechanism yet, so report true. ModelCapabilities { name, has_vision, has_tool_calling: true, } } } /// Extract a diagnostic fragment from a llama-swap / llama-server response /// that doesn't match the expected `{choices: [...]}` shape. llama-server /// returns errors as `{"error": {"message": "...", "code": N, "type": "..."}}`; /// llama-swap itself sometimes wraps subprocess failures with its own /// `{"error": "..."}` flat shape. Surface either when present, otherwise fall /// back to a truncated raw-JSON view. fn extract_error_detail(parsed: &Value) -> String { if let Some(err) = parsed.get("error") { match err { Value::Object(_) => { let message = err .get("message") .and_then(|v| v.as_str()) .unwrap_or("(no message)"); let code = err .get("code") .map(|v| match v { Value::String(s) => s.clone(), other => other.to_string(), }) .unwrap_or_else(|| "?".to_string()); let short_message: String = message.chars().take(240).collect(); return format!("error code={} message=\"{}\"", code, short_message); } Value::String(s) => { let short: String = s.chars().take(240).collect(); return format!("error=\"{}\"", short); } _ => {} } } let raw = parsed.to_string(); raw.chars().take(300).collect() } fn find_double_newline(buf: &[u8]) -> Option { for i in 0..buf.len().saturating_sub(1) { if buf[i] == b'\n' && buf[i + 1] == b'\n' { return Some(i); } if i + 3 < buf.len() && buf[i] == b'\r' && buf[i + 1] == b'\n' && buf[i + 2] == b'\r' && buf[i + 3] == b'\n' { return Some(i + 1); } } None } fn image_to_data_url(img: &str) -> String { if img.starts_with("data:") { img.to_string() } else { format!("data:image/jpeg;base64,{}", img) } } #[cfg(test)] mod tests { use super::*; #[test] fn tool_call_arguments_stringified_on_send() { let msg = ChatMessage { role: "assistant".into(), content: String::new(), tool_calls: Some(vec![ToolCall { id: Some("call_abc".into()), function: ToolCallFunction { name: "search_sms".into(), arguments: json!({"query": "hello", "limit": 5}), }, }]), images: None, }; let wire = LlamaCppClient::messages_to_openai(&[msg]); let tcs = wire[0] .get("tool_calls") .and_then(|v| v.as_array()) .expect("tool_calls present"); let args = tcs[0] .get("function") .and_then(|f| f.get("arguments")) .and_then(|a| a.as_str()) .expect("arguments stringified"); let parsed: Value = serde_json::from_str(args).unwrap(); assert_eq!(parsed["query"], "hello"); assert_eq!(parsed["limit"], 5); } #[test] fn tool_call_arguments_parsed_on_receive() { let response_msg = json!({ "role": "assistant", "content": "", "tool_calls": [{ "id": "call_xyz", "type": "function", "function": { "name": "get_weather", "arguments": "{\"city\":\"Boston\",\"units\":\"celsius\"}" } }] }); let parsed = LlamaCppClient::openai_message_to_chat(&response_msg).unwrap(); let tcs = parsed.tool_calls.unwrap(); assert_eq!(tcs.len(), 1); assert_eq!(tcs[0].function.name, "get_weather"); assert_eq!(tcs[0].function.arguments["city"], "Boston"); assert_eq!(tcs[0].function.arguments["units"], "celsius"); assert_eq!(tcs[0].id.as_deref(), Some("call_xyz")); } #[test] fn tool_call_arguments_accept_native_json_on_receive() { // Some llama.cpp builds emit arguments as a JSON object directly when // jinja's tool-output strict-string rule isn't applied — accept both. let response_msg = json!({ "role": "assistant", "content": "", "tool_calls": [{ "id": "call_1", "type": "function", "function": { "name": "foo", "arguments": {"nested": {"k": 1}} } }] }); let parsed = LlamaCppClient::openai_message_to_chat(&response_msg).unwrap(); let tc = &parsed.tool_calls.unwrap()[0]; assert_eq!(tc.function.arguments["nested"]["k"], 1); } #[test] fn images_become_content_parts() { let mut msg = ChatMessage::user("What is in this photo?"); msg.images = Some(vec!["BASE64DATA".into()]); let wire = LlamaCppClient::messages_to_openai(&[msg]); let content = wire[0].get("content").and_then(|v| v.as_array()).unwrap(); assert_eq!(content.len(), 2); assert_eq!(content[0]["type"], "text"); assert_eq!(content[0]["text"], "What is in this photo?"); assert_eq!(content[1]["type"], "image_url"); assert_eq!( content[1]["image_url"]["url"], "data:image/jpeg;base64,BASE64DATA" ); } #[test] fn data_url_images_pass_through_unchanged() { let mut msg = ChatMessage::user(""); msg.images = Some(vec!["data:image/png;base64,ABCDEF".into()]); let wire = LlamaCppClient::messages_to_openai(&[msg]); let content = wire[0].get("content").and_then(|v| v.as_array()).unwrap(); assert_eq!(content.len(), 1); assert_eq!( content[0]["image_url"]["url"], "data:image/png;base64,ABCDEF" ); } #[test] fn text_only_message_stays_string() { let msg = ChatMessage::user("hello"); let wire = LlamaCppClient::messages_to_openai(&[msg]); assert_eq!(wire[0]["content"], "hello"); assert!(wire[0]["content"].as_str().is_some()); } #[test] fn tool_result_inherits_tool_call_id_from_prior_assistant() { let assistant = ChatMessage { role: "assistant".into(), content: String::new(), tool_calls: Some(vec![ToolCall { id: Some("call_42".into()), function: ToolCallFunction { name: "lookup".into(), arguments: json!({}), }, }]), images: None, }; let tool_result = ChatMessage::tool_result("found it"); let wire = LlamaCppClient::messages_to_openai(&[assistant, tool_result]); assert_eq!(wire[1]["role"], "tool"); assert_eq!(wire[1]["tool_call_id"], "call_42"); } #[test] fn multiple_tool_results_map_to_sequential_call_ids() { let assistant = ChatMessage { role: "assistant".into(), content: String::new(), tool_calls: Some(vec![ ToolCall { id: Some("call_A".into()), function: ToolCallFunction { name: "a".into(), arguments: json!({}), }, }, ToolCall { id: Some("call_B".into()), function: ToolCallFunction { name: "b".into(), arguments: json!({}), }, }, ]), images: None, }; let r1 = ChatMessage::tool_result("a result"); let r2 = ChatMessage::tool_result("b result"); let wire = LlamaCppClient::messages_to_openai(&[assistant, r1, r2]); assert_eq!(wire[1]["tool_call_id"], "call_A"); assert_eq!(wire[2]["tool_call_id"], "call_B"); } #[test] fn missing_tool_call_id_gets_synthetic_fallback() { let assistant = ChatMessage { role: "assistant".into(), content: String::new(), tool_calls: Some(vec![ToolCall { id: None, function: ToolCallFunction { name: "noid".into(), arguments: json!({}), }, }]), images: None, }; let wire = LlamaCppClient::messages_to_openai(&[assistant]); let tcs = wire[0] .get("tool_calls") .and_then(|v| v.as_array()) .unwrap(); assert_eq!(tcs[0]["id"], "call_0"); } #[test] fn capability_inference_uses_vision_model_and_allowlist() { let mut c = LlamaCppClient::new(None, Some("chat".into())); c.set_vision_model("vision".into()); c.set_vision_models(vec!["qwen-vl".into()]); let m_chat = json!({ "id": "chat" }); let m_vision = json!({ "id": "vision" }); let m_qwen = json!({ "id": "qwen-vl" }); let m_other = json!({ "id": "embed" }); let chat = c.parse_model_capabilities(&m_chat); let vision = c.parse_model_capabilities(&m_vision); let qwen = c.parse_model_capabilities(&m_qwen); let other = c.parse_model_capabilities(&m_other); assert!(!chat.has_vision); assert!(chat.has_tool_calling); assert!(vision.has_vision); assert!(qwen.has_vision); assert!(!other.has_vision); } }