feat: add tool-calling types, chat_with_tools(), and has_tool_calling capability detection

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-18 22:55:20 -04:00
parent 8196ef94a0
commit 5e5a2a3167
1 changed files with 212 additions and 4 deletions
@@ -1,4 +1,4 @@
-use anyhow::Result;
+use anyhow::{Context, Result};
 use chrono::NaiveDate;
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
@@ -176,10 +176,13 @@ impl OllamaClient {
        // Check if "vision" is in the capabilities array
        let has_vision = show_response.capabilities.iter().any(|cap| cap == "vision");
        // Check if "tools" is in the capabilities array
        let has_tool_calling = show_response.capabilities.iter().any(|cap| cap == "tools");
        Ok(ModelCapabilities {
            name: model_name.to_string(),
            has_vision,
            has_tool_calling,
        })
    }
@@ -206,10 +209,11 @@ impl OllamaClient {
                Ok(cap) => capabilities.push(cap),
                Err(e) => {
                    log::warn!("Failed to get capabilities for model {}: {}", model_name, e);
-                    // Fallback: assume no vision if we can't check
+                    // Fallback: assume no vision/tools if we can't check
                    capabilities.push(ModelCapabilities {
                        name: model_name,
                        has_vision: false,
                        has_tool_calling: false,
                    });
                }
            }
@@ -254,7 +258,7 @@ impl OllamaClient {
            prompt: prompt.to_string(),
            stream: false,
            system: system.map(|s| s.to_string()),
-            options: self.num_ctx.map(|ctx| OllamaOptions { num_ctx: ctx }),
+            options: self.num_ctx.map(|ctx| OllamaOptions { num_ctx: Some(ctx) }),
            images,
        };
@@ -496,6 +500,119 @@ Analyze the image and use specific details from both the visual content and the
        Ok(description.trim().to_string())
    }
    /// Send a chat request with tool definitions to /api/chat.
    /// Returns the assistant's response message (may contain tool_calls or final content).
    /// Uses primary/fallback URL routing same as other generation methods.
    pub async fn chat_with_tools(
        &self,
        messages: Vec<ChatMessage>,
        tools: Vec<Tool>,
    ) -> Result<ChatMessage> {
        // Try primary server first
        log::info!(
            "Attempting chat_with_tools with primary server: {} (model: {})",
            self.primary_url,
            self.primary_model
        );
        let primary_result = self
            .try_chat_with_tools(&self.primary_url, messages.clone(), tools.clone())
            .await;
        match primary_result {
            Ok(response) => {
                log::info!("Successfully got chat_with_tools response from primary server");
                Ok(response)
            }
            Err(e) => {
                log::warn!("Primary server chat_with_tools failed: {}", e);
                // Try fallback server if available
                if let Some(fallback_url) = &self.fallback_url {
                    let fallback_model = self
                        .fallback_model
                        .as_ref()
                        .unwrap_or(&self.primary_model);
                    log::info!(
                        "Attempting chat_with_tools with fallback server: {} (model: {})",
                        fallback_url,
                        fallback_model
                    );
                    match self
                        .try_chat_with_tools(fallback_url, messages, tools)
                        .await
                    {
                        Ok(response) => {
                            log::info!(
                                "Successfully got chat_with_tools response from fallback server"
                            );
                            Ok(response)
                        }
                        Err(fallback_e) => {
                            log::error!(
                                "Fallback server chat_with_tools also failed: {}",
                                fallback_e
                            );
                            Err(anyhow::anyhow!(
                                "Both primary and fallback servers failed. Primary: {}, Fallback: {}",
                                e,
                                fallback_e
                            ))
                        }
                    }
                } else {
                    log::error!("No fallback server configured");
                    Err(e)
                }
            }
        }
    }
    async fn try_chat_with_tools(
        &self,
        base_url: &str,
        messages: Vec<ChatMessage>,
        tools: Vec<Tool>,
    ) -> Result<ChatMessage> {
        let url = format!("{}/api/chat", base_url);
        let model = if base_url == self.primary_url {
            &self.primary_model
        } else {
            self.fallback_model.as_deref().unwrap_or(&self.primary_model)
        };
        let options = self.num_ctx.map(|ctx| OllamaOptions { num_ctx: Some(ctx) });
        let request_body = OllamaChatRequest {
            model,
            messages: &messages,
            stream: false,
            tools,
            options,
        };
        let response = self
            .client
            .post(&url)
            .json(&request_body)
            .send()
            .await
            .with_context(|| format!("Failed to connect to Ollama at {}", url))?;
        if !response.status().is_success() {
            let status = response.status();
            let body = response.text().await.unwrap_or_default();
            anyhow::bail!("Ollama chat request failed with status {}: {}", status, body);
        }
        let chat_response: OllamaChatResponse = response
            .json()
            .await
            .with_context(|| "Failed to parse Ollama chat response")?;
        Ok(chat_response.message)
    }
    /// Generate an embedding vector for text using nomic-embed-text:v1.5
    /// Returns a 768-dimensional vector as Vec<f32>
    pub async fn generate_embedding(&self, text: &str) -> Result<Vec<f32>> {
@@ -640,7 +757,97 @@ struct OllamaRequest {
 #[derive(Serialize)]
 struct OllamaOptions {
-    num_ctx: i32,
+    num_ctx: Option<i32>,
 }
 /// Tool definition sent in /api/chat requests (OpenAI-compatible format)
 #[derive(Serialize, Clone, Debug)]
 pub struct Tool {
    #[serde(rename = "type")]
    pub tool_type: String, // always "function"
    pub function: ToolFunction,
 }
 #[derive(Serialize, Clone, Debug)]
 pub struct ToolFunction {
    pub name: String,
    pub description: String,
    pub parameters: serde_json::Value,
 }
 impl Tool {
    pub fn function(name: &str, description: &str, parameters: serde_json::Value) -> Self {
        Self {
            tool_type: "function".to_string(),
            function: ToolFunction {
                name: name.to_string(),
                description: description.to_string(),
                parameters,
            },
        }
    }
 }
 /// A message in the chat conversation history
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct ChatMessage {
    pub role: String, // "system" | "user" | "assistant" | "tool"
    /// Empty string (not null) when tool_calls is present — Ollama quirk
    #[serde(default)]
    pub content: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_calls: Option<Vec<ToolCall>>,
    /// Base64 images — only on user messages to vision-capable models
    #[serde(skip_serializing_if = "Option::is_none")]
    pub images: Option<Vec<String>>,
 }
 impl ChatMessage {
    pub fn system(content: impl Into<String>) -> Self {
        Self { role: "system".to_string(), content: content.into(), tool_calls: None, images: None }
    }
    pub fn user(content: impl Into<String>) -> Self {
        Self { role: "user".to_string(), content: content.into(), tool_calls: None, images: None }
    }
    pub fn tool_result(content: impl Into<String>) -> Self {
        Self { role: "tool".to_string(), content: content.into(), tool_calls: None, images: None }
    }
 }
 /// Tool call returned by the model in an assistant message
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct ToolCall {
    pub function: ToolCallFunction,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub id: Option<String>,
 }
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct ToolCallFunction {
    pub name: String,
    /// Native JSON object (NOT a JSON-encoded string like OpenAI)
    pub arguments: serde_json::Value,
 }
 #[derive(Serialize)]
 struct OllamaChatRequest<'a> {
    model: &'a str,
    messages: &'a [ChatMessage],
    stream: bool,
    #[serde(skip_serializing_if = "Vec::is_empty")]
    tools: Vec<Tool>,
    #[serde(skip_serializing_if = "Option::is_none")]
    options: Option<OllamaOptions>,
 }
 #[derive(Deserialize, Debug)]
 struct OllamaChatResponse {
    message: ChatMessage,
    #[allow(dead_code)]
    done: bool,
    #[serde(default)]
    #[allow(dead_code)]
    done_reason: String,
 }
 #[derive(Deserialize)]
@@ -668,6 +875,7 @@ struct OllamaShowResponse {
 pub struct ModelCapabilities {
    pub name: String,
    pub has_vision: bool,
    pub has_tool_calling: bool,
 }
 #[derive(Serialize)]