refactor: introduce LlmClient trait (no-op)

Preparation for a second LLM backend (OpenRouter) and hybrid vision-local / chat-remote mode. Shared wire types (ChatMessage, Tool, ToolCall, etc.) move into a new src/ai/llm_client.rs and are re-exported from ollama.rs so existing imports keep working. OllamaClient now implements LlmClient. No behavior change; callers still hold the concrete OllamaClient. Caller migration to Arc<dyn LlmClient> is deferred to the PR that wires hybrid backend routing. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-20 22:11:05 -04:00
parent 702aa8078c
commit 0073409b3d
5 changed files with 197 additions and 92 deletions
--- a/src/ai/ollama.rs
+++ b/src/ai/ollama.rs
@@ -1,4 +1,5 @@
 use anyhow::{Context, Result};
+use async_trait::async_trait;
 use chrono::NaiveDate;
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
@@ -6,6 +7,14 @@ use std::collections::HashMap;
 use std::sync::{Arc, Mutex};
 use std::time::{Duration, Instant};

+use crate::ai::llm_client::LlmClient;
+
+// Re-export shared types so existing `crate::ai::ollama::{...}` imports
+// continue to resolve.
+pub use crate::ai::llm_client::{ChatMessage, ModelCapabilities, Tool};
+#[allow(unused_imports)]
+pub use crate::ai::llm_client::{ToolCall, ToolCallFunction, ToolFunction};
+
 // Cache duration: 15 minutes
 const CACHE_DURATION_SECS: u64 = 15 * 60;

@@ -818,6 +827,46 @@ Analyze the image and use specific details from both the visual content and the
    }
 }

+#[async_trait]
+impl LlmClient for OllamaClient {
+    async fn generate(
+        &self,
+        prompt: &str,
+        system: Option<&str>,
+        images: Option<Vec<String>>,
+    ) -> Result<String> {
+        self.generate_with_images(prompt, system, images).await
+    }
+
+    async fn chat_with_tools(
+        &self,
+        messages: Vec<ChatMessage>,
+        tools: Vec<Tool>,
+    ) -> Result<(ChatMessage, Option<i32>, Option<i32>)> {
+        OllamaClient::chat_with_tools(self, messages, tools).await
+    }
+
+    async fn generate_embeddings(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
+        OllamaClient::generate_embeddings(self, texts).await
+    }
+
+    async fn describe_image(&self, image_base64: &str) -> Result<String> {
+        self.generate_photo_description(image_base64).await
+    }
+
+    async fn list_models(&self) -> Result<Vec<ModelCapabilities>> {
+        Self::list_models_with_capabilities(&self.primary_url).await
+    }
+
+    async fn model_capabilities(&self, model: &str) -> Result<ModelCapabilities> {
+        Self::check_model_capabilities(&self.primary_url, model).await
+    }
+
+    fn primary_model(&self) -> &str {
+        &self.primary_model
+    }
+}
+
 #[derive(Serialize)]
 struct OllamaRequest {
    model: String,
@@ -845,90 +894,6 @@ struct OllamaOptions {
    min_p: Option<f32>,
 }

-/// Tool definition sent in /api/chat requests (OpenAI-compatible format)
-#[derive(Serialize, Clone, Debug)]
-pub struct Tool {
-    #[serde(rename = "type")]
-    pub tool_type: String, // always "function"
-    pub function: ToolFunction,
-}
-
-#[derive(Serialize, Clone, Debug)]
-pub struct ToolFunction {
-    pub name: String,
-    pub description: String,
-    pub parameters: serde_json::Value,
-}
-
-impl Tool {
-    pub fn function(name: &str, description: &str, parameters: serde_json::Value) -> Self {
-        Self {
-            tool_type: "function".to_string(),
-            function: ToolFunction {
-                name: name.to_string(),
-                description: description.to_string(),
-                parameters,
-            },
-        }
-    }
-}
-
-/// A message in the chat conversation history
-#[derive(Serialize, Deserialize, Clone, Debug)]
-pub struct ChatMessage {
-    pub role: String, // "system" | "user" | "assistant" | "tool"
-    /// Empty string (not null) when tool_calls is present — Ollama quirk
-    #[serde(default)]
-    pub content: String,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tool_calls: Option<Vec<ToolCall>>,
-    /// Base64 images — only on user messages to vision-capable models
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub images: Option<Vec<String>>,
-}
-
-impl ChatMessage {
-    pub fn system(content: impl Into<String>) -> Self {
-        Self {
-            role: "system".to_string(),
-            content: content.into(),
-            tool_calls: None,
-            images: None,
-        }
-    }
-    pub fn user(content: impl Into<String>) -> Self {
-        Self {
-            role: "user".to_string(),
-            content: content.into(),
-            tool_calls: None,
-            images: None,
-        }
-    }
-    pub fn tool_result(content: impl Into<String>) -> Self {
-        Self {
-            role: "tool".to_string(),
-            content: content.into(),
-            tool_calls: None,
-            images: None,
-        }
-    }
-}
-
-/// Tool call returned by the model in an assistant message
-#[derive(Serialize, Deserialize, Clone, Debug)]
-pub struct ToolCall {
-    pub function: ToolCallFunction,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub id: Option<String>,
-}
-
-#[derive(Serialize, Deserialize, Clone, Debug)]
-pub struct ToolCallFunction {
-    pub name: String,
-    /// Native JSON object (NOT a JSON-encoded string like OpenAI)
-    pub arguments: serde_json::Value,
-}
-
 #[derive(Serialize)]
 struct OllamaChatRequest<'a> {
    model: &'a str,
@@ -975,13 +940,6 @@ struct OllamaShowResponse {
    capabilities: Vec<String>,
 }

-#[derive(Serialize, Deserialize, Clone, Debug)]
-pub struct ModelCapabilities {
-    pub name: String,
-    pub has_vision: bool,
-    pub has_tool_calling: bool,
-}
-
 #[derive(Serialize)]
 struct OllamaBatchEmbedRequest {
    model: String,