refactor: introduce LlmClient trait (no-op)

Preparation for a second LLM backend (OpenRouter) and hybrid vision-local /
chat-remote mode. Shared wire types (ChatMessage, Tool, ToolCall, etc.) move
into a new src/ai/llm_client.rs and are re-exported from ollama.rs so
existing imports keep working. OllamaClient now implements LlmClient.

No behavior change; callers still hold the concrete OllamaClient. Caller
migration to Arc<dyn LlmClient> is deferred to the PR that wires hybrid
backend routing.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron
2026-04-20 22:11:05 -04:00
parent 702aa8078c
commit 0073409b3d
5 changed files with 197 additions and 92 deletions

View File

@@ -1,4 +1,5 @@
use anyhow::{Context, Result};
use async_trait::async_trait;
use chrono::NaiveDate;
use reqwest::Client;
use serde::{Deserialize, Serialize};
@@ -6,6 +7,14 @@ use std::collections::HashMap;
use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant};
use crate::ai::llm_client::LlmClient;
// Re-export shared types so existing `crate::ai::ollama::{...}` imports
// continue to resolve.
pub use crate::ai::llm_client::{ChatMessage, ModelCapabilities, Tool};
#[allow(unused_imports)]
pub use crate::ai::llm_client::{ToolCall, ToolCallFunction, ToolFunction};
// Cache duration: 15 minutes
const CACHE_DURATION_SECS: u64 = 15 * 60;
@@ -818,6 +827,46 @@ Analyze the image and use specific details from both the visual content and the
}
}
#[async_trait]
impl LlmClient for OllamaClient {
async fn generate(
&self,
prompt: &str,
system: Option<&str>,
images: Option<Vec<String>>,
) -> Result<String> {
self.generate_with_images(prompt, system, images).await
}
async fn chat_with_tools(
&self,
messages: Vec<ChatMessage>,
tools: Vec<Tool>,
) -> Result<(ChatMessage, Option<i32>, Option<i32>)> {
OllamaClient::chat_with_tools(self, messages, tools).await
}
async fn generate_embeddings(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
OllamaClient::generate_embeddings(self, texts).await
}
async fn describe_image(&self, image_base64: &str) -> Result<String> {
self.generate_photo_description(image_base64).await
}
async fn list_models(&self) -> Result<Vec<ModelCapabilities>> {
Self::list_models_with_capabilities(&self.primary_url).await
}
async fn model_capabilities(&self, model: &str) -> Result<ModelCapabilities> {
Self::check_model_capabilities(&self.primary_url, model).await
}
fn primary_model(&self) -> &str {
&self.primary_model
}
}
#[derive(Serialize)]
struct OllamaRequest {
model: String,
@@ -845,90 +894,6 @@ struct OllamaOptions {
min_p: Option<f32>,
}
/// Tool definition sent in /api/chat requests (OpenAI-compatible format)
#[derive(Serialize, Clone, Debug)]
pub struct Tool {
#[serde(rename = "type")]
pub tool_type: String, // always "function"
pub function: ToolFunction,
}
#[derive(Serialize, Clone, Debug)]
pub struct ToolFunction {
pub name: String,
pub description: String,
pub parameters: serde_json::Value,
}
impl Tool {
pub fn function(name: &str, description: &str, parameters: serde_json::Value) -> Self {
Self {
tool_type: "function".to_string(),
function: ToolFunction {
name: name.to_string(),
description: description.to_string(),
parameters,
},
}
}
}
/// A message in the chat conversation history
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct ChatMessage {
pub role: String, // "system" | "user" | "assistant" | "tool"
/// Empty string (not null) when tool_calls is present — Ollama quirk
#[serde(default)]
pub content: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_calls: Option<Vec<ToolCall>>,
/// Base64 images — only on user messages to vision-capable models
#[serde(skip_serializing_if = "Option::is_none")]
pub images: Option<Vec<String>>,
}
impl ChatMessage {
pub fn system(content: impl Into<String>) -> Self {
Self {
role: "system".to_string(),
content: content.into(),
tool_calls: None,
images: None,
}
}
pub fn user(content: impl Into<String>) -> Self {
Self {
role: "user".to_string(),
content: content.into(),
tool_calls: None,
images: None,
}
}
pub fn tool_result(content: impl Into<String>) -> Self {
Self {
role: "tool".to_string(),
content: content.into(),
tool_calls: None,
images: None,
}
}
}
/// Tool call returned by the model in an assistant message
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct ToolCall {
pub function: ToolCallFunction,
#[serde(skip_serializing_if = "Option::is_none")]
pub id: Option<String>,
}
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct ToolCallFunction {
pub name: String,
/// Native JSON object (NOT a JSON-encoded string like OpenAI)
pub arguments: serde_json::Value,
}
#[derive(Serialize)]
struct OllamaChatRequest<'a> {
model: &'a str,
@@ -975,13 +940,6 @@ struct OllamaShowResponse {
capabilities: Vec<String>,
}
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct ModelCapabilities {
pub name: String,
pub has_vision: bool,
pub has_tool_calling: bool,
}
#[derive(Serialize)]
struct OllamaBatchEmbedRequest {
model: String,