refactor: introduce LlmClient trait (no-op)

Preparation for a second LLM backend (OpenRouter) and hybrid vision-local / chat-remote mode. Shared wire types (ChatMessage, Tool, ToolCall, etc.) move into a new src/ai/llm_client.rs and are re-exported from ollama.rs so existing imports keep working. OllamaClient now implements LlmClient. No behavior change; callers still hold the concrete OllamaClient. Caller migration to Arc<dyn LlmClient> is deferred to the PR that wires hybrid backend routing. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-20 22:11:05 -04:00
parent 702aa8078c
commit 0073409b3d
5 changed files with 197 additions and 92 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1843,6 +1843,7 @@ dependencies = [
 "actix-web",
 "actix-web-prom",
 "anyhow",
 "async-trait",
 "base64",
 "bcrypt",
 "blake3",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -56,3 +56,4 @@ ical = "0.11"
 scraper = "0.20"
 base64 = "0.22"
 blake3 = "1.5"
 async-trait = "0.1"
--- a/src/ai/llm_client.rs
+++ b/src/ai/llm_client.rs
@@ -0,0 +1,140 @@
 use anyhow::Result;
 use async_trait::async_trait;
 use serde::{Deserialize, Serialize};
 /// Provider-agnostic surface for LLM backends (Ollama, OpenRouter, …).
 ///
 /// Impls translate these canonical shapes at the wire boundary: tool-call
 /// arguments stay as `serde_json::Value` in memory and are stringified only
 /// when a provider requires it (OpenAI-compatible APIs do), and `images`
 /// stays as base64 strings here and is rewritten into content-parts where
 /// needed.
 // First consumer lands in a later PR (OpenRouter impl + hybrid mode routing).
 #[allow(dead_code)]
 #[async_trait]
 pub trait LlmClient: Send + Sync {
    /// Single-shot text generation. Optional system prompt and optional
    /// base64 images (ignored by providers without vision support).
    async fn generate(
        &self,
        prompt: &str,
        system: Option<&str>,
        images: Option<Vec<String>>,
    ) -> Result<String>;
    /// Multi-turn chat with tool definitions. Returns the assistant message
    /// (which may contain tool_calls) plus optional prompt/eval token counts.
    async fn chat_with_tools(
        &self,
        messages: Vec<ChatMessage>,
        tools: Vec<Tool>,
    ) -> Result<(ChatMessage, Option<i32>, Option<i32>)>;
    /// Batch embedding generation. Dimensionality is provider/model specific.
    async fn generate_embeddings(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>>;
    /// One-shot vision description of an image. Used to convert images into
    /// plain text for the hybrid-mode conversation flow.
    async fn describe_image(&self, image_base64: &str) -> Result<String>;
    /// Enumerate available models with their capabilities.
    async fn list_models(&self) -> Result<Vec<ModelCapabilities>>;
    /// Look up capabilities for a single model.
    async fn model_capabilities(&self, model: &str) -> Result<ModelCapabilities>;
    /// Primary model identifier this client was constructed with.
    fn primary_model(&self) -> &str;
 }
 /// Tool definition sent to the model (OpenAI-compatible function schema).
 #[derive(Serialize, Clone, Debug)]
 pub struct Tool {
    #[serde(rename = "type")]
    pub tool_type: String, // always "function"
    pub function: ToolFunction,
 }
 #[derive(Serialize, Clone, Debug)]
 pub struct ToolFunction {
    pub name: String,
    pub description: String,
    pub parameters: serde_json::Value,
 }
 impl Tool {
    pub fn function(name: &str, description: &str, parameters: serde_json::Value) -> Self {
        Self {
            tool_type: "function".to_string(),
            function: ToolFunction {
                name: name.to_string(),
                description: description.to_string(),
                parameters,
            },
        }
    }
 }
 /// A message in the chat conversation history.
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct ChatMessage {
    pub role: String, // "system" | "user" | "assistant" | "tool"
    /// Empty string (not null) when tool_calls is present — Ollama quirk.
    #[serde(default)]
    pub content: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_calls: Option<Vec<ToolCall>>,
    /// Base64 images — only on user messages to vision-capable models.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub images: Option<Vec<String>>,
 }
 impl ChatMessage {
    pub fn system(content: impl Into<String>) -> Self {
        Self {
            role: "system".to_string(),
            content: content.into(),
            tool_calls: None,
            images: None,
        }
    }
    pub fn user(content: impl Into<String>) -> Self {
        Self {
            role: "user".to_string(),
            content: content.into(),
            tool_calls: None,
            images: None,
        }
    }
    pub fn tool_result(content: impl Into<String>) -> Self {
        Self {
            role: "tool".to_string(),
            content: content.into(),
            tool_calls: None,
            images: None,
        }
    }
 }
 /// Tool call returned by the model in an assistant message.
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct ToolCall {
    pub function: ToolCallFunction,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub id: Option<String>,
 }
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct ToolCallFunction {
    pub name: String,
    /// Canonical shape: native JSON. Providers that use JSON-encoded-string
    /// arguments (OpenAI-compatible) translate at their wire boundary.
    pub arguments: serde_json::Value,
 }
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct ModelCapabilities {
    pub name: String,
    pub has_vision: bool,
    pub has_tool_calling: bool,
 }
--- a/src/ai/mod.rs
+++ b/src/ai/mod.rs
@@ -1,6 +1,7 @@
 pub mod daily_summary_job;
 pub mod handlers;
 pub mod insight_generator;
 pub mod llm_client;
 pub mod ollama;
 pub mod sms_client;
@@ -13,5 +14,9 @@ pub use handlers::{
    get_insight_handler, rate_insight_handler,
 };
 pub use insight_generator::InsightGenerator;
-pub use ollama::{ModelCapabilities, OllamaClient};
+#[allow(unused_imports)]
 pub use llm_client::{
    ChatMessage, LlmClient, ModelCapabilities, Tool, ToolCall, ToolCallFunction, ToolFunction,
 };
 pub use ollama::OllamaClient;
 pub use sms_client::{SmsApiClient, SmsMessage};
--- a/src/ai/ollama.rs
+++ b/src/ai/ollama.rs
@@ -1,4 +1,5 @@
 use anyhow::{Context, Result};
 use async_trait::async_trait;
 use chrono::NaiveDate;
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
@@ -6,6 +7,14 @@ use std::collections::HashMap;
 use std::sync::{Arc, Mutex};
 use std::time::{Duration, Instant};
 use crate::ai::llm_client::LlmClient;
 // Re-export shared types so existing `crate::ai::ollama::{...}` imports
 // continue to resolve.
 pub use crate::ai::llm_client::{ChatMessage, ModelCapabilities, Tool};
 #[allow(unused_imports)]
 pub use crate::ai::llm_client::{ToolCall, ToolCallFunction, ToolFunction};
 // Cache duration: 15 minutes
 const CACHE_DURATION_SECS: u64 = 15 * 60;
@@ -818,6 +827,46 @@ Analyze the image and use specific details from both the visual content and the
    }
 }
 #[async_trait]
 impl LlmClient for OllamaClient {
    async fn generate(
        &self,
        prompt: &str,
        system: Option<&str>,
        images: Option<Vec<String>>,
    ) -> Result<String> {
        self.generate_with_images(prompt, system, images).await
    }
    async fn chat_with_tools(
        &self,
        messages: Vec<ChatMessage>,
        tools: Vec<Tool>,
    ) -> Result<(ChatMessage, Option<i32>, Option<i32>)> {
        OllamaClient::chat_with_tools(self, messages, tools).await
    }
    async fn generate_embeddings(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
        OllamaClient::generate_embeddings(self, texts).await
    }
    async fn describe_image(&self, image_base64: &str) -> Result<String> {
        self.generate_photo_description(image_base64).await
    }
    async fn list_models(&self) -> Result<Vec<ModelCapabilities>> {
        Self::list_models_with_capabilities(&self.primary_url).await
    }
    async fn model_capabilities(&self, model: &str) -> Result<ModelCapabilities> {
        Self::check_model_capabilities(&self.primary_url, model).await
    }
    fn primary_model(&self) -> &str {
        &self.primary_model
    }
 }
 #[derive(Serialize)]
 struct OllamaRequest {
    model: String,
@@ -845,90 +894,6 @@ struct OllamaOptions {
    min_p: Option<f32>,
 }
 /// Tool definition sent in /api/chat requests (OpenAI-compatible format)
 #[derive(Serialize, Clone, Debug)]
 pub struct Tool {
    #[serde(rename = "type")]
    pub tool_type: String, // always "function"
    pub function: ToolFunction,
 }
 #[derive(Serialize, Clone, Debug)]
 pub struct ToolFunction {
    pub name: String,
    pub description: String,
    pub parameters: serde_json::Value,
 }
 impl Tool {
    pub fn function(name: &str, description: &str, parameters: serde_json::Value) -> Self {
        Self {
            tool_type: "function".to_string(),
            function: ToolFunction {
                name: name.to_string(),
                description: description.to_string(),
                parameters,
            },
        }
    }
 }
 /// A message in the chat conversation history
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct ChatMessage {
    pub role: String, // "system" | "user" | "assistant" | "tool"
    /// Empty string (not null) when tool_calls is present — Ollama quirk
    #[serde(default)]
    pub content: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_calls: Option<Vec<ToolCall>>,
    /// Base64 images — only on user messages to vision-capable models
    #[serde(skip_serializing_if = "Option::is_none")]
    pub images: Option<Vec<String>>,
 }
 impl ChatMessage {
    pub fn system(content: impl Into<String>) -> Self {
        Self {
            role: "system".to_string(),
            content: content.into(),
            tool_calls: None,
            images: None,
        }
    }
    pub fn user(content: impl Into<String>) -> Self {
        Self {
            role: "user".to_string(),
            content: content.into(),
            tool_calls: None,
            images: None,
        }
    }
    pub fn tool_result(content: impl Into<String>) -> Self {
        Self {
            role: "tool".to_string(),
            content: content.into(),
            tool_calls: None,
            images: None,
        }
    }
 }
 /// Tool call returned by the model in an assistant message
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct ToolCall {
    pub function: ToolCallFunction,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub id: Option<String>,
 }
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct ToolCallFunction {
    pub name: String,
    /// Native JSON object (NOT a JSON-encoded string like OpenAI)
    pub arguments: serde_json::Value,
 }
 #[derive(Serialize)]
 struct OllamaChatRequest<'a> {
    model: &'a str,
@@ -975,13 +940,6 @@ struct OllamaShowResponse {
    capabilities: Vec<String>,
 }
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct ModelCapabilities {
    pub name: String,
    pub has_vision: bool,
    pub has_tool_calling: bool,
 }
 #[derive(Serialize)]
 struct OllamaBatchEmbedRequest {
    model: String,