diff --git a/Cargo.lock b/Cargo.lock index 4f04521..2e210ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1843,6 +1843,7 @@ dependencies = [ "actix-web", "actix-web-prom", "anyhow", + "async-trait", "base64", "bcrypt", "blake3", diff --git a/Cargo.toml b/Cargo.toml index 1e606b0..be60128 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,3 +56,4 @@ ical = "0.11" scraper = "0.20" base64 = "0.22" blake3 = "1.5" +async-trait = "0.1" diff --git a/src/ai/llm_client.rs b/src/ai/llm_client.rs new file mode 100644 index 0000000..c1f1bca --- /dev/null +++ b/src/ai/llm_client.rs @@ -0,0 +1,140 @@ +use anyhow::Result; +use async_trait::async_trait; +use serde::{Deserialize, Serialize}; + +/// Provider-agnostic surface for LLM backends (Ollama, OpenRouter, …). +/// +/// Impls translate these canonical shapes at the wire boundary: tool-call +/// arguments stay as `serde_json::Value` in memory and are stringified only +/// when a provider requires it (OpenAI-compatible APIs do), and `images` +/// stays as base64 strings here and is rewritten into content-parts where +/// needed. +// First consumer lands in a later PR (OpenRouter impl + hybrid mode routing). +#[allow(dead_code)] +#[async_trait] +pub trait LlmClient: Send + Sync { + /// Single-shot text generation. Optional system prompt and optional + /// base64 images (ignored by providers without vision support). + async fn generate( + &self, + prompt: &str, + system: Option<&str>, + images: Option>, + ) -> Result; + + /// Multi-turn chat with tool definitions. Returns the assistant message + /// (which may contain tool_calls) plus optional prompt/eval token counts. + async fn chat_with_tools( + &self, + messages: Vec, + tools: Vec, + ) -> Result<(ChatMessage, Option, Option)>; + + /// Batch embedding generation. Dimensionality is provider/model specific. + async fn generate_embeddings(&self, texts: &[&str]) -> Result>>; + + /// One-shot vision description of an image. Used to convert images into + /// plain text for the hybrid-mode conversation flow. + async fn describe_image(&self, image_base64: &str) -> Result; + + /// Enumerate available models with their capabilities. + async fn list_models(&self) -> Result>; + + /// Look up capabilities for a single model. + async fn model_capabilities(&self, model: &str) -> Result; + + /// Primary model identifier this client was constructed with. + fn primary_model(&self) -> &str; +} + +/// Tool definition sent to the model (OpenAI-compatible function schema). +#[derive(Serialize, Clone, Debug)] +pub struct Tool { + #[serde(rename = "type")] + pub tool_type: String, // always "function" + pub function: ToolFunction, +} + +#[derive(Serialize, Clone, Debug)] +pub struct ToolFunction { + pub name: String, + pub description: String, + pub parameters: serde_json::Value, +} + +impl Tool { + pub fn function(name: &str, description: &str, parameters: serde_json::Value) -> Self { + Self { + tool_type: "function".to_string(), + function: ToolFunction { + name: name.to_string(), + description: description.to_string(), + parameters, + }, + } + } +} + +/// A message in the chat conversation history. +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct ChatMessage { + pub role: String, // "system" | "user" | "assistant" | "tool" + /// Empty string (not null) when tool_calls is present — Ollama quirk. + #[serde(default)] + pub content: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub tool_calls: Option>, + /// Base64 images — only on user messages to vision-capable models. + #[serde(skip_serializing_if = "Option::is_none")] + pub images: Option>, +} + +impl ChatMessage { + pub fn system(content: impl Into) -> Self { + Self { + role: "system".to_string(), + content: content.into(), + tool_calls: None, + images: None, + } + } + pub fn user(content: impl Into) -> Self { + Self { + role: "user".to_string(), + content: content.into(), + tool_calls: None, + images: None, + } + } + pub fn tool_result(content: impl Into) -> Self { + Self { + role: "tool".to_string(), + content: content.into(), + tool_calls: None, + images: None, + } + } +} + +/// Tool call returned by the model in an assistant message. +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct ToolCall { + pub function: ToolCallFunction, + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct ToolCallFunction { + pub name: String, + /// Canonical shape: native JSON. Providers that use JSON-encoded-string + /// arguments (OpenAI-compatible) translate at their wire boundary. + pub arguments: serde_json::Value, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct ModelCapabilities { + pub name: String, + pub has_vision: bool, + pub has_tool_calling: bool, +} diff --git a/src/ai/mod.rs b/src/ai/mod.rs index 4e682fb..f60e553 100644 --- a/src/ai/mod.rs +++ b/src/ai/mod.rs @@ -1,6 +1,7 @@ pub mod daily_summary_job; pub mod handlers; pub mod insight_generator; +pub mod llm_client; pub mod ollama; pub mod sms_client; @@ -13,5 +14,9 @@ pub use handlers::{ get_insight_handler, rate_insight_handler, }; pub use insight_generator::InsightGenerator; -pub use ollama::{ModelCapabilities, OllamaClient}; +#[allow(unused_imports)] +pub use llm_client::{ + ChatMessage, LlmClient, ModelCapabilities, Tool, ToolCall, ToolCallFunction, ToolFunction, +}; +pub use ollama::OllamaClient; pub use sms_client::{SmsApiClient, SmsMessage}; diff --git a/src/ai/ollama.rs b/src/ai/ollama.rs index 184bc61..2cc2cfa 100644 --- a/src/ai/ollama.rs +++ b/src/ai/ollama.rs @@ -1,4 +1,5 @@ use anyhow::{Context, Result}; +use async_trait::async_trait; use chrono::NaiveDate; use reqwest::Client; use serde::{Deserialize, Serialize}; @@ -6,6 +7,14 @@ use std::collections::HashMap; use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; +use crate::ai::llm_client::LlmClient; + +// Re-export shared types so existing `crate::ai::ollama::{...}` imports +// continue to resolve. +pub use crate::ai::llm_client::{ChatMessage, ModelCapabilities, Tool}; +#[allow(unused_imports)] +pub use crate::ai::llm_client::{ToolCall, ToolCallFunction, ToolFunction}; + // Cache duration: 15 minutes const CACHE_DURATION_SECS: u64 = 15 * 60; @@ -818,6 +827,46 @@ Analyze the image and use specific details from both the visual content and the } } +#[async_trait] +impl LlmClient for OllamaClient { + async fn generate( + &self, + prompt: &str, + system: Option<&str>, + images: Option>, + ) -> Result { + self.generate_with_images(prompt, system, images).await + } + + async fn chat_with_tools( + &self, + messages: Vec, + tools: Vec, + ) -> Result<(ChatMessage, Option, Option)> { + OllamaClient::chat_with_tools(self, messages, tools).await + } + + async fn generate_embeddings(&self, texts: &[&str]) -> Result>> { + OllamaClient::generate_embeddings(self, texts).await + } + + async fn describe_image(&self, image_base64: &str) -> Result { + self.generate_photo_description(image_base64).await + } + + async fn list_models(&self) -> Result> { + Self::list_models_with_capabilities(&self.primary_url).await + } + + async fn model_capabilities(&self, model: &str) -> Result { + Self::check_model_capabilities(&self.primary_url, model).await + } + + fn primary_model(&self) -> &str { + &self.primary_model + } +} + #[derive(Serialize)] struct OllamaRequest { model: String, @@ -845,90 +894,6 @@ struct OllamaOptions { min_p: Option, } -/// Tool definition sent in /api/chat requests (OpenAI-compatible format) -#[derive(Serialize, Clone, Debug)] -pub struct Tool { - #[serde(rename = "type")] - pub tool_type: String, // always "function" - pub function: ToolFunction, -} - -#[derive(Serialize, Clone, Debug)] -pub struct ToolFunction { - pub name: String, - pub description: String, - pub parameters: serde_json::Value, -} - -impl Tool { - pub fn function(name: &str, description: &str, parameters: serde_json::Value) -> Self { - Self { - tool_type: "function".to_string(), - function: ToolFunction { - name: name.to_string(), - description: description.to_string(), - parameters, - }, - } - } -} - -/// A message in the chat conversation history -#[derive(Serialize, Deserialize, Clone, Debug)] -pub struct ChatMessage { - pub role: String, // "system" | "user" | "assistant" | "tool" - /// Empty string (not null) when tool_calls is present — Ollama quirk - #[serde(default)] - pub content: String, - #[serde(skip_serializing_if = "Option::is_none")] - pub tool_calls: Option>, - /// Base64 images — only on user messages to vision-capable models - #[serde(skip_serializing_if = "Option::is_none")] - pub images: Option>, -} - -impl ChatMessage { - pub fn system(content: impl Into) -> Self { - Self { - role: "system".to_string(), - content: content.into(), - tool_calls: None, - images: None, - } - } - pub fn user(content: impl Into) -> Self { - Self { - role: "user".to_string(), - content: content.into(), - tool_calls: None, - images: None, - } - } - pub fn tool_result(content: impl Into) -> Self { - Self { - role: "tool".to_string(), - content: content.into(), - tool_calls: None, - images: None, - } - } -} - -/// Tool call returned by the model in an assistant message -#[derive(Serialize, Deserialize, Clone, Debug)] -pub struct ToolCall { - pub function: ToolCallFunction, - #[serde(skip_serializing_if = "Option::is_none")] - pub id: Option, -} - -#[derive(Serialize, Deserialize, Clone, Debug)] -pub struct ToolCallFunction { - pub name: String, - /// Native JSON object (NOT a JSON-encoded string like OpenAI) - pub arguments: serde_json::Value, -} - #[derive(Serialize)] struct OllamaChatRequest<'a> { model: &'a str, @@ -975,13 +940,6 @@ struct OllamaShowResponse { capabilities: Vec, } -#[derive(Serialize, Deserialize, Clone, Debug)] -pub struct ModelCapabilities { - pub name: String, - pub has_vision: bool, - pub has_tool_calling: bool, -} - #[derive(Serialize)] struct OllamaBatchEmbedRequest { model: String,