OpenRouter Support, Insight Chat and User injection #56

Merged
cameron merged 24 commits from 005-llm-client-trait into master 2026-04-26 23:01:35 +00:00
5 changed files with 197 additions and 92 deletions
Showing only changes of commit 0073409b3d - Show all commits

1
Cargo.lock generated
View File

@@ -1843,6 +1843,7 @@ dependencies = [
"actix-web", "actix-web",
"actix-web-prom", "actix-web-prom",
"anyhow", "anyhow",
"async-trait",
"base64", "base64",
"bcrypt", "bcrypt",
"blake3", "blake3",

View File

@@ -56,3 +56,4 @@ ical = "0.11"
scraper = "0.20" scraper = "0.20"
base64 = "0.22" base64 = "0.22"
blake3 = "1.5" blake3 = "1.5"
async-trait = "0.1"

140
src/ai/llm_client.rs Normal file
View File

@@ -0,0 +1,140 @@
use anyhow::Result;
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
/// Provider-agnostic surface for LLM backends (Ollama, OpenRouter, …).
///
/// Impls translate these canonical shapes at the wire boundary: tool-call
/// arguments stay as `serde_json::Value` in memory and are stringified only
/// when a provider requires it (OpenAI-compatible APIs do), and `images`
/// stays as base64 strings here and is rewritten into content-parts where
/// needed.
// First consumer lands in a later PR (OpenRouter impl + hybrid mode routing).
#[allow(dead_code)]
#[async_trait]
pub trait LlmClient: Send + Sync {
/// Single-shot text generation. Optional system prompt and optional
/// base64 images (ignored by providers without vision support).
async fn generate(
&self,
prompt: &str,
system: Option<&str>,
images: Option<Vec<String>>,
) -> Result<String>;
/// Multi-turn chat with tool definitions. Returns the assistant message
/// (which may contain tool_calls) plus optional prompt/eval token counts.
async fn chat_with_tools(
&self,
messages: Vec<ChatMessage>,
tools: Vec<Tool>,
) -> Result<(ChatMessage, Option<i32>, Option<i32>)>;
/// Batch embedding generation. Dimensionality is provider/model specific.
async fn generate_embeddings(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>>;
/// One-shot vision description of an image. Used to convert images into
/// plain text for the hybrid-mode conversation flow.
async fn describe_image(&self, image_base64: &str) -> Result<String>;
/// Enumerate available models with their capabilities.
async fn list_models(&self) -> Result<Vec<ModelCapabilities>>;
/// Look up capabilities for a single model.
async fn model_capabilities(&self, model: &str) -> Result<ModelCapabilities>;
/// Primary model identifier this client was constructed with.
fn primary_model(&self) -> &str;
}
/// Tool definition sent to the model (OpenAI-compatible function schema).
#[derive(Serialize, Clone, Debug)]
pub struct Tool {
#[serde(rename = "type")]
pub tool_type: String, // always "function"
pub function: ToolFunction,
}
#[derive(Serialize, Clone, Debug)]
pub struct ToolFunction {
pub name: String,
pub description: String,
pub parameters: serde_json::Value,
}
impl Tool {
pub fn function(name: &str, description: &str, parameters: serde_json::Value) -> Self {
Self {
tool_type: "function".to_string(),
function: ToolFunction {
name: name.to_string(),
description: description.to_string(),
parameters,
},
}
}
}
/// A message in the chat conversation history.
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct ChatMessage {
pub role: String, // "system" | "user" | "assistant" | "tool"
/// Empty string (not null) when tool_calls is present — Ollama quirk.
#[serde(default)]
pub content: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_calls: Option<Vec<ToolCall>>,
/// Base64 images — only on user messages to vision-capable models.
#[serde(skip_serializing_if = "Option::is_none")]
pub images: Option<Vec<String>>,
}
impl ChatMessage {
pub fn system(content: impl Into<String>) -> Self {
Self {
role: "system".to_string(),
content: content.into(),
tool_calls: None,
images: None,
}
}
pub fn user(content: impl Into<String>) -> Self {
Self {
role: "user".to_string(),
content: content.into(),
tool_calls: None,
images: None,
}
}
pub fn tool_result(content: impl Into<String>) -> Self {
Self {
role: "tool".to_string(),
content: content.into(),
tool_calls: None,
images: None,
}
}
}
/// Tool call returned by the model in an assistant message.
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct ToolCall {
pub function: ToolCallFunction,
#[serde(skip_serializing_if = "Option::is_none")]
pub id: Option<String>,
}
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct ToolCallFunction {
pub name: String,
/// Canonical shape: native JSON. Providers that use JSON-encoded-string
/// arguments (OpenAI-compatible) translate at their wire boundary.
pub arguments: serde_json::Value,
}
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct ModelCapabilities {
pub name: String,
pub has_vision: bool,
pub has_tool_calling: bool,
}

View File

@@ -1,6 +1,7 @@
pub mod daily_summary_job; pub mod daily_summary_job;
pub mod handlers; pub mod handlers;
pub mod insight_generator; pub mod insight_generator;
pub mod llm_client;
pub mod ollama; pub mod ollama;
pub mod sms_client; pub mod sms_client;
@@ -13,5 +14,9 @@ pub use handlers::{
get_insight_handler, rate_insight_handler, get_insight_handler, rate_insight_handler,
}; };
pub use insight_generator::InsightGenerator; pub use insight_generator::InsightGenerator;
pub use ollama::{ModelCapabilities, OllamaClient}; #[allow(unused_imports)]
pub use llm_client::{
ChatMessage, LlmClient, ModelCapabilities, Tool, ToolCall, ToolCallFunction, ToolFunction,
};
pub use ollama::OllamaClient;
pub use sms_client::{SmsApiClient, SmsMessage}; pub use sms_client::{SmsApiClient, SmsMessage};

View File

@@ -1,4 +1,5 @@
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use async_trait::async_trait;
use chrono::NaiveDate; use chrono::NaiveDate;
use reqwest::Client; use reqwest::Client;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@@ -6,6 +7,14 @@ use std::collections::HashMap;
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use crate::ai::llm_client::LlmClient;
// Re-export shared types so existing `crate::ai::ollama::{...}` imports
// continue to resolve.
pub use crate::ai::llm_client::{ChatMessage, ModelCapabilities, Tool};
#[allow(unused_imports)]
pub use crate::ai::llm_client::{ToolCall, ToolCallFunction, ToolFunction};
// Cache duration: 15 minutes // Cache duration: 15 minutes
const CACHE_DURATION_SECS: u64 = 15 * 60; const CACHE_DURATION_SECS: u64 = 15 * 60;
@@ -818,6 +827,46 @@ Analyze the image and use specific details from both the visual content and the
} }
} }
#[async_trait]
impl LlmClient for OllamaClient {
async fn generate(
&self,
prompt: &str,
system: Option<&str>,
images: Option<Vec<String>>,
) -> Result<String> {
self.generate_with_images(prompt, system, images).await
}
async fn chat_with_tools(
&self,
messages: Vec<ChatMessage>,
tools: Vec<Tool>,
) -> Result<(ChatMessage, Option<i32>, Option<i32>)> {
OllamaClient::chat_with_tools(self, messages, tools).await
}
async fn generate_embeddings(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
OllamaClient::generate_embeddings(self, texts).await
}
async fn describe_image(&self, image_base64: &str) -> Result<String> {
self.generate_photo_description(image_base64).await
}
async fn list_models(&self) -> Result<Vec<ModelCapabilities>> {
Self::list_models_with_capabilities(&self.primary_url).await
}
async fn model_capabilities(&self, model: &str) -> Result<ModelCapabilities> {
Self::check_model_capabilities(&self.primary_url, model).await
}
fn primary_model(&self) -> &str {
&self.primary_model
}
}
#[derive(Serialize)] #[derive(Serialize)]
struct OllamaRequest { struct OllamaRequest {
model: String, model: String,
@@ -845,90 +894,6 @@ struct OllamaOptions {
min_p: Option<f32>, min_p: Option<f32>,
} }
/// Tool definition sent in /api/chat requests (OpenAI-compatible format)
#[derive(Serialize, Clone, Debug)]
pub struct Tool {
#[serde(rename = "type")]
pub tool_type: String, // always "function"
pub function: ToolFunction,
}
#[derive(Serialize, Clone, Debug)]
pub struct ToolFunction {
pub name: String,
pub description: String,
pub parameters: serde_json::Value,
}
impl Tool {
pub fn function(name: &str, description: &str, parameters: serde_json::Value) -> Self {
Self {
tool_type: "function".to_string(),
function: ToolFunction {
name: name.to_string(),
description: description.to_string(),
parameters,
},
}
}
}
/// A message in the chat conversation history
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct ChatMessage {
pub role: String, // "system" | "user" | "assistant" | "tool"
/// Empty string (not null) when tool_calls is present — Ollama quirk
#[serde(default)]
pub content: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_calls: Option<Vec<ToolCall>>,
/// Base64 images — only on user messages to vision-capable models
#[serde(skip_serializing_if = "Option::is_none")]
pub images: Option<Vec<String>>,
}
impl ChatMessage {
pub fn system(content: impl Into<String>) -> Self {
Self {
role: "system".to_string(),
content: content.into(),
tool_calls: None,
images: None,
}
}
pub fn user(content: impl Into<String>) -> Self {
Self {
role: "user".to_string(),
content: content.into(),
tool_calls: None,
images: None,
}
}
pub fn tool_result(content: impl Into<String>) -> Self {
Self {
role: "tool".to_string(),
content: content.into(),
tool_calls: None,
images: None,
}
}
}
/// Tool call returned by the model in an assistant message
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct ToolCall {
pub function: ToolCallFunction,
#[serde(skip_serializing_if = "Option::is_none")]
pub id: Option<String>,
}
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct ToolCallFunction {
pub name: String,
/// Native JSON object (NOT a JSON-encoded string like OpenAI)
pub arguments: serde_json::Value,
}
#[derive(Serialize)] #[derive(Serialize)]
struct OllamaChatRequest<'a> { struct OllamaChatRequest<'a> {
model: &'a str, model: &'a str,
@@ -975,13 +940,6 @@ struct OllamaShowResponse {
capabilities: Vec<String>, capabilities: Vec<String>,
} }
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct ModelCapabilities {
pub name: String,
pub has_vision: bool,
pub has_tool_calling: bool,
}
#[derive(Serialize)] #[derive(Serialize)]
struct OllamaBatchEmbedRequest { struct OllamaBatchEmbedRequest {
model: String, model: String,