diff --git a/Cargo.lock b/Cargo.lock
index 4f04521..2e210ea 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1843,6 +1843,7 @@ dependencies = [
  "actix-web",
  "actix-web-prom",
  "anyhow",
+ "async-trait",
  "base64",
  "bcrypt",
  "blake3",
diff --git a/Cargo.toml b/Cargo.toml
index 1e606b0..be60128 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -56,3 +56,4 @@ ical = "0.11"
 scraper = "0.20"
 base64 = "0.22"
 blake3 = "1.5"
+async-trait = "0.1"
diff --git a/src/ai/llm_client.rs b/src/ai/llm_client.rs
new file mode 100644
index 0000000..c1f1bca
--- /dev/null
+++ b/src/ai/llm_client.rs
@@ -0,0 +1,140 @@
+use anyhow::Result;
+use async_trait::async_trait;
+use serde::{Deserialize, Serialize};
+
+/// Provider-agnostic surface for LLM backends (Ollama, OpenRouter, …).
+///
+/// Impls translate these canonical shapes at the wire boundary: tool-call
+/// arguments stay as `serde_json::Value` in memory and are stringified only
+/// when a provider requires it (OpenAI-compatible APIs do), and `images`
+/// stays as base64 strings here and is rewritten into content-parts where
+/// needed.
+// First consumer lands in a later PR (OpenRouter impl + hybrid mode routing).
+#[allow(dead_code)]
+#[async_trait]
+pub trait LlmClient: Send + Sync {
+    /// Single-shot text generation. Optional system prompt and optional
+    /// base64 images (ignored by providers without vision support).
+    async fn generate(
+        &self,
+        prompt: &str,
+        system: Option<&str>,
+        images: Option<Vec<String>>,
+    ) -> Result<String>;
+
+    /// Multi-turn chat with tool definitions. Returns the assistant message
+    /// (which may contain tool_calls) plus optional prompt/eval token counts.
+    async fn chat_with_tools(
+        &self,
+        messages: Vec<ChatMessage>,
+        tools: Vec<Tool>,
+    ) -> Result<(ChatMessage, Option<i32>, Option<i32>)>;
+
+    /// Batch embedding generation. Dimensionality is provider/model specific.
+    async fn generate_embeddings(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>>;
+
+    /// One-shot vision description of an image. Used to convert images into
+    /// plain text for the hybrid-mode conversation flow.
+    async fn describe_image(&self, image_base64: &str) -> Result<String>;
+
+    /// Enumerate available models with their capabilities.
+    async fn list_models(&self) -> Result<Vec<ModelCapabilities>>;
+
+    /// Look up capabilities for a single model.
+    async fn model_capabilities(&self, model: &str) -> Result<ModelCapabilities>;
+
+    /// Primary model identifier this client was constructed with.
+    fn primary_model(&self) -> &str;
+}
+
+/// Tool definition sent to the model (OpenAI-compatible function schema).
+#[derive(Serialize, Clone, Debug)]
+pub struct Tool {
+    #[serde(rename = "type")]
+    pub tool_type: String, // always "function"
+    pub function: ToolFunction,
+}
+
+#[derive(Serialize, Clone, Debug)]
+pub struct ToolFunction {
+    pub name: String,
+    pub description: String,
+    pub parameters: serde_json::Value,
+}
+
+impl Tool {
+    pub fn function(name: &str, description: &str, parameters: serde_json::Value) -> Self {
+        Self {
+            tool_type: "function".to_string(),
+            function: ToolFunction {
+                name: name.to_string(),
+                description: description.to_string(),
+                parameters,
+            },
+        }
+    }
+}
+
+/// A message in the chat conversation history.
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct ChatMessage {
+    pub role: String, // "system" | "user" | "assistant" | "tool"
+    /// Empty string (not null) when tool_calls is present — Ollama quirk.
+    #[serde(default)]
+    pub content: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_calls: Option<Vec<ToolCall>>,
+    /// Base64 images — only on user messages to vision-capable models.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub images: Option<Vec<String>>,
+}
+
+impl ChatMessage {
+    pub fn system(content: impl Into<String>) -> Self {
+        Self {
+            role: "system".to_string(),
+            content: content.into(),
+            tool_calls: None,
+            images: None,
+        }
+    }
+    pub fn user(content: impl Into<String>) -> Self {
+        Self {
+            role: "user".to_string(),
+            content: content.into(),
+            tool_calls: None,
+            images: None,
+        }
+    }
+    pub fn tool_result(content: impl Into<String>) -> Self {
+        Self {
+            role: "tool".to_string(),
+            content: content.into(),
+            tool_calls: None,
+            images: None,
+        }
+    }
+}
+
+/// Tool call returned by the model in an assistant message.
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct ToolCall {
+    pub function: ToolCallFunction,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct ToolCallFunction {
+    pub name: String,
+    /// Canonical shape: native JSON. Providers that use JSON-encoded-string
+    /// arguments (OpenAI-compatible) translate at their wire boundary.
+    pub arguments: serde_json::Value,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct ModelCapabilities {
+    pub name: String,
+    pub has_vision: bool,
+    pub has_tool_calling: bool,
+}
diff --git a/src/ai/mod.rs b/src/ai/mod.rs
index 4e682fb..f60e553 100644
--- a/src/ai/mod.rs
+++ b/src/ai/mod.rs
@@ -1,6 +1,7 @@
 pub mod daily_summary_job;
 pub mod handlers;
 pub mod insight_generator;
+pub mod llm_client;
 pub mod ollama;
 pub mod sms_client;
 
@@ -13,5 +14,9 @@ pub use handlers::{
     get_insight_handler, rate_insight_handler,
 };
 pub use insight_generator::InsightGenerator;
-pub use ollama::{ModelCapabilities, OllamaClient};
+#[allow(unused_imports)]
+pub use llm_client::{
+    ChatMessage, LlmClient, ModelCapabilities, Tool, ToolCall, ToolCallFunction, ToolFunction,
+};
+pub use ollama::OllamaClient;
 pub use sms_client::{SmsApiClient, SmsMessage};
diff --git a/src/ai/ollama.rs b/src/ai/ollama.rs
index 184bc61..2cc2cfa 100644
--- a/src/ai/ollama.rs
+++ b/src/ai/ollama.rs
@@ -1,4 +1,5 @@
 use anyhow::{Context, Result};
+use async_trait::async_trait;
 use chrono::NaiveDate;
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
@@ -6,6 +7,14 @@ use std::collections::HashMap;
 use std::sync::{Arc, Mutex};
 use std::time::{Duration, Instant};
 
+use crate::ai::llm_client::LlmClient;
+
+// Re-export shared types so existing `crate::ai::ollama::{...}` imports
+// continue to resolve.
+pub use crate::ai::llm_client::{ChatMessage, ModelCapabilities, Tool};
+#[allow(unused_imports)]
+pub use crate::ai::llm_client::{ToolCall, ToolCallFunction, ToolFunction};
+
 // Cache duration: 15 minutes
 const CACHE_DURATION_SECS: u64 = 15 * 60;
 
@@ -818,6 +827,46 @@ Analyze the image and use specific details from both the visual content and the
     }
 }
 
+#[async_trait]
+impl LlmClient for OllamaClient {
+    async fn generate(
+        &self,
+        prompt: &str,
+        system: Option<&str>,
+        images: Option<Vec<String>>,
+    ) -> Result<String> {
+        self.generate_with_images(prompt, system, images).await
+    }
+
+    async fn chat_with_tools(
+        &self,
+        messages: Vec<ChatMessage>,
+        tools: Vec<Tool>,
+    ) -> Result<(ChatMessage, Option<i32>, Option<i32>)> {
+        OllamaClient::chat_with_tools(self, messages, tools).await
+    }
+
+    async fn generate_embeddings(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
+        OllamaClient::generate_embeddings(self, texts).await
+    }
+
+    async fn describe_image(&self, image_base64: &str) -> Result<String> {
+        self.generate_photo_description(image_base64).await
+    }
+
+    async fn list_models(&self) -> Result<Vec<ModelCapabilities>> {
+        Self::list_models_with_capabilities(&self.primary_url).await
+    }
+
+    async fn model_capabilities(&self, model: &str) -> Result<ModelCapabilities> {
+        Self::check_model_capabilities(&self.primary_url, model).await
+    }
+
+    fn primary_model(&self) -> &str {
+        &self.primary_model
+    }
+}
+
 #[derive(Serialize)]
 struct OllamaRequest {
     model: String,
@@ -845,90 +894,6 @@ struct OllamaOptions {
     min_p: Option<f32>,
 }
 
-/// Tool definition sent in /api/chat requests (OpenAI-compatible format)
-#[derive(Serialize, Clone, Debug)]
-pub struct Tool {
-    #[serde(rename = "type")]
-    pub tool_type: String, // always "function"
-    pub function: ToolFunction,
-}
-
-#[derive(Serialize, Clone, Debug)]
-pub struct ToolFunction {
-    pub name: String,
-    pub description: String,
-    pub parameters: serde_json::Value,
-}
-
-impl Tool {
-    pub fn function(name: &str, description: &str, parameters: serde_json::Value) -> Self {
-        Self {
-            tool_type: "function".to_string(),
-            function: ToolFunction {
-                name: name.to_string(),
-                description: description.to_string(),
-                parameters,
-            },
-        }
-    }
-}
-
-/// A message in the chat conversation history
-#[derive(Serialize, Deserialize, Clone, Debug)]
-pub struct ChatMessage {
-    pub role: String, // "system" | "user" | "assistant" | "tool"
-    /// Empty string (not null) when tool_calls is present — Ollama quirk
-    #[serde(default)]
-    pub content: String,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tool_calls: Option<Vec<ToolCall>>,
-    /// Base64 images — only on user messages to vision-capable models
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub images: Option<Vec<String>>,
-}
-
-impl ChatMessage {
-    pub fn system(content: impl Into<String>) -> Self {
-        Self {
-            role: "system".to_string(),
-            content: content.into(),
-            tool_calls: None,
-            images: None,
-        }
-    }
-    pub fn user(content: impl Into<String>) -> Self {
-        Self {
-            role: "user".to_string(),
-            content: content.into(),
-            tool_calls: None,
-            images: None,
-        }
-    }
-    pub fn tool_result(content: impl Into<String>) -> Self {
-        Self {
-            role: "tool".to_string(),
-            content: content.into(),
-            tool_calls: None,
-            images: None,
-        }
-    }
-}
-
-/// Tool call returned by the model in an assistant message
-#[derive(Serialize, Deserialize, Clone, Debug)]
-pub struct ToolCall {
-    pub function: ToolCallFunction,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub id: Option<String>,
-}
-
-#[derive(Serialize, Deserialize, Clone, Debug)]
-pub struct ToolCallFunction {
-    pub name: String,
-    /// Native JSON object (NOT a JSON-encoded string like OpenAI)
-    pub arguments: serde_json::Value,
-}
-
 #[derive(Serialize)]
 struct OllamaChatRequest<'a> {
     model: &'a str,
@@ -975,13 +940,6 @@ struct OllamaShowResponse {
     capabilities: Vec<String>,
 }
 
-#[derive(Serialize, Deserialize, Clone, Debug)]
-pub struct ModelCapabilities {
-    pub name: String,
-    pub has_vision: bool,
-    pub has_tool_calling: bool,
-}
-
 #[derive(Serialize)]
 struct OllamaBatchEmbedRequest {
     model: String,