From 3ac0cd62ebe01e98d24eb21074207a30f4e7076e Mon Sep 17 00:00:00 2001
From: Cameron <ccordes12@mail.bw.edu>
Date: Mon, 20 Apr 2026 22:30:40 -0400
Subject: [PATCH] feat(ai): hybrid backend mode for agentic insights
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a `backend` column to photo_insights (default 'local', migration
2026-04-20-000000) and a corresponding optional `backend` field on the
agentic request. When a request sets backend=hybrid:

- The local Ollama vision model is called once via describe_image to
  produce a text description.
- The description is inlined into the first user message as text —
  no base64 image is ever sent to the chat model.
- The agentic tool-calling loop and title generation route through an
  OpenRouterClient (dispatched via &dyn LlmClient), letting the user
  pick any tool-capable model from OpenRouter per request.
- describe_photo is removed from the offered tools since the description
  is already present.

Embeddings and vision stay on local Ollama regardless of backend.
Hybrid mode requires OPENROUTER_API_KEY; handlers return a clear error
when hybrid is requested without it, and also when the selected
OpenRouter model lacks tool-calling support.

AppState gains an optional openrouter client built from
OPENROUTER_API_KEY / OPENROUTER_BASE_URL / OPENROUTER_DEFAULT_MODEL /
OPENROUTER_EMBEDDING_MODEL / attribution headers. Default model is
anthropic/claude-sonnet-4.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../down.sql                                  |  23 ++
 .../up.sql                                    |   1 +
 src/ai/handlers.rs                            |  13 +
 src/ai/insight_generator.rs                   | 363 +++++++++++++-----
 src/bin/populate_knowledge.rs                 |   2 +
 src/database/models.rs                        |   4 +
 src/database/schema.rs                        |   1 +
 src/state.rs                                  |  35 ++
 8 files changed, 342 insertions(+), 100 deletions(-)
 create mode 100644 migrations/2026-04-20-000000_add_backend_to_insights/down.sql
 create mode 100644 migrations/2026-04-20-000000_add_backend_to_insights/up.sql
diff --git a/migrations/2026-04-20-000000_add_backend_to_insights/down.sql b/migrations/2026-04-20-000000_add_backend_to_insights/down.sql
new file mode 100644
index 0000000..cb8864d
--- /dev/null
+++ b/migrations/2026-04-20-000000_add_backend_to_insights/down.sql
@@ -0,0 +1,23 @@
+-- SQLite can't DROP COLUMN cleanly on older versions; rebuild the table.
+CREATE TABLE photo_insights_backup AS
+    SELECT id, library_id, rel_path, title, summary, generated_at, model_version,
+           is_current, training_messages, approved
+    FROM photo_insights;
+DROP TABLE photo_insights;
+CREATE TABLE photo_insights (
+    id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
+    library_id INTEGER NOT NULL REFERENCES libraries(id),
+    rel_path TEXT NOT NULL,
+    title TEXT NOT NULL,
+    summary TEXT NOT NULL,
+    generated_at BIGINT NOT NULL,
+    model_version TEXT NOT NULL,
+    is_current BOOLEAN NOT NULL DEFAULT TRUE,
+    training_messages TEXT,
+    approved BOOLEAN
+);
+INSERT INTO photo_insights
+    SELECT id, library_id, rel_path, title, summary, generated_at, model_version,
+           is_current, training_messages, approved
+    FROM photo_insights_backup;
+DROP TABLE photo_insights_backup;
diff --git a/migrations/2026-04-20-000000_add_backend_to_insights/up.sql b/migrations/2026-04-20-000000_add_backend_to_insights/up.sql
new file mode 100644
index 0000000..520c209
--- /dev/null
+++ b/migrations/2026-04-20-000000_add_backend_to_insights/up.sql
@@ -0,0 +1 @@
+ALTER TABLE photo_insights ADD COLUMN backend TEXT NOT NULL DEFAULT 'local';
diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs
index abf2369..038470d 100644
--- a/src/ai/handlers.rs
+++ b/src/ai/handlers.rs
@@ -28,6 +28,10 @@ pub struct GeneratePhotoInsightRequest {
     pub top_k: Option<i32>,
     #[serde(default)]
     pub min_p: Option<f32>,
+    /// `"local"` (default, Ollama with images) | `"hybrid"` (local vision +
+    /// OpenRouter chat). Only respected by the agentic endpoint.
+    #[serde(default)]
+    pub backend: Option<String>,
 }
 
 #[derive(Debug, Deserialize)]
@@ -65,6 +69,7 @@ pub struct PhotoInsightResponse {
     pub eval_count: Option<i32>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub approved: Option<bool>,
+    pub backend: String,
 }
 
 #[derive(Debug, Serialize)]
@@ -187,6 +192,7 @@ pub async fn get_insight_handler(
                 prompt_eval_count: None,
                 eval_count: None,
                 approved: insight.approved,
+                backend: insight.backend,
             };
             HttpResponse::Ok().json(response)
         }
@@ -254,6 +260,7 @@ pub async fn get_all_insights_handler(
                     prompt_eval_count: None,
                     eval_count: None,
                     approved: insight.approved,
+                    backend: insight.backend,
                 })
                 .collect();
 
@@ -309,6 +316,10 @@ pub async fn generate_agentic_insight_handler(
         max_iterations
     );
 
+    if let Some(ref b) = request.backend {
+        span.set_attribute(KeyValue::new("backend", b.clone()));
+    }
+
     let result = insight_generator
         .generate_agentic_insight_for_photo(
             &normalized_path,
@@ -320,6 +331,7 @@ pub async fn generate_agentic_insight_handler(
             request.top_k,
             request.min_p,
             max_iterations,
+            request.backend.clone(),
         )
         .await;
 
@@ -341,6 +353,7 @@ pub async fn generate_agentic_insight_handler(
                         prompt_eval_count,
                         eval_count,
                         approved: insight.approved,
+                        backend: insight.backend,
                     };
                     HttpResponse::Ok().json(response)
                 }
diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs
index 18e50c7..292324c 100644
--- a/src/ai/insight_generator.rs
+++ b/src/ai/insight_generator.rs
@@ -9,7 +9,9 @@ use std::fs::File;
 use std::io::Cursor;
 use std::sync::{Arc, Mutex};
 
+use crate::ai::llm_client::LlmClient;
 use crate::ai::ollama::{ChatMessage, OllamaClient, Tool};
+use crate::ai::openrouter::OpenRouterClient;
 use crate::ai::sms_client::SmsApiClient;
 use crate::database::models::InsertPhotoInsight;
 use crate::database::{
@@ -39,6 +41,9 @@ struct NominatimAddress {
 #[derive(Clone)]
 pub struct InsightGenerator {
     ollama: OllamaClient,
+    /// Optional OpenRouter client, used when `backend=hybrid` is requested.
+    /// `None` when `OPENROUTER_API_KEY` is not configured.
+    openrouter: Option<Arc<OpenRouterClient>>,
     sms_client: SmsApiClient,
     insight_dao: Arc<Mutex<Box<dyn InsightDao>>>,
     exif_dao: Arc<Mutex<Box<dyn ExifDao>>>,
@@ -59,6 +64,7 @@ pub struct InsightGenerator {
 impl InsightGenerator {
     pub fn new(
         ollama: OllamaClient,
+        openrouter: Option<Arc<OpenRouterClient>>,
         sms_client: SmsApiClient,
         insight_dao: Arc<Mutex<Box<dyn InsightDao>>>,
         exif_dao: Arc<Mutex<Box<dyn ExifDao>>>,
@@ -72,6 +78,7 @@ impl InsightGenerator {
     ) -> Self {
         Self {
             ollama,
+            openrouter,
             sms_client,
             insight_dao,
             exif_dao,
@@ -1218,6 +1225,7 @@ impl InsightGenerator {
             model_version: ollama_client.primary_model.clone(),
             is_current: true,
             training_messages: None,
+            backend: "local".to_string(),
         };
 
         let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
@@ -2376,6 +2384,14 @@ Return ONLY the summary, nothing else."#,
 
     /// Generate an AI insight for a photo using an agentic tool-calling loop.
     /// The model decides which tools to call to gather context before writing the final insight.
+    ///
+    /// `backend` selects the chat provider: `"local"` (default) routes the
+    /// agentic loop through the configured Ollama server with the image
+    /// attached to the first user message; `"hybrid"` asks the local Ollama
+    /// vision model to describe the image once, inlines the description as
+    /// text, and runs the loop through OpenRouter (chat only — embeddings
+    /// and describe calls stay local in either mode).
+    #[allow(clippy::too_many_arguments)]
     pub async fn generate_agentic_insight_for_photo(
         &self,
         file_path: &str,
@@ -2387,6 +2403,7 @@ Return ONLY the summary, nothing else."#,
         top_k: Option<i32>,
         min_p: Option<f32>,
         max_iterations: usize,
+        backend: Option<String>,
     ) -> Result<(Option<i32>, Option<i32>)> {
         let tracer = global_tracer();
         let current_cx = opentelemetry::Context::current();
@@ -2398,8 +2415,30 @@ Return ONLY the summary, nothing else."#,
         span.set_attribute(KeyValue::new("file_path", file_path.clone()));
         span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64));
 
-        // 1. Create OllamaClient
-        let mut ollama_client = if let Some(ref model) = custom_model {
+        // 1a. Resolve backend label (defaults to "local").
+        let backend_label = backend
+            .as_deref()
+            .map(|s| s.trim().to_lowercase())
+            .filter(|s| !s.is_empty())
+            .unwrap_or_else(|| "local".to_string());
+        if !matches!(backend_label.as_str(), "local" | "hybrid") {
+            return Err(anyhow::anyhow!(
+                "unknown backend '{}'; expected 'local' or 'hybrid'",
+                backend_label
+            ));
+        }
+        span.set_attribute(KeyValue::new("backend", backend_label.clone()));
+        let is_hybrid = backend_label == "hybrid";
+
+        // 1b. Always build an Ollama client. In local mode it owns the chat
+        //     loop; in hybrid mode it still handles describe_image + any
+        //     tool-local calls (e.g. if a future tool needs embeddings).
+        //     Sampling overrides only apply in local mode — in hybrid the
+        //     user's params belong to the OpenRouter chat client.
+        let apply_sampling_to_ollama = !is_hybrid;
+        let mut ollama_client = if let Some(ref model) = custom_model
+            && !is_hybrid
+        {
             log::info!("Using custom model for agentic: {}", model);
             span.set_attribute(KeyValue::new("custom_model", model.clone()));
             OllamaClient::new(
@@ -2409,108 +2448,179 @@ Return ONLY the summary, nothing else."#,
                 Some(model.clone()),
             )
         } else {
-            span.set_attribute(KeyValue::new("model", self.ollama.primary_model.clone()));
+            if !is_hybrid {
+                span.set_attribute(KeyValue::new("model", self.ollama.primary_model.clone()));
+            }
             self.ollama.clone()
         };
 
-        if let Some(ctx) = num_ctx {
-            log::info!("Using custom context size: {}", ctx);
-            span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
-            ollama_client.set_num_ctx(Some(ctx));
+        if apply_sampling_to_ollama {
+            if let Some(ctx) = num_ctx {
+                log::info!("Using custom context size: {}", ctx);
+                span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
+                ollama_client.set_num_ctx(Some(ctx));
+            }
+
+            if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
+                log::info!(
+                    "Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}",
+                    temperature,
+                    top_p,
+                    top_k,
+                    min_p
+                );
+                if let Some(t) = temperature {
+                    span.set_attribute(KeyValue::new("temperature", t as f64));
+                }
+                if let Some(p) = top_p {
+                    span.set_attribute(KeyValue::new("top_p", p as f64));
+                }
+                if let Some(k) = top_k {
+                    span.set_attribute(KeyValue::new("top_k", k as i64));
+                }
+                if let Some(m) = min_p {
+                    span.set_attribute(KeyValue::new("min_p", m as f64));
+                }
+                ollama_client.set_sampling_params(temperature, top_p, top_k, min_p);
+            }
         }
 
-        if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
-            log::info!(
-                "Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}",
-                temperature,
-                top_p,
-                top_k,
-                min_p
-            );
-            if let Some(t) = temperature {
-                span.set_attribute(KeyValue::new("temperature", t as f64));
+        // 1c. In hybrid mode, clone the configured OpenRouter client and
+        //     apply per-request overrides.
+        let openrouter_client: Option<OpenRouterClient> = if is_hybrid {
+            let arc = self.openrouter.as_ref().ok_or_else(|| {
+                anyhow::anyhow!("hybrid backend unavailable: OPENROUTER_API_KEY not configured")
+            })?;
+            let mut c: OpenRouterClient = (**arc).clone();
+            if let Some(ref m) = custom_model {
+                c.primary_model = m.clone();
+                span.set_attribute(KeyValue::new("custom_model", m.clone()));
             }
-            if let Some(p) = top_p {
-                span.set_attribute(KeyValue::new("top_p", p as f64));
+            span.set_attribute(KeyValue::new("openrouter_model", c.primary_model.clone()));
+            if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
+                if let Some(t) = temperature {
+                    span.set_attribute(KeyValue::new("temperature", t as f64));
+                }
+                if let Some(p) = top_p {
+                    span.set_attribute(KeyValue::new("top_p", p as f64));
+                }
+                if let Some(k) = top_k {
+                    span.set_attribute(KeyValue::new("top_k", k as i64));
+                }
+                if let Some(m) = min_p {
+                    span.set_attribute(KeyValue::new("min_p", m as f64));
+                }
+                c.set_sampling_params(temperature, top_p, top_k, min_p);
             }
-            if let Some(k) = top_k {
-                span.set_attribute(KeyValue::new("top_k", k as i64));
+            if let Some(ctx) = num_ctx {
+                span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
+                c.set_num_ctx(Some(ctx));
             }
-            if let Some(m) = min_p {
-                span.set_attribute(KeyValue::new("min_p", m as f64));
-            }
-            ollama_client.set_sampling_params(temperature, top_p, top_k, min_p);
-        }
+            Some(c)
+        } else {
+            None
+        };
 
         let insight_cx = current_cx.with_span(span);
 
-        // 2a. Verify the model exists on at least one server before checking capabilities
-        if let Some(ref model_name) = custom_model {
-            let available_on_primary =
-                OllamaClient::is_model_available(&ollama_client.primary_url, model_name)
-                    .await
-                    .unwrap_or(false);
-
-            let available_on_fallback = if let Some(ref fallback_url) = ollama_client.fallback_url {
-                OllamaClient::is_model_available(fallback_url, model_name)
-                    .await
-                    .unwrap_or(false)
-            } else {
-                false
-            };
-
-            if !available_on_primary && !available_on_fallback {
-                anyhow::bail!(
-                    "model not available: '{}' not found on any configured server",
-                    model_name
-                );
-            }
-        }
-
-        // 2b. Check tool calling capability — try primary, fall back to fallback URL
-        let model_name_for_caps = &ollama_client.primary_model;
-        let capabilities = match OllamaClient::check_model_capabilities(
-            &ollama_client.primary_url,
-            model_name_for_caps,
-        )
-        .await
-        {
-            Ok(caps) => caps,
-            Err(_) => {
-                // Model may only be on the fallback server
-                let fallback_url = ollama_client.fallback_url.as_deref().ok_or_else(|| {
+        // 2. Verify chat model supports tool calling.
+        //    - local: existing Ollama model availability + capability check.
+        //    - hybrid: query OpenRouter's /models for the chosen model.
+        let has_vision = if is_hybrid {
+            let or_client = openrouter_client
+                .as_ref()
+                .expect("openrouter_client constructed when is_hybrid");
+            let caps = or_client
+                .model_capabilities(&or_client.primary_model)
+                .await
+                .map_err(|e| {
                     anyhow::anyhow!(
-                        "Failed to check model capabilities for '{}': model not found on primary server and no fallback configured",
-                        model_name_for_caps
+                        "OpenRouter capability lookup failed for '{}': {}",
+                        or_client.primary_model,
+                        e
                     )
                 })?;
-                OllamaClient::check_model_capabilities(fallback_url, model_name_for_caps)
-                    .await
-                    .map_err(|e| {
-                        anyhow::anyhow!(
-                            "Failed to check model capabilities for '{}': {}",
-                            model_name_for_caps,
-                            e
-                        )
-                    })?
+            if !caps.has_tool_calling {
+                return Err(anyhow::anyhow!(
+                    "tool calling not supported by OpenRouter model '{}'",
+                    or_client.primary_model
+                ));
             }
+            insight_cx
+                .span()
+                .set_attribute(KeyValue::new("model_has_tool_calling", true));
+            // In hybrid mode the chat model never sees images directly — we
+            // describe-then-inject, so `has_vision` drives only whether we
+            // bother loading the image to describe it, which we always do.
+            true
+        } else {
+            if let Some(ref model_name) = custom_model {
+                let available_on_primary =
+                    OllamaClient::is_model_available(&ollama_client.primary_url, model_name)
+                        .await
+                        .unwrap_or(false);
+
+                let available_on_fallback =
+                    if let Some(ref fallback_url) = ollama_client.fallback_url {
+                        OllamaClient::is_model_available(fallback_url, model_name)
+                            .await
+                            .unwrap_or(false)
+                    } else {
+                        false
+                    };
+
+                if !available_on_primary && !available_on_fallback {
+                    anyhow::bail!(
+                        "model not available: '{}' not found on any configured server",
+                        model_name
+                    );
+                }
+            }
+
+            let model_name_for_caps = &ollama_client.primary_model;
+            let capabilities = match OllamaClient::check_model_capabilities(
+                &ollama_client.primary_url,
+                model_name_for_caps,
+            )
+            .await
+            {
+                Ok(caps) => caps,
+                Err(_) => {
+                    let fallback_url = ollama_client.fallback_url.as_deref().ok_or_else(|| {
+                        anyhow::anyhow!(
+                            "Failed to check model capabilities for '{}': model not found on primary server and no fallback configured",
+                            model_name_for_caps
+                        )
+                    })?;
+                    OllamaClient::check_model_capabilities(fallback_url, model_name_for_caps)
+                        .await
+                        .map_err(|e| {
+                            anyhow::anyhow!(
+                                "Failed to check model capabilities for '{}': {}",
+                                model_name_for_caps,
+                                e
+                            )
+                        })?
+                }
+            };
+
+            if !capabilities.has_tool_calling {
+                return Err(anyhow::anyhow!(
+                    "tool calling not supported by model '{}'",
+                    ollama_client.primary_model
+                ));
+            }
+
+            insight_cx
+                .span()
+                .set_attribute(KeyValue::new("model_has_vision", capabilities.has_vision));
+            insight_cx
+                .span()
+                .set_attribute(KeyValue::new("model_has_tool_calling", true));
+
+            capabilities.has_vision
         };
 
-        if !capabilities.has_tool_calling {
-            return Err(anyhow::anyhow!(
-                "tool calling not supported by model '{}'",
-                ollama_client.primary_model
-            ));
-        }
-
-        let has_vision = capabilities.has_vision;
-        insight_cx
-            .span()
-            .set_attribute(KeyValue::new("model_has_vision", has_vision));
-        insight_cx
-            .span()
-            .set_attribute(KeyValue::new("model_has_tool_calling", true));
-
         // 3. Fetch EXIF
         let exif = {
             let mut exif_dao = self.exif_dao.lock().expect("Unable to lock ExifDao");
@@ -2603,7 +2713,10 @@ Return ONLY the summary, nothing else."#,
             }
         };
 
-        // 7. Load image if vision capable
+        // 7. Load image if vision capable.
+        //    In hybrid mode we ALSO describe it locally now so the
+        //    description can be inlined as text — the OpenRouter chat model
+        //    never receives the base64 image directly.
         let image_base64 = if has_vision {
             match self.load_image_as_base64(&file_path) {
                 Ok(b64) => {
@@ -2619,6 +2732,30 @@ Return ONLY the summary, nothing else."#,
             None
         };
 
+        let hybrid_visual_description: Option<String> = if is_hybrid {
+            match image_base64.as_deref() {
+                Some(b64) => match self.ollama.describe_image(b64).await {
+                    Ok(desc) => {
+                        log::info!(
+                            "Hybrid: local vision describe succeeded ({} chars)",
+                            desc.len()
+                        );
+                        Some(desc)
+                    }
+                    Err(e) => {
+                        log::warn!(
+                            "Hybrid: local vision describe failed, continuing without: {}",
+                            e
+                        );
+                        None
+                    }
+                },
+                None => None,
+            }
+        } else {
+            None
+        };
+
         // 8. Build system message
         let cameron_id_note = match cameron_entity_id {
             Some(id) => format!(
@@ -2672,8 +2809,13 @@ Return ONLY the summary, nothing else."#,
             .map(|c| format!("Contact/Person: {}", c))
             .unwrap_or_else(|| "Contact/Person: unknown".to_string());
 
+        let visual_block = hybrid_visual_description
+            .as_deref()
+            .map(|d| format!("Visual description (from local vision model):\n{}\n\n", d))
+            .unwrap_or_default();
+
         let user_content = format!(
-            "Please analyze this photo and gather any relevant context from the surrounding weeks.\n\n\
+            "{visual_block}Please analyze this photo and gather any relevant context from the surrounding weeks.\n\n\
              Photo file path: {}\n\
              Date taken: {}\n\
              {}\n\
@@ -2686,21 +2828,32 @@ Return ONLY the summary, nothing else."#,
             contact_info,
             gps_info,
             tags_info,
+            visual_block = visual_block,
         );
 
-        // 10. Define tools
-        let tools = Self::build_tool_definitions(has_vision);
+        // 10. Define tools. Hybrid mode omits `describe_photo` since the
+        //     chat model receives the visual description inline.
+        let offer_describe_tool = has_vision && !is_hybrid;
+        let tools = Self::build_tool_definitions(offer_describe_tool);
 
-        // 11. Build initial messages
+        // 11. Build initial messages. In hybrid mode images are never
+        //     attached to the wire message — the description is part of
+        //     `user_content`.
         let system_msg = ChatMessage::system(system_content);
         let mut user_msg = ChatMessage::user(user_content);
-        if let Some(ref img) = image_base64 {
+        if !is_hybrid && let Some(ref img) = image_base64 {
             user_msg.images = Some(vec![img.clone()]);
         }
 
         let mut messages = vec![system_msg, user_msg];
 
-        // 12. Agentic loop
+        // 12. Agentic loop — dispatch through the selected backend.
+        let chat_backend: &dyn LlmClient = if let Some(ref or_c) = openrouter_client {
+            or_c
+        } else {
+            &ollama_client
+        };
+
         let loop_span = tracer.start_with_context("ai.agentic.loop", &insight_cx);
         let loop_cx = insight_cx.with_span(loop_span);
 
@@ -2713,7 +2866,7 @@ Return ONLY the summary, nothing else."#,
             iterations_used = iteration + 1;
             log::info!("Agentic iteration {}/{}", iteration + 1, max_iterations);
 
-            let (response, prompt_tokens, eval_tokens) = ollama_client
+            let (response, prompt_tokens, eval_tokens) = chat_backend
                 .chat_with_tools(messages.clone(), tools.clone())
                 .await?;
 
@@ -2778,7 +2931,7 @@ Return ONLY the summary, nothing else."#,
             messages.push(ChatMessage::user(
                 "Based on the context gathered, please write the final photo insight: a title and a detailed personal summary. Write in first person as Cameron.",
             ));
-            let (final_response, prompt_tokens, eval_tokens) = ollama_client
+            let (final_response, prompt_tokens, eval_tokens) = chat_backend
                 .chat_with_tools(messages.clone(), vec![])
                 .await?;
             last_prompt_eval_count = prompt_tokens;
@@ -2792,10 +2945,18 @@ Return ONLY the summary, nothing else."#,
             .set_attribute(KeyValue::new("iterations_used", iterations_used as i64));
         loop_cx.span().set_status(Status::Ok);
 
-        // 13. Generate title
-        let title = ollama_client
-            .generate_photo_title(&final_content, custom_system_prompt.as_deref())
+        // 13. Generate title via the same backend so voice stays consistent.
+        let title_prompt = format!(
+            "Create a short title (maximum 8 words) for the following journal entry:\n\n{}\n\nCapture the key moment or theme. Return ONLY the title, nothing else.",
+            final_content
+        );
+        let title_system = custom_system_prompt.as_deref().unwrap_or(
+            "You are my long term memory assistant. Use only the information provided. Do not invent details.",
+        );
+        let title_raw = chat_backend
+            .generate(&title_prompt, Some(title_system), None)
             .await?;
+        let title = title_raw.trim().trim_matches('"').to_string();
 
         log::info!("Agentic generated title: {}", title);
         log::info!(
@@ -2814,15 +2975,17 @@ Return ONLY the summary, nothing else."#,
         };
 
         // 15. Store insight (returns the persisted row including its new id)
+        let model_version = chat_backend.primary_model().to_string();
         let insight = InsertPhotoInsight {
             library_id: crate::libraries::PRIMARY_LIBRARY_ID,
             file_path: file_path.to_string(),
             title,
             summary: final_content,
             generated_at: Utc::now().timestamp(),
-            model_version: ollama_client.primary_model.clone(),
+            model_version,
             is_current: true,
             training_messages,
+            backend: backend_label.clone(),
         };
 
         let stored = {
diff --git a/src/bin/populate_knowledge.rs b/src/bin/populate_knowledge.rs
index bc37960..f70c5a2 100644
--- a/src/bin/populate_knowledge.rs
+++ b/src/bin/populate_knowledge.rs
@@ -134,6 +134,7 @@ async fn main() -> anyhow::Result<()> {
 
     let generator = InsightGenerator::new(
         ollama,
+        None,
         sms_client,
         insight_dao.clone(),
         exif_dao,
@@ -249,6 +250,7 @@ async fn main() -> anyhow::Result<()> {
                 args.top_k,
                 args.min_p,
                 args.max_iterations,
+                None,
             )
             .await
         {
diff --git a/src/database/models.rs b/src/database/models.rs
index d95876b..3d63f1a 100644
--- a/src/database/models.rs
+++ b/src/database/models.rs
@@ -100,6 +100,8 @@ pub struct InsertPhotoInsight {
     pub model_version: String,
     pub is_current: bool,
     pub training_messages: Option<String>,
+    /// `"local"` (Ollama with images) | `"hybrid"` (local vision + OpenRouter chat).
+    pub backend: String,
 }
 
 #[derive(Serialize, Queryable, Clone, Debug)]
@@ -115,6 +117,8 @@ pub struct PhotoInsight {
     pub is_current: bool,
     pub training_messages: Option<String>,
     pub approved: Option<bool>,
+    /// `"local"` (Ollama with images) | `"hybrid"` (local vision + OpenRouter chat).
+    pub backend: String,
 }
 
 // --- Libraries ---
diff --git a/src/database/schema.rs b/src/database/schema.rs
index 3352ca6..200cb15 100644
--- a/src/database/schema.rs
+++ b/src/database/schema.rs
@@ -142,6 +142,7 @@ diesel::table! {
         is_current -> Bool,
         training_messages -> Nullable<Text>,
         approved -> Nullable<Bool>,
+        backend -> Text,
     }
 }
 
diff --git a/src/state.rs b/src/state.rs
index 78b98ad..72a509e 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -1,3 +1,4 @@
+use crate::ai::openrouter::OpenRouterClient;
 use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient};
 use crate::database::{
     CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, KnowledgeDao, LocationHistoryDao,
@@ -31,6 +32,13 @@ pub struct AppState {
     pub preview_clips_path: String,
     pub excluded_dirs: Vec<String>,
     pub ollama: OllamaClient,
+    /// `None` when `OPENROUTER_API_KEY` is not configured. Consulted only
+    /// when a request explicitly opts into `backend=hybrid`. Currently
+    /// reached via `insight_generator`; kept here so future handlers
+    /// (insight_chat) can route to it without threading it through the
+    /// generator.
+    #[allow(dead_code)]
+    pub openrouter: Option<Arc<OpenRouterClient>>,
     pub sms_client: SmsApiClient,
     pub insight_generator: InsightGenerator,
 }
@@ -61,6 +69,7 @@ impl AppState {
         preview_clips_path: String,
         excluded_dirs: Vec<String>,
         ollama: OllamaClient,
+        openrouter: Option<Arc<OpenRouterClient>>,
         sms_client: SmsApiClient,
         insight_generator: InsightGenerator,
         preview_dao: Arc<Mutex<Box<dyn PreviewDao>>>,
@@ -92,6 +101,7 @@ impl AppState {
             preview_clips_path,
             excluded_dirs,
             ollama,
+            openrouter,
             sms_client,
             insight_generator,
         }
@@ -127,6 +137,8 @@ impl Default for AppState {
             ollama_fallback_model,
         );
 
+        let openrouter = build_openrouter_from_env();
+
         let sms_api_url =
             env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
         let sms_api_token = env::var("SMS_API_TOKEN").ok();
@@ -168,6 +180,7 @@ impl Default for AppState {
         // Initialize InsightGenerator with all data sources
         let insight_generator = InsightGenerator::new(
             ollama.clone(),
+            openrouter.clone(),
             sms_client.clone(),
             insight_dao.clone(),
             exif_dao.clone(),
@@ -195,6 +208,7 @@ impl Default for AppState {
             preview_clips_path,
             Self::parse_excluded_dirs(),
             ollama,
+            openrouter,
             sms_client,
             insight_generator,
             preview_dao,
@@ -202,6 +216,25 @@ impl Default for AppState {
     }
 }
 
+/// Build an `OpenRouterClient` from environment variables. Returns `None`
+/// when `OPENROUTER_API_KEY` is unset (the hybrid backend is then
+/// unavailable and requests for it return a clear error).
+fn build_openrouter_from_env() -> Option<Arc<OpenRouterClient>> {
+    let api_key = env::var("OPENROUTER_API_KEY").ok()?;
+    let base_url = env::var("OPENROUTER_BASE_URL").ok();
+    let default_model = env::var("OPENROUTER_DEFAULT_MODEL")
+        .unwrap_or_else(|_| "anthropic/claude-sonnet-4".to_string());
+    let mut client = OpenRouterClient::new(api_key, base_url, default_model);
+    client.set_attribution(
+        env::var("OPENROUTER_HTTP_REFERER").ok(),
+        env::var("OPENROUTER_APP_TITLE").ok(),
+    );
+    if let Ok(model) = env::var("OPENROUTER_EMBEDDING_MODEL") {
+        client.set_embedding_model(model);
+    }
+    Some(Arc::new(client))
+}
+
 #[cfg(test)]
 impl AppState {
     /// Creates an AppState instance for testing with temporary directories
@@ -255,6 +288,7 @@ impl AppState {
         };
         let insight_generator = InsightGenerator::new(
             ollama.clone(),
+            None,
             sms_client.clone(),
             insight_dao.clone(),
             exif_dao.clone(),
@@ -286,6 +320,7 @@ impl AppState {
             preview_clips_path.to_string_lossy().to_string(),
             Vec::new(), // No excluded directories for test state
             ollama,
+            None,
             sms_client,
             insight_generator,
             preview_dao,