From 3ac0cd62ebe01e98d24eb21074207a30f4e7076e Mon Sep 17 00:00:00 2001 From: Cameron Date: Mon, 20 Apr 2026 22:30:40 -0400 Subject: [PATCH] feat(ai): hybrid backend mode for agentic insights MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a `backend` column to photo_insights (default 'local', migration 2026-04-20-000000) and a corresponding optional `backend` field on the agentic request. When a request sets backend=hybrid: - The local Ollama vision model is called once via describe_image to produce a text description. - The description is inlined into the first user message as text — no base64 image is ever sent to the chat model. - The agentic tool-calling loop and title generation route through an OpenRouterClient (dispatched via &dyn LlmClient), letting the user pick any tool-capable model from OpenRouter per request. - describe_photo is removed from the offered tools since the description is already present. Embeddings and vision stay on local Ollama regardless of backend. Hybrid mode requires OPENROUTER_API_KEY; handlers return a clear error when hybrid is requested without it, and also when the selected OpenRouter model lacks tool-calling support. AppState gains an optional openrouter client built from OPENROUTER_API_KEY / OPENROUTER_BASE_URL / OPENROUTER_DEFAULT_MODEL / OPENROUTER_EMBEDDING_MODEL / attribution headers. Default model is anthropic/claude-sonnet-4. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../down.sql | 23 ++ .../up.sql | 1 + src/ai/handlers.rs | 13 + src/ai/insight_generator.rs | 363 +++++++++++++----- src/bin/populate_knowledge.rs | 2 + src/database/models.rs | 4 + src/database/schema.rs | 1 + src/state.rs | 35 ++ 8 files changed, 342 insertions(+), 100 deletions(-) create mode 100644 migrations/2026-04-20-000000_add_backend_to_insights/down.sql create mode 100644 migrations/2026-04-20-000000_add_backend_to_insights/up.sql diff --git a/migrations/2026-04-20-000000_add_backend_to_insights/down.sql b/migrations/2026-04-20-000000_add_backend_to_insights/down.sql new file mode 100644 index 0000000..cb8864d --- /dev/null +++ b/migrations/2026-04-20-000000_add_backend_to_insights/down.sql @@ -0,0 +1,23 @@ +-- SQLite can't DROP COLUMN cleanly on older versions; rebuild the table. +CREATE TABLE photo_insights_backup AS + SELECT id, library_id, rel_path, title, summary, generated_at, model_version, + is_current, training_messages, approved + FROM photo_insights; +DROP TABLE photo_insights; +CREATE TABLE photo_insights ( + id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + library_id INTEGER NOT NULL REFERENCES libraries(id), + rel_path TEXT NOT NULL, + title TEXT NOT NULL, + summary TEXT NOT NULL, + generated_at BIGINT NOT NULL, + model_version TEXT NOT NULL, + is_current BOOLEAN NOT NULL DEFAULT TRUE, + training_messages TEXT, + approved BOOLEAN +); +INSERT INTO photo_insights + SELECT id, library_id, rel_path, title, summary, generated_at, model_version, + is_current, training_messages, approved + FROM photo_insights_backup; +DROP TABLE photo_insights_backup; diff --git a/migrations/2026-04-20-000000_add_backend_to_insights/up.sql b/migrations/2026-04-20-000000_add_backend_to_insights/up.sql new file mode 100644 index 0000000..520c209 --- /dev/null +++ b/migrations/2026-04-20-000000_add_backend_to_insights/up.sql @@ -0,0 +1 @@ +ALTER TABLE photo_insights ADD COLUMN backend TEXT NOT NULL DEFAULT 'local'; diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs index abf2369..038470d 100644 --- a/src/ai/handlers.rs +++ b/src/ai/handlers.rs @@ -28,6 +28,10 @@ pub struct GeneratePhotoInsightRequest { pub top_k: Option, #[serde(default)] pub min_p: Option, + /// `"local"` (default, Ollama with images) | `"hybrid"` (local vision + + /// OpenRouter chat). Only respected by the agentic endpoint. + #[serde(default)] + pub backend: Option, } #[derive(Debug, Deserialize)] @@ -65,6 +69,7 @@ pub struct PhotoInsightResponse { pub eval_count: Option, #[serde(skip_serializing_if = "Option::is_none")] pub approved: Option, + pub backend: String, } #[derive(Debug, Serialize)] @@ -187,6 +192,7 @@ pub async fn get_insight_handler( prompt_eval_count: None, eval_count: None, approved: insight.approved, + backend: insight.backend, }; HttpResponse::Ok().json(response) } @@ -254,6 +260,7 @@ pub async fn get_all_insights_handler( prompt_eval_count: None, eval_count: None, approved: insight.approved, + backend: insight.backend, }) .collect(); @@ -309,6 +316,10 @@ pub async fn generate_agentic_insight_handler( max_iterations ); + if let Some(ref b) = request.backend { + span.set_attribute(KeyValue::new("backend", b.clone())); + } + let result = insight_generator .generate_agentic_insight_for_photo( &normalized_path, @@ -320,6 +331,7 @@ pub async fn generate_agentic_insight_handler( request.top_k, request.min_p, max_iterations, + request.backend.clone(), ) .await; @@ -341,6 +353,7 @@ pub async fn generate_agentic_insight_handler( prompt_eval_count, eval_count, approved: insight.approved, + backend: insight.backend, }; HttpResponse::Ok().json(response) } diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 18e50c7..292324c 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -9,7 +9,9 @@ use std::fs::File; use std::io::Cursor; use std::sync::{Arc, Mutex}; +use crate::ai::llm_client::LlmClient; use crate::ai::ollama::{ChatMessage, OllamaClient, Tool}; +use crate::ai::openrouter::OpenRouterClient; use crate::ai::sms_client::SmsApiClient; use crate::database::models::InsertPhotoInsight; use crate::database::{ @@ -39,6 +41,9 @@ struct NominatimAddress { #[derive(Clone)] pub struct InsightGenerator { ollama: OllamaClient, + /// Optional OpenRouter client, used when `backend=hybrid` is requested. + /// `None` when `OPENROUTER_API_KEY` is not configured. + openrouter: Option>, sms_client: SmsApiClient, insight_dao: Arc>>, exif_dao: Arc>>, @@ -59,6 +64,7 @@ pub struct InsightGenerator { impl InsightGenerator { pub fn new( ollama: OllamaClient, + openrouter: Option>, sms_client: SmsApiClient, insight_dao: Arc>>, exif_dao: Arc>>, @@ -72,6 +78,7 @@ impl InsightGenerator { ) -> Self { Self { ollama, + openrouter, sms_client, insight_dao, exif_dao, @@ -1218,6 +1225,7 @@ impl InsightGenerator { model_version: ollama_client.primary_model.clone(), is_current: true, training_messages: None, + backend: "local".to_string(), }; let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao"); @@ -2376,6 +2384,14 @@ Return ONLY the summary, nothing else."#, /// Generate an AI insight for a photo using an agentic tool-calling loop. /// The model decides which tools to call to gather context before writing the final insight. + /// + /// `backend` selects the chat provider: `"local"` (default) routes the + /// agentic loop through the configured Ollama server with the image + /// attached to the first user message; `"hybrid"` asks the local Ollama + /// vision model to describe the image once, inlines the description as + /// text, and runs the loop through OpenRouter (chat only — embeddings + /// and describe calls stay local in either mode). + #[allow(clippy::too_many_arguments)] pub async fn generate_agentic_insight_for_photo( &self, file_path: &str, @@ -2387,6 +2403,7 @@ Return ONLY the summary, nothing else."#, top_k: Option, min_p: Option, max_iterations: usize, + backend: Option, ) -> Result<(Option, Option)> { let tracer = global_tracer(); let current_cx = opentelemetry::Context::current(); @@ -2398,8 +2415,30 @@ Return ONLY the summary, nothing else."#, span.set_attribute(KeyValue::new("file_path", file_path.clone())); span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64)); - // 1. Create OllamaClient - let mut ollama_client = if let Some(ref model) = custom_model { + // 1a. Resolve backend label (defaults to "local"). + let backend_label = backend + .as_deref() + .map(|s| s.trim().to_lowercase()) + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| "local".to_string()); + if !matches!(backend_label.as_str(), "local" | "hybrid") { + return Err(anyhow::anyhow!( + "unknown backend '{}'; expected 'local' or 'hybrid'", + backend_label + )); + } + span.set_attribute(KeyValue::new("backend", backend_label.clone())); + let is_hybrid = backend_label == "hybrid"; + + // 1b. Always build an Ollama client. In local mode it owns the chat + // loop; in hybrid mode it still handles describe_image + any + // tool-local calls (e.g. if a future tool needs embeddings). + // Sampling overrides only apply in local mode — in hybrid the + // user's params belong to the OpenRouter chat client. + let apply_sampling_to_ollama = !is_hybrid; + let mut ollama_client = if let Some(ref model) = custom_model + && !is_hybrid + { log::info!("Using custom model for agentic: {}", model); span.set_attribute(KeyValue::new("custom_model", model.clone())); OllamaClient::new( @@ -2409,108 +2448,179 @@ Return ONLY the summary, nothing else."#, Some(model.clone()), ) } else { - span.set_attribute(KeyValue::new("model", self.ollama.primary_model.clone())); + if !is_hybrid { + span.set_attribute(KeyValue::new("model", self.ollama.primary_model.clone())); + } self.ollama.clone() }; - if let Some(ctx) = num_ctx { - log::info!("Using custom context size: {}", ctx); - span.set_attribute(KeyValue::new("num_ctx", ctx as i64)); - ollama_client.set_num_ctx(Some(ctx)); + if apply_sampling_to_ollama { + if let Some(ctx) = num_ctx { + log::info!("Using custom context size: {}", ctx); + span.set_attribute(KeyValue::new("num_ctx", ctx as i64)); + ollama_client.set_num_ctx(Some(ctx)); + } + + if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() { + log::info!( + "Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}", + temperature, + top_p, + top_k, + min_p + ); + if let Some(t) = temperature { + span.set_attribute(KeyValue::new("temperature", t as f64)); + } + if let Some(p) = top_p { + span.set_attribute(KeyValue::new("top_p", p as f64)); + } + if let Some(k) = top_k { + span.set_attribute(KeyValue::new("top_k", k as i64)); + } + if let Some(m) = min_p { + span.set_attribute(KeyValue::new("min_p", m as f64)); + } + ollama_client.set_sampling_params(temperature, top_p, top_k, min_p); + } } - if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() { - log::info!( - "Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}", - temperature, - top_p, - top_k, - min_p - ); - if let Some(t) = temperature { - span.set_attribute(KeyValue::new("temperature", t as f64)); + // 1c. In hybrid mode, clone the configured OpenRouter client and + // apply per-request overrides. + let openrouter_client: Option = if is_hybrid { + let arc = self.openrouter.as_ref().ok_or_else(|| { + anyhow::anyhow!("hybrid backend unavailable: OPENROUTER_API_KEY not configured") + })?; + let mut c: OpenRouterClient = (**arc).clone(); + if let Some(ref m) = custom_model { + c.primary_model = m.clone(); + span.set_attribute(KeyValue::new("custom_model", m.clone())); } - if let Some(p) = top_p { - span.set_attribute(KeyValue::new("top_p", p as f64)); + span.set_attribute(KeyValue::new("openrouter_model", c.primary_model.clone())); + if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() { + if let Some(t) = temperature { + span.set_attribute(KeyValue::new("temperature", t as f64)); + } + if let Some(p) = top_p { + span.set_attribute(KeyValue::new("top_p", p as f64)); + } + if let Some(k) = top_k { + span.set_attribute(KeyValue::new("top_k", k as i64)); + } + if let Some(m) = min_p { + span.set_attribute(KeyValue::new("min_p", m as f64)); + } + c.set_sampling_params(temperature, top_p, top_k, min_p); } - if let Some(k) = top_k { - span.set_attribute(KeyValue::new("top_k", k as i64)); + if let Some(ctx) = num_ctx { + span.set_attribute(KeyValue::new("num_ctx", ctx as i64)); + c.set_num_ctx(Some(ctx)); } - if let Some(m) = min_p { - span.set_attribute(KeyValue::new("min_p", m as f64)); - } - ollama_client.set_sampling_params(temperature, top_p, top_k, min_p); - } + Some(c) + } else { + None + }; let insight_cx = current_cx.with_span(span); - // 2a. Verify the model exists on at least one server before checking capabilities - if let Some(ref model_name) = custom_model { - let available_on_primary = - OllamaClient::is_model_available(&ollama_client.primary_url, model_name) - .await - .unwrap_or(false); - - let available_on_fallback = if let Some(ref fallback_url) = ollama_client.fallback_url { - OllamaClient::is_model_available(fallback_url, model_name) - .await - .unwrap_or(false) - } else { - false - }; - - if !available_on_primary && !available_on_fallback { - anyhow::bail!( - "model not available: '{}' not found on any configured server", - model_name - ); - } - } - - // 2b. Check tool calling capability — try primary, fall back to fallback URL - let model_name_for_caps = &ollama_client.primary_model; - let capabilities = match OllamaClient::check_model_capabilities( - &ollama_client.primary_url, - model_name_for_caps, - ) - .await - { - Ok(caps) => caps, - Err(_) => { - // Model may only be on the fallback server - let fallback_url = ollama_client.fallback_url.as_deref().ok_or_else(|| { + // 2. Verify chat model supports tool calling. + // - local: existing Ollama model availability + capability check. + // - hybrid: query OpenRouter's /models for the chosen model. + let has_vision = if is_hybrid { + let or_client = openrouter_client + .as_ref() + .expect("openrouter_client constructed when is_hybrid"); + let caps = or_client + .model_capabilities(&or_client.primary_model) + .await + .map_err(|e| { anyhow::anyhow!( - "Failed to check model capabilities for '{}': model not found on primary server and no fallback configured", - model_name_for_caps + "OpenRouter capability lookup failed for '{}': {}", + or_client.primary_model, + e ) })?; - OllamaClient::check_model_capabilities(fallback_url, model_name_for_caps) - .await - .map_err(|e| { - anyhow::anyhow!( - "Failed to check model capabilities for '{}': {}", - model_name_for_caps, - e - ) - })? + if !caps.has_tool_calling { + return Err(anyhow::anyhow!( + "tool calling not supported by OpenRouter model '{}'", + or_client.primary_model + )); } + insight_cx + .span() + .set_attribute(KeyValue::new("model_has_tool_calling", true)); + // In hybrid mode the chat model never sees images directly — we + // describe-then-inject, so `has_vision` drives only whether we + // bother loading the image to describe it, which we always do. + true + } else { + if let Some(ref model_name) = custom_model { + let available_on_primary = + OllamaClient::is_model_available(&ollama_client.primary_url, model_name) + .await + .unwrap_or(false); + + let available_on_fallback = + if let Some(ref fallback_url) = ollama_client.fallback_url { + OllamaClient::is_model_available(fallback_url, model_name) + .await + .unwrap_or(false) + } else { + false + }; + + if !available_on_primary && !available_on_fallback { + anyhow::bail!( + "model not available: '{}' not found on any configured server", + model_name + ); + } + } + + let model_name_for_caps = &ollama_client.primary_model; + let capabilities = match OllamaClient::check_model_capabilities( + &ollama_client.primary_url, + model_name_for_caps, + ) + .await + { + Ok(caps) => caps, + Err(_) => { + let fallback_url = ollama_client.fallback_url.as_deref().ok_or_else(|| { + anyhow::anyhow!( + "Failed to check model capabilities for '{}': model not found on primary server and no fallback configured", + model_name_for_caps + ) + })?; + OllamaClient::check_model_capabilities(fallback_url, model_name_for_caps) + .await + .map_err(|e| { + anyhow::anyhow!( + "Failed to check model capabilities for '{}': {}", + model_name_for_caps, + e + ) + })? + } + }; + + if !capabilities.has_tool_calling { + return Err(anyhow::anyhow!( + "tool calling not supported by model '{}'", + ollama_client.primary_model + )); + } + + insight_cx + .span() + .set_attribute(KeyValue::new("model_has_vision", capabilities.has_vision)); + insight_cx + .span() + .set_attribute(KeyValue::new("model_has_tool_calling", true)); + + capabilities.has_vision }; - if !capabilities.has_tool_calling { - return Err(anyhow::anyhow!( - "tool calling not supported by model '{}'", - ollama_client.primary_model - )); - } - - let has_vision = capabilities.has_vision; - insight_cx - .span() - .set_attribute(KeyValue::new("model_has_vision", has_vision)); - insight_cx - .span() - .set_attribute(KeyValue::new("model_has_tool_calling", true)); - // 3. Fetch EXIF let exif = { let mut exif_dao = self.exif_dao.lock().expect("Unable to lock ExifDao"); @@ -2603,7 +2713,10 @@ Return ONLY the summary, nothing else."#, } }; - // 7. Load image if vision capable + // 7. Load image if vision capable. + // In hybrid mode we ALSO describe it locally now so the + // description can be inlined as text — the OpenRouter chat model + // never receives the base64 image directly. let image_base64 = if has_vision { match self.load_image_as_base64(&file_path) { Ok(b64) => { @@ -2619,6 +2732,30 @@ Return ONLY the summary, nothing else."#, None }; + let hybrid_visual_description: Option = if is_hybrid { + match image_base64.as_deref() { + Some(b64) => match self.ollama.describe_image(b64).await { + Ok(desc) => { + log::info!( + "Hybrid: local vision describe succeeded ({} chars)", + desc.len() + ); + Some(desc) + } + Err(e) => { + log::warn!( + "Hybrid: local vision describe failed, continuing without: {}", + e + ); + None + } + }, + None => None, + } + } else { + None + }; + // 8. Build system message let cameron_id_note = match cameron_entity_id { Some(id) => format!( @@ -2672,8 +2809,13 @@ Return ONLY the summary, nothing else."#, .map(|c| format!("Contact/Person: {}", c)) .unwrap_or_else(|| "Contact/Person: unknown".to_string()); + let visual_block = hybrid_visual_description + .as_deref() + .map(|d| format!("Visual description (from local vision model):\n{}\n\n", d)) + .unwrap_or_default(); + let user_content = format!( - "Please analyze this photo and gather any relevant context from the surrounding weeks.\n\n\ + "{visual_block}Please analyze this photo and gather any relevant context from the surrounding weeks.\n\n\ Photo file path: {}\n\ Date taken: {}\n\ {}\n\ @@ -2686,21 +2828,32 @@ Return ONLY the summary, nothing else."#, contact_info, gps_info, tags_info, + visual_block = visual_block, ); - // 10. Define tools - let tools = Self::build_tool_definitions(has_vision); + // 10. Define tools. Hybrid mode omits `describe_photo` since the + // chat model receives the visual description inline. + let offer_describe_tool = has_vision && !is_hybrid; + let tools = Self::build_tool_definitions(offer_describe_tool); - // 11. Build initial messages + // 11. Build initial messages. In hybrid mode images are never + // attached to the wire message — the description is part of + // `user_content`. let system_msg = ChatMessage::system(system_content); let mut user_msg = ChatMessage::user(user_content); - if let Some(ref img) = image_base64 { + if !is_hybrid && let Some(ref img) = image_base64 { user_msg.images = Some(vec![img.clone()]); } let mut messages = vec![system_msg, user_msg]; - // 12. Agentic loop + // 12. Agentic loop — dispatch through the selected backend. + let chat_backend: &dyn LlmClient = if let Some(ref or_c) = openrouter_client { + or_c + } else { + &ollama_client + }; + let loop_span = tracer.start_with_context("ai.agentic.loop", &insight_cx); let loop_cx = insight_cx.with_span(loop_span); @@ -2713,7 +2866,7 @@ Return ONLY the summary, nothing else."#, iterations_used = iteration + 1; log::info!("Agentic iteration {}/{}", iteration + 1, max_iterations); - let (response, prompt_tokens, eval_tokens) = ollama_client + let (response, prompt_tokens, eval_tokens) = chat_backend .chat_with_tools(messages.clone(), tools.clone()) .await?; @@ -2778,7 +2931,7 @@ Return ONLY the summary, nothing else."#, messages.push(ChatMessage::user( "Based on the context gathered, please write the final photo insight: a title and a detailed personal summary. Write in first person as Cameron.", )); - let (final_response, prompt_tokens, eval_tokens) = ollama_client + let (final_response, prompt_tokens, eval_tokens) = chat_backend .chat_with_tools(messages.clone(), vec![]) .await?; last_prompt_eval_count = prompt_tokens; @@ -2792,10 +2945,18 @@ Return ONLY the summary, nothing else."#, .set_attribute(KeyValue::new("iterations_used", iterations_used as i64)); loop_cx.span().set_status(Status::Ok); - // 13. Generate title - let title = ollama_client - .generate_photo_title(&final_content, custom_system_prompt.as_deref()) + // 13. Generate title via the same backend so voice stays consistent. + let title_prompt = format!( + "Create a short title (maximum 8 words) for the following journal entry:\n\n{}\n\nCapture the key moment or theme. Return ONLY the title, nothing else.", + final_content + ); + let title_system = custom_system_prompt.as_deref().unwrap_or( + "You are my long term memory assistant. Use only the information provided. Do not invent details.", + ); + let title_raw = chat_backend + .generate(&title_prompt, Some(title_system), None) .await?; + let title = title_raw.trim().trim_matches('"').to_string(); log::info!("Agentic generated title: {}", title); log::info!( @@ -2814,15 +2975,17 @@ Return ONLY the summary, nothing else."#, }; // 15. Store insight (returns the persisted row including its new id) + let model_version = chat_backend.primary_model().to_string(); let insight = InsertPhotoInsight { library_id: crate::libraries::PRIMARY_LIBRARY_ID, file_path: file_path.to_string(), title, summary: final_content, generated_at: Utc::now().timestamp(), - model_version: ollama_client.primary_model.clone(), + model_version, is_current: true, training_messages, + backend: backend_label.clone(), }; let stored = { diff --git a/src/bin/populate_knowledge.rs b/src/bin/populate_knowledge.rs index bc37960..f70c5a2 100644 --- a/src/bin/populate_knowledge.rs +++ b/src/bin/populate_knowledge.rs @@ -134,6 +134,7 @@ async fn main() -> anyhow::Result<()> { let generator = InsightGenerator::new( ollama, + None, sms_client, insight_dao.clone(), exif_dao, @@ -249,6 +250,7 @@ async fn main() -> anyhow::Result<()> { args.top_k, args.min_p, args.max_iterations, + None, ) .await { diff --git a/src/database/models.rs b/src/database/models.rs index d95876b..3d63f1a 100644 --- a/src/database/models.rs +++ b/src/database/models.rs @@ -100,6 +100,8 @@ pub struct InsertPhotoInsight { pub model_version: String, pub is_current: bool, pub training_messages: Option, + /// `"local"` (Ollama with images) | `"hybrid"` (local vision + OpenRouter chat). + pub backend: String, } #[derive(Serialize, Queryable, Clone, Debug)] @@ -115,6 +117,8 @@ pub struct PhotoInsight { pub is_current: bool, pub training_messages: Option, pub approved: Option, + /// `"local"` (Ollama with images) | `"hybrid"` (local vision + OpenRouter chat). + pub backend: String, } // --- Libraries --- diff --git a/src/database/schema.rs b/src/database/schema.rs index 3352ca6..200cb15 100644 --- a/src/database/schema.rs +++ b/src/database/schema.rs @@ -142,6 +142,7 @@ diesel::table! { is_current -> Bool, training_messages -> Nullable, approved -> Nullable, + backend -> Text, } } diff --git a/src/state.rs b/src/state.rs index 78b98ad..72a509e 100644 --- a/src/state.rs +++ b/src/state.rs @@ -1,3 +1,4 @@ +use crate::ai::openrouter::OpenRouterClient; use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient}; use crate::database::{ CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, KnowledgeDao, LocationHistoryDao, @@ -31,6 +32,13 @@ pub struct AppState { pub preview_clips_path: String, pub excluded_dirs: Vec, pub ollama: OllamaClient, + /// `None` when `OPENROUTER_API_KEY` is not configured. Consulted only + /// when a request explicitly opts into `backend=hybrid`. Currently + /// reached via `insight_generator`; kept here so future handlers + /// (insight_chat) can route to it without threading it through the + /// generator. + #[allow(dead_code)] + pub openrouter: Option>, pub sms_client: SmsApiClient, pub insight_generator: InsightGenerator, } @@ -61,6 +69,7 @@ impl AppState { preview_clips_path: String, excluded_dirs: Vec, ollama: OllamaClient, + openrouter: Option>, sms_client: SmsApiClient, insight_generator: InsightGenerator, preview_dao: Arc>>, @@ -92,6 +101,7 @@ impl AppState { preview_clips_path, excluded_dirs, ollama, + openrouter, sms_client, insight_generator, } @@ -127,6 +137,8 @@ impl Default for AppState { ollama_fallback_model, ); + let openrouter = build_openrouter_from_env(); + let sms_api_url = env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string()); let sms_api_token = env::var("SMS_API_TOKEN").ok(); @@ -168,6 +180,7 @@ impl Default for AppState { // Initialize InsightGenerator with all data sources let insight_generator = InsightGenerator::new( ollama.clone(), + openrouter.clone(), sms_client.clone(), insight_dao.clone(), exif_dao.clone(), @@ -195,6 +208,7 @@ impl Default for AppState { preview_clips_path, Self::parse_excluded_dirs(), ollama, + openrouter, sms_client, insight_generator, preview_dao, @@ -202,6 +216,25 @@ impl Default for AppState { } } +/// Build an `OpenRouterClient` from environment variables. Returns `None` +/// when `OPENROUTER_API_KEY` is unset (the hybrid backend is then +/// unavailable and requests for it return a clear error). +fn build_openrouter_from_env() -> Option> { + let api_key = env::var("OPENROUTER_API_KEY").ok()?; + let base_url = env::var("OPENROUTER_BASE_URL").ok(); + let default_model = env::var("OPENROUTER_DEFAULT_MODEL") + .unwrap_or_else(|_| "anthropic/claude-sonnet-4".to_string()); + let mut client = OpenRouterClient::new(api_key, base_url, default_model); + client.set_attribution( + env::var("OPENROUTER_HTTP_REFERER").ok(), + env::var("OPENROUTER_APP_TITLE").ok(), + ); + if let Ok(model) = env::var("OPENROUTER_EMBEDDING_MODEL") { + client.set_embedding_model(model); + } + Some(Arc::new(client)) +} + #[cfg(test)] impl AppState { /// Creates an AppState instance for testing with temporary directories @@ -255,6 +288,7 @@ impl AppState { }; let insight_generator = InsightGenerator::new( ollama.clone(), + None, sms_client.clone(), insight_dao.clone(), exif_dao.clone(), @@ -286,6 +320,7 @@ impl AppState { preview_clips_path.to_string_lossy().to_string(), Vec::new(), // No excluded directories for test state ollama, + None, sms_client, insight_generator, preview_dao,