feat(ai): hybrid backend mode for agentic insights

Adds a `backend` column to photo_insights (default 'local', migration
2026-04-20-000000) and a corresponding optional `backend` field on the
agentic request. When a request sets backend=hybrid:

- The local Ollama vision model is called once via describe_image to
  produce a text description.
- The description is inlined into the first user message as text —
  no base64 image is ever sent to the chat model.
- The agentic tool-calling loop and title generation route through an
  OpenRouterClient (dispatched via &dyn LlmClient), letting the user
  pick any tool-capable model from OpenRouter per request.
- describe_photo is removed from the offered tools since the description
  is already present.

Embeddings and vision stay on local Ollama regardless of backend.
Hybrid mode requires OPENROUTER_API_KEY; handlers return a clear error
when hybrid is requested without it, and also when the selected
OpenRouter model lacks tool-calling support.

AppState gains an optional openrouter client built from
OPENROUTER_API_KEY / OPENROUTER_BASE_URL / OPENROUTER_DEFAULT_MODEL /
OPENROUTER_EMBEDDING_MODEL / attribution headers. Default model is
anthropic/claude-sonnet-4.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron
2026-04-20 22:30:40 -04:00
parent e799ba716c
commit 3ac0cd62eb
8 changed files with 342 additions and 100 deletions

View File

@@ -0,0 +1,23 @@
-- SQLite can't DROP COLUMN cleanly on older versions; rebuild the table.
CREATE TABLE photo_insights_backup AS
SELECT id, library_id, rel_path, title, summary, generated_at, model_version,
is_current, training_messages, approved
FROM photo_insights;
DROP TABLE photo_insights;
CREATE TABLE photo_insights (
id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
library_id INTEGER NOT NULL REFERENCES libraries(id),
rel_path TEXT NOT NULL,
title TEXT NOT NULL,
summary TEXT NOT NULL,
generated_at BIGINT NOT NULL,
model_version TEXT NOT NULL,
is_current BOOLEAN NOT NULL DEFAULT TRUE,
training_messages TEXT,
approved BOOLEAN
);
INSERT INTO photo_insights
SELECT id, library_id, rel_path, title, summary, generated_at, model_version,
is_current, training_messages, approved
FROM photo_insights_backup;
DROP TABLE photo_insights_backup;

View File

@@ -0,0 +1 @@
ALTER TABLE photo_insights ADD COLUMN backend TEXT NOT NULL DEFAULT 'local';

View File

@@ -28,6 +28,10 @@ pub struct GeneratePhotoInsightRequest {
pub top_k: Option<i32>,
#[serde(default)]
pub min_p: Option<f32>,
/// `"local"` (default, Ollama with images) | `"hybrid"` (local vision +
/// OpenRouter chat). Only respected by the agentic endpoint.
#[serde(default)]
pub backend: Option<String>,
}
#[derive(Debug, Deserialize)]
@@ -65,6 +69,7 @@ pub struct PhotoInsightResponse {
pub eval_count: Option<i32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub approved: Option<bool>,
pub backend: String,
}
#[derive(Debug, Serialize)]
@@ -187,6 +192,7 @@ pub async fn get_insight_handler(
prompt_eval_count: None,
eval_count: None,
approved: insight.approved,
backend: insight.backend,
};
HttpResponse::Ok().json(response)
}
@@ -254,6 +260,7 @@ pub async fn get_all_insights_handler(
prompt_eval_count: None,
eval_count: None,
approved: insight.approved,
backend: insight.backend,
})
.collect();
@@ -309,6 +316,10 @@ pub async fn generate_agentic_insight_handler(
max_iterations
);
if let Some(ref b) = request.backend {
span.set_attribute(KeyValue::new("backend", b.clone()));
}
let result = insight_generator
.generate_agentic_insight_for_photo(
&normalized_path,
@@ -320,6 +331,7 @@ pub async fn generate_agentic_insight_handler(
request.top_k,
request.min_p,
max_iterations,
request.backend.clone(),
)
.await;
@@ -341,6 +353,7 @@ pub async fn generate_agentic_insight_handler(
prompt_eval_count,
eval_count,
approved: insight.approved,
backend: insight.backend,
};
HttpResponse::Ok().json(response)
}

View File

@@ -9,7 +9,9 @@ use std::fs::File;
use std::io::Cursor;
use std::sync::{Arc, Mutex};
use crate::ai::llm_client::LlmClient;
use crate::ai::ollama::{ChatMessage, OllamaClient, Tool};
use crate::ai::openrouter::OpenRouterClient;
use crate::ai::sms_client::SmsApiClient;
use crate::database::models::InsertPhotoInsight;
use crate::database::{
@@ -39,6 +41,9 @@ struct NominatimAddress {
#[derive(Clone)]
pub struct InsightGenerator {
ollama: OllamaClient,
/// Optional OpenRouter client, used when `backend=hybrid` is requested.
/// `None` when `OPENROUTER_API_KEY` is not configured.
openrouter: Option<Arc<OpenRouterClient>>,
sms_client: SmsApiClient,
insight_dao: Arc<Mutex<Box<dyn InsightDao>>>,
exif_dao: Arc<Mutex<Box<dyn ExifDao>>>,
@@ -59,6 +64,7 @@ pub struct InsightGenerator {
impl InsightGenerator {
pub fn new(
ollama: OllamaClient,
openrouter: Option<Arc<OpenRouterClient>>,
sms_client: SmsApiClient,
insight_dao: Arc<Mutex<Box<dyn InsightDao>>>,
exif_dao: Arc<Mutex<Box<dyn ExifDao>>>,
@@ -72,6 +78,7 @@ impl InsightGenerator {
) -> Self {
Self {
ollama,
openrouter,
sms_client,
insight_dao,
exif_dao,
@@ -1218,6 +1225,7 @@ impl InsightGenerator {
model_version: ollama_client.primary_model.clone(),
is_current: true,
training_messages: None,
backend: "local".to_string(),
};
let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
@@ -2376,6 +2384,14 @@ Return ONLY the summary, nothing else."#,
/// Generate an AI insight for a photo using an agentic tool-calling loop.
/// The model decides which tools to call to gather context before writing the final insight.
///
/// `backend` selects the chat provider: `"local"` (default) routes the
/// agentic loop through the configured Ollama server with the image
/// attached to the first user message; `"hybrid"` asks the local Ollama
/// vision model to describe the image once, inlines the description as
/// text, and runs the loop through OpenRouter (chat only — embeddings
/// and describe calls stay local in either mode).
#[allow(clippy::too_many_arguments)]
pub async fn generate_agentic_insight_for_photo(
&self,
file_path: &str,
@@ -2387,6 +2403,7 @@ Return ONLY the summary, nothing else."#,
top_k: Option<i32>,
min_p: Option<f32>,
max_iterations: usize,
backend: Option<String>,
) -> Result<(Option<i32>, Option<i32>)> {
let tracer = global_tracer();
let current_cx = opentelemetry::Context::current();
@@ -2398,8 +2415,30 @@ Return ONLY the summary, nothing else."#,
span.set_attribute(KeyValue::new("file_path", file_path.clone()));
span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64));
// 1. Create OllamaClient
let mut ollama_client = if let Some(ref model) = custom_model {
// 1a. Resolve backend label (defaults to "local").
let backend_label = backend
.as_deref()
.map(|s| s.trim().to_lowercase())
.filter(|s| !s.is_empty())
.unwrap_or_else(|| "local".to_string());
if !matches!(backend_label.as_str(), "local" | "hybrid") {
return Err(anyhow::anyhow!(
"unknown backend '{}'; expected 'local' or 'hybrid'",
backend_label
));
}
span.set_attribute(KeyValue::new("backend", backend_label.clone()));
let is_hybrid = backend_label == "hybrid";
// 1b. Always build an Ollama client. In local mode it owns the chat
// loop; in hybrid mode it still handles describe_image + any
// tool-local calls (e.g. if a future tool needs embeddings).
// Sampling overrides only apply in local mode — in hybrid the
// user's params belong to the OpenRouter chat client.
let apply_sampling_to_ollama = !is_hybrid;
let mut ollama_client = if let Some(ref model) = custom_model
&& !is_hybrid
{
log::info!("Using custom model for agentic: {}", model);
span.set_attribute(KeyValue::new("custom_model", model.clone()));
OllamaClient::new(
@@ -2409,10 +2448,13 @@ Return ONLY the summary, nothing else."#,
Some(model.clone()),
)
} else {
if !is_hybrid {
span.set_attribute(KeyValue::new("model", self.ollama.primary_model.clone()));
}
self.ollama.clone()
};
if apply_sampling_to_ollama {
if let Some(ctx) = num_ctx {
log::info!("Using custom context size: {}", ctx);
span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
@@ -2441,17 +2483,85 @@ Return ONLY the summary, nothing else."#,
}
ollama_client.set_sampling_params(temperature, top_p, top_k, min_p);
}
}
// 1c. In hybrid mode, clone the configured OpenRouter client and
// apply per-request overrides.
let openrouter_client: Option<OpenRouterClient> = if is_hybrid {
let arc = self.openrouter.as_ref().ok_or_else(|| {
anyhow::anyhow!("hybrid backend unavailable: OPENROUTER_API_KEY not configured")
})?;
let mut c: OpenRouterClient = (**arc).clone();
if let Some(ref m) = custom_model {
c.primary_model = m.clone();
span.set_attribute(KeyValue::new("custom_model", m.clone()));
}
span.set_attribute(KeyValue::new("openrouter_model", c.primary_model.clone()));
if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
if let Some(t) = temperature {
span.set_attribute(KeyValue::new("temperature", t as f64));
}
if let Some(p) = top_p {
span.set_attribute(KeyValue::new("top_p", p as f64));
}
if let Some(k) = top_k {
span.set_attribute(KeyValue::new("top_k", k as i64));
}
if let Some(m) = min_p {
span.set_attribute(KeyValue::new("min_p", m as f64));
}
c.set_sampling_params(temperature, top_p, top_k, min_p);
}
if let Some(ctx) = num_ctx {
span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
c.set_num_ctx(Some(ctx));
}
Some(c)
} else {
None
};
let insight_cx = current_cx.with_span(span);
// 2a. Verify the model exists on at least one server before checking capabilities
// 2. Verify chat model supports tool calling.
// - local: existing Ollama model availability + capability check.
// - hybrid: query OpenRouter's /models for the chosen model.
let has_vision = if is_hybrid {
let or_client = openrouter_client
.as_ref()
.expect("openrouter_client constructed when is_hybrid");
let caps = or_client
.model_capabilities(&or_client.primary_model)
.await
.map_err(|e| {
anyhow::anyhow!(
"OpenRouter capability lookup failed for '{}': {}",
or_client.primary_model,
e
)
})?;
if !caps.has_tool_calling {
return Err(anyhow::anyhow!(
"tool calling not supported by OpenRouter model '{}'",
or_client.primary_model
));
}
insight_cx
.span()
.set_attribute(KeyValue::new("model_has_tool_calling", true));
// In hybrid mode the chat model never sees images directly — we
// describe-then-inject, so `has_vision` drives only whether we
// bother loading the image to describe it, which we always do.
true
} else {
if let Some(ref model_name) = custom_model {
let available_on_primary =
OllamaClient::is_model_available(&ollama_client.primary_url, model_name)
.await
.unwrap_or(false);
let available_on_fallback = if let Some(ref fallback_url) = ollama_client.fallback_url {
let available_on_fallback =
if let Some(ref fallback_url) = ollama_client.fallback_url {
OllamaClient::is_model_available(fallback_url, model_name)
.await
.unwrap_or(false)
@@ -2467,7 +2577,6 @@ Return ONLY the summary, nothing else."#,
}
}
// 2b. Check tool calling capability — try primary, fall back to fallback URL
let model_name_for_caps = &ollama_client.primary_model;
let capabilities = match OllamaClient::check_model_capabilities(
&ollama_client.primary_url,
@@ -2477,7 +2586,6 @@ Return ONLY the summary, nothing else."#,
{
Ok(caps) => caps,
Err(_) => {
// Model may only be on the fallback server
let fallback_url = ollama_client.fallback_url.as_deref().ok_or_else(|| {
anyhow::anyhow!(
"Failed to check model capabilities for '{}': model not found on primary server and no fallback configured",
@@ -2503,14 +2611,16 @@ Return ONLY the summary, nothing else."#,
));
}
let has_vision = capabilities.has_vision;
insight_cx
.span()
.set_attribute(KeyValue::new("model_has_vision", has_vision));
.set_attribute(KeyValue::new("model_has_vision", capabilities.has_vision));
insight_cx
.span()
.set_attribute(KeyValue::new("model_has_tool_calling", true));
capabilities.has_vision
};
// 3. Fetch EXIF
let exif = {
let mut exif_dao = self.exif_dao.lock().expect("Unable to lock ExifDao");
@@ -2603,7 +2713,10 @@ Return ONLY the summary, nothing else."#,
}
};
// 7. Load image if vision capable
// 7. Load image if vision capable.
// In hybrid mode we ALSO describe it locally now so the
// description can be inlined as text — the OpenRouter chat model
// never receives the base64 image directly.
let image_base64 = if has_vision {
match self.load_image_as_base64(&file_path) {
Ok(b64) => {
@@ -2619,6 +2732,30 @@ Return ONLY the summary, nothing else."#,
None
};
let hybrid_visual_description: Option<String> = if is_hybrid {
match image_base64.as_deref() {
Some(b64) => match self.ollama.describe_image(b64).await {
Ok(desc) => {
log::info!(
"Hybrid: local vision describe succeeded ({} chars)",
desc.len()
);
Some(desc)
}
Err(e) => {
log::warn!(
"Hybrid: local vision describe failed, continuing without: {}",
e
);
None
}
},
None => None,
}
} else {
None
};
// 8. Build system message
let cameron_id_note = match cameron_entity_id {
Some(id) => format!(
@@ -2672,8 +2809,13 @@ Return ONLY the summary, nothing else."#,
.map(|c| format!("Contact/Person: {}", c))
.unwrap_or_else(|| "Contact/Person: unknown".to_string());
let visual_block = hybrid_visual_description
.as_deref()
.map(|d| format!("Visual description (from local vision model):\n{}\n\n", d))
.unwrap_or_default();
let user_content = format!(
"Please analyze this photo and gather any relevant context from the surrounding weeks.\n\n\
"{visual_block}Please analyze this photo and gather any relevant context from the surrounding weeks.\n\n\
Photo file path: {}\n\
Date taken: {}\n\
{}\n\
@@ -2686,21 +2828,32 @@ Return ONLY the summary, nothing else."#,
contact_info,
gps_info,
tags_info,
visual_block = visual_block,
);
// 10. Define tools
let tools = Self::build_tool_definitions(has_vision);
// 10. Define tools. Hybrid mode omits `describe_photo` since the
// chat model receives the visual description inline.
let offer_describe_tool = has_vision && !is_hybrid;
let tools = Self::build_tool_definitions(offer_describe_tool);
// 11. Build initial messages
// 11. Build initial messages. In hybrid mode images are never
// attached to the wire message — the description is part of
// `user_content`.
let system_msg = ChatMessage::system(system_content);
let mut user_msg = ChatMessage::user(user_content);
if let Some(ref img) = image_base64 {
if !is_hybrid && let Some(ref img) = image_base64 {
user_msg.images = Some(vec![img.clone()]);
}
let mut messages = vec![system_msg, user_msg];
// 12. Agentic loop
// 12. Agentic loop — dispatch through the selected backend.
let chat_backend: &dyn LlmClient = if let Some(ref or_c) = openrouter_client {
or_c
} else {
&ollama_client
};
let loop_span = tracer.start_with_context("ai.agentic.loop", &insight_cx);
let loop_cx = insight_cx.with_span(loop_span);
@@ -2713,7 +2866,7 @@ Return ONLY the summary, nothing else."#,
iterations_used = iteration + 1;
log::info!("Agentic iteration {}/{}", iteration + 1, max_iterations);
let (response, prompt_tokens, eval_tokens) = ollama_client
let (response, prompt_tokens, eval_tokens) = chat_backend
.chat_with_tools(messages.clone(), tools.clone())
.await?;
@@ -2778,7 +2931,7 @@ Return ONLY the summary, nothing else."#,
messages.push(ChatMessage::user(
"Based on the context gathered, please write the final photo insight: a title and a detailed personal summary. Write in first person as Cameron.",
));
let (final_response, prompt_tokens, eval_tokens) = ollama_client
let (final_response, prompt_tokens, eval_tokens) = chat_backend
.chat_with_tools(messages.clone(), vec![])
.await?;
last_prompt_eval_count = prompt_tokens;
@@ -2792,10 +2945,18 @@ Return ONLY the summary, nothing else."#,
.set_attribute(KeyValue::new("iterations_used", iterations_used as i64));
loop_cx.span().set_status(Status::Ok);
// 13. Generate title
let title = ollama_client
.generate_photo_title(&final_content, custom_system_prompt.as_deref())
// 13. Generate title via the same backend so voice stays consistent.
let title_prompt = format!(
"Create a short title (maximum 8 words) for the following journal entry:\n\n{}\n\nCapture the key moment or theme. Return ONLY the title, nothing else.",
final_content
);
let title_system = custom_system_prompt.as_deref().unwrap_or(
"You are my long term memory assistant. Use only the information provided. Do not invent details.",
);
let title_raw = chat_backend
.generate(&title_prompt, Some(title_system), None)
.await?;
let title = title_raw.trim().trim_matches('"').to_string();
log::info!("Agentic generated title: {}", title);
log::info!(
@@ -2814,15 +2975,17 @@ Return ONLY the summary, nothing else."#,
};
// 15. Store insight (returns the persisted row including its new id)
let model_version = chat_backend.primary_model().to_string();
let insight = InsertPhotoInsight {
library_id: crate::libraries::PRIMARY_LIBRARY_ID,
file_path: file_path.to_string(),
title,
summary: final_content,
generated_at: Utc::now().timestamp(),
model_version: ollama_client.primary_model.clone(),
model_version,
is_current: true,
training_messages,
backend: backend_label.clone(),
};
let stored = {

View File

@@ -134,6 +134,7 @@ async fn main() -> anyhow::Result<()> {
let generator = InsightGenerator::new(
ollama,
None,
sms_client,
insight_dao.clone(),
exif_dao,
@@ -249,6 +250,7 @@ async fn main() -> anyhow::Result<()> {
args.top_k,
args.min_p,
args.max_iterations,
None,
)
.await
{

View File

@@ -100,6 +100,8 @@ pub struct InsertPhotoInsight {
pub model_version: String,
pub is_current: bool,
pub training_messages: Option<String>,
/// `"local"` (Ollama with images) | `"hybrid"` (local vision + OpenRouter chat).
pub backend: String,
}
#[derive(Serialize, Queryable, Clone, Debug)]
@@ -115,6 +117,8 @@ pub struct PhotoInsight {
pub is_current: bool,
pub training_messages: Option<String>,
pub approved: Option<bool>,
/// `"local"` (Ollama with images) | `"hybrid"` (local vision + OpenRouter chat).
pub backend: String,
}
// --- Libraries ---

View File

@@ -142,6 +142,7 @@ diesel::table! {
is_current -> Bool,
training_messages -> Nullable<Text>,
approved -> Nullable<Bool>,
backend -> Text,
}
}

View File

@@ -1,3 +1,4 @@
use crate::ai::openrouter::OpenRouterClient;
use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient};
use crate::database::{
CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, KnowledgeDao, LocationHistoryDao,
@@ -31,6 +32,13 @@ pub struct AppState {
pub preview_clips_path: String,
pub excluded_dirs: Vec<String>,
pub ollama: OllamaClient,
/// `None` when `OPENROUTER_API_KEY` is not configured. Consulted only
/// when a request explicitly opts into `backend=hybrid`. Currently
/// reached via `insight_generator`; kept here so future handlers
/// (insight_chat) can route to it without threading it through the
/// generator.
#[allow(dead_code)]
pub openrouter: Option<Arc<OpenRouterClient>>,
pub sms_client: SmsApiClient,
pub insight_generator: InsightGenerator,
}
@@ -61,6 +69,7 @@ impl AppState {
preview_clips_path: String,
excluded_dirs: Vec<String>,
ollama: OllamaClient,
openrouter: Option<Arc<OpenRouterClient>>,
sms_client: SmsApiClient,
insight_generator: InsightGenerator,
preview_dao: Arc<Mutex<Box<dyn PreviewDao>>>,
@@ -92,6 +101,7 @@ impl AppState {
preview_clips_path,
excluded_dirs,
ollama,
openrouter,
sms_client,
insight_generator,
}
@@ -127,6 +137,8 @@ impl Default for AppState {
ollama_fallback_model,
);
let openrouter = build_openrouter_from_env();
let sms_api_url =
env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
let sms_api_token = env::var("SMS_API_TOKEN").ok();
@@ -168,6 +180,7 @@ impl Default for AppState {
// Initialize InsightGenerator with all data sources
let insight_generator = InsightGenerator::new(
ollama.clone(),
openrouter.clone(),
sms_client.clone(),
insight_dao.clone(),
exif_dao.clone(),
@@ -195,6 +208,7 @@ impl Default for AppState {
preview_clips_path,
Self::parse_excluded_dirs(),
ollama,
openrouter,
sms_client,
insight_generator,
preview_dao,
@@ -202,6 +216,25 @@ impl Default for AppState {
}
}
/// Build an `OpenRouterClient` from environment variables. Returns `None`
/// when `OPENROUTER_API_KEY` is unset (the hybrid backend is then
/// unavailable and requests for it return a clear error).
fn build_openrouter_from_env() -> Option<Arc<OpenRouterClient>> {
let api_key = env::var("OPENROUTER_API_KEY").ok()?;
let base_url = env::var("OPENROUTER_BASE_URL").ok();
let default_model = env::var("OPENROUTER_DEFAULT_MODEL")
.unwrap_or_else(|_| "anthropic/claude-sonnet-4".to_string());
let mut client = OpenRouterClient::new(api_key, base_url, default_model);
client.set_attribution(
env::var("OPENROUTER_HTTP_REFERER").ok(),
env::var("OPENROUTER_APP_TITLE").ok(),
);
if let Ok(model) = env::var("OPENROUTER_EMBEDDING_MODEL") {
client.set_embedding_model(model);
}
Some(Arc::new(client))
}
#[cfg(test)]
impl AppState {
/// Creates an AppState instance for testing with temporary directories
@@ -255,6 +288,7 @@ impl AppState {
};
let insight_generator = InsightGenerator::new(
ollama.clone(),
None,
sms_client.clone(),
insight_dao.clone(),
exif_dao.clone(),
@@ -286,6 +320,7 @@ impl AppState {
preview_clips_path.to_string_lossy().to_string(),
Vec::new(), // No excluded directories for test state
ollama,
None,
sms_client,
insight_generator,
preview_dao,