feat(ai): hybrid backend mode for agentic insights
Adds a `backend` column to photo_insights (default 'local', migration 2026-04-20-000000) and a corresponding optional `backend` field on the agentic request. When a request sets backend=hybrid: - The local Ollama vision model is called once via describe_image to produce a text description. - The description is inlined into the first user message as text — no base64 image is ever sent to the chat model. - The agentic tool-calling loop and title generation route through an OpenRouterClient (dispatched via &dyn LlmClient), letting the user pick any tool-capable model from OpenRouter per request. - describe_photo is removed from the offered tools since the description is already present. Embeddings and vision stay on local Ollama regardless of backend. Hybrid mode requires OPENROUTER_API_KEY; handlers return a clear error when hybrid is requested without it, and also when the selected OpenRouter model lacks tool-calling support. AppState gains an optional openrouter client built from OPENROUTER_API_KEY / OPENROUTER_BASE_URL / OPENROUTER_DEFAULT_MODEL / OPENROUTER_EMBEDDING_MODEL / attribution headers. Default model is anthropic/claude-sonnet-4. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,23 @@
|
||||
-- SQLite can't DROP COLUMN cleanly on older versions; rebuild the table.
|
||||
CREATE TABLE photo_insights_backup AS
|
||||
SELECT id, library_id, rel_path, title, summary, generated_at, model_version,
|
||||
is_current, training_messages, approved
|
||||
FROM photo_insights;
|
||||
DROP TABLE photo_insights;
|
||||
CREATE TABLE photo_insights (
|
||||
id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
library_id INTEGER NOT NULL REFERENCES libraries(id),
|
||||
rel_path TEXT NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
summary TEXT NOT NULL,
|
||||
generated_at BIGINT NOT NULL,
|
||||
model_version TEXT NOT NULL,
|
||||
is_current BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
training_messages TEXT,
|
||||
approved BOOLEAN
|
||||
);
|
||||
INSERT INTO photo_insights
|
||||
SELECT id, library_id, rel_path, title, summary, generated_at, model_version,
|
||||
is_current, training_messages, approved
|
||||
FROM photo_insights_backup;
|
||||
DROP TABLE photo_insights_backup;
|
||||
@@ -0,0 +1 @@
|
||||
ALTER TABLE photo_insights ADD COLUMN backend TEXT NOT NULL DEFAULT 'local';
|
||||
@@ -28,6 +28,10 @@ pub struct GeneratePhotoInsightRequest {
|
||||
pub top_k: Option<i32>,
|
||||
#[serde(default)]
|
||||
pub min_p: Option<f32>,
|
||||
/// `"local"` (default, Ollama with images) | `"hybrid"` (local vision +
|
||||
/// OpenRouter chat). Only respected by the agentic endpoint.
|
||||
#[serde(default)]
|
||||
pub backend: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
@@ -65,6 +69,7 @@ pub struct PhotoInsightResponse {
|
||||
pub eval_count: Option<i32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub approved: Option<bool>,
|
||||
pub backend: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
@@ -187,6 +192,7 @@ pub async fn get_insight_handler(
|
||||
prompt_eval_count: None,
|
||||
eval_count: None,
|
||||
approved: insight.approved,
|
||||
backend: insight.backend,
|
||||
};
|
||||
HttpResponse::Ok().json(response)
|
||||
}
|
||||
@@ -254,6 +260,7 @@ pub async fn get_all_insights_handler(
|
||||
prompt_eval_count: None,
|
||||
eval_count: None,
|
||||
approved: insight.approved,
|
||||
backend: insight.backend,
|
||||
})
|
||||
.collect();
|
||||
|
||||
@@ -309,6 +316,10 @@ pub async fn generate_agentic_insight_handler(
|
||||
max_iterations
|
||||
);
|
||||
|
||||
if let Some(ref b) = request.backend {
|
||||
span.set_attribute(KeyValue::new("backend", b.clone()));
|
||||
}
|
||||
|
||||
let result = insight_generator
|
||||
.generate_agentic_insight_for_photo(
|
||||
&normalized_path,
|
||||
@@ -320,6 +331,7 @@ pub async fn generate_agentic_insight_handler(
|
||||
request.top_k,
|
||||
request.min_p,
|
||||
max_iterations,
|
||||
request.backend.clone(),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -341,6 +353,7 @@ pub async fn generate_agentic_insight_handler(
|
||||
prompt_eval_count,
|
||||
eval_count,
|
||||
approved: insight.approved,
|
||||
backend: insight.backend,
|
||||
};
|
||||
HttpResponse::Ok().json(response)
|
||||
}
|
||||
|
||||
@@ -9,7 +9,9 @@ use std::fs::File;
|
||||
use std::io::Cursor;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use crate::ai::llm_client::LlmClient;
|
||||
use crate::ai::ollama::{ChatMessage, OllamaClient, Tool};
|
||||
use crate::ai::openrouter::OpenRouterClient;
|
||||
use crate::ai::sms_client::SmsApiClient;
|
||||
use crate::database::models::InsertPhotoInsight;
|
||||
use crate::database::{
|
||||
@@ -39,6 +41,9 @@ struct NominatimAddress {
|
||||
#[derive(Clone)]
|
||||
pub struct InsightGenerator {
|
||||
ollama: OllamaClient,
|
||||
/// Optional OpenRouter client, used when `backend=hybrid` is requested.
|
||||
/// `None` when `OPENROUTER_API_KEY` is not configured.
|
||||
openrouter: Option<Arc<OpenRouterClient>>,
|
||||
sms_client: SmsApiClient,
|
||||
insight_dao: Arc<Mutex<Box<dyn InsightDao>>>,
|
||||
exif_dao: Arc<Mutex<Box<dyn ExifDao>>>,
|
||||
@@ -59,6 +64,7 @@ pub struct InsightGenerator {
|
||||
impl InsightGenerator {
|
||||
pub fn new(
|
||||
ollama: OllamaClient,
|
||||
openrouter: Option<Arc<OpenRouterClient>>,
|
||||
sms_client: SmsApiClient,
|
||||
insight_dao: Arc<Mutex<Box<dyn InsightDao>>>,
|
||||
exif_dao: Arc<Mutex<Box<dyn ExifDao>>>,
|
||||
@@ -72,6 +78,7 @@ impl InsightGenerator {
|
||||
) -> Self {
|
||||
Self {
|
||||
ollama,
|
||||
openrouter,
|
||||
sms_client,
|
||||
insight_dao,
|
||||
exif_dao,
|
||||
@@ -1218,6 +1225,7 @@ impl InsightGenerator {
|
||||
model_version: ollama_client.primary_model.clone(),
|
||||
is_current: true,
|
||||
training_messages: None,
|
||||
backend: "local".to_string(),
|
||||
};
|
||||
|
||||
let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
|
||||
@@ -2376,6 +2384,14 @@ Return ONLY the summary, nothing else."#,
|
||||
|
||||
/// Generate an AI insight for a photo using an agentic tool-calling loop.
|
||||
/// The model decides which tools to call to gather context before writing the final insight.
|
||||
///
|
||||
/// `backend` selects the chat provider: `"local"` (default) routes the
|
||||
/// agentic loop through the configured Ollama server with the image
|
||||
/// attached to the first user message; `"hybrid"` asks the local Ollama
|
||||
/// vision model to describe the image once, inlines the description as
|
||||
/// text, and runs the loop through OpenRouter (chat only — embeddings
|
||||
/// and describe calls stay local in either mode).
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn generate_agentic_insight_for_photo(
|
||||
&self,
|
||||
file_path: &str,
|
||||
@@ -2387,6 +2403,7 @@ Return ONLY the summary, nothing else."#,
|
||||
top_k: Option<i32>,
|
||||
min_p: Option<f32>,
|
||||
max_iterations: usize,
|
||||
backend: Option<String>,
|
||||
) -> Result<(Option<i32>, Option<i32>)> {
|
||||
let tracer = global_tracer();
|
||||
let current_cx = opentelemetry::Context::current();
|
||||
@@ -2398,8 +2415,30 @@ Return ONLY the summary, nothing else."#,
|
||||
span.set_attribute(KeyValue::new("file_path", file_path.clone()));
|
||||
span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64));
|
||||
|
||||
// 1. Create OllamaClient
|
||||
let mut ollama_client = if let Some(ref model) = custom_model {
|
||||
// 1a. Resolve backend label (defaults to "local").
|
||||
let backend_label = backend
|
||||
.as_deref()
|
||||
.map(|s| s.trim().to_lowercase())
|
||||
.filter(|s| !s.is_empty())
|
||||
.unwrap_or_else(|| "local".to_string());
|
||||
if !matches!(backend_label.as_str(), "local" | "hybrid") {
|
||||
return Err(anyhow::anyhow!(
|
||||
"unknown backend '{}'; expected 'local' or 'hybrid'",
|
||||
backend_label
|
||||
));
|
||||
}
|
||||
span.set_attribute(KeyValue::new("backend", backend_label.clone()));
|
||||
let is_hybrid = backend_label == "hybrid";
|
||||
|
||||
// 1b. Always build an Ollama client. In local mode it owns the chat
|
||||
// loop; in hybrid mode it still handles describe_image + any
|
||||
// tool-local calls (e.g. if a future tool needs embeddings).
|
||||
// Sampling overrides only apply in local mode — in hybrid the
|
||||
// user's params belong to the OpenRouter chat client.
|
||||
let apply_sampling_to_ollama = !is_hybrid;
|
||||
let mut ollama_client = if let Some(ref model) = custom_model
|
||||
&& !is_hybrid
|
||||
{
|
||||
log::info!("Using custom model for agentic: {}", model);
|
||||
span.set_attribute(KeyValue::new("custom_model", model.clone()));
|
||||
OllamaClient::new(
|
||||
@@ -2409,108 +2448,179 @@ Return ONLY the summary, nothing else."#,
|
||||
Some(model.clone()),
|
||||
)
|
||||
} else {
|
||||
span.set_attribute(KeyValue::new("model", self.ollama.primary_model.clone()));
|
||||
if !is_hybrid {
|
||||
span.set_attribute(KeyValue::new("model", self.ollama.primary_model.clone()));
|
||||
}
|
||||
self.ollama.clone()
|
||||
};
|
||||
|
||||
if let Some(ctx) = num_ctx {
|
||||
log::info!("Using custom context size: {}", ctx);
|
||||
span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
|
||||
ollama_client.set_num_ctx(Some(ctx));
|
||||
if apply_sampling_to_ollama {
|
||||
if let Some(ctx) = num_ctx {
|
||||
log::info!("Using custom context size: {}", ctx);
|
||||
span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
|
||||
ollama_client.set_num_ctx(Some(ctx));
|
||||
}
|
||||
|
||||
if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
|
||||
log::info!(
|
||||
"Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}",
|
||||
temperature,
|
||||
top_p,
|
||||
top_k,
|
||||
min_p
|
||||
);
|
||||
if let Some(t) = temperature {
|
||||
span.set_attribute(KeyValue::new("temperature", t as f64));
|
||||
}
|
||||
if let Some(p) = top_p {
|
||||
span.set_attribute(KeyValue::new("top_p", p as f64));
|
||||
}
|
||||
if let Some(k) = top_k {
|
||||
span.set_attribute(KeyValue::new("top_k", k as i64));
|
||||
}
|
||||
if let Some(m) = min_p {
|
||||
span.set_attribute(KeyValue::new("min_p", m as f64));
|
||||
}
|
||||
ollama_client.set_sampling_params(temperature, top_p, top_k, min_p);
|
||||
}
|
||||
}
|
||||
|
||||
if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
|
||||
log::info!(
|
||||
"Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}",
|
||||
temperature,
|
||||
top_p,
|
||||
top_k,
|
||||
min_p
|
||||
);
|
||||
if let Some(t) = temperature {
|
||||
span.set_attribute(KeyValue::new("temperature", t as f64));
|
||||
// 1c. In hybrid mode, clone the configured OpenRouter client and
|
||||
// apply per-request overrides.
|
||||
let openrouter_client: Option<OpenRouterClient> = if is_hybrid {
|
||||
let arc = self.openrouter.as_ref().ok_or_else(|| {
|
||||
anyhow::anyhow!("hybrid backend unavailable: OPENROUTER_API_KEY not configured")
|
||||
})?;
|
||||
let mut c: OpenRouterClient = (**arc).clone();
|
||||
if let Some(ref m) = custom_model {
|
||||
c.primary_model = m.clone();
|
||||
span.set_attribute(KeyValue::new("custom_model", m.clone()));
|
||||
}
|
||||
if let Some(p) = top_p {
|
||||
span.set_attribute(KeyValue::new("top_p", p as f64));
|
||||
span.set_attribute(KeyValue::new("openrouter_model", c.primary_model.clone()));
|
||||
if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
|
||||
if let Some(t) = temperature {
|
||||
span.set_attribute(KeyValue::new("temperature", t as f64));
|
||||
}
|
||||
if let Some(p) = top_p {
|
||||
span.set_attribute(KeyValue::new("top_p", p as f64));
|
||||
}
|
||||
if let Some(k) = top_k {
|
||||
span.set_attribute(KeyValue::new("top_k", k as i64));
|
||||
}
|
||||
if let Some(m) = min_p {
|
||||
span.set_attribute(KeyValue::new("min_p", m as f64));
|
||||
}
|
||||
c.set_sampling_params(temperature, top_p, top_k, min_p);
|
||||
}
|
||||
if let Some(k) = top_k {
|
||||
span.set_attribute(KeyValue::new("top_k", k as i64));
|
||||
if let Some(ctx) = num_ctx {
|
||||
span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
|
||||
c.set_num_ctx(Some(ctx));
|
||||
}
|
||||
if let Some(m) = min_p {
|
||||
span.set_attribute(KeyValue::new("min_p", m as f64));
|
||||
}
|
||||
ollama_client.set_sampling_params(temperature, top_p, top_k, min_p);
|
||||
}
|
||||
Some(c)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let insight_cx = current_cx.with_span(span);
|
||||
|
||||
// 2a. Verify the model exists on at least one server before checking capabilities
|
||||
if let Some(ref model_name) = custom_model {
|
||||
let available_on_primary =
|
||||
OllamaClient::is_model_available(&ollama_client.primary_url, model_name)
|
||||
.await
|
||||
.unwrap_or(false);
|
||||
|
||||
let available_on_fallback = if let Some(ref fallback_url) = ollama_client.fallback_url {
|
||||
OllamaClient::is_model_available(fallback_url, model_name)
|
||||
.await
|
||||
.unwrap_or(false)
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
if !available_on_primary && !available_on_fallback {
|
||||
anyhow::bail!(
|
||||
"model not available: '{}' not found on any configured server",
|
||||
model_name
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// 2b. Check tool calling capability — try primary, fall back to fallback URL
|
||||
let model_name_for_caps = &ollama_client.primary_model;
|
||||
let capabilities = match OllamaClient::check_model_capabilities(
|
||||
&ollama_client.primary_url,
|
||||
model_name_for_caps,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(caps) => caps,
|
||||
Err(_) => {
|
||||
// Model may only be on the fallback server
|
||||
let fallback_url = ollama_client.fallback_url.as_deref().ok_or_else(|| {
|
||||
// 2. Verify chat model supports tool calling.
|
||||
// - local: existing Ollama model availability + capability check.
|
||||
// - hybrid: query OpenRouter's /models for the chosen model.
|
||||
let has_vision = if is_hybrid {
|
||||
let or_client = openrouter_client
|
||||
.as_ref()
|
||||
.expect("openrouter_client constructed when is_hybrid");
|
||||
let caps = or_client
|
||||
.model_capabilities(&or_client.primary_model)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
anyhow::anyhow!(
|
||||
"Failed to check model capabilities for '{}': model not found on primary server and no fallback configured",
|
||||
model_name_for_caps
|
||||
"OpenRouter capability lookup failed for '{}': {}",
|
||||
or_client.primary_model,
|
||||
e
|
||||
)
|
||||
})?;
|
||||
OllamaClient::check_model_capabilities(fallback_url, model_name_for_caps)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
anyhow::anyhow!(
|
||||
"Failed to check model capabilities for '{}': {}",
|
||||
model_name_for_caps,
|
||||
e
|
||||
)
|
||||
})?
|
||||
if !caps.has_tool_calling {
|
||||
return Err(anyhow::anyhow!(
|
||||
"tool calling not supported by OpenRouter model '{}'",
|
||||
or_client.primary_model
|
||||
));
|
||||
}
|
||||
insight_cx
|
||||
.span()
|
||||
.set_attribute(KeyValue::new("model_has_tool_calling", true));
|
||||
// In hybrid mode the chat model never sees images directly — we
|
||||
// describe-then-inject, so `has_vision` drives only whether we
|
||||
// bother loading the image to describe it, which we always do.
|
||||
true
|
||||
} else {
|
||||
if let Some(ref model_name) = custom_model {
|
||||
let available_on_primary =
|
||||
OllamaClient::is_model_available(&ollama_client.primary_url, model_name)
|
||||
.await
|
||||
.unwrap_or(false);
|
||||
|
||||
let available_on_fallback =
|
||||
if let Some(ref fallback_url) = ollama_client.fallback_url {
|
||||
OllamaClient::is_model_available(fallback_url, model_name)
|
||||
.await
|
||||
.unwrap_or(false)
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
if !available_on_primary && !available_on_fallback {
|
||||
anyhow::bail!(
|
||||
"model not available: '{}' not found on any configured server",
|
||||
model_name
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let model_name_for_caps = &ollama_client.primary_model;
|
||||
let capabilities = match OllamaClient::check_model_capabilities(
|
||||
&ollama_client.primary_url,
|
||||
model_name_for_caps,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(caps) => caps,
|
||||
Err(_) => {
|
||||
let fallback_url = ollama_client.fallback_url.as_deref().ok_or_else(|| {
|
||||
anyhow::anyhow!(
|
||||
"Failed to check model capabilities for '{}': model not found on primary server and no fallback configured",
|
||||
model_name_for_caps
|
||||
)
|
||||
})?;
|
||||
OllamaClient::check_model_capabilities(fallback_url, model_name_for_caps)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
anyhow::anyhow!(
|
||||
"Failed to check model capabilities for '{}': {}",
|
||||
model_name_for_caps,
|
||||
e
|
||||
)
|
||||
})?
|
||||
}
|
||||
};
|
||||
|
||||
if !capabilities.has_tool_calling {
|
||||
return Err(anyhow::anyhow!(
|
||||
"tool calling not supported by model '{}'",
|
||||
ollama_client.primary_model
|
||||
));
|
||||
}
|
||||
|
||||
insight_cx
|
||||
.span()
|
||||
.set_attribute(KeyValue::new("model_has_vision", capabilities.has_vision));
|
||||
insight_cx
|
||||
.span()
|
||||
.set_attribute(KeyValue::new("model_has_tool_calling", true));
|
||||
|
||||
capabilities.has_vision
|
||||
};
|
||||
|
||||
if !capabilities.has_tool_calling {
|
||||
return Err(anyhow::anyhow!(
|
||||
"tool calling not supported by model '{}'",
|
||||
ollama_client.primary_model
|
||||
));
|
||||
}
|
||||
|
||||
let has_vision = capabilities.has_vision;
|
||||
insight_cx
|
||||
.span()
|
||||
.set_attribute(KeyValue::new("model_has_vision", has_vision));
|
||||
insight_cx
|
||||
.span()
|
||||
.set_attribute(KeyValue::new("model_has_tool_calling", true));
|
||||
|
||||
// 3. Fetch EXIF
|
||||
let exif = {
|
||||
let mut exif_dao = self.exif_dao.lock().expect("Unable to lock ExifDao");
|
||||
@@ -2603,7 +2713,10 @@ Return ONLY the summary, nothing else."#,
|
||||
}
|
||||
};
|
||||
|
||||
// 7. Load image if vision capable
|
||||
// 7. Load image if vision capable.
|
||||
// In hybrid mode we ALSO describe it locally now so the
|
||||
// description can be inlined as text — the OpenRouter chat model
|
||||
// never receives the base64 image directly.
|
||||
let image_base64 = if has_vision {
|
||||
match self.load_image_as_base64(&file_path) {
|
||||
Ok(b64) => {
|
||||
@@ -2619,6 +2732,30 @@ Return ONLY the summary, nothing else."#,
|
||||
None
|
||||
};
|
||||
|
||||
let hybrid_visual_description: Option<String> = if is_hybrid {
|
||||
match image_base64.as_deref() {
|
||||
Some(b64) => match self.ollama.describe_image(b64).await {
|
||||
Ok(desc) => {
|
||||
log::info!(
|
||||
"Hybrid: local vision describe succeeded ({} chars)",
|
||||
desc.len()
|
||||
);
|
||||
Some(desc)
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!(
|
||||
"Hybrid: local vision describe failed, continuing without: {}",
|
||||
e
|
||||
);
|
||||
None
|
||||
}
|
||||
},
|
||||
None => None,
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// 8. Build system message
|
||||
let cameron_id_note = match cameron_entity_id {
|
||||
Some(id) => format!(
|
||||
@@ -2672,8 +2809,13 @@ Return ONLY the summary, nothing else."#,
|
||||
.map(|c| format!("Contact/Person: {}", c))
|
||||
.unwrap_or_else(|| "Contact/Person: unknown".to_string());
|
||||
|
||||
let visual_block = hybrid_visual_description
|
||||
.as_deref()
|
||||
.map(|d| format!("Visual description (from local vision model):\n{}\n\n", d))
|
||||
.unwrap_or_default();
|
||||
|
||||
let user_content = format!(
|
||||
"Please analyze this photo and gather any relevant context from the surrounding weeks.\n\n\
|
||||
"{visual_block}Please analyze this photo and gather any relevant context from the surrounding weeks.\n\n\
|
||||
Photo file path: {}\n\
|
||||
Date taken: {}\n\
|
||||
{}\n\
|
||||
@@ -2686,21 +2828,32 @@ Return ONLY the summary, nothing else."#,
|
||||
contact_info,
|
||||
gps_info,
|
||||
tags_info,
|
||||
visual_block = visual_block,
|
||||
);
|
||||
|
||||
// 10. Define tools
|
||||
let tools = Self::build_tool_definitions(has_vision);
|
||||
// 10. Define tools. Hybrid mode omits `describe_photo` since the
|
||||
// chat model receives the visual description inline.
|
||||
let offer_describe_tool = has_vision && !is_hybrid;
|
||||
let tools = Self::build_tool_definitions(offer_describe_tool);
|
||||
|
||||
// 11. Build initial messages
|
||||
// 11. Build initial messages. In hybrid mode images are never
|
||||
// attached to the wire message — the description is part of
|
||||
// `user_content`.
|
||||
let system_msg = ChatMessage::system(system_content);
|
||||
let mut user_msg = ChatMessage::user(user_content);
|
||||
if let Some(ref img) = image_base64 {
|
||||
if !is_hybrid && let Some(ref img) = image_base64 {
|
||||
user_msg.images = Some(vec![img.clone()]);
|
||||
}
|
||||
|
||||
let mut messages = vec![system_msg, user_msg];
|
||||
|
||||
// 12. Agentic loop
|
||||
// 12. Agentic loop — dispatch through the selected backend.
|
||||
let chat_backend: &dyn LlmClient = if let Some(ref or_c) = openrouter_client {
|
||||
or_c
|
||||
} else {
|
||||
&ollama_client
|
||||
};
|
||||
|
||||
let loop_span = tracer.start_with_context("ai.agentic.loop", &insight_cx);
|
||||
let loop_cx = insight_cx.with_span(loop_span);
|
||||
|
||||
@@ -2713,7 +2866,7 @@ Return ONLY the summary, nothing else."#,
|
||||
iterations_used = iteration + 1;
|
||||
log::info!("Agentic iteration {}/{}", iteration + 1, max_iterations);
|
||||
|
||||
let (response, prompt_tokens, eval_tokens) = ollama_client
|
||||
let (response, prompt_tokens, eval_tokens) = chat_backend
|
||||
.chat_with_tools(messages.clone(), tools.clone())
|
||||
.await?;
|
||||
|
||||
@@ -2778,7 +2931,7 @@ Return ONLY the summary, nothing else."#,
|
||||
messages.push(ChatMessage::user(
|
||||
"Based on the context gathered, please write the final photo insight: a title and a detailed personal summary. Write in first person as Cameron.",
|
||||
));
|
||||
let (final_response, prompt_tokens, eval_tokens) = ollama_client
|
||||
let (final_response, prompt_tokens, eval_tokens) = chat_backend
|
||||
.chat_with_tools(messages.clone(), vec![])
|
||||
.await?;
|
||||
last_prompt_eval_count = prompt_tokens;
|
||||
@@ -2792,10 +2945,18 @@ Return ONLY the summary, nothing else."#,
|
||||
.set_attribute(KeyValue::new("iterations_used", iterations_used as i64));
|
||||
loop_cx.span().set_status(Status::Ok);
|
||||
|
||||
// 13. Generate title
|
||||
let title = ollama_client
|
||||
.generate_photo_title(&final_content, custom_system_prompt.as_deref())
|
||||
// 13. Generate title via the same backend so voice stays consistent.
|
||||
let title_prompt = format!(
|
||||
"Create a short title (maximum 8 words) for the following journal entry:\n\n{}\n\nCapture the key moment or theme. Return ONLY the title, nothing else.",
|
||||
final_content
|
||||
);
|
||||
let title_system = custom_system_prompt.as_deref().unwrap_or(
|
||||
"You are my long term memory assistant. Use only the information provided. Do not invent details.",
|
||||
);
|
||||
let title_raw = chat_backend
|
||||
.generate(&title_prompt, Some(title_system), None)
|
||||
.await?;
|
||||
let title = title_raw.trim().trim_matches('"').to_string();
|
||||
|
||||
log::info!("Agentic generated title: {}", title);
|
||||
log::info!(
|
||||
@@ -2814,15 +2975,17 @@ Return ONLY the summary, nothing else."#,
|
||||
};
|
||||
|
||||
// 15. Store insight (returns the persisted row including its new id)
|
||||
let model_version = chat_backend.primary_model().to_string();
|
||||
let insight = InsertPhotoInsight {
|
||||
library_id: crate::libraries::PRIMARY_LIBRARY_ID,
|
||||
file_path: file_path.to_string(),
|
||||
title,
|
||||
summary: final_content,
|
||||
generated_at: Utc::now().timestamp(),
|
||||
model_version: ollama_client.primary_model.clone(),
|
||||
model_version,
|
||||
is_current: true,
|
||||
training_messages,
|
||||
backend: backend_label.clone(),
|
||||
};
|
||||
|
||||
let stored = {
|
||||
|
||||
@@ -134,6 +134,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
let generator = InsightGenerator::new(
|
||||
ollama,
|
||||
None,
|
||||
sms_client,
|
||||
insight_dao.clone(),
|
||||
exif_dao,
|
||||
@@ -249,6 +250,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
args.top_k,
|
||||
args.min_p,
|
||||
args.max_iterations,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
{
|
||||
|
||||
@@ -100,6 +100,8 @@ pub struct InsertPhotoInsight {
|
||||
pub model_version: String,
|
||||
pub is_current: bool,
|
||||
pub training_messages: Option<String>,
|
||||
/// `"local"` (Ollama with images) | `"hybrid"` (local vision + OpenRouter chat).
|
||||
pub backend: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Queryable, Clone, Debug)]
|
||||
@@ -115,6 +117,8 @@ pub struct PhotoInsight {
|
||||
pub is_current: bool,
|
||||
pub training_messages: Option<String>,
|
||||
pub approved: Option<bool>,
|
||||
/// `"local"` (Ollama with images) | `"hybrid"` (local vision + OpenRouter chat).
|
||||
pub backend: String,
|
||||
}
|
||||
|
||||
// --- Libraries ---
|
||||
|
||||
@@ -142,6 +142,7 @@ diesel::table! {
|
||||
is_current -> Bool,
|
||||
training_messages -> Nullable<Text>,
|
||||
approved -> Nullable<Bool>,
|
||||
backend -> Text,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
35
src/state.rs
35
src/state.rs
@@ -1,3 +1,4 @@
|
||||
use crate::ai::openrouter::OpenRouterClient;
|
||||
use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient};
|
||||
use crate::database::{
|
||||
CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, KnowledgeDao, LocationHistoryDao,
|
||||
@@ -31,6 +32,13 @@ pub struct AppState {
|
||||
pub preview_clips_path: String,
|
||||
pub excluded_dirs: Vec<String>,
|
||||
pub ollama: OllamaClient,
|
||||
/// `None` when `OPENROUTER_API_KEY` is not configured. Consulted only
|
||||
/// when a request explicitly opts into `backend=hybrid`. Currently
|
||||
/// reached via `insight_generator`; kept here so future handlers
|
||||
/// (insight_chat) can route to it without threading it through the
|
||||
/// generator.
|
||||
#[allow(dead_code)]
|
||||
pub openrouter: Option<Arc<OpenRouterClient>>,
|
||||
pub sms_client: SmsApiClient,
|
||||
pub insight_generator: InsightGenerator,
|
||||
}
|
||||
@@ -61,6 +69,7 @@ impl AppState {
|
||||
preview_clips_path: String,
|
||||
excluded_dirs: Vec<String>,
|
||||
ollama: OllamaClient,
|
||||
openrouter: Option<Arc<OpenRouterClient>>,
|
||||
sms_client: SmsApiClient,
|
||||
insight_generator: InsightGenerator,
|
||||
preview_dao: Arc<Mutex<Box<dyn PreviewDao>>>,
|
||||
@@ -92,6 +101,7 @@ impl AppState {
|
||||
preview_clips_path,
|
||||
excluded_dirs,
|
||||
ollama,
|
||||
openrouter,
|
||||
sms_client,
|
||||
insight_generator,
|
||||
}
|
||||
@@ -127,6 +137,8 @@ impl Default for AppState {
|
||||
ollama_fallback_model,
|
||||
);
|
||||
|
||||
let openrouter = build_openrouter_from_env();
|
||||
|
||||
let sms_api_url =
|
||||
env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
|
||||
let sms_api_token = env::var("SMS_API_TOKEN").ok();
|
||||
@@ -168,6 +180,7 @@ impl Default for AppState {
|
||||
// Initialize InsightGenerator with all data sources
|
||||
let insight_generator = InsightGenerator::new(
|
||||
ollama.clone(),
|
||||
openrouter.clone(),
|
||||
sms_client.clone(),
|
||||
insight_dao.clone(),
|
||||
exif_dao.clone(),
|
||||
@@ -195,6 +208,7 @@ impl Default for AppState {
|
||||
preview_clips_path,
|
||||
Self::parse_excluded_dirs(),
|
||||
ollama,
|
||||
openrouter,
|
||||
sms_client,
|
||||
insight_generator,
|
||||
preview_dao,
|
||||
@@ -202,6 +216,25 @@ impl Default for AppState {
|
||||
}
|
||||
}
|
||||
|
||||
/// Build an `OpenRouterClient` from environment variables. Returns `None`
|
||||
/// when `OPENROUTER_API_KEY` is unset (the hybrid backend is then
|
||||
/// unavailable and requests for it return a clear error).
|
||||
fn build_openrouter_from_env() -> Option<Arc<OpenRouterClient>> {
|
||||
let api_key = env::var("OPENROUTER_API_KEY").ok()?;
|
||||
let base_url = env::var("OPENROUTER_BASE_URL").ok();
|
||||
let default_model = env::var("OPENROUTER_DEFAULT_MODEL")
|
||||
.unwrap_or_else(|_| "anthropic/claude-sonnet-4".to_string());
|
||||
let mut client = OpenRouterClient::new(api_key, base_url, default_model);
|
||||
client.set_attribution(
|
||||
env::var("OPENROUTER_HTTP_REFERER").ok(),
|
||||
env::var("OPENROUTER_APP_TITLE").ok(),
|
||||
);
|
||||
if let Ok(model) = env::var("OPENROUTER_EMBEDDING_MODEL") {
|
||||
client.set_embedding_model(model);
|
||||
}
|
||||
Some(Arc::new(client))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl AppState {
|
||||
/// Creates an AppState instance for testing with temporary directories
|
||||
@@ -255,6 +288,7 @@ impl AppState {
|
||||
};
|
||||
let insight_generator = InsightGenerator::new(
|
||||
ollama.clone(),
|
||||
None,
|
||||
sms_client.clone(),
|
||||
insight_dao.clone(),
|
||||
exif_dao.clone(),
|
||||
@@ -286,6 +320,7 @@ impl AppState {
|
||||
preview_clips_path.to_string_lossy().to_string(),
|
||||
Vec::new(), // No excluded directories for test state
|
||||
ollama,
|
||||
None,
|
||||
sms_client,
|
||||
insight_generator,
|
||||
preview_dao,
|
||||
|
||||
Reference in New Issue
Block a user