ai: extract ResolvedBackend, remove ~480 lines of duplicated dispatch

Replace 5 copies of the ~80-line backend resolution pattern with a
single InsightGenerator::resolve_backend() builder that returns a
ResolvedBackend (chat + local clients, BackendKind enum, images_inline
flag). Tool dispatch now takes &ResolvedBackend instead of
&OllamaClient + model + backend strings.

Remove duplicated ollama/openrouter/llamacpp fields from
InsightChatService — InsightGenerator owns them and resolve_backend
uses them. Delete build_chat_clients (replaced by resolve_backend).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-05-24 15:00:50 -04:00
parent 0631820fbf
commit a8a661f70a
3 changed files with 158 additions and 640 deletions

View File

@@ -6,11 +6,9 @@ use std::collections::HashMap;
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use tokio::sync::Mutex as TokioMutex; use tokio::sync::Mutex as TokioMutex;
use crate::ai::backend::{BackendKind, ResolvedBackend, SamplingOverrides};
use crate::ai::insight_generator::InsightGenerator; use crate::ai::insight_generator::InsightGenerator;
use crate::ai::llm_client::{ChatMessage, LlmClient, LlmStreamEvent, Tool}; use crate::ai::llm_client::{ChatMessage, LlmStreamEvent, Tool};
use crate::ai::ollama::OllamaClient;
use crate::ai::llamacpp::LlamaCppClient;
use crate::ai::openrouter::OpenRouterClient;
use crate::database::InsightDao; use crate::database::InsightDao;
use crate::database::models::InsertPhotoInsight; use crate::database::models::InsertPhotoInsight;
use crate::otel::global_tracer; use crate::otel::global_tracer;
@@ -92,9 +90,6 @@ pub struct ChatTurnResult {
#[derive(Clone)] #[derive(Clone)]
pub struct InsightChatService { pub struct InsightChatService {
generator: Arc<InsightGenerator>, generator: Arc<InsightGenerator>,
ollama: OllamaClient,
openrouter: Option<Arc<OpenRouterClient>>,
llamacpp: Option<Arc<LlamaCppClient>>,
insight_dao: Arc<Mutex<Box<dyn InsightDao>>>, insight_dao: Arc<Mutex<Box<dyn InsightDao>>>,
chat_locks: ChatLockMap, chat_locks: ChatLockMap,
} }
@@ -102,17 +97,11 @@ pub struct InsightChatService {
impl InsightChatService { impl InsightChatService {
pub fn new( pub fn new(
generator: Arc<InsightGenerator>, generator: Arc<InsightGenerator>,
ollama: OllamaClient,
openrouter: Option<Arc<OpenRouterClient>>,
llamacpp: Option<Arc<LlamaCppClient>>,
insight_dao: Arc<Mutex<Box<dyn InsightDao>>>, insight_dao: Arc<Mutex<Box<dyn InsightDao>>>,
chat_locks: ChatLockMap, chat_locks: ChatLockMap,
) -> Self { ) -> Self {
Self { Self {
generator, generator,
ollama,
openrouter,
llamacpp,
insight_dao, insight_dao,
chat_locks, chat_locks,
} }
@@ -308,16 +297,9 @@ impl InsightChatService {
.filter(|s| !s.is_empty()) .filter(|s| !s.is_empty())
.unwrap_or_else(|| stored_backend.clone()); .unwrap_or_else(|| stored_backend.clone());
validate_cross_replay(&stored_backend, &effective_backend)?; validate_cross_replay(&stored_backend, &effective_backend)?;
let is_hybrid = effective_backend == "hybrid"; let kind = BackendKind::parse(&effective_backend)?;
let local_via_llamacpp = span.set_attribute(KeyValue::new("backend", kind.as_str()));
crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some();
let describes_then_inlines = is_hybrid;
span.set_attribute(KeyValue::new("backend", effective_backend.clone()));
// 4. Build the chat backend client. Hybrid → OpenRouter; local with
// `LLM_BACKEND=llamacpp` → llama-swap; otherwise Ollama. Clones
// so per-request sampling/model overrides don't leak into shared
// state.
let max_iterations = req let max_iterations = req
.max_iterations .max_iterations
.unwrap_or(DEFAULT_MAX_ITERATIONS) .unwrap_or(DEFAULT_MAX_ITERATIONS)
@@ -325,113 +307,36 @@ impl InsightChatService {
span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64)); span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64));
let stored_model = insight.model_version.clone(); let stored_model = insight.model_version.clone();
let custom_model = req let overrides = SamplingOverrides {
.model model: req.model.clone()
.clone() .or_else(|| Some(stored_model.clone()))
.or_else(|| Some(stored_model.clone())) .filter(|m| !m.is_empty()),
.filter(|m| !m.is_empty()); num_ctx: req.num_ctx,
temperature: req.temperature,
let mut ollama_client = self.ollama.clone(); top_p: req.top_p,
let mut openrouter_client: Option<OpenRouterClient> = None; top_k: req.top_k,
let mut llamacpp_client: Option<LlamaCppClient> = None; min_p: req.min_p,
if is_hybrid {
let arc = self.openrouter.as_ref().ok_or_else(|| {
anyhow!("hybrid backend unavailable: OPENROUTER_API_KEY not configured")
})?;
let mut c: OpenRouterClient = (**arc).clone();
if let Some(ref m) = custom_model {
c.primary_model = m.clone();
}
if req.temperature.is_some()
|| req.top_p.is_some()
|| req.top_k.is_some()
|| req.min_p.is_some()
{
c.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
}
if let Some(ctx) = req.num_ctx {
c.set_num_ctx(Some(ctx));
}
openrouter_client = Some(c);
} else if local_via_llamacpp {
let arc = self.llamacpp.as_ref().ok_or_else(|| {
anyhow!("LLM_BACKEND=llamacpp but LLAMA_SWAP_URL not configured")
})?;
let mut c: LlamaCppClient = (**arc).clone();
if let Some(ref m) = custom_model {
c.primary_model = m.clone();
}
if req.temperature.is_some()
|| req.top_p.is_some()
|| req.top_k.is_some()
|| req.min_p.is_some()
{
c.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
}
if let Some(ctx) = req.num_ctx {
c.set_num_ctx(Some(ctx));
}
llamacpp_client = Some(c);
} else {
// Pure local (Ollama): model swap. Build a new client when the
// chat model differs from the configured one.
if let Some(ref m) = custom_model
&& m != &self.ollama.primary_model
{
ollama_client = OllamaClient::new(
self.ollama.primary_url.clone(),
self.ollama.fallback_url.clone(),
m.clone(),
Some(m.clone()),
);
}
if req.temperature.is_some()
|| req.top_p.is_some()
|| req.top_k.is_some()
|| req.min_p.is_some()
{
ollama_client.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
}
if let Some(ctx) = req.num_ctx {
ollama_client.set_num_ctx(Some(ctx));
}
}
let chat_backend: &dyn LlmClient = if let Some(ref c) = llamacpp_client {
c
} else if let Some(ref c) = openrouter_client {
c
} else {
&ollama_client
}; };
let model_used = chat_backend.primary_model().to_string(); let backend = self.generator.resolve_backend(kind, &overrides).await?;
let model_used = backend.model().to_string();
span.set_attribute(KeyValue::new("model", model_used.clone())); span.set_attribute(KeyValue::new("model", model_used.clone()));
// 5. Decide vision + tool set. In describe-then-inline mode // 5. Decide vision + tool set. In hybrid (describe-then-inline) mode
// (hybrid only) we omit `describe_photo`. In local and llamacpp // we omit `describe_photo`. Otherwise trust the stored history:
// we trust the stored history's first-user shape: if it carries // if the first user message carries images, describe_photo stays.
// `images`, the original model was vision-capable, and we keep
// `describe_photo` available.
let local_first_user_has_image = messages let local_first_user_has_image = messages
.iter() .iter()
.find(|m| m.role == "user") .find(|m| m.role == "user")
.and_then(|m| m.images.as_ref()) .and_then(|m| m.images.as_ref())
.map(|imgs| !imgs.is_empty()) .map(|imgs| !imgs.is_empty())
.unwrap_or(false); .unwrap_or(false);
let offer_describe_tool = !describes_then_inlines && local_first_user_has_image; let offer_describe_tool = backend.images_inline && local_first_user_has_image;
// current_gate_opts(has_vision) sets gate_opts.has_vision = has_vision
// and probes the per-table presence flags. Pass `offer_describe_tool`
// directly — the `!is_hybrid && local_first_user_has_image` decision
// is the chat-path's vision predicate.
let gate_opts = self.generator.current_gate_opts_for_persona( let gate_opts = self.generator.current_gate_opts_for_persona(
offer_describe_tool, offer_describe_tool,
Some((req.user_id, &active_persona)), Some((req.user_id, &active_persona)),
); );
let tools = InsightGenerator::build_tool_definitions(gate_opts); let tools = InsightGenerator::build_tool_definitions(gate_opts);
// Image base64 only needed when describe_photo is on the menu. Load
// lazily to avoid disk IO when the loop never invokes it.
let image_base64: Option<String> = if offer_describe_tool { let image_base64: Option<String> = if offer_describe_tool {
self.generator.load_image_as_base64(&normalized).ok() self.generator.load_image_as_base64(&normalized).ok()
} else { } else {
@@ -480,13 +385,13 @@ impl InsightChatService {
iterations_used = iteration + 1; iterations_used = iteration + 1;
log::info!("Chat iteration {}/{}", iterations_used, max_iterations); log::info!("Chat iteration {}/{}", iterations_used, max_iterations);
let (response, prompt_tokens, eval_tokens) = chat_backend let (response, prompt_tokens, eval_tokens) = backend
.chat()
.chat_with_tools(messages.clone(), tools.clone()) .chat_with_tools(messages.clone(), tools.clone())
.await?; .await?;
last_prompt_eval_count = prompt_tokens; last_prompt_eval_count = prompt_tokens;
last_eval_count = eval_tokens; last_eval_count = eval_tokens;
// Ollama rejects non-object tool-call arguments on replay.
let mut response = response; let mut response = response;
if let Some(ref mut tcs) = response.tool_calls { if let Some(ref mut tcs) = response.tool_calls {
for tc in tcs.iter_mut() { for tc in tcs.iter_mut() {
@@ -514,13 +419,11 @@ impl InsightChatService {
.execute_tool( .execute_tool(
&tool_call.function.name, &tool_call.function.name,
&tool_call.function.arguments, &tool_call.function.arguments,
&ollama_client, &backend,
&image_base64, &image_base64,
&normalized, &normalized,
req.user_id, req.user_id,
&active_persona, &active_persona,
&model_used,
&effective_backend,
&loop_cx, &loop_cx,
) )
.await; .await;
@@ -534,8 +437,6 @@ impl InsightChatService {
} }
if final_content.is_empty() { if final_content.is_empty() {
// The model never produced a final answer; ask once more without
// tools to force a textual reply.
log::info!( log::info!(
"Chat loop exhausted after {} iterations, requesting final answer", "Chat loop exhausted after {} iterations, requesting final answer",
iterations_used iterations_used
@@ -543,7 +444,8 @@ impl InsightChatService {
messages.push(ChatMessage::user( messages.push(ChatMessage::user(
"Please write your final answer now without calling any more tools.", "Please write your final answer now without calling any more tools.",
)); ));
let (final_response, prompt_tokens, eval_tokens) = chat_backend let (final_response, prompt_tokens, eval_tokens) = backend
.chat()
.chat_with_tools(messages.clone(), vec![]) .chat_with_tools(messages.clone(), vec![])
.await?; .await?;
last_prompt_eval_count = prompt_tokens; last_prompt_eval_count = prompt_tokens;
@@ -579,7 +481,8 @@ impl InsightChatService {
Capture the key moment or theme. Return ONLY the title, nothing else.", Capture the key moment or theme. Return ONLY the title, nothing else.",
final_content final_content
); );
let title_raw = chat_backend let title_raw = backend
.chat()
.generate( .generate(
&title_prompt, &title_prompt,
Some( Some(
@@ -604,7 +507,7 @@ impl InsightChatService {
model_version: model_used.clone(), model_version: model_used.clone(),
is_current: true, is_current: true,
training_messages: Some(json), training_messages: Some(json),
backend: effective_backend.clone(), backend: kind.as_str().to_string(),
fewshot_source_ids: None, fewshot_source_ids: None,
content_hash: None, content_hash: None,
}; };
@@ -629,7 +532,7 @@ impl InsightChatService {
prompt_eval_count: last_prompt_eval_count, prompt_eval_count: last_prompt_eval_count,
eval_count: last_eval_count, eval_count: last_eval_count,
amended_insight_id, amended_insight_id,
backend_used: effective_backend, backend_used: kind.as_str().to_string(),
model_used, model_used,
}) })
} }
@@ -818,9 +721,8 @@ impl InsightChatService {
.map(|s| s.trim().to_lowercase()) .map(|s| s.trim().to_lowercase())
.filter(|s| !s.is_empty()) .filter(|s| !s.is_empty())
.unwrap_or_else(|| stored_backend.clone()); .unwrap_or_else(|| stored_backend.clone());
validate_cross_replay(&stored_backend, &effective_backend)?; let kind = BackendKind::parse(&effective_backend)?;
let is_hybrid = effective_backend == "hybrid"; validate_cross_replay(&stored_backend, kind.as_str())?;
let describes_then_inlines = is_hybrid;
let max_iterations = req let max_iterations = req
.max_iterations .max_iterations
@@ -828,18 +730,20 @@ impl InsightChatService {
.clamp(1, env_max_iterations()); .clamp(1, env_max_iterations());
let stored_model = insight.model_version.clone(); let stored_model = insight.model_version.clone();
let custom_model = req let overrides = SamplingOverrides {
.model model: req.model.clone()
.clone() .or_else(|| Some(stored_model.clone()))
.or_else(|| Some(stored_model.clone())) .filter(|m| !m.is_empty()),
.filter(|m| !m.is_empty()); num_ctx: req.num_ctx,
temperature: req.temperature,
top_p: req.top_p,
top_k: req.top_k,
min_p: req.min_p,
};
let backend = self.generator.resolve_backend(kind, &overrides).await?;
let model_used = backend.model().to_string();
let (chat_backend_holder, ollama_client) = // Tool set — images_inline mode + first user turn carries an image →
self.build_chat_clients(&effective_backend, custom_model.as_deref(), &req)?;
let chat_backend: &dyn LlmClient = chat_backend_holder.as_ref();
let model_used = chat_backend.primary_model().to_string();
// Tool set — local/llamacpp mode + first user turn carries an image →
// offer describe_photo. Describe-then-inline mode (hybrid only): // offer describe_photo. Describe-then-inline mode (hybrid only):
// visual description was inlined at bootstrap, no describe tool needed. // visual description was inlined at bootstrap, no describe tool needed.
let local_first_user_has_image = messages let local_first_user_has_image = messages
@@ -848,7 +752,7 @@ impl InsightChatService {
.and_then(|m| m.images.as_ref()) .and_then(|m| m.images.as_ref())
.map(|imgs| !imgs.is_empty()) .map(|imgs| !imgs.is_empty())
.unwrap_or(false); .unwrap_or(false);
let offer_describe_tool = !describes_then_inlines && local_first_user_has_image; let offer_describe_tool = backend.images_inline && local_first_user_has_image;
let gate_opts = self.generator.current_gate_opts_for_persona( let gate_opts = self.generator.current_gate_opts_for_persona(
offer_describe_tool, offer_describe_tool,
Some((req.user_id, &active_persona)), Some((req.user_id, &active_persona)),
@@ -879,16 +783,13 @@ impl InsightChatService {
let outcome = self let outcome = self
.run_streaming_agentic_loop( .run_streaming_agentic_loop(
chat_backend, &backend,
&ollama_client,
&mut messages, &mut messages,
tools, tools,
&image_base64, &image_base64,
&normalized, &normalized,
req.user_id, req.user_id,
&active_persona, &active_persona,
&model_used,
&effective_backend,
max_iterations, max_iterations,
&tx, &tx,
) )
@@ -916,7 +817,7 @@ impl InsightChatService {
let mut amended_insight_id: Option<i32> = None; let mut amended_insight_id: Option<i32> = None;
if req.amend { if req.amend {
let title = self.generate_title(chat_backend, &final_content).await?; let title = self.generate_title(&backend, &final_content).await?;
// Amended rows intentionally do not inherit the parent's // Amended rows intentionally do not inherit the parent's
// `fewshot_source_ids`. The parent's few-shot influence is still // `fewshot_source_ids`. The parent's few-shot influence is still
@@ -932,7 +833,7 @@ impl InsightChatService {
model_version: model_used.clone(), model_version: model_used.clone(),
is_current: true, is_current: true,
training_messages: Some(json), training_messages: Some(json),
backend: effective_backend.clone(), backend: kind.as_str().to_string(),
fewshot_source_ids: None, fewshot_source_ids: None,
content_hash: None, content_hash: None,
}; };
@@ -958,7 +859,7 @@ impl InsightChatService {
eval_tokens: last_eval_count, eval_tokens: last_eval_count,
num_ctx: req.num_ctx, num_ctx: req.num_ctx,
amended_insight_id, amended_insight_id,
backend_used: effective_backend, backend_used: kind.as_str().to_string(),
model_used, model_used,
}) })
.await; .await;
@@ -984,21 +885,23 @@ impl InsightChatService {
.filter(|s| !s.trim().is_empty()) .filter(|s| !s.trim().is_empty())
.unwrap_or_else(|| "default".to_string()); .unwrap_or_else(|| "default".to_string());
let effective_backend = resolve_bootstrap_backend(req.backend.as_deref())?; let effective_backend = resolve_bootstrap_backend(req.backend.as_deref())?;
let is_hybrid = effective_backend == "hybrid"; let kind = BackendKind::parse(&effective_backend)?;
let local_via_llamacpp =
crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some();
let describes_then_inlines = is_hybrid;
let max_iterations = req let max_iterations = req
.max_iterations .max_iterations
.unwrap_or(DEFAULT_MAX_ITERATIONS) .unwrap_or(DEFAULT_MAX_ITERATIONS)
.clamp(1, env_max_iterations()); .clamp(1, env_max_iterations());
let custom_model = req.model.clone().filter(|m| !m.is_empty()); let overrides = SamplingOverrides {
let (chat_backend_holder, ollama_client) = model: req.model.clone().filter(|m| !m.is_empty()),
self.build_chat_clients(&effective_backend, custom_model.as_deref(), &req)?; num_ctx: req.num_ctx,
let chat_backend: &dyn LlmClient = chat_backend_holder.as_ref(); temperature: req.temperature,
let model_used = chat_backend.primary_model().to_string(); top_p: req.top_p,
top_k: req.top_k,
min_p: req.min_p,
};
let backend = self.generator.resolve_backend(kind, &overrides).await?;
let model_used = backend.model().to_string();
// Load image bytes once. RAW preview fallback is handled inside // Load image bytes once. RAW preview fallback is handled inside
// load_image_as_base64. Errors degrade silently — a chat that // load_image_as_base64. Errors degrade silently — a chat that
@@ -1020,26 +923,17 @@ impl InsightChatService {
}); });
// Describe-then-inline (hybrid only): pre-describe the image so a // Describe-then-inline (hybrid only): pre-describe the image so a
// text-only chat model gets the visual description inline. llamacpp // text-only chat model gets the visual description inline.
// sends images directly to the chat model. // images_inline backends send images directly to the chat model.
let visual_block = if describes_then_inlines { let visual_block = if !backend.images_inline {
match image_base64.as_deref() { match image_base64.as_deref() {
Some(b64) => { Some(b64) => {
let described = if local_via_llamacpp { match backend.local().describe_image(b64).await {
self.llamacpp
.as_ref()
.expect("local_via_llamacpp guarantees Some")
.describe_image(b64)
.await
} else {
self.ollama.describe_image(b64).await
};
match described {
Ok(desc) => { Ok(desc) => {
format!("Visual description (from local vision model):\n{}\n", desc) format!("Visual description (from local vision model):\n{}\n", desc)
} }
Err(e) => { Err(e) => {
log::warn!("{} bootstrap: describe_image failed: {}", effective_backend, e); log::warn!("{} bootstrap: describe_image failed: {}", kind.as_str(), e);
String::new() String::new()
} }
} }
@@ -1050,10 +944,10 @@ impl InsightChatService {
String::new() String::new()
}; };
// Tool gates. Local + image present → expose describe_photo so // Tool gates. images_inline + image present → expose describe_photo so
// the chat model can re-look at the photo on demand. Hybrid: // the chat model can re-look at the photo on demand. Non-inline:
// already inlined, no tool needed. // already inlined, no tool needed.
let offer_describe_tool = !describes_then_inlines && image_base64.is_some(); let offer_describe_tool = backend.images_inline && image_base64.is_some();
let gate_opts = self.generator.current_gate_opts_for_persona( let gate_opts = self.generator.current_gate_opts_for_persona(
offer_describe_tool, offer_describe_tool,
Some((req.user_id, &active_persona)), Some((req.user_id, &active_persona)),
@@ -1079,23 +973,22 @@ impl InsightChatService {
); );
let system_msg = ChatMessage::system(system_content); let system_msg = ChatMessage::system(system_content);
let mut user_msg = ChatMessage::user(req.user_message.clone()); let mut user_msg = ChatMessage::user(req.user_message.clone());
if !describes_then_inlines && let Some(ref img) = image_base64 { if backend.images_inline {
user_msg.images = Some(vec![img.clone()]); if let Some(ref img) = image_base64 {
user_msg.images = Some(vec![img.clone()]);
}
} }
let mut messages = vec![system_msg, user_msg]; let mut messages = vec![system_msg, user_msg];
let outcome = self let outcome = self
.run_streaming_agentic_loop( .run_streaming_agentic_loop(
chat_backend, &backend,
&ollama_client,
&mut messages, &mut messages,
tools, tools,
&image_base64, &image_base64,
&normalized, &normalized,
req.user_id, req.user_id,
&active_persona, &active_persona,
&model_used,
&effective_backend,
max_iterations, max_iterations,
&tx, &tx,
) )
@@ -1108,7 +1001,7 @@ impl InsightChatService {
final_content, final_content,
} = outcome; } = outcome;
let title = self.generate_title(chat_backend, &final_content).await?; let title = self.generate_title(&backend, &final_content).await?;
let json = serde_json::to_string(&messages) let json = serde_json::to_string(&messages)
.map_err(|e| anyhow!("failed to serialize chat history: {}", e))?; .map_err(|e| anyhow!("failed to serialize chat history: {}", e))?;
@@ -1121,7 +1014,7 @@ impl InsightChatService {
model_version: model_used.clone(), model_version: model_used.clone(),
is_current: true, is_current: true,
training_messages: Some(json), training_messages: Some(json),
backend: effective_backend.clone(), backend: kind.as_str().to_string(),
fewshot_source_ids: None, fewshot_source_ids: None,
content_hash: None, content_hash: None,
}; };
@@ -1144,7 +1037,7 @@ impl InsightChatService {
eval_tokens: last_eval_count, eval_tokens: last_eval_count,
num_ctx: req.num_ctx, num_ctx: req.num_ctx,
amended_insight_id: Some(stored.id), amended_insight_id: Some(stored.id),
backend_used: effective_backend, backend_used: kind.as_str().to_string(),
model_used, model_used,
}) })
.await; .await;
@@ -1152,95 +1045,12 @@ impl InsightChatService {
Ok(()) Ok(())
} }
/// Set up chat clients (Ollama + optional OpenRouter / LlamaCpp) shared
/// by bootstrap and continuation. Returns the chat-side backend client
/// (boxed because each backend has a different concrete type) and the
/// Ollama client used for describe-image / local tool calls.
///
/// `effective_backend` must be one of `"local"` or `"hybrid"` (validated
/// upstream). Hybrid → OpenRouter; local with `LLM_BACKEND=llamacpp` →
/// llama-swap; pure local → Ollama. Returns the dispatched chat client
/// plus the (possibly per-request) Ollama client that the caller uses
/// for non-chat helpers (image describe in non-llamacpp mode, tool ops).
fn build_chat_clients(
&self,
effective_backend: &str,
custom_model: Option<&str>,
req: &ChatTurnRequest,
) -> Result<(Box<dyn LlmClient>, OllamaClient)> {
let mut ollama_client = self.ollama.clone();
if effective_backend == "hybrid" {
let arc = self.openrouter.as_ref().ok_or_else(|| {
anyhow!("hybrid backend unavailable: OPENROUTER_API_KEY not configured")
})?;
let mut c: OpenRouterClient = (**arc).clone();
if let Some(m) = custom_model {
c.primary_model = m.to_string();
}
if req.temperature.is_some()
|| req.top_p.is_some()
|| req.top_k.is_some()
|| req.min_p.is_some()
{
c.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
}
if let Some(ctx) = req.num_ctx {
c.set_num_ctx(Some(ctx));
}
return Ok((Box::new(c), ollama_client));
}
// Local mode — env switch decides between Ollama and llama-swap.
if crate::ai::local_backend_is_llamacpp()
&& let Some(arc) = self.llamacpp.as_ref()
{
let mut c: LlamaCppClient = (**arc).clone();
if let Some(m) = custom_model {
c.primary_model = m.to_string();
}
if req.temperature.is_some()
|| req.top_p.is_some()
|| req.top_k.is_some()
|| req.min_p.is_some()
{
c.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
}
if let Some(ctx) = req.num_ctx {
c.set_num_ctx(Some(ctx));
}
return Ok((Box::new(c), ollama_client));
}
if let Some(m) = custom_model
&& m != self.ollama.primary_model
{
ollama_client = OllamaClient::new(
self.ollama.primary_url.clone(),
self.ollama.fallback_url.clone(),
m.to_string(),
Some(m.to_string()),
);
}
if req.temperature.is_some()
|| req.top_p.is_some()
|| req.top_k.is_some()
|| req.min_p.is_some()
{
ollama_client.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
}
if let Some(ctx) = req.num_ctx {
ollama_client.set_num_ctx(Some(ctx));
}
Ok((Box::new(ollama_client.clone()), ollama_client))
}
/// Generate a short title via the same chat backend so voice stays /// Generate a short title via the same chat backend so voice stays
/// consistent with the body. Mirrors generate_agentic_insight_for_photo's /// consistent with the body. Mirrors generate_agentic_insight_for_photo's
/// titling pass. /// titling pass.
async fn generate_title( async fn generate_title(
&self, &self,
chat_backend: &dyn LlmClient, backend: &ResolvedBackend,
final_content: &str, final_content: &str,
) -> Result<String> { ) -> Result<String> {
let title_prompt = format!( let title_prompt = format!(
@@ -1248,7 +1058,8 @@ impl InsightChatService {
Capture the key moment or theme. Return ONLY the title, nothing else.", Capture the key moment or theme. Return ONLY the title, nothing else.",
final_content final_content
); );
let title_raw = chat_backend let title_raw = backend
.chat()
.generate( .generate(
&title_prompt, &title_prompt,
Some( Some(
@@ -1266,18 +1077,13 @@ impl InsightChatService {
/// final assistant content. /// final assistant content.
async fn run_streaming_agentic_loop( async fn run_streaming_agentic_loop(
&self, &self,
chat_backend: &dyn LlmClient, backend: &ResolvedBackend,
ollama_client: &OllamaClient,
messages: &mut Vec<ChatMessage>, messages: &mut Vec<ChatMessage>,
tools: Vec<Tool>, tools: Vec<Tool>,
image_base64: &Option<String>, image_base64: &Option<String>,
normalized: &str, normalized: &str,
user_id: i32, user_id: i32,
active_persona: &str, active_persona: &str,
// Provenance — stamped onto any store_fact tool call made
// during this loop. Mirrors the non-streaming chat path.
model_used: &str,
effective_backend: &str,
max_iterations: usize, max_iterations: usize,
tx: &tokio::sync::mpsc::Sender<ChatStreamEvent>, tx: &tokio::sync::mpsc::Sender<ChatStreamEvent>,
) -> Result<AgenticLoopOutcome> { ) -> Result<AgenticLoopOutcome> {
@@ -1296,7 +1102,8 @@ impl InsightChatService {
}) })
.await; .await;
let mut stream = chat_backend let mut stream = backend
.chat()
.chat_with_tools_stream(messages.clone(), tools.clone()) .chat_with_tools_stream(messages.clone(), tools.clone())
.await?; .await?;
@@ -1353,13 +1160,11 @@ impl InsightChatService {
.execute_tool( .execute_tool(
&tool_call.function.name, &tool_call.function.name,
&tool_call.function.arguments, &tool_call.function.arguments,
ollama_client, backend,
image_base64, image_base64,
normalized, normalized,
user_id, user_id,
active_persona, active_persona,
model_used,
effective_backend,
&cx, &cx,
) )
.await; .await;
@@ -1394,7 +1199,8 @@ impl InsightChatService {
messages.push(ChatMessage::user( messages.push(ChatMessage::user(
"Please write your final answer now without calling any more tools.", "Please write your final answer now without calling any more tools.",
)); ));
let mut stream = chat_backend let mut stream = backend
.chat()
.chat_with_tools_stream(messages.clone(), vec![]) .chat_with_tools_stream(messages.clone(), vec![])
.await?; .await?;
let mut final_message: Option<ChatMessage> = None; let mut final_message: Option<ChatMessage> = None;

View File

@@ -1594,29 +1594,24 @@ Return ONLY the summary, nothing else."#,
&self, &self,
tool_name: &str, tool_name: &str,
arguments: &serde_json::Value, arguments: &serde_json::Value,
ollama: &OllamaClient, backend: &ResolvedBackend,
image_base64: &Option<String>, image_base64: &Option<String>,
file_path: &str, file_path: &str,
user_id: i32, user_id: i32,
persona_id: &str, persona_id: &str,
// Provenance — written into entity_facts.created_by_* when
// the loop calls store_fact. The caller knows the actual
// chat-runtime model and backend (which may differ from
// ollama.primary_model in hybrid mode where chat lives on
// OpenRouter while Ollama still handles vision).
model: &str,
backend: &str,
cx: &opentelemetry::Context, cx: &opentelemetry::Context,
) -> String { ) -> String {
let model = backend.model();
let backend_label = backend.kind.as_str();
let result = match tool_name { let result = match tool_name {
"search_rag" => self.tool_search_rag(arguments, ollama, cx).await, "search_rag" => self.tool_search_rag(arguments, backend.local(), cx).await,
"search_messages" => self.tool_search_messages(arguments, cx).await, "search_messages" => self.tool_search_messages(arguments, cx).await,
"get_sms_messages" => self.tool_get_sms_messages(arguments, cx).await, "get_sms_messages" => self.tool_get_sms_messages(arguments, cx).await,
"get_calendar_events" => self.tool_get_calendar_events(arguments, cx).await, "get_calendar_events" => self.tool_get_calendar_events(arguments, cx).await,
"get_location_history" => self.tool_get_location_history(arguments, cx).await, "get_location_history" => self.tool_get_location_history(arguments, cx).await,
"get_file_tags" => self.tool_get_file_tags(arguments, cx).await, "get_file_tags" => self.tool_get_file_tags(arguments, cx).await,
"get_faces_in_photo" => self.tool_get_faces_in_photo(arguments, cx).await, "get_faces_in_photo" => self.tool_get_faces_in_photo(arguments, cx).await,
"describe_photo" => self.tool_describe_photo(ollama, image_base64).await, "describe_photo" => self.tool_describe_photo(backend.local(), image_base64).await,
"reverse_geocode" => self.tool_reverse_geocode(arguments).await, "reverse_geocode" => self.tool_reverse_geocode(arguments).await,
"get_personal_place_at" => self.tool_get_personal_place_at(arguments).await, "get_personal_place_at" => self.tool_get_personal_place_at(arguments).await,
"recall_entities" => self.tool_recall_entities(arguments, cx).await, "recall_entities" => self.tool_recall_entities(arguments, cx).await,
@@ -1624,19 +1619,19 @@ Return ONLY the summary, nothing else."#,
self.tool_recall_facts_for_photo(arguments, user_id, persona_id, cx) self.tool_recall_facts_for_photo(arguments, user_id, persona_id, cx)
.await .await
} }
"store_entity" => self.tool_store_entity(arguments, ollama, cx).await, "store_entity" => self.tool_store_entity(arguments, cx).await,
"store_fact" => { "store_fact" => {
self.tool_store_fact( self.tool_store_fact(
arguments, file_path, user_id, persona_id, model, backend, cx, arguments, file_path, user_id, persona_id, model, backend_label, cx,
) )
.await .await
} }
"update_fact" => { "update_fact" => {
self.tool_update_fact(arguments, user_id, persona_id, model, backend, cx) self.tool_update_fact(arguments, user_id, persona_id, model, backend_label, cx)
.await .await
} }
"supersede_fact" => { "supersede_fact" => {
self.tool_supersede_fact(arguments, user_id, persona_id, model, backend, cx) self.tool_supersede_fact(arguments, user_id, persona_id, model, backend_label, cx)
.await .await
} }
"get_current_datetime" => Self::tool_get_current_datetime(), "get_current_datetime" => Self::tool_get_current_datetime(),
@@ -1654,7 +1649,7 @@ Return ONLY the summary, nothing else."#,
async fn tool_search_rag( async fn tool_search_rag(
&self, &self,
args: &serde_json::Value, args: &serde_json::Value,
ollama: &OllamaClient, local: &dyn LlmClient,
_cx: &opentelemetry::Context, _cx: &opentelemetry::Context,
) -> String { ) -> String {
let query = match args.get("query").and_then(|v| v.as_str()) { let query = match args.get("query").and_then(|v| v.as_str()) {
@@ -1718,7 +1713,7 @@ Return ONLY the summary, nothing else."#,
}; };
let final_results = if rerank_enabled && results.len() > limit { let final_results = if rerank_enabled && results.len() > limit {
match self.rerank_with_llm(&query, &results, limit, ollama).await { match self.rerank_with_llm(&query, &results, limit, local).await {
Ok(reordered) => reordered, Ok(reordered) => reordered,
Err(e) => { Err(e) => {
log::warn!("rerank failed, using vector order: {}", e); log::warn!("rerank failed, using vector order: {}", e);
@@ -1744,7 +1739,7 @@ Return ONLY the summary, nothing else."#,
query: &str, query: &str,
candidates: &[String], candidates: &[String],
limit: usize, limit: usize,
ollama: &OllamaClient, local: &dyn LlmClient,
) -> Result<Vec<String>> { ) -> Result<Vec<String>> {
let query_preview: String = query.chars().take(60).collect(); let query_preview: String = query.chars().take(60).collect();
log::info!( log::info!(
@@ -1785,15 +1780,7 @@ Return ONLY the summary, nothing else."#,
let system = Some( let system = Some(
"You are a terse relevance ranker. You output only numbers separated by commas.", "You are a terse relevance ranker. You output only numbers separated by commas.",
); );
let response = if crate::ai::local_backend_is_llamacpp() { let response = local.generate(&prompt, system, None).await?;
if let Some(ref lc) = self.llamacpp {
lc.generate(&prompt, system, None).await?
} else {
ollama.generate_no_think(&prompt, system).await?
}
} else {
ollama.generate_no_think(&prompt, system).await?
};
log::info!( log::info!(
"rerank: finished in {} ms (prompt={} chars)", "rerank: finished in {} ms (prompt={} chars)",
started.elapsed().as_millis(), started.elapsed().as_millis(),
@@ -2365,31 +2352,17 @@ Return ONLY the summary, nothing else."#,
out out
} }
/// Tool: describe_photo — generate a visual description of the photo.
/// Routes through llama-swap when `LLM_BACKEND=llamacpp`, Ollama otherwise.
async fn tool_describe_photo( async fn tool_describe_photo(
&self, &self,
ollama: &OllamaClient, local: &dyn LlmClient,
image_base64: &Option<String>, image_base64: &Option<String>,
) -> String { ) -> String {
log::info!("tool_describe_photo: generating visual description"); log::info!("tool_describe_photo: generating visual description");
match image_base64 { match image_base64 {
Some(img) => { Some(img) => match local.describe_image(img).await {
let result = if crate::ai::local_backend_is_llamacpp() { Ok(desc) => desc,
if let Some(ref lc) = self.llamacpp { Err(e) => format!("Error describing photo: {}", e),
lc.describe_image(img).await },
} else {
ollama.generate_photo_description(img).await
}
} else {
ollama.generate_photo_description(img).await
};
match result {
Ok(desc) => desc,
Err(e) => format!("Error describing photo: {}", e),
}
}
None => "No image available for description.".to_string(), None => "No image available for description.".to_string(),
} }
} }
@@ -2635,7 +2608,6 @@ Return ONLY the summary, nothing else."#,
async fn tool_store_entity( async fn tool_store_entity(
&self, &self,
args: &serde_json::Value, args: &serde_json::Value,
_ollama: &OllamaClient,
cx: &opentelemetry::Context, cx: &opentelemetry::Context,
) -> String { ) -> String {
use crate::database::models::InsertEntity; use crate::database::models::InsertEntity;
@@ -3775,243 +3747,25 @@ Return ONLY the summary, nothing else."#,
span.set_attribute(KeyValue::new("file_path", file_path.clone())); span.set_attribute(KeyValue::new("file_path", file_path.clone()));
span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64)); span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64));
// 1a. Resolve backend label (defaults to "local"). // 1. Resolve backend + build clients.
let backend_label = backend let kind = BackendKind::parse(
.as_deref() backend.as_deref().unwrap_or("local"),
.map(|s| s.trim().to_lowercase()) )?;
.filter(|s| !s.is_empty()) span.set_attribute(KeyValue::new("backend", kind.as_str()));
.unwrap_or_else(|| "local".to_string()); let overrides = SamplingOverrides {
if !matches!(backend_label.as_str(), "local" | "hybrid") { model: custom_model,
return Err(anyhow::anyhow!( num_ctx,
"unknown backend '{}'; expected 'local' or 'hybrid'", temperature,
backend_label top_p,
)); top_k,
} min_p,
span.set_attribute(KeyValue::new("backend", backend_label.clone()));
let is_hybrid = backend_label == "hybrid";
// `LLM_BACKEND=llamacpp` swaps Ollama out for llama-swap as the
// "local" stack — chat + embeddings route through llama-swap.
// llamacpp models receive images directly (vision-capable); only
// hybrid mode (OpenRouter chat) uses describe-then-inline.
let local_via_llamacpp =
crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some();
let describes_then_inlines = is_hybrid;
let ollama_is_chat = !is_hybrid && !local_via_llamacpp;
// 1b. Always build an Ollama client. In local mode it owns the chat
// loop; in hybrid/llamacpp mode it still handles tool-local calls
// (e.g. future embedding-backed tools). The chat backend is
// selected separately below.
// Sampling overrides only apply when Ollama is the chat backend.
let apply_sampling_to_ollama = ollama_is_chat;
let mut ollama_client = if let Some(ref model) = custom_model
&& ollama_is_chat
{
log::info!("Using custom model for agentic: {}", model);
span.set_attribute(KeyValue::new("custom_model", model.clone()));
OllamaClient::new(
self.ollama.primary_url.clone(),
self.ollama.fallback_url.clone(),
model.clone(),
Some(model.clone()),
)
} else {
if ollama_is_chat {
span.set_attribute(KeyValue::new("model", self.ollama.primary_model.clone()));
}
self.ollama.clone()
};
if apply_sampling_to_ollama {
if let Some(ctx) = num_ctx {
log::info!("Using custom context size: {}", ctx);
span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
ollama_client.set_num_ctx(Some(ctx));
}
if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
log::info!(
"Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}",
temperature,
top_p,
top_k,
min_p
);
if let Some(t) = temperature {
span.set_attribute(KeyValue::new("temperature", t as f64));
}
if let Some(p) = top_p {
span.set_attribute(KeyValue::new("top_p", p as f64));
}
if let Some(k) = top_k {
span.set_attribute(KeyValue::new("top_k", k as i64));
}
if let Some(m) = min_p {
span.set_attribute(KeyValue::new("min_p", m as f64));
}
ollama_client.set_sampling_params(temperature, top_p, top_k, min_p);
}
}
// 1c. In hybrid mode, clone the configured OpenRouter client and
// apply per-request overrides.
let openrouter_client: Option<OpenRouterClient> = if is_hybrid {
let arc = self.openrouter.as_ref().ok_or_else(|| {
anyhow::anyhow!("hybrid backend unavailable: OPENROUTER_API_KEY not configured")
})?;
let mut c: OpenRouterClient = (**arc).clone();
if let Some(ref m) = custom_model {
c.primary_model = m.clone();
span.set_attribute(KeyValue::new("custom_model", m.clone()));
}
span.set_attribute(KeyValue::new("openrouter_model", c.primary_model.clone()));
if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
if let Some(t) = temperature {
span.set_attribute(KeyValue::new("temperature", t as f64));
}
if let Some(p) = top_p {
span.set_attribute(KeyValue::new("top_p", p as f64));
}
if let Some(k) = top_k {
span.set_attribute(KeyValue::new("top_k", k as i64));
}
if let Some(m) = min_p {
span.set_attribute(KeyValue::new("min_p", m as f64));
}
c.set_sampling_params(temperature, top_p, top_k, min_p);
}
if let Some(ctx) = num_ctx {
span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
c.set_num_ctx(Some(ctx));
}
Some(c)
} else {
None
};
// 1d. When `LLM_BACKEND=llamacpp` and we're in local mode (not
// hybrid — hybrid keeps chat on OpenRouter), clone the llamacpp
// client and apply per-request overrides. Same shape as the
// openrouter branch above; describe_image will route through
// the vision slot configured on the client.
let llamacpp_client: Option<LlamaCppClient> = if local_via_llamacpp && !is_hybrid {
let arc = self.llamacpp.as_ref().ok_or_else(|| {
anyhow::anyhow!("LLM_BACKEND=llamacpp but LLAMA_SWAP_URL not configured")
})?;
let mut c: LlamaCppClient = (**arc).clone();
if let Some(ref m) = custom_model {
c.primary_model = m.clone();
span.set_attribute(KeyValue::new("custom_model", m.clone()));
}
span.set_attribute(KeyValue::new("llamacpp_model", c.primary_model.clone()));
if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
if let Some(t) = temperature {
span.set_attribute(KeyValue::new("temperature", t as f64));
}
if let Some(p) = top_p {
span.set_attribute(KeyValue::new("top_p", p as f64));
}
if let Some(k) = top_k {
span.set_attribute(KeyValue::new("top_k", k as i64));
}
if let Some(m) = min_p {
span.set_attribute(KeyValue::new("min_p", m as f64));
}
c.set_sampling_params(temperature, top_p, top_k, min_p);
}
if let Some(ctx) = num_ctx {
span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
c.set_num_ctx(Some(ctx));
}
Some(c)
} else {
None
}; };
let backend = self.resolve_backend(kind, &overrides).await?;
span.set_attribute(KeyValue::new("model", backend.model().to_string()));
span.set_attribute(KeyValue::new("images_inline", backend.images_inline));
let insight_cx = current_cx.with_span(span); let insight_cx = current_cx.with_span(span);
// 2. Verify chat model supports tool calling.
// - local: existing Ollama model availability + capability check.
// - hybrid: trust the operator's curated allowlist
// (OPENROUTER_ALLOWED_MODELS) — no live precheck. A bad model id
// surfaces as a chat-call error on the next step.
let has_vision = if describes_then_inlines {
// Hybrid: chat model never sees images — describe-then-inject.
true
} else if local_via_llamacpp {
// llama-swap models receive images directly via OpenAI content
// parts. Capability probing isn't available (no `/api/show`),
// so assume vision support; a misconfigured model surfaces as
// a chat-call error.
true
} else {
if let Some(ref model_name) = custom_model {
let available_on_primary =
OllamaClient::is_model_available(&ollama_client.primary_url, model_name)
.await
.unwrap_or(false);
let available_on_fallback =
if let Some(ref fallback_url) = ollama_client.fallback_url {
OllamaClient::is_model_available(fallback_url, model_name)
.await
.unwrap_or(false)
} else {
false
};
if !available_on_primary && !available_on_fallback {
anyhow::bail!(
"model not available: '{}' not found on any configured server",
model_name
);
}
}
let model_name_for_caps = &ollama_client.primary_model;
let capabilities = match OllamaClient::check_model_capabilities(
&ollama_client.primary_url,
model_name_for_caps,
)
.await
{
Ok(caps) => caps,
Err(_) => {
let fallback_url = ollama_client.fallback_url.as_deref().ok_or_else(|| {
anyhow::anyhow!(
"Failed to check model capabilities for '{}': model not found on primary server and no fallback configured",
model_name_for_caps
)
})?;
OllamaClient::check_model_capabilities(fallback_url, model_name_for_caps)
.await
.map_err(|e| {
anyhow::anyhow!(
"Failed to check model capabilities for '{}': {}",
model_name_for_caps,
e
)
})?
}
};
if !capabilities.has_tool_calling {
return Err(anyhow::anyhow!(
"tool calling not supported by model '{}'",
ollama_client.primary_model
));
}
insight_cx
.span()
.set_attribute(KeyValue::new("model_has_vision", capabilities.has_vision));
insight_cx
.span()
.set_attribute(KeyValue::new("model_has_tool_calling", true));
capabilities.has_vision
};
// 3. Fetch EXIF // 3. Fetch EXIF
let exif = { let exif = {
let mut exif_dao = self.exif_dao.lock().expect("Unable to lock ExifDao"); let mut exif_dao = self.exif_dao.lock().expect("Unable to lock ExifDao");
@@ -4103,60 +3857,33 @@ Return ONLY the summary, nothing else."#,
} }
}; };
// 7. Load image if vision capable. // 7. Load image. Always attempted — vision-capable models get the
// In hybrid mode we ALSO describe it locally now so the // base64 inline; hybrid mode describes it locally and injects text.
// description can be inlined as text — the OpenRouter chat model let image_base64 = match self.load_image_as_base64(&file_path) {
// never receives the base64 image directly. Ok(b64) => {
let image_base64 = if has_vision { log::info!("Loaded image for agentic model");
match self.load_image_as_base64(&file_path) { Some(b64)
Ok(b64) => { }
log::info!("Loaded image for vision-capable agentic model"); Err(e) => {
Some(b64) log::warn!("Failed to load image for agentic: {}", e);
} None
Err(e) => {
log::warn!("Failed to load image for agentic vision: {}", e);
None
}
} }
} else {
None
}; };
// describe-then-inline path (hybrid only). Vision describe routes // Describe-then-inline (hybrid only). Vision describe routes through
// through whichever local backend is configured — llama-swap when // the local backend so non-text work stays off OpenRouter.
// `local_via_llamacpp`, otherwise Ollama. let inlined_visual_description: Option<String> = if !backend.images_inline {
let inlined_visual_description: Option<String> = if describes_then_inlines {
match image_base64.as_deref() { match image_base64.as_deref() {
Some(b64) => { Some(b64) => match backend.local().describe_image(b64).await {
let described = if local_via_llamacpp { Ok(desc) => {
self.llamacpp log::info!("{}: vision describe succeeded ({} chars)", kind, desc.len());
.as_ref() Some(desc)
.expect("local_via_llamacpp guarantees Some")
.describe_image(b64)
.await
} else {
self.ollama.describe_image(b64).await
};
match described {
Ok(desc) => {
log::info!(
"{}: vision describe succeeded ({} chars)",
backend_label,
desc.len()
);
Some(desc)
}
Err(e) => {
log::warn!(
"{}: vision describe failed, continuing without: {}",
backend_label,
e
);
None
}
} }
} Err(e) => {
log::warn!("{}: vision describe failed, continuing without: {}", kind, e);
None
}
},
None => None, None => None,
} }
} else { } else {
@@ -4228,34 +3955,24 @@ Return ONLY the summary, nothing else."#,
date = date_taken.format("%B %d, %Y"), date = date_taken.format("%B %d, %Y"),
); );
// 10. Define tools. Gate flags computed from current data presence; // 10. Define tools. describe_photo offered only when the chat model
// hybrid mode omits describe_photo since the chat model receives // sees images directly (images_inline); in hybrid mode the visual
// the visual description inline (so we pass `false` for // description is already inlined as text.
// has_vision in that mode regardless of the model's actual let gate_opts = self.current_gate_opts(backend.images_inline);
// capability).
let gate_opts = self.current_gate_opts(has_vision && !describes_then_inlines);
let tools = Self::build_tool_definitions(gate_opts); let tools = Self::build_tool_definitions(gate_opts);
// 11. Build initial messages. In describe-then-inline modes images // 11. Build initial messages. images_inline → attach base64 to the
// are never attached to the wire message — the description is part // user message; describe-then-inline → text was already injected.
// of `user_content`.
let system_msg = ChatMessage::system(system_content); let system_msg = ChatMessage::system(system_content);
let mut user_msg = ChatMessage::user(user_content); let mut user_msg = ChatMessage::user(user_content);
if !describes_then_inlines && let Some(ref img) = image_base64 { if backend.images_inline {
user_msg.images = Some(vec![img.clone()]); if let Some(ref img) = image_base64 {
user_msg.images = Some(vec![img.clone()]);
}
} }
let mut messages = vec![system_msg, user_msg]; let mut messages = vec![system_msg, user_msg];
// 12. Agentic loop — dispatch through the selected backend.
let chat_backend: &dyn LlmClient = if let Some(ref lc_c) = llamacpp_client {
lc_c
} else if let Some(ref or_c) = openrouter_client {
or_c
} else {
&ollama_client
};
let loop_span = tracer.start_with_context("ai.agentic.loop", &insight_cx); let loop_span = tracer.start_with_context("ai.agentic.loop", &insight_cx);
let loop_cx = insight_cx.with_span(loop_span); let loop_cx = insight_cx.with_span(loop_span);
@@ -4268,7 +3985,8 @@ Return ONLY the summary, nothing else."#,
iterations_used = iteration + 1; iterations_used = iteration + 1;
log::info!("Agentic iteration {}/{}", iteration + 1, max_iterations); log::info!("Agentic iteration {}/{}", iteration + 1, max_iterations);
let (response, prompt_tokens, eval_tokens) = chat_backend let (response, prompt_tokens, eval_tokens) = backend
.chat()
.chat_with_tools(messages.clone(), tools.clone()) .chat_with_tools(messages.clone(), tools.clone())
.await?; .await?;
@@ -4308,13 +4026,11 @@ Return ONLY the summary, nothing else."#,
.execute_tool( .execute_tool(
&tool_call.function.name, &tool_call.function.name,
&tool_call.function.arguments, &tool_call.function.arguments,
&ollama_client, &backend,
&image_base64, &image_base64,
&file_path, &file_path,
user_id, user_id,
&persona_id, &persona_id,
chat_backend.primary_model(),
&backend_label,
&loop_cx, &loop_cx,
) )
.await; .await;
@@ -4338,7 +4054,8 @@ Return ONLY the summary, nothing else."#,
"Based on the context gathered, please write the final photo insight: a title and a detailed personal summary. Write in first person as {}.", "Based on the context gathered, please write the final photo insight: a title and a detailed personal summary. Write in first person as {}.",
user_display_name() user_display_name()
))); )));
let (final_response, prompt_tokens, eval_tokens) = chat_backend let (final_response, prompt_tokens, eval_tokens) = backend
.chat()
.chat_with_tools(messages.clone(), vec![]) .chat_with_tools(messages.clone(), vec![])
.await?; .await?;
last_prompt_eval_count = prompt_tokens; last_prompt_eval_count = prompt_tokens;
@@ -4360,7 +4077,8 @@ Return ONLY the summary, nothing else."#,
let title_system = custom_system_prompt.as_deref().unwrap_or( let title_system = custom_system_prompt.as_deref().unwrap_or(
"You are my long term memory assistant. Use only the information provided. Do not invent details.", "You are my long term memory assistant. Use only the information provided. Do not invent details.",
); );
let title_raw = chat_backend let title_raw = backend
.chat()
.generate(&title_prompt, Some(title_system), None) .generate(&title_prompt, Some(title_system), None)
.await?; .await?;
let title = title_raw.trim().trim_matches('"').to_string(); let title = title_raw.trim().trim_matches('"').to_string();
@@ -4383,7 +4101,7 @@ Return ONLY the summary, nothing else."#,
}; };
// 15. Store insight (returns the persisted row including its new id) // 15. Store insight (returns the persisted row including its new id)
let model_version = chat_backend.primary_model().to_string(); let model_version = backend.model().to_string();
let fewshot_source_ids_json = if fewshot_source_ids.is_empty() { let fewshot_source_ids_json = if fewshot_source_ids.is_empty() {
None None
} else { } else {
@@ -4398,7 +4116,7 @@ Return ONLY the summary, nothing else."#,
model_version, model_version,
is_current: true, is_current: true,
training_messages, training_messages,
backend: backend_label.clone(), backend: kind.as_str().to_string(),
fewshot_source_ids: fewshot_source_ids_json, fewshot_source_ids: fewshot_source_ids_json,
content_hash: None, content_hash: None,
}; };

View File

@@ -290,9 +290,6 @@ impl Default for AppState {
Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new())); Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new()));
let insight_chat = Arc::new(InsightChatService::new( let insight_chat = Arc::new(InsightChatService::new(
Arc::new(insight_generator.clone()), Arc::new(insight_generator.clone()),
ollama.clone(),
openrouter.clone(),
llamacpp.clone(),
insight_dao.clone(), insight_dao.clone(),
chat_locks, chat_locks,
)); ));
@@ -470,9 +467,6 @@ impl AppState {
Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new())); Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new()));
let insight_chat = Arc::new(InsightChatService::new( let insight_chat = Arc::new(InsightChatService::new(
Arc::new(insight_generator.clone()), Arc::new(insight_generator.clone()),
ollama.clone(),
None,
None,
insight_dao.clone(), insight_dao.clone(),
chat_locks, chat_locks,
)); ));