ai: extract ResolvedBackend, remove ~480 lines of duplicated dispatch
Replace 5 copies of the ~80-line backend resolution pattern with a single InsightGenerator::resolve_backend() builder that returns a ResolvedBackend (chat + local clients, BackendKind enum, images_inline flag). Tool dispatch now takes &ResolvedBackend instead of &OllamaClient + model + backend strings. Remove duplicated ollama/openrouter/llamacpp fields from InsightChatService — InsightGenerator owns them and resolve_backend uses them. Delete build_chat_clients (replaced by resolve_backend). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+84
-278
@@ -6,11 +6,9 @@ use std::collections::HashMap;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use tokio::sync::Mutex as TokioMutex;
|
||||
|
||||
use crate::ai::backend::{BackendKind, ResolvedBackend, SamplingOverrides};
|
||||
use crate::ai::insight_generator::InsightGenerator;
|
||||
use crate::ai::llm_client::{ChatMessage, LlmClient, LlmStreamEvent, Tool};
|
||||
use crate::ai::ollama::OllamaClient;
|
||||
use crate::ai::llamacpp::LlamaCppClient;
|
||||
use crate::ai::openrouter::OpenRouterClient;
|
||||
use crate::ai::llm_client::{ChatMessage, LlmStreamEvent, Tool};
|
||||
use crate::database::InsightDao;
|
||||
use crate::database::models::InsertPhotoInsight;
|
||||
use crate::otel::global_tracer;
|
||||
@@ -92,9 +90,6 @@ pub struct ChatTurnResult {
|
||||
#[derive(Clone)]
|
||||
pub struct InsightChatService {
|
||||
generator: Arc<InsightGenerator>,
|
||||
ollama: OllamaClient,
|
||||
openrouter: Option<Arc<OpenRouterClient>>,
|
||||
llamacpp: Option<Arc<LlamaCppClient>>,
|
||||
insight_dao: Arc<Mutex<Box<dyn InsightDao>>>,
|
||||
chat_locks: ChatLockMap,
|
||||
}
|
||||
@@ -102,17 +97,11 @@ pub struct InsightChatService {
|
||||
impl InsightChatService {
|
||||
pub fn new(
|
||||
generator: Arc<InsightGenerator>,
|
||||
ollama: OllamaClient,
|
||||
openrouter: Option<Arc<OpenRouterClient>>,
|
||||
llamacpp: Option<Arc<LlamaCppClient>>,
|
||||
insight_dao: Arc<Mutex<Box<dyn InsightDao>>>,
|
||||
chat_locks: ChatLockMap,
|
||||
) -> Self {
|
||||
Self {
|
||||
generator,
|
||||
ollama,
|
||||
openrouter,
|
||||
llamacpp,
|
||||
insight_dao,
|
||||
chat_locks,
|
||||
}
|
||||
@@ -308,16 +297,9 @@ impl InsightChatService {
|
||||
.filter(|s| !s.is_empty())
|
||||
.unwrap_or_else(|| stored_backend.clone());
|
||||
validate_cross_replay(&stored_backend, &effective_backend)?;
|
||||
let is_hybrid = effective_backend == "hybrid";
|
||||
let local_via_llamacpp =
|
||||
crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some();
|
||||
let describes_then_inlines = is_hybrid;
|
||||
span.set_attribute(KeyValue::new("backend", effective_backend.clone()));
|
||||
let kind = BackendKind::parse(&effective_backend)?;
|
||||
span.set_attribute(KeyValue::new("backend", kind.as_str()));
|
||||
|
||||
// 4. Build the chat backend client. Hybrid → OpenRouter; local with
|
||||
// `LLM_BACKEND=llamacpp` → llama-swap; otherwise Ollama. Clones
|
||||
// so per-request sampling/model overrides don't leak into shared
|
||||
// state.
|
||||
let max_iterations = req
|
||||
.max_iterations
|
||||
.unwrap_or(DEFAULT_MAX_ITERATIONS)
|
||||
@@ -325,113 +307,36 @@ impl InsightChatService {
|
||||
span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64));
|
||||
|
||||
let stored_model = insight.model_version.clone();
|
||||
let custom_model = req
|
||||
.model
|
||||
.clone()
|
||||
.or_else(|| Some(stored_model.clone()))
|
||||
.filter(|m| !m.is_empty());
|
||||
|
||||
let mut ollama_client = self.ollama.clone();
|
||||
let mut openrouter_client: Option<OpenRouterClient> = None;
|
||||
let mut llamacpp_client: Option<LlamaCppClient> = None;
|
||||
|
||||
if is_hybrid {
|
||||
let arc = self.openrouter.as_ref().ok_or_else(|| {
|
||||
anyhow!("hybrid backend unavailable: OPENROUTER_API_KEY not configured")
|
||||
})?;
|
||||
let mut c: OpenRouterClient = (**arc).clone();
|
||||
if let Some(ref m) = custom_model {
|
||||
c.primary_model = m.clone();
|
||||
}
|
||||
if req.temperature.is_some()
|
||||
|| req.top_p.is_some()
|
||||
|| req.top_k.is_some()
|
||||
|| req.min_p.is_some()
|
||||
{
|
||||
c.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
|
||||
}
|
||||
if let Some(ctx) = req.num_ctx {
|
||||
c.set_num_ctx(Some(ctx));
|
||||
}
|
||||
openrouter_client = Some(c);
|
||||
} else if local_via_llamacpp {
|
||||
let arc = self.llamacpp.as_ref().ok_or_else(|| {
|
||||
anyhow!("LLM_BACKEND=llamacpp but LLAMA_SWAP_URL not configured")
|
||||
})?;
|
||||
let mut c: LlamaCppClient = (**arc).clone();
|
||||
if let Some(ref m) = custom_model {
|
||||
c.primary_model = m.clone();
|
||||
}
|
||||
if req.temperature.is_some()
|
||||
|| req.top_p.is_some()
|
||||
|| req.top_k.is_some()
|
||||
|| req.min_p.is_some()
|
||||
{
|
||||
c.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
|
||||
}
|
||||
if let Some(ctx) = req.num_ctx {
|
||||
c.set_num_ctx(Some(ctx));
|
||||
}
|
||||
llamacpp_client = Some(c);
|
||||
} else {
|
||||
// Pure local (Ollama): model swap. Build a new client when the
|
||||
// chat model differs from the configured one.
|
||||
if let Some(ref m) = custom_model
|
||||
&& m != &self.ollama.primary_model
|
||||
{
|
||||
ollama_client = OllamaClient::new(
|
||||
self.ollama.primary_url.clone(),
|
||||
self.ollama.fallback_url.clone(),
|
||||
m.clone(),
|
||||
Some(m.clone()),
|
||||
);
|
||||
}
|
||||
if req.temperature.is_some()
|
||||
|| req.top_p.is_some()
|
||||
|| req.top_k.is_some()
|
||||
|| req.min_p.is_some()
|
||||
{
|
||||
ollama_client.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
|
||||
}
|
||||
if let Some(ctx) = req.num_ctx {
|
||||
ollama_client.set_num_ctx(Some(ctx));
|
||||
}
|
||||
}
|
||||
|
||||
let chat_backend: &dyn LlmClient = if let Some(ref c) = llamacpp_client {
|
||||
c
|
||||
} else if let Some(ref c) = openrouter_client {
|
||||
c
|
||||
} else {
|
||||
&ollama_client
|
||||
let overrides = SamplingOverrides {
|
||||
model: req.model.clone()
|
||||
.or_else(|| Some(stored_model.clone()))
|
||||
.filter(|m| !m.is_empty()),
|
||||
num_ctx: req.num_ctx,
|
||||
temperature: req.temperature,
|
||||
top_p: req.top_p,
|
||||
top_k: req.top_k,
|
||||
min_p: req.min_p,
|
||||
};
|
||||
let model_used = chat_backend.primary_model().to_string();
|
||||
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
||||
let model_used = backend.model().to_string();
|
||||
span.set_attribute(KeyValue::new("model", model_used.clone()));
|
||||
|
||||
// 5. Decide vision + tool set. In describe-then-inline mode
|
||||
// (hybrid only) we omit `describe_photo`. In local and llamacpp
|
||||
// we trust the stored history's first-user shape: if it carries
|
||||
// `images`, the original model was vision-capable, and we keep
|
||||
// `describe_photo` available.
|
||||
// 5. Decide vision + tool set. In hybrid (describe-then-inline) mode
|
||||
// we omit `describe_photo`. Otherwise trust the stored history:
|
||||
// if the first user message carries images, describe_photo stays.
|
||||
let local_first_user_has_image = messages
|
||||
.iter()
|
||||
.find(|m| m.role == "user")
|
||||
.and_then(|m| m.images.as_ref())
|
||||
.map(|imgs| !imgs.is_empty())
|
||||
.unwrap_or(false);
|
||||
let offer_describe_tool = !describes_then_inlines && local_first_user_has_image;
|
||||
// current_gate_opts(has_vision) sets gate_opts.has_vision = has_vision
|
||||
// and probes the per-table presence flags. Pass `offer_describe_tool`
|
||||
// directly — the `!is_hybrid && local_first_user_has_image` decision
|
||||
// is the chat-path's vision predicate.
|
||||
let offer_describe_tool = backend.images_inline && local_first_user_has_image;
|
||||
let gate_opts = self.generator.current_gate_opts_for_persona(
|
||||
offer_describe_tool,
|
||||
Some((req.user_id, &active_persona)),
|
||||
);
|
||||
let tools = InsightGenerator::build_tool_definitions(gate_opts);
|
||||
|
||||
// Image base64 only needed when describe_photo is on the menu. Load
|
||||
// lazily to avoid disk IO when the loop never invokes it.
|
||||
let image_base64: Option<String> = if offer_describe_tool {
|
||||
self.generator.load_image_as_base64(&normalized).ok()
|
||||
} else {
|
||||
@@ -480,13 +385,13 @@ impl InsightChatService {
|
||||
iterations_used = iteration + 1;
|
||||
log::info!("Chat iteration {}/{}", iterations_used, max_iterations);
|
||||
|
||||
let (response, prompt_tokens, eval_tokens) = chat_backend
|
||||
let (response, prompt_tokens, eval_tokens) = backend
|
||||
.chat()
|
||||
.chat_with_tools(messages.clone(), tools.clone())
|
||||
.await?;
|
||||
last_prompt_eval_count = prompt_tokens;
|
||||
last_eval_count = eval_tokens;
|
||||
|
||||
// Ollama rejects non-object tool-call arguments on replay.
|
||||
let mut response = response;
|
||||
if let Some(ref mut tcs) = response.tool_calls {
|
||||
for tc in tcs.iter_mut() {
|
||||
@@ -514,13 +419,11 @@ impl InsightChatService {
|
||||
.execute_tool(
|
||||
&tool_call.function.name,
|
||||
&tool_call.function.arguments,
|
||||
&ollama_client,
|
||||
&backend,
|
||||
&image_base64,
|
||||
&normalized,
|
||||
req.user_id,
|
||||
&active_persona,
|
||||
&model_used,
|
||||
&effective_backend,
|
||||
&loop_cx,
|
||||
)
|
||||
.await;
|
||||
@@ -534,8 +437,6 @@ impl InsightChatService {
|
||||
}
|
||||
|
||||
if final_content.is_empty() {
|
||||
// The model never produced a final answer; ask once more without
|
||||
// tools to force a textual reply.
|
||||
log::info!(
|
||||
"Chat loop exhausted after {} iterations, requesting final answer",
|
||||
iterations_used
|
||||
@@ -543,7 +444,8 @@ impl InsightChatService {
|
||||
messages.push(ChatMessage::user(
|
||||
"Please write your final answer now without calling any more tools.",
|
||||
));
|
||||
let (final_response, prompt_tokens, eval_tokens) = chat_backend
|
||||
let (final_response, prompt_tokens, eval_tokens) = backend
|
||||
.chat()
|
||||
.chat_with_tools(messages.clone(), vec![])
|
||||
.await?;
|
||||
last_prompt_eval_count = prompt_tokens;
|
||||
@@ -579,7 +481,8 @@ impl InsightChatService {
|
||||
Capture the key moment or theme. Return ONLY the title, nothing else.",
|
||||
final_content
|
||||
);
|
||||
let title_raw = chat_backend
|
||||
let title_raw = backend
|
||||
.chat()
|
||||
.generate(
|
||||
&title_prompt,
|
||||
Some(
|
||||
@@ -604,7 +507,7 @@ impl InsightChatService {
|
||||
model_version: model_used.clone(),
|
||||
is_current: true,
|
||||
training_messages: Some(json),
|
||||
backend: effective_backend.clone(),
|
||||
backend: kind.as_str().to_string(),
|
||||
fewshot_source_ids: None,
|
||||
content_hash: None,
|
||||
};
|
||||
@@ -629,7 +532,7 @@ impl InsightChatService {
|
||||
prompt_eval_count: last_prompt_eval_count,
|
||||
eval_count: last_eval_count,
|
||||
amended_insight_id,
|
||||
backend_used: effective_backend,
|
||||
backend_used: kind.as_str().to_string(),
|
||||
model_used,
|
||||
})
|
||||
}
|
||||
@@ -818,9 +721,8 @@ impl InsightChatService {
|
||||
.map(|s| s.trim().to_lowercase())
|
||||
.filter(|s| !s.is_empty())
|
||||
.unwrap_or_else(|| stored_backend.clone());
|
||||
validate_cross_replay(&stored_backend, &effective_backend)?;
|
||||
let is_hybrid = effective_backend == "hybrid";
|
||||
let describes_then_inlines = is_hybrid;
|
||||
let kind = BackendKind::parse(&effective_backend)?;
|
||||
validate_cross_replay(&stored_backend, kind.as_str())?;
|
||||
|
||||
let max_iterations = req
|
||||
.max_iterations
|
||||
@@ -828,18 +730,20 @@ impl InsightChatService {
|
||||
.clamp(1, env_max_iterations());
|
||||
|
||||
let stored_model = insight.model_version.clone();
|
||||
let custom_model = req
|
||||
.model
|
||||
.clone()
|
||||
.or_else(|| Some(stored_model.clone()))
|
||||
.filter(|m| !m.is_empty());
|
||||
let overrides = SamplingOverrides {
|
||||
model: req.model.clone()
|
||||
.or_else(|| Some(stored_model.clone()))
|
||||
.filter(|m| !m.is_empty()),
|
||||
num_ctx: req.num_ctx,
|
||||
temperature: req.temperature,
|
||||
top_p: req.top_p,
|
||||
top_k: req.top_k,
|
||||
min_p: req.min_p,
|
||||
};
|
||||
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
||||
let model_used = backend.model().to_string();
|
||||
|
||||
let (chat_backend_holder, ollama_client) =
|
||||
self.build_chat_clients(&effective_backend, custom_model.as_deref(), &req)?;
|
||||
let chat_backend: &dyn LlmClient = chat_backend_holder.as_ref();
|
||||
let model_used = chat_backend.primary_model().to_string();
|
||||
|
||||
// Tool set — local/llamacpp mode + first user turn carries an image →
|
||||
// Tool set — images_inline mode + first user turn carries an image →
|
||||
// offer describe_photo. Describe-then-inline mode (hybrid only):
|
||||
// visual description was inlined at bootstrap, no describe tool needed.
|
||||
let local_first_user_has_image = messages
|
||||
@@ -848,7 +752,7 @@ impl InsightChatService {
|
||||
.and_then(|m| m.images.as_ref())
|
||||
.map(|imgs| !imgs.is_empty())
|
||||
.unwrap_or(false);
|
||||
let offer_describe_tool = !describes_then_inlines && local_first_user_has_image;
|
||||
let offer_describe_tool = backend.images_inline && local_first_user_has_image;
|
||||
let gate_opts = self.generator.current_gate_opts_for_persona(
|
||||
offer_describe_tool,
|
||||
Some((req.user_id, &active_persona)),
|
||||
@@ -879,16 +783,13 @@ impl InsightChatService {
|
||||
|
||||
let outcome = self
|
||||
.run_streaming_agentic_loop(
|
||||
chat_backend,
|
||||
&ollama_client,
|
||||
&backend,
|
||||
&mut messages,
|
||||
tools,
|
||||
&image_base64,
|
||||
&normalized,
|
||||
req.user_id,
|
||||
&active_persona,
|
||||
&model_used,
|
||||
&effective_backend,
|
||||
max_iterations,
|
||||
&tx,
|
||||
)
|
||||
@@ -916,7 +817,7 @@ impl InsightChatService {
|
||||
|
||||
let mut amended_insight_id: Option<i32> = None;
|
||||
if req.amend {
|
||||
let title = self.generate_title(chat_backend, &final_content).await?;
|
||||
let title = self.generate_title(&backend, &final_content).await?;
|
||||
|
||||
// Amended rows intentionally do not inherit the parent's
|
||||
// `fewshot_source_ids`. The parent's few-shot influence is still
|
||||
@@ -932,7 +833,7 @@ impl InsightChatService {
|
||||
model_version: model_used.clone(),
|
||||
is_current: true,
|
||||
training_messages: Some(json),
|
||||
backend: effective_backend.clone(),
|
||||
backend: kind.as_str().to_string(),
|
||||
fewshot_source_ids: None,
|
||||
content_hash: None,
|
||||
};
|
||||
@@ -958,7 +859,7 @@ impl InsightChatService {
|
||||
eval_tokens: last_eval_count,
|
||||
num_ctx: req.num_ctx,
|
||||
amended_insight_id,
|
||||
backend_used: effective_backend,
|
||||
backend_used: kind.as_str().to_string(),
|
||||
model_used,
|
||||
})
|
||||
.await;
|
||||
@@ -984,21 +885,23 @@ impl InsightChatService {
|
||||
.filter(|s| !s.trim().is_empty())
|
||||
.unwrap_or_else(|| "default".to_string());
|
||||
let effective_backend = resolve_bootstrap_backend(req.backend.as_deref())?;
|
||||
let is_hybrid = effective_backend == "hybrid";
|
||||
let local_via_llamacpp =
|
||||
crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some();
|
||||
let describes_then_inlines = is_hybrid;
|
||||
let kind = BackendKind::parse(&effective_backend)?;
|
||||
|
||||
let max_iterations = req
|
||||
.max_iterations
|
||||
.unwrap_or(DEFAULT_MAX_ITERATIONS)
|
||||
.clamp(1, env_max_iterations());
|
||||
|
||||
let custom_model = req.model.clone().filter(|m| !m.is_empty());
|
||||
let (chat_backend_holder, ollama_client) =
|
||||
self.build_chat_clients(&effective_backend, custom_model.as_deref(), &req)?;
|
||||
let chat_backend: &dyn LlmClient = chat_backend_holder.as_ref();
|
||||
let model_used = chat_backend.primary_model().to_string();
|
||||
let overrides = SamplingOverrides {
|
||||
model: req.model.clone().filter(|m| !m.is_empty()),
|
||||
num_ctx: req.num_ctx,
|
||||
temperature: req.temperature,
|
||||
top_p: req.top_p,
|
||||
top_k: req.top_k,
|
||||
min_p: req.min_p,
|
||||
};
|
||||
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
||||
let model_used = backend.model().to_string();
|
||||
|
||||
// Load image bytes once. RAW preview fallback is handled inside
|
||||
// load_image_as_base64. Errors degrade silently — a chat that
|
||||
@@ -1020,26 +923,17 @@ impl InsightChatService {
|
||||
});
|
||||
|
||||
// Describe-then-inline (hybrid only): pre-describe the image so a
|
||||
// text-only chat model gets the visual description inline. llamacpp
|
||||
// sends images directly to the chat model.
|
||||
let visual_block = if describes_then_inlines {
|
||||
// text-only chat model gets the visual description inline.
|
||||
// images_inline backends send images directly to the chat model.
|
||||
let visual_block = if !backend.images_inline {
|
||||
match image_base64.as_deref() {
|
||||
Some(b64) => {
|
||||
let described = if local_via_llamacpp {
|
||||
self.llamacpp
|
||||
.as_ref()
|
||||
.expect("local_via_llamacpp guarantees Some")
|
||||
.describe_image(b64)
|
||||
.await
|
||||
} else {
|
||||
self.ollama.describe_image(b64).await
|
||||
};
|
||||
match described {
|
||||
match backend.local().describe_image(b64).await {
|
||||
Ok(desc) => {
|
||||
format!("Visual description (from local vision model):\n{}\n", desc)
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("{} bootstrap: describe_image failed: {}", effective_backend, e);
|
||||
log::warn!("{} bootstrap: describe_image failed: {}", kind.as_str(), e);
|
||||
String::new()
|
||||
}
|
||||
}
|
||||
@@ -1050,10 +944,10 @@ impl InsightChatService {
|
||||
String::new()
|
||||
};
|
||||
|
||||
// Tool gates. Local + image present → expose describe_photo so
|
||||
// the chat model can re-look at the photo on demand. Hybrid:
|
||||
// Tool gates. images_inline + image present → expose describe_photo so
|
||||
// the chat model can re-look at the photo on demand. Non-inline:
|
||||
// already inlined, no tool needed.
|
||||
let offer_describe_tool = !describes_then_inlines && image_base64.is_some();
|
||||
let offer_describe_tool = backend.images_inline && image_base64.is_some();
|
||||
let gate_opts = self.generator.current_gate_opts_for_persona(
|
||||
offer_describe_tool,
|
||||
Some((req.user_id, &active_persona)),
|
||||
@@ -1079,23 +973,22 @@ impl InsightChatService {
|
||||
);
|
||||
let system_msg = ChatMessage::system(system_content);
|
||||
let mut user_msg = ChatMessage::user(req.user_message.clone());
|
||||
if !describes_then_inlines && let Some(ref img) = image_base64 {
|
||||
user_msg.images = Some(vec![img.clone()]);
|
||||
if backend.images_inline {
|
||||
if let Some(ref img) = image_base64 {
|
||||
user_msg.images = Some(vec![img.clone()]);
|
||||
}
|
||||
}
|
||||
let mut messages = vec![system_msg, user_msg];
|
||||
|
||||
let outcome = self
|
||||
.run_streaming_agentic_loop(
|
||||
chat_backend,
|
||||
&ollama_client,
|
||||
&backend,
|
||||
&mut messages,
|
||||
tools,
|
||||
&image_base64,
|
||||
&normalized,
|
||||
req.user_id,
|
||||
&active_persona,
|
||||
&model_used,
|
||||
&effective_backend,
|
||||
max_iterations,
|
||||
&tx,
|
||||
)
|
||||
@@ -1108,7 +1001,7 @@ impl InsightChatService {
|
||||
final_content,
|
||||
} = outcome;
|
||||
|
||||
let title = self.generate_title(chat_backend, &final_content).await?;
|
||||
let title = self.generate_title(&backend, &final_content).await?;
|
||||
|
||||
let json = serde_json::to_string(&messages)
|
||||
.map_err(|e| anyhow!("failed to serialize chat history: {}", e))?;
|
||||
@@ -1121,7 +1014,7 @@ impl InsightChatService {
|
||||
model_version: model_used.clone(),
|
||||
is_current: true,
|
||||
training_messages: Some(json),
|
||||
backend: effective_backend.clone(),
|
||||
backend: kind.as_str().to_string(),
|
||||
fewshot_source_ids: None,
|
||||
content_hash: None,
|
||||
};
|
||||
@@ -1144,7 +1037,7 @@ impl InsightChatService {
|
||||
eval_tokens: last_eval_count,
|
||||
num_ctx: req.num_ctx,
|
||||
amended_insight_id: Some(stored.id),
|
||||
backend_used: effective_backend,
|
||||
backend_used: kind.as_str().to_string(),
|
||||
model_used,
|
||||
})
|
||||
.await;
|
||||
@@ -1152,95 +1045,12 @@ impl InsightChatService {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Set up chat clients (Ollama + optional OpenRouter / LlamaCpp) shared
|
||||
/// by bootstrap and continuation. Returns the chat-side backend client
|
||||
/// (boxed because each backend has a different concrete type) and the
|
||||
/// Ollama client used for describe-image / local tool calls.
|
||||
///
|
||||
/// `effective_backend` must be one of `"local"` or `"hybrid"` (validated
|
||||
/// upstream). Hybrid → OpenRouter; local with `LLM_BACKEND=llamacpp` →
|
||||
/// llama-swap; pure local → Ollama. Returns the dispatched chat client
|
||||
/// plus the (possibly per-request) Ollama client that the caller uses
|
||||
/// for non-chat helpers (image describe in non-llamacpp mode, tool ops).
|
||||
fn build_chat_clients(
|
||||
&self,
|
||||
effective_backend: &str,
|
||||
custom_model: Option<&str>,
|
||||
req: &ChatTurnRequest,
|
||||
) -> Result<(Box<dyn LlmClient>, OllamaClient)> {
|
||||
let mut ollama_client = self.ollama.clone();
|
||||
|
||||
if effective_backend == "hybrid" {
|
||||
let arc = self.openrouter.as_ref().ok_or_else(|| {
|
||||
anyhow!("hybrid backend unavailable: OPENROUTER_API_KEY not configured")
|
||||
})?;
|
||||
let mut c: OpenRouterClient = (**arc).clone();
|
||||
if let Some(m) = custom_model {
|
||||
c.primary_model = m.to_string();
|
||||
}
|
||||
if req.temperature.is_some()
|
||||
|| req.top_p.is_some()
|
||||
|| req.top_k.is_some()
|
||||
|| req.min_p.is_some()
|
||||
{
|
||||
c.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
|
||||
}
|
||||
if let Some(ctx) = req.num_ctx {
|
||||
c.set_num_ctx(Some(ctx));
|
||||
}
|
||||
return Ok((Box::new(c), ollama_client));
|
||||
}
|
||||
|
||||
// Local mode — env switch decides between Ollama and llama-swap.
|
||||
if crate::ai::local_backend_is_llamacpp()
|
||||
&& let Some(arc) = self.llamacpp.as_ref()
|
||||
{
|
||||
let mut c: LlamaCppClient = (**arc).clone();
|
||||
if let Some(m) = custom_model {
|
||||
c.primary_model = m.to_string();
|
||||
}
|
||||
if req.temperature.is_some()
|
||||
|| req.top_p.is_some()
|
||||
|| req.top_k.is_some()
|
||||
|| req.min_p.is_some()
|
||||
{
|
||||
c.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
|
||||
}
|
||||
if let Some(ctx) = req.num_ctx {
|
||||
c.set_num_ctx(Some(ctx));
|
||||
}
|
||||
return Ok((Box::new(c), ollama_client));
|
||||
}
|
||||
|
||||
if let Some(m) = custom_model
|
||||
&& m != self.ollama.primary_model
|
||||
{
|
||||
ollama_client = OllamaClient::new(
|
||||
self.ollama.primary_url.clone(),
|
||||
self.ollama.fallback_url.clone(),
|
||||
m.to_string(),
|
||||
Some(m.to_string()),
|
||||
);
|
||||
}
|
||||
if req.temperature.is_some()
|
||||
|| req.top_p.is_some()
|
||||
|| req.top_k.is_some()
|
||||
|| req.min_p.is_some()
|
||||
{
|
||||
ollama_client.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
|
||||
}
|
||||
if let Some(ctx) = req.num_ctx {
|
||||
ollama_client.set_num_ctx(Some(ctx));
|
||||
}
|
||||
Ok((Box::new(ollama_client.clone()), ollama_client))
|
||||
}
|
||||
|
||||
/// Generate a short title via the same chat backend so voice stays
|
||||
/// consistent with the body. Mirrors generate_agentic_insight_for_photo's
|
||||
/// titling pass.
|
||||
async fn generate_title(
|
||||
&self,
|
||||
chat_backend: &dyn LlmClient,
|
||||
backend: &ResolvedBackend,
|
||||
final_content: &str,
|
||||
) -> Result<String> {
|
||||
let title_prompt = format!(
|
||||
@@ -1248,7 +1058,8 @@ impl InsightChatService {
|
||||
Capture the key moment or theme. Return ONLY the title, nothing else.",
|
||||
final_content
|
||||
);
|
||||
let title_raw = chat_backend
|
||||
let title_raw = backend
|
||||
.chat()
|
||||
.generate(
|
||||
&title_prompt,
|
||||
Some(
|
||||
@@ -1266,18 +1077,13 @@ impl InsightChatService {
|
||||
/// final assistant content.
|
||||
async fn run_streaming_agentic_loop(
|
||||
&self,
|
||||
chat_backend: &dyn LlmClient,
|
||||
ollama_client: &OllamaClient,
|
||||
backend: &ResolvedBackend,
|
||||
messages: &mut Vec<ChatMessage>,
|
||||
tools: Vec<Tool>,
|
||||
image_base64: &Option<String>,
|
||||
normalized: &str,
|
||||
user_id: i32,
|
||||
active_persona: &str,
|
||||
// Provenance — stamped onto any store_fact tool call made
|
||||
// during this loop. Mirrors the non-streaming chat path.
|
||||
model_used: &str,
|
||||
effective_backend: &str,
|
||||
max_iterations: usize,
|
||||
tx: &tokio::sync::mpsc::Sender<ChatStreamEvent>,
|
||||
) -> Result<AgenticLoopOutcome> {
|
||||
@@ -1296,7 +1102,8 @@ impl InsightChatService {
|
||||
})
|
||||
.await;
|
||||
|
||||
let mut stream = chat_backend
|
||||
let mut stream = backend
|
||||
.chat()
|
||||
.chat_with_tools_stream(messages.clone(), tools.clone())
|
||||
.await?;
|
||||
|
||||
@@ -1353,13 +1160,11 @@ impl InsightChatService {
|
||||
.execute_tool(
|
||||
&tool_call.function.name,
|
||||
&tool_call.function.arguments,
|
||||
ollama_client,
|
||||
backend,
|
||||
image_base64,
|
||||
normalized,
|
||||
user_id,
|
||||
active_persona,
|
||||
model_used,
|
||||
effective_backend,
|
||||
&cx,
|
||||
)
|
||||
.await;
|
||||
@@ -1394,7 +1199,8 @@ impl InsightChatService {
|
||||
messages.push(ChatMessage::user(
|
||||
"Please write your final answer now without calling any more tools.",
|
||||
));
|
||||
let mut stream = chat_backend
|
||||
let mut stream = backend
|
||||
.chat()
|
||||
.chat_with_tools_stream(messages.clone(), vec![])
|
||||
.await?;
|
||||
let mut final_message: Option<ChatMessage> = None;
|
||||
|
||||
Reference in New Issue
Block a user