ai: extract ResolvedBackend, remove ~480 lines of duplicated dispatch
Replace 5 copies of the ~80-line backend resolution pattern with a single InsightGenerator::resolve_backend() builder that returns a ResolvedBackend (chat + local clients, BackendKind enum, images_inline flag). Tool dispatch now takes &ResolvedBackend instead of &OllamaClient + model + backend strings. Remove duplicated ollama/openrouter/llamacpp fields from InsightChatService — InsightGenerator owns them and resolve_backend uses them. Delete build_chat_clients (replaced by resolve_backend). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -6,11 +6,9 @@ use std::collections::HashMap;
|
|||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
use tokio::sync::Mutex as TokioMutex;
|
use tokio::sync::Mutex as TokioMutex;
|
||||||
|
|
||||||
|
use crate::ai::backend::{BackendKind, ResolvedBackend, SamplingOverrides};
|
||||||
use crate::ai::insight_generator::InsightGenerator;
|
use crate::ai::insight_generator::InsightGenerator;
|
||||||
use crate::ai::llm_client::{ChatMessage, LlmClient, LlmStreamEvent, Tool};
|
use crate::ai::llm_client::{ChatMessage, LlmStreamEvent, Tool};
|
||||||
use crate::ai::ollama::OllamaClient;
|
|
||||||
use crate::ai::llamacpp::LlamaCppClient;
|
|
||||||
use crate::ai::openrouter::OpenRouterClient;
|
|
||||||
use crate::database::InsightDao;
|
use crate::database::InsightDao;
|
||||||
use crate::database::models::InsertPhotoInsight;
|
use crate::database::models::InsertPhotoInsight;
|
||||||
use crate::otel::global_tracer;
|
use crate::otel::global_tracer;
|
||||||
@@ -92,9 +90,6 @@ pub struct ChatTurnResult {
|
|||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct InsightChatService {
|
pub struct InsightChatService {
|
||||||
generator: Arc<InsightGenerator>,
|
generator: Arc<InsightGenerator>,
|
||||||
ollama: OllamaClient,
|
|
||||||
openrouter: Option<Arc<OpenRouterClient>>,
|
|
||||||
llamacpp: Option<Arc<LlamaCppClient>>,
|
|
||||||
insight_dao: Arc<Mutex<Box<dyn InsightDao>>>,
|
insight_dao: Arc<Mutex<Box<dyn InsightDao>>>,
|
||||||
chat_locks: ChatLockMap,
|
chat_locks: ChatLockMap,
|
||||||
}
|
}
|
||||||
@@ -102,17 +97,11 @@ pub struct InsightChatService {
|
|||||||
impl InsightChatService {
|
impl InsightChatService {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
generator: Arc<InsightGenerator>,
|
generator: Arc<InsightGenerator>,
|
||||||
ollama: OllamaClient,
|
|
||||||
openrouter: Option<Arc<OpenRouterClient>>,
|
|
||||||
llamacpp: Option<Arc<LlamaCppClient>>,
|
|
||||||
insight_dao: Arc<Mutex<Box<dyn InsightDao>>>,
|
insight_dao: Arc<Mutex<Box<dyn InsightDao>>>,
|
||||||
chat_locks: ChatLockMap,
|
chat_locks: ChatLockMap,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
generator,
|
generator,
|
||||||
ollama,
|
|
||||||
openrouter,
|
|
||||||
llamacpp,
|
|
||||||
insight_dao,
|
insight_dao,
|
||||||
chat_locks,
|
chat_locks,
|
||||||
}
|
}
|
||||||
@@ -308,16 +297,9 @@ impl InsightChatService {
|
|||||||
.filter(|s| !s.is_empty())
|
.filter(|s| !s.is_empty())
|
||||||
.unwrap_or_else(|| stored_backend.clone());
|
.unwrap_or_else(|| stored_backend.clone());
|
||||||
validate_cross_replay(&stored_backend, &effective_backend)?;
|
validate_cross_replay(&stored_backend, &effective_backend)?;
|
||||||
let is_hybrid = effective_backend == "hybrid";
|
let kind = BackendKind::parse(&effective_backend)?;
|
||||||
let local_via_llamacpp =
|
span.set_attribute(KeyValue::new("backend", kind.as_str()));
|
||||||
crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some();
|
|
||||||
let describes_then_inlines = is_hybrid;
|
|
||||||
span.set_attribute(KeyValue::new("backend", effective_backend.clone()));
|
|
||||||
|
|
||||||
// 4. Build the chat backend client. Hybrid → OpenRouter; local with
|
|
||||||
// `LLM_BACKEND=llamacpp` → llama-swap; otherwise Ollama. Clones
|
|
||||||
// so per-request sampling/model overrides don't leak into shared
|
|
||||||
// state.
|
|
||||||
let max_iterations = req
|
let max_iterations = req
|
||||||
.max_iterations
|
.max_iterations
|
||||||
.unwrap_or(DEFAULT_MAX_ITERATIONS)
|
.unwrap_or(DEFAULT_MAX_ITERATIONS)
|
||||||
@@ -325,113 +307,36 @@ impl InsightChatService {
|
|||||||
span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64));
|
span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64));
|
||||||
|
|
||||||
let stored_model = insight.model_version.clone();
|
let stored_model = insight.model_version.clone();
|
||||||
let custom_model = req
|
let overrides = SamplingOverrides {
|
||||||
.model
|
model: req.model.clone()
|
||||||
.clone()
|
.or_else(|| Some(stored_model.clone()))
|
||||||
.or_else(|| Some(stored_model.clone()))
|
.filter(|m| !m.is_empty()),
|
||||||
.filter(|m| !m.is_empty());
|
num_ctx: req.num_ctx,
|
||||||
|
temperature: req.temperature,
|
||||||
let mut ollama_client = self.ollama.clone();
|
top_p: req.top_p,
|
||||||
let mut openrouter_client: Option<OpenRouterClient> = None;
|
top_k: req.top_k,
|
||||||
let mut llamacpp_client: Option<LlamaCppClient> = None;
|
min_p: req.min_p,
|
||||||
|
|
||||||
if is_hybrid {
|
|
||||||
let arc = self.openrouter.as_ref().ok_or_else(|| {
|
|
||||||
anyhow!("hybrid backend unavailable: OPENROUTER_API_KEY not configured")
|
|
||||||
})?;
|
|
||||||
let mut c: OpenRouterClient = (**arc).clone();
|
|
||||||
if let Some(ref m) = custom_model {
|
|
||||||
c.primary_model = m.clone();
|
|
||||||
}
|
|
||||||
if req.temperature.is_some()
|
|
||||||
|| req.top_p.is_some()
|
|
||||||
|| req.top_k.is_some()
|
|
||||||
|| req.min_p.is_some()
|
|
||||||
{
|
|
||||||
c.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
|
|
||||||
}
|
|
||||||
if let Some(ctx) = req.num_ctx {
|
|
||||||
c.set_num_ctx(Some(ctx));
|
|
||||||
}
|
|
||||||
openrouter_client = Some(c);
|
|
||||||
} else if local_via_llamacpp {
|
|
||||||
let arc = self.llamacpp.as_ref().ok_or_else(|| {
|
|
||||||
anyhow!("LLM_BACKEND=llamacpp but LLAMA_SWAP_URL not configured")
|
|
||||||
})?;
|
|
||||||
let mut c: LlamaCppClient = (**arc).clone();
|
|
||||||
if let Some(ref m) = custom_model {
|
|
||||||
c.primary_model = m.clone();
|
|
||||||
}
|
|
||||||
if req.temperature.is_some()
|
|
||||||
|| req.top_p.is_some()
|
|
||||||
|| req.top_k.is_some()
|
|
||||||
|| req.min_p.is_some()
|
|
||||||
{
|
|
||||||
c.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
|
|
||||||
}
|
|
||||||
if let Some(ctx) = req.num_ctx {
|
|
||||||
c.set_num_ctx(Some(ctx));
|
|
||||||
}
|
|
||||||
llamacpp_client = Some(c);
|
|
||||||
} else {
|
|
||||||
// Pure local (Ollama): model swap. Build a new client when the
|
|
||||||
// chat model differs from the configured one.
|
|
||||||
if let Some(ref m) = custom_model
|
|
||||||
&& m != &self.ollama.primary_model
|
|
||||||
{
|
|
||||||
ollama_client = OllamaClient::new(
|
|
||||||
self.ollama.primary_url.clone(),
|
|
||||||
self.ollama.fallback_url.clone(),
|
|
||||||
m.clone(),
|
|
||||||
Some(m.clone()),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if req.temperature.is_some()
|
|
||||||
|| req.top_p.is_some()
|
|
||||||
|| req.top_k.is_some()
|
|
||||||
|| req.min_p.is_some()
|
|
||||||
{
|
|
||||||
ollama_client.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
|
|
||||||
}
|
|
||||||
if let Some(ctx) = req.num_ctx {
|
|
||||||
ollama_client.set_num_ctx(Some(ctx));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let chat_backend: &dyn LlmClient = if let Some(ref c) = llamacpp_client {
|
|
||||||
c
|
|
||||||
} else if let Some(ref c) = openrouter_client {
|
|
||||||
c
|
|
||||||
} else {
|
|
||||||
&ollama_client
|
|
||||||
};
|
};
|
||||||
let model_used = chat_backend.primary_model().to_string();
|
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
||||||
|
let model_used = backend.model().to_string();
|
||||||
span.set_attribute(KeyValue::new("model", model_used.clone()));
|
span.set_attribute(KeyValue::new("model", model_used.clone()));
|
||||||
|
|
||||||
// 5. Decide vision + tool set. In describe-then-inline mode
|
// 5. Decide vision + tool set. In hybrid (describe-then-inline) mode
|
||||||
// (hybrid only) we omit `describe_photo`. In local and llamacpp
|
// we omit `describe_photo`. Otherwise trust the stored history:
|
||||||
// we trust the stored history's first-user shape: if it carries
|
// if the first user message carries images, describe_photo stays.
|
||||||
// `images`, the original model was vision-capable, and we keep
|
|
||||||
// `describe_photo` available.
|
|
||||||
let local_first_user_has_image = messages
|
let local_first_user_has_image = messages
|
||||||
.iter()
|
.iter()
|
||||||
.find(|m| m.role == "user")
|
.find(|m| m.role == "user")
|
||||||
.and_then(|m| m.images.as_ref())
|
.and_then(|m| m.images.as_ref())
|
||||||
.map(|imgs| !imgs.is_empty())
|
.map(|imgs| !imgs.is_empty())
|
||||||
.unwrap_or(false);
|
.unwrap_or(false);
|
||||||
let offer_describe_tool = !describes_then_inlines && local_first_user_has_image;
|
let offer_describe_tool = backend.images_inline && local_first_user_has_image;
|
||||||
// current_gate_opts(has_vision) sets gate_opts.has_vision = has_vision
|
|
||||||
// and probes the per-table presence flags. Pass `offer_describe_tool`
|
|
||||||
// directly — the `!is_hybrid && local_first_user_has_image` decision
|
|
||||||
// is the chat-path's vision predicate.
|
|
||||||
let gate_opts = self.generator.current_gate_opts_for_persona(
|
let gate_opts = self.generator.current_gate_opts_for_persona(
|
||||||
offer_describe_tool,
|
offer_describe_tool,
|
||||||
Some((req.user_id, &active_persona)),
|
Some((req.user_id, &active_persona)),
|
||||||
);
|
);
|
||||||
let tools = InsightGenerator::build_tool_definitions(gate_opts);
|
let tools = InsightGenerator::build_tool_definitions(gate_opts);
|
||||||
|
|
||||||
// Image base64 only needed when describe_photo is on the menu. Load
|
|
||||||
// lazily to avoid disk IO when the loop never invokes it.
|
|
||||||
let image_base64: Option<String> = if offer_describe_tool {
|
let image_base64: Option<String> = if offer_describe_tool {
|
||||||
self.generator.load_image_as_base64(&normalized).ok()
|
self.generator.load_image_as_base64(&normalized).ok()
|
||||||
} else {
|
} else {
|
||||||
@@ -480,13 +385,13 @@ impl InsightChatService {
|
|||||||
iterations_used = iteration + 1;
|
iterations_used = iteration + 1;
|
||||||
log::info!("Chat iteration {}/{}", iterations_used, max_iterations);
|
log::info!("Chat iteration {}/{}", iterations_used, max_iterations);
|
||||||
|
|
||||||
let (response, prompt_tokens, eval_tokens) = chat_backend
|
let (response, prompt_tokens, eval_tokens) = backend
|
||||||
|
.chat()
|
||||||
.chat_with_tools(messages.clone(), tools.clone())
|
.chat_with_tools(messages.clone(), tools.clone())
|
||||||
.await?;
|
.await?;
|
||||||
last_prompt_eval_count = prompt_tokens;
|
last_prompt_eval_count = prompt_tokens;
|
||||||
last_eval_count = eval_tokens;
|
last_eval_count = eval_tokens;
|
||||||
|
|
||||||
// Ollama rejects non-object tool-call arguments on replay.
|
|
||||||
let mut response = response;
|
let mut response = response;
|
||||||
if let Some(ref mut tcs) = response.tool_calls {
|
if let Some(ref mut tcs) = response.tool_calls {
|
||||||
for tc in tcs.iter_mut() {
|
for tc in tcs.iter_mut() {
|
||||||
@@ -514,13 +419,11 @@ impl InsightChatService {
|
|||||||
.execute_tool(
|
.execute_tool(
|
||||||
&tool_call.function.name,
|
&tool_call.function.name,
|
||||||
&tool_call.function.arguments,
|
&tool_call.function.arguments,
|
||||||
&ollama_client,
|
&backend,
|
||||||
&image_base64,
|
&image_base64,
|
||||||
&normalized,
|
&normalized,
|
||||||
req.user_id,
|
req.user_id,
|
||||||
&active_persona,
|
&active_persona,
|
||||||
&model_used,
|
|
||||||
&effective_backend,
|
|
||||||
&loop_cx,
|
&loop_cx,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
@@ -534,8 +437,6 @@ impl InsightChatService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if final_content.is_empty() {
|
if final_content.is_empty() {
|
||||||
// The model never produced a final answer; ask once more without
|
|
||||||
// tools to force a textual reply.
|
|
||||||
log::info!(
|
log::info!(
|
||||||
"Chat loop exhausted after {} iterations, requesting final answer",
|
"Chat loop exhausted after {} iterations, requesting final answer",
|
||||||
iterations_used
|
iterations_used
|
||||||
@@ -543,7 +444,8 @@ impl InsightChatService {
|
|||||||
messages.push(ChatMessage::user(
|
messages.push(ChatMessage::user(
|
||||||
"Please write your final answer now without calling any more tools.",
|
"Please write your final answer now without calling any more tools.",
|
||||||
));
|
));
|
||||||
let (final_response, prompt_tokens, eval_tokens) = chat_backend
|
let (final_response, prompt_tokens, eval_tokens) = backend
|
||||||
|
.chat()
|
||||||
.chat_with_tools(messages.clone(), vec![])
|
.chat_with_tools(messages.clone(), vec![])
|
||||||
.await?;
|
.await?;
|
||||||
last_prompt_eval_count = prompt_tokens;
|
last_prompt_eval_count = prompt_tokens;
|
||||||
@@ -579,7 +481,8 @@ impl InsightChatService {
|
|||||||
Capture the key moment or theme. Return ONLY the title, nothing else.",
|
Capture the key moment or theme. Return ONLY the title, nothing else.",
|
||||||
final_content
|
final_content
|
||||||
);
|
);
|
||||||
let title_raw = chat_backend
|
let title_raw = backend
|
||||||
|
.chat()
|
||||||
.generate(
|
.generate(
|
||||||
&title_prompt,
|
&title_prompt,
|
||||||
Some(
|
Some(
|
||||||
@@ -604,7 +507,7 @@ impl InsightChatService {
|
|||||||
model_version: model_used.clone(),
|
model_version: model_used.clone(),
|
||||||
is_current: true,
|
is_current: true,
|
||||||
training_messages: Some(json),
|
training_messages: Some(json),
|
||||||
backend: effective_backend.clone(),
|
backend: kind.as_str().to_string(),
|
||||||
fewshot_source_ids: None,
|
fewshot_source_ids: None,
|
||||||
content_hash: None,
|
content_hash: None,
|
||||||
};
|
};
|
||||||
@@ -629,7 +532,7 @@ impl InsightChatService {
|
|||||||
prompt_eval_count: last_prompt_eval_count,
|
prompt_eval_count: last_prompt_eval_count,
|
||||||
eval_count: last_eval_count,
|
eval_count: last_eval_count,
|
||||||
amended_insight_id,
|
amended_insight_id,
|
||||||
backend_used: effective_backend,
|
backend_used: kind.as_str().to_string(),
|
||||||
model_used,
|
model_used,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -818,9 +721,8 @@ impl InsightChatService {
|
|||||||
.map(|s| s.trim().to_lowercase())
|
.map(|s| s.trim().to_lowercase())
|
||||||
.filter(|s| !s.is_empty())
|
.filter(|s| !s.is_empty())
|
||||||
.unwrap_or_else(|| stored_backend.clone());
|
.unwrap_or_else(|| stored_backend.clone());
|
||||||
validate_cross_replay(&stored_backend, &effective_backend)?;
|
let kind = BackendKind::parse(&effective_backend)?;
|
||||||
let is_hybrid = effective_backend == "hybrid";
|
validate_cross_replay(&stored_backend, kind.as_str())?;
|
||||||
let describes_then_inlines = is_hybrid;
|
|
||||||
|
|
||||||
let max_iterations = req
|
let max_iterations = req
|
||||||
.max_iterations
|
.max_iterations
|
||||||
@@ -828,18 +730,20 @@ impl InsightChatService {
|
|||||||
.clamp(1, env_max_iterations());
|
.clamp(1, env_max_iterations());
|
||||||
|
|
||||||
let stored_model = insight.model_version.clone();
|
let stored_model = insight.model_version.clone();
|
||||||
let custom_model = req
|
let overrides = SamplingOverrides {
|
||||||
.model
|
model: req.model.clone()
|
||||||
.clone()
|
.or_else(|| Some(stored_model.clone()))
|
||||||
.or_else(|| Some(stored_model.clone()))
|
.filter(|m| !m.is_empty()),
|
||||||
.filter(|m| !m.is_empty());
|
num_ctx: req.num_ctx,
|
||||||
|
temperature: req.temperature,
|
||||||
|
top_p: req.top_p,
|
||||||
|
top_k: req.top_k,
|
||||||
|
min_p: req.min_p,
|
||||||
|
};
|
||||||
|
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
||||||
|
let model_used = backend.model().to_string();
|
||||||
|
|
||||||
let (chat_backend_holder, ollama_client) =
|
// Tool set — images_inline mode + first user turn carries an image →
|
||||||
self.build_chat_clients(&effective_backend, custom_model.as_deref(), &req)?;
|
|
||||||
let chat_backend: &dyn LlmClient = chat_backend_holder.as_ref();
|
|
||||||
let model_used = chat_backend.primary_model().to_string();
|
|
||||||
|
|
||||||
// Tool set — local/llamacpp mode + first user turn carries an image →
|
|
||||||
// offer describe_photo. Describe-then-inline mode (hybrid only):
|
// offer describe_photo. Describe-then-inline mode (hybrid only):
|
||||||
// visual description was inlined at bootstrap, no describe tool needed.
|
// visual description was inlined at bootstrap, no describe tool needed.
|
||||||
let local_first_user_has_image = messages
|
let local_first_user_has_image = messages
|
||||||
@@ -848,7 +752,7 @@ impl InsightChatService {
|
|||||||
.and_then(|m| m.images.as_ref())
|
.and_then(|m| m.images.as_ref())
|
||||||
.map(|imgs| !imgs.is_empty())
|
.map(|imgs| !imgs.is_empty())
|
||||||
.unwrap_or(false);
|
.unwrap_or(false);
|
||||||
let offer_describe_tool = !describes_then_inlines && local_first_user_has_image;
|
let offer_describe_tool = backend.images_inline && local_first_user_has_image;
|
||||||
let gate_opts = self.generator.current_gate_opts_for_persona(
|
let gate_opts = self.generator.current_gate_opts_for_persona(
|
||||||
offer_describe_tool,
|
offer_describe_tool,
|
||||||
Some((req.user_id, &active_persona)),
|
Some((req.user_id, &active_persona)),
|
||||||
@@ -879,16 +783,13 @@ impl InsightChatService {
|
|||||||
|
|
||||||
let outcome = self
|
let outcome = self
|
||||||
.run_streaming_agentic_loop(
|
.run_streaming_agentic_loop(
|
||||||
chat_backend,
|
&backend,
|
||||||
&ollama_client,
|
|
||||||
&mut messages,
|
&mut messages,
|
||||||
tools,
|
tools,
|
||||||
&image_base64,
|
&image_base64,
|
||||||
&normalized,
|
&normalized,
|
||||||
req.user_id,
|
req.user_id,
|
||||||
&active_persona,
|
&active_persona,
|
||||||
&model_used,
|
|
||||||
&effective_backend,
|
|
||||||
max_iterations,
|
max_iterations,
|
||||||
&tx,
|
&tx,
|
||||||
)
|
)
|
||||||
@@ -916,7 +817,7 @@ impl InsightChatService {
|
|||||||
|
|
||||||
let mut amended_insight_id: Option<i32> = None;
|
let mut amended_insight_id: Option<i32> = None;
|
||||||
if req.amend {
|
if req.amend {
|
||||||
let title = self.generate_title(chat_backend, &final_content).await?;
|
let title = self.generate_title(&backend, &final_content).await?;
|
||||||
|
|
||||||
// Amended rows intentionally do not inherit the parent's
|
// Amended rows intentionally do not inherit the parent's
|
||||||
// `fewshot_source_ids`. The parent's few-shot influence is still
|
// `fewshot_source_ids`. The parent's few-shot influence is still
|
||||||
@@ -932,7 +833,7 @@ impl InsightChatService {
|
|||||||
model_version: model_used.clone(),
|
model_version: model_used.clone(),
|
||||||
is_current: true,
|
is_current: true,
|
||||||
training_messages: Some(json),
|
training_messages: Some(json),
|
||||||
backend: effective_backend.clone(),
|
backend: kind.as_str().to_string(),
|
||||||
fewshot_source_ids: None,
|
fewshot_source_ids: None,
|
||||||
content_hash: None,
|
content_hash: None,
|
||||||
};
|
};
|
||||||
@@ -958,7 +859,7 @@ impl InsightChatService {
|
|||||||
eval_tokens: last_eval_count,
|
eval_tokens: last_eval_count,
|
||||||
num_ctx: req.num_ctx,
|
num_ctx: req.num_ctx,
|
||||||
amended_insight_id,
|
amended_insight_id,
|
||||||
backend_used: effective_backend,
|
backend_used: kind.as_str().to_string(),
|
||||||
model_used,
|
model_used,
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
@@ -984,21 +885,23 @@ impl InsightChatService {
|
|||||||
.filter(|s| !s.trim().is_empty())
|
.filter(|s| !s.trim().is_empty())
|
||||||
.unwrap_or_else(|| "default".to_string());
|
.unwrap_or_else(|| "default".to_string());
|
||||||
let effective_backend = resolve_bootstrap_backend(req.backend.as_deref())?;
|
let effective_backend = resolve_bootstrap_backend(req.backend.as_deref())?;
|
||||||
let is_hybrid = effective_backend == "hybrid";
|
let kind = BackendKind::parse(&effective_backend)?;
|
||||||
let local_via_llamacpp =
|
|
||||||
crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some();
|
|
||||||
let describes_then_inlines = is_hybrid;
|
|
||||||
|
|
||||||
let max_iterations = req
|
let max_iterations = req
|
||||||
.max_iterations
|
.max_iterations
|
||||||
.unwrap_or(DEFAULT_MAX_ITERATIONS)
|
.unwrap_or(DEFAULT_MAX_ITERATIONS)
|
||||||
.clamp(1, env_max_iterations());
|
.clamp(1, env_max_iterations());
|
||||||
|
|
||||||
let custom_model = req.model.clone().filter(|m| !m.is_empty());
|
let overrides = SamplingOverrides {
|
||||||
let (chat_backend_holder, ollama_client) =
|
model: req.model.clone().filter(|m| !m.is_empty()),
|
||||||
self.build_chat_clients(&effective_backend, custom_model.as_deref(), &req)?;
|
num_ctx: req.num_ctx,
|
||||||
let chat_backend: &dyn LlmClient = chat_backend_holder.as_ref();
|
temperature: req.temperature,
|
||||||
let model_used = chat_backend.primary_model().to_string();
|
top_p: req.top_p,
|
||||||
|
top_k: req.top_k,
|
||||||
|
min_p: req.min_p,
|
||||||
|
};
|
||||||
|
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
||||||
|
let model_used = backend.model().to_string();
|
||||||
|
|
||||||
// Load image bytes once. RAW preview fallback is handled inside
|
// Load image bytes once. RAW preview fallback is handled inside
|
||||||
// load_image_as_base64. Errors degrade silently — a chat that
|
// load_image_as_base64. Errors degrade silently — a chat that
|
||||||
@@ -1020,26 +923,17 @@ impl InsightChatService {
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Describe-then-inline (hybrid only): pre-describe the image so a
|
// Describe-then-inline (hybrid only): pre-describe the image so a
|
||||||
// text-only chat model gets the visual description inline. llamacpp
|
// text-only chat model gets the visual description inline.
|
||||||
// sends images directly to the chat model.
|
// images_inline backends send images directly to the chat model.
|
||||||
let visual_block = if describes_then_inlines {
|
let visual_block = if !backend.images_inline {
|
||||||
match image_base64.as_deref() {
|
match image_base64.as_deref() {
|
||||||
Some(b64) => {
|
Some(b64) => {
|
||||||
let described = if local_via_llamacpp {
|
match backend.local().describe_image(b64).await {
|
||||||
self.llamacpp
|
|
||||||
.as_ref()
|
|
||||||
.expect("local_via_llamacpp guarantees Some")
|
|
||||||
.describe_image(b64)
|
|
||||||
.await
|
|
||||||
} else {
|
|
||||||
self.ollama.describe_image(b64).await
|
|
||||||
};
|
|
||||||
match described {
|
|
||||||
Ok(desc) => {
|
Ok(desc) => {
|
||||||
format!("Visual description (from local vision model):\n{}\n", desc)
|
format!("Visual description (from local vision model):\n{}\n", desc)
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
log::warn!("{} bootstrap: describe_image failed: {}", effective_backend, e);
|
log::warn!("{} bootstrap: describe_image failed: {}", kind.as_str(), e);
|
||||||
String::new()
|
String::new()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1050,10 +944,10 @@ impl InsightChatService {
|
|||||||
String::new()
|
String::new()
|
||||||
};
|
};
|
||||||
|
|
||||||
// Tool gates. Local + image present → expose describe_photo so
|
// Tool gates. images_inline + image present → expose describe_photo so
|
||||||
// the chat model can re-look at the photo on demand. Hybrid:
|
// the chat model can re-look at the photo on demand. Non-inline:
|
||||||
// already inlined, no tool needed.
|
// already inlined, no tool needed.
|
||||||
let offer_describe_tool = !describes_then_inlines && image_base64.is_some();
|
let offer_describe_tool = backend.images_inline && image_base64.is_some();
|
||||||
let gate_opts = self.generator.current_gate_opts_for_persona(
|
let gate_opts = self.generator.current_gate_opts_for_persona(
|
||||||
offer_describe_tool,
|
offer_describe_tool,
|
||||||
Some((req.user_id, &active_persona)),
|
Some((req.user_id, &active_persona)),
|
||||||
@@ -1079,23 +973,22 @@ impl InsightChatService {
|
|||||||
);
|
);
|
||||||
let system_msg = ChatMessage::system(system_content);
|
let system_msg = ChatMessage::system(system_content);
|
||||||
let mut user_msg = ChatMessage::user(req.user_message.clone());
|
let mut user_msg = ChatMessage::user(req.user_message.clone());
|
||||||
if !describes_then_inlines && let Some(ref img) = image_base64 {
|
if backend.images_inline {
|
||||||
user_msg.images = Some(vec![img.clone()]);
|
if let Some(ref img) = image_base64 {
|
||||||
|
user_msg.images = Some(vec![img.clone()]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
let mut messages = vec![system_msg, user_msg];
|
let mut messages = vec![system_msg, user_msg];
|
||||||
|
|
||||||
let outcome = self
|
let outcome = self
|
||||||
.run_streaming_agentic_loop(
|
.run_streaming_agentic_loop(
|
||||||
chat_backend,
|
&backend,
|
||||||
&ollama_client,
|
|
||||||
&mut messages,
|
&mut messages,
|
||||||
tools,
|
tools,
|
||||||
&image_base64,
|
&image_base64,
|
||||||
&normalized,
|
&normalized,
|
||||||
req.user_id,
|
req.user_id,
|
||||||
&active_persona,
|
&active_persona,
|
||||||
&model_used,
|
|
||||||
&effective_backend,
|
|
||||||
max_iterations,
|
max_iterations,
|
||||||
&tx,
|
&tx,
|
||||||
)
|
)
|
||||||
@@ -1108,7 +1001,7 @@ impl InsightChatService {
|
|||||||
final_content,
|
final_content,
|
||||||
} = outcome;
|
} = outcome;
|
||||||
|
|
||||||
let title = self.generate_title(chat_backend, &final_content).await?;
|
let title = self.generate_title(&backend, &final_content).await?;
|
||||||
|
|
||||||
let json = serde_json::to_string(&messages)
|
let json = serde_json::to_string(&messages)
|
||||||
.map_err(|e| anyhow!("failed to serialize chat history: {}", e))?;
|
.map_err(|e| anyhow!("failed to serialize chat history: {}", e))?;
|
||||||
@@ -1121,7 +1014,7 @@ impl InsightChatService {
|
|||||||
model_version: model_used.clone(),
|
model_version: model_used.clone(),
|
||||||
is_current: true,
|
is_current: true,
|
||||||
training_messages: Some(json),
|
training_messages: Some(json),
|
||||||
backend: effective_backend.clone(),
|
backend: kind.as_str().to_string(),
|
||||||
fewshot_source_ids: None,
|
fewshot_source_ids: None,
|
||||||
content_hash: None,
|
content_hash: None,
|
||||||
};
|
};
|
||||||
@@ -1144,7 +1037,7 @@ impl InsightChatService {
|
|||||||
eval_tokens: last_eval_count,
|
eval_tokens: last_eval_count,
|
||||||
num_ctx: req.num_ctx,
|
num_ctx: req.num_ctx,
|
||||||
amended_insight_id: Some(stored.id),
|
amended_insight_id: Some(stored.id),
|
||||||
backend_used: effective_backend,
|
backend_used: kind.as_str().to_string(),
|
||||||
model_used,
|
model_used,
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
@@ -1152,95 +1045,12 @@ impl InsightChatService {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Set up chat clients (Ollama + optional OpenRouter / LlamaCpp) shared
|
|
||||||
/// by bootstrap and continuation. Returns the chat-side backend client
|
|
||||||
/// (boxed because each backend has a different concrete type) and the
|
|
||||||
/// Ollama client used for describe-image / local tool calls.
|
|
||||||
///
|
|
||||||
/// `effective_backend` must be one of `"local"` or `"hybrid"` (validated
|
|
||||||
/// upstream). Hybrid → OpenRouter; local with `LLM_BACKEND=llamacpp` →
|
|
||||||
/// llama-swap; pure local → Ollama. Returns the dispatched chat client
|
|
||||||
/// plus the (possibly per-request) Ollama client that the caller uses
|
|
||||||
/// for non-chat helpers (image describe in non-llamacpp mode, tool ops).
|
|
||||||
fn build_chat_clients(
|
|
||||||
&self,
|
|
||||||
effective_backend: &str,
|
|
||||||
custom_model: Option<&str>,
|
|
||||||
req: &ChatTurnRequest,
|
|
||||||
) -> Result<(Box<dyn LlmClient>, OllamaClient)> {
|
|
||||||
let mut ollama_client = self.ollama.clone();
|
|
||||||
|
|
||||||
if effective_backend == "hybrid" {
|
|
||||||
let arc = self.openrouter.as_ref().ok_or_else(|| {
|
|
||||||
anyhow!("hybrid backend unavailable: OPENROUTER_API_KEY not configured")
|
|
||||||
})?;
|
|
||||||
let mut c: OpenRouterClient = (**arc).clone();
|
|
||||||
if let Some(m) = custom_model {
|
|
||||||
c.primary_model = m.to_string();
|
|
||||||
}
|
|
||||||
if req.temperature.is_some()
|
|
||||||
|| req.top_p.is_some()
|
|
||||||
|| req.top_k.is_some()
|
|
||||||
|| req.min_p.is_some()
|
|
||||||
{
|
|
||||||
c.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
|
|
||||||
}
|
|
||||||
if let Some(ctx) = req.num_ctx {
|
|
||||||
c.set_num_ctx(Some(ctx));
|
|
||||||
}
|
|
||||||
return Ok((Box::new(c), ollama_client));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Local mode — env switch decides between Ollama and llama-swap.
|
|
||||||
if crate::ai::local_backend_is_llamacpp()
|
|
||||||
&& let Some(arc) = self.llamacpp.as_ref()
|
|
||||||
{
|
|
||||||
let mut c: LlamaCppClient = (**arc).clone();
|
|
||||||
if let Some(m) = custom_model {
|
|
||||||
c.primary_model = m.to_string();
|
|
||||||
}
|
|
||||||
if req.temperature.is_some()
|
|
||||||
|| req.top_p.is_some()
|
|
||||||
|| req.top_k.is_some()
|
|
||||||
|| req.min_p.is_some()
|
|
||||||
{
|
|
||||||
c.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
|
|
||||||
}
|
|
||||||
if let Some(ctx) = req.num_ctx {
|
|
||||||
c.set_num_ctx(Some(ctx));
|
|
||||||
}
|
|
||||||
return Ok((Box::new(c), ollama_client));
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(m) = custom_model
|
|
||||||
&& m != self.ollama.primary_model
|
|
||||||
{
|
|
||||||
ollama_client = OllamaClient::new(
|
|
||||||
self.ollama.primary_url.clone(),
|
|
||||||
self.ollama.fallback_url.clone(),
|
|
||||||
m.to_string(),
|
|
||||||
Some(m.to_string()),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if req.temperature.is_some()
|
|
||||||
|| req.top_p.is_some()
|
|
||||||
|| req.top_k.is_some()
|
|
||||||
|| req.min_p.is_some()
|
|
||||||
{
|
|
||||||
ollama_client.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
|
|
||||||
}
|
|
||||||
if let Some(ctx) = req.num_ctx {
|
|
||||||
ollama_client.set_num_ctx(Some(ctx));
|
|
||||||
}
|
|
||||||
Ok((Box::new(ollama_client.clone()), ollama_client))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Generate a short title via the same chat backend so voice stays
|
/// Generate a short title via the same chat backend so voice stays
|
||||||
/// consistent with the body. Mirrors generate_agentic_insight_for_photo's
|
/// consistent with the body. Mirrors generate_agentic_insight_for_photo's
|
||||||
/// titling pass.
|
/// titling pass.
|
||||||
async fn generate_title(
|
async fn generate_title(
|
||||||
&self,
|
&self,
|
||||||
chat_backend: &dyn LlmClient,
|
backend: &ResolvedBackend,
|
||||||
final_content: &str,
|
final_content: &str,
|
||||||
) -> Result<String> {
|
) -> Result<String> {
|
||||||
let title_prompt = format!(
|
let title_prompt = format!(
|
||||||
@@ -1248,7 +1058,8 @@ impl InsightChatService {
|
|||||||
Capture the key moment or theme. Return ONLY the title, nothing else.",
|
Capture the key moment or theme. Return ONLY the title, nothing else.",
|
||||||
final_content
|
final_content
|
||||||
);
|
);
|
||||||
let title_raw = chat_backend
|
let title_raw = backend
|
||||||
|
.chat()
|
||||||
.generate(
|
.generate(
|
||||||
&title_prompt,
|
&title_prompt,
|
||||||
Some(
|
Some(
|
||||||
@@ -1266,18 +1077,13 @@ impl InsightChatService {
|
|||||||
/// final assistant content.
|
/// final assistant content.
|
||||||
async fn run_streaming_agentic_loop(
|
async fn run_streaming_agentic_loop(
|
||||||
&self,
|
&self,
|
||||||
chat_backend: &dyn LlmClient,
|
backend: &ResolvedBackend,
|
||||||
ollama_client: &OllamaClient,
|
|
||||||
messages: &mut Vec<ChatMessage>,
|
messages: &mut Vec<ChatMessage>,
|
||||||
tools: Vec<Tool>,
|
tools: Vec<Tool>,
|
||||||
image_base64: &Option<String>,
|
image_base64: &Option<String>,
|
||||||
normalized: &str,
|
normalized: &str,
|
||||||
user_id: i32,
|
user_id: i32,
|
||||||
active_persona: &str,
|
active_persona: &str,
|
||||||
// Provenance — stamped onto any store_fact tool call made
|
|
||||||
// during this loop. Mirrors the non-streaming chat path.
|
|
||||||
model_used: &str,
|
|
||||||
effective_backend: &str,
|
|
||||||
max_iterations: usize,
|
max_iterations: usize,
|
||||||
tx: &tokio::sync::mpsc::Sender<ChatStreamEvent>,
|
tx: &tokio::sync::mpsc::Sender<ChatStreamEvent>,
|
||||||
) -> Result<AgenticLoopOutcome> {
|
) -> Result<AgenticLoopOutcome> {
|
||||||
@@ -1296,7 +1102,8 @@ impl InsightChatService {
|
|||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
let mut stream = chat_backend
|
let mut stream = backend
|
||||||
|
.chat()
|
||||||
.chat_with_tools_stream(messages.clone(), tools.clone())
|
.chat_with_tools_stream(messages.clone(), tools.clone())
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
@@ -1353,13 +1160,11 @@ impl InsightChatService {
|
|||||||
.execute_tool(
|
.execute_tool(
|
||||||
&tool_call.function.name,
|
&tool_call.function.name,
|
||||||
&tool_call.function.arguments,
|
&tool_call.function.arguments,
|
||||||
ollama_client,
|
backend,
|
||||||
image_base64,
|
image_base64,
|
||||||
normalized,
|
normalized,
|
||||||
user_id,
|
user_id,
|
||||||
active_persona,
|
active_persona,
|
||||||
model_used,
|
|
||||||
effective_backend,
|
|
||||||
&cx,
|
&cx,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
@@ -1394,7 +1199,8 @@ impl InsightChatService {
|
|||||||
messages.push(ChatMessage::user(
|
messages.push(ChatMessage::user(
|
||||||
"Please write your final answer now without calling any more tools.",
|
"Please write your final answer now without calling any more tools.",
|
||||||
));
|
));
|
||||||
let mut stream = chat_backend
|
let mut stream = backend
|
||||||
|
.chat()
|
||||||
.chat_with_tools_stream(messages.clone(), vec![])
|
.chat_with_tools_stream(messages.clone(), vec![])
|
||||||
.await?;
|
.await?;
|
||||||
let mut final_message: Option<ChatMessage> = None;
|
let mut final_message: Option<ChatMessage> = None;
|
||||||
|
|||||||
@@ -1594,29 +1594,24 @@ Return ONLY the summary, nothing else."#,
|
|||||||
&self,
|
&self,
|
||||||
tool_name: &str,
|
tool_name: &str,
|
||||||
arguments: &serde_json::Value,
|
arguments: &serde_json::Value,
|
||||||
ollama: &OllamaClient,
|
backend: &ResolvedBackend,
|
||||||
image_base64: &Option<String>,
|
image_base64: &Option<String>,
|
||||||
file_path: &str,
|
file_path: &str,
|
||||||
user_id: i32,
|
user_id: i32,
|
||||||
persona_id: &str,
|
persona_id: &str,
|
||||||
// Provenance — written into entity_facts.created_by_* when
|
|
||||||
// the loop calls store_fact. The caller knows the actual
|
|
||||||
// chat-runtime model and backend (which may differ from
|
|
||||||
// ollama.primary_model in hybrid mode where chat lives on
|
|
||||||
// OpenRouter while Ollama still handles vision).
|
|
||||||
model: &str,
|
|
||||||
backend: &str,
|
|
||||||
cx: &opentelemetry::Context,
|
cx: &opentelemetry::Context,
|
||||||
) -> String {
|
) -> String {
|
||||||
|
let model = backend.model();
|
||||||
|
let backend_label = backend.kind.as_str();
|
||||||
let result = match tool_name {
|
let result = match tool_name {
|
||||||
"search_rag" => self.tool_search_rag(arguments, ollama, cx).await,
|
"search_rag" => self.tool_search_rag(arguments, backend.local(), cx).await,
|
||||||
"search_messages" => self.tool_search_messages(arguments, cx).await,
|
"search_messages" => self.tool_search_messages(arguments, cx).await,
|
||||||
"get_sms_messages" => self.tool_get_sms_messages(arguments, cx).await,
|
"get_sms_messages" => self.tool_get_sms_messages(arguments, cx).await,
|
||||||
"get_calendar_events" => self.tool_get_calendar_events(arguments, cx).await,
|
"get_calendar_events" => self.tool_get_calendar_events(arguments, cx).await,
|
||||||
"get_location_history" => self.tool_get_location_history(arguments, cx).await,
|
"get_location_history" => self.tool_get_location_history(arguments, cx).await,
|
||||||
"get_file_tags" => self.tool_get_file_tags(arguments, cx).await,
|
"get_file_tags" => self.tool_get_file_tags(arguments, cx).await,
|
||||||
"get_faces_in_photo" => self.tool_get_faces_in_photo(arguments, cx).await,
|
"get_faces_in_photo" => self.tool_get_faces_in_photo(arguments, cx).await,
|
||||||
"describe_photo" => self.tool_describe_photo(ollama, image_base64).await,
|
"describe_photo" => self.tool_describe_photo(backend.local(), image_base64).await,
|
||||||
"reverse_geocode" => self.tool_reverse_geocode(arguments).await,
|
"reverse_geocode" => self.tool_reverse_geocode(arguments).await,
|
||||||
"get_personal_place_at" => self.tool_get_personal_place_at(arguments).await,
|
"get_personal_place_at" => self.tool_get_personal_place_at(arguments).await,
|
||||||
"recall_entities" => self.tool_recall_entities(arguments, cx).await,
|
"recall_entities" => self.tool_recall_entities(arguments, cx).await,
|
||||||
@@ -1624,19 +1619,19 @@ Return ONLY the summary, nothing else."#,
|
|||||||
self.tool_recall_facts_for_photo(arguments, user_id, persona_id, cx)
|
self.tool_recall_facts_for_photo(arguments, user_id, persona_id, cx)
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
"store_entity" => self.tool_store_entity(arguments, ollama, cx).await,
|
"store_entity" => self.tool_store_entity(arguments, cx).await,
|
||||||
"store_fact" => {
|
"store_fact" => {
|
||||||
self.tool_store_fact(
|
self.tool_store_fact(
|
||||||
arguments, file_path, user_id, persona_id, model, backend, cx,
|
arguments, file_path, user_id, persona_id, model, backend_label, cx,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
"update_fact" => {
|
"update_fact" => {
|
||||||
self.tool_update_fact(arguments, user_id, persona_id, model, backend, cx)
|
self.tool_update_fact(arguments, user_id, persona_id, model, backend_label, cx)
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
"supersede_fact" => {
|
"supersede_fact" => {
|
||||||
self.tool_supersede_fact(arguments, user_id, persona_id, model, backend, cx)
|
self.tool_supersede_fact(arguments, user_id, persona_id, model, backend_label, cx)
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
"get_current_datetime" => Self::tool_get_current_datetime(),
|
"get_current_datetime" => Self::tool_get_current_datetime(),
|
||||||
@@ -1654,7 +1649,7 @@ Return ONLY the summary, nothing else."#,
|
|||||||
async fn tool_search_rag(
|
async fn tool_search_rag(
|
||||||
&self,
|
&self,
|
||||||
args: &serde_json::Value,
|
args: &serde_json::Value,
|
||||||
ollama: &OllamaClient,
|
local: &dyn LlmClient,
|
||||||
_cx: &opentelemetry::Context,
|
_cx: &opentelemetry::Context,
|
||||||
) -> String {
|
) -> String {
|
||||||
let query = match args.get("query").and_then(|v| v.as_str()) {
|
let query = match args.get("query").and_then(|v| v.as_str()) {
|
||||||
@@ -1718,7 +1713,7 @@ Return ONLY the summary, nothing else."#,
|
|||||||
};
|
};
|
||||||
|
|
||||||
let final_results = if rerank_enabled && results.len() > limit {
|
let final_results = if rerank_enabled && results.len() > limit {
|
||||||
match self.rerank_with_llm(&query, &results, limit, ollama).await {
|
match self.rerank_with_llm(&query, &results, limit, local).await {
|
||||||
Ok(reordered) => reordered,
|
Ok(reordered) => reordered,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
log::warn!("rerank failed, using vector order: {}", e);
|
log::warn!("rerank failed, using vector order: {}", e);
|
||||||
@@ -1744,7 +1739,7 @@ Return ONLY the summary, nothing else."#,
|
|||||||
query: &str,
|
query: &str,
|
||||||
candidates: &[String],
|
candidates: &[String],
|
||||||
limit: usize,
|
limit: usize,
|
||||||
ollama: &OllamaClient,
|
local: &dyn LlmClient,
|
||||||
) -> Result<Vec<String>> {
|
) -> Result<Vec<String>> {
|
||||||
let query_preview: String = query.chars().take(60).collect();
|
let query_preview: String = query.chars().take(60).collect();
|
||||||
log::info!(
|
log::info!(
|
||||||
@@ -1785,15 +1780,7 @@ Return ONLY the summary, nothing else."#,
|
|||||||
let system = Some(
|
let system = Some(
|
||||||
"You are a terse relevance ranker. You output only numbers separated by commas.",
|
"You are a terse relevance ranker. You output only numbers separated by commas.",
|
||||||
);
|
);
|
||||||
let response = if crate::ai::local_backend_is_llamacpp() {
|
let response = local.generate(&prompt, system, None).await?;
|
||||||
if let Some(ref lc) = self.llamacpp {
|
|
||||||
lc.generate(&prompt, system, None).await?
|
|
||||||
} else {
|
|
||||||
ollama.generate_no_think(&prompt, system).await?
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ollama.generate_no_think(&prompt, system).await?
|
|
||||||
};
|
|
||||||
log::info!(
|
log::info!(
|
||||||
"rerank: finished in {} ms (prompt={} chars)",
|
"rerank: finished in {} ms (prompt={} chars)",
|
||||||
started.elapsed().as_millis(),
|
started.elapsed().as_millis(),
|
||||||
@@ -2365,31 +2352,17 @@ Return ONLY the summary, nothing else."#,
|
|||||||
out
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Tool: describe_photo — generate a visual description of the photo.
|
|
||||||
/// Routes through llama-swap when `LLM_BACKEND=llamacpp`, Ollama otherwise.
|
|
||||||
async fn tool_describe_photo(
|
async fn tool_describe_photo(
|
||||||
&self,
|
&self,
|
||||||
ollama: &OllamaClient,
|
local: &dyn LlmClient,
|
||||||
image_base64: &Option<String>,
|
image_base64: &Option<String>,
|
||||||
) -> String {
|
) -> String {
|
||||||
log::info!("tool_describe_photo: generating visual description");
|
log::info!("tool_describe_photo: generating visual description");
|
||||||
|
|
||||||
match image_base64 {
|
match image_base64 {
|
||||||
Some(img) => {
|
Some(img) => match local.describe_image(img).await {
|
||||||
let result = if crate::ai::local_backend_is_llamacpp() {
|
Ok(desc) => desc,
|
||||||
if let Some(ref lc) = self.llamacpp {
|
Err(e) => format!("Error describing photo: {}", e),
|
||||||
lc.describe_image(img).await
|
},
|
||||||
} else {
|
|
||||||
ollama.generate_photo_description(img).await
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ollama.generate_photo_description(img).await
|
|
||||||
};
|
|
||||||
match result {
|
|
||||||
Ok(desc) => desc,
|
|
||||||
Err(e) => format!("Error describing photo: {}", e),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None => "No image available for description.".to_string(),
|
None => "No image available for description.".to_string(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2635,7 +2608,6 @@ Return ONLY the summary, nothing else."#,
|
|||||||
async fn tool_store_entity(
|
async fn tool_store_entity(
|
||||||
&self,
|
&self,
|
||||||
args: &serde_json::Value,
|
args: &serde_json::Value,
|
||||||
_ollama: &OllamaClient,
|
|
||||||
cx: &opentelemetry::Context,
|
cx: &opentelemetry::Context,
|
||||||
) -> String {
|
) -> String {
|
||||||
use crate::database::models::InsertEntity;
|
use crate::database::models::InsertEntity;
|
||||||
@@ -3775,243 +3747,25 @@ Return ONLY the summary, nothing else."#,
|
|||||||
span.set_attribute(KeyValue::new("file_path", file_path.clone()));
|
span.set_attribute(KeyValue::new("file_path", file_path.clone()));
|
||||||
span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64));
|
span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64));
|
||||||
|
|
||||||
// 1a. Resolve backend label (defaults to "local").
|
// 1. Resolve backend + build clients.
|
||||||
let backend_label = backend
|
let kind = BackendKind::parse(
|
||||||
.as_deref()
|
backend.as_deref().unwrap_or("local"),
|
||||||
.map(|s| s.trim().to_lowercase())
|
)?;
|
||||||
.filter(|s| !s.is_empty())
|
span.set_attribute(KeyValue::new("backend", kind.as_str()));
|
||||||
.unwrap_or_else(|| "local".to_string());
|
let overrides = SamplingOverrides {
|
||||||
if !matches!(backend_label.as_str(), "local" | "hybrid") {
|
model: custom_model,
|
||||||
return Err(anyhow::anyhow!(
|
num_ctx,
|
||||||
"unknown backend '{}'; expected 'local' or 'hybrid'",
|
temperature,
|
||||||
backend_label
|
top_p,
|
||||||
));
|
top_k,
|
||||||
}
|
min_p,
|
||||||
span.set_attribute(KeyValue::new("backend", backend_label.clone()));
|
|
||||||
let is_hybrid = backend_label == "hybrid";
|
|
||||||
// `LLM_BACKEND=llamacpp` swaps Ollama out for llama-swap as the
|
|
||||||
// "local" stack — chat + embeddings route through llama-swap.
|
|
||||||
// llamacpp models receive images directly (vision-capable); only
|
|
||||||
// hybrid mode (OpenRouter chat) uses describe-then-inline.
|
|
||||||
let local_via_llamacpp =
|
|
||||||
crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some();
|
|
||||||
let describes_then_inlines = is_hybrid;
|
|
||||||
let ollama_is_chat = !is_hybrid && !local_via_llamacpp;
|
|
||||||
|
|
||||||
// 1b. Always build an Ollama client. In local mode it owns the chat
|
|
||||||
// loop; in hybrid/llamacpp mode it still handles tool-local calls
|
|
||||||
// (e.g. future embedding-backed tools). The chat backend is
|
|
||||||
// selected separately below.
|
|
||||||
// Sampling overrides only apply when Ollama is the chat backend.
|
|
||||||
let apply_sampling_to_ollama = ollama_is_chat;
|
|
||||||
let mut ollama_client = if let Some(ref model) = custom_model
|
|
||||||
&& ollama_is_chat
|
|
||||||
{
|
|
||||||
log::info!("Using custom model for agentic: {}", model);
|
|
||||||
span.set_attribute(KeyValue::new("custom_model", model.clone()));
|
|
||||||
OllamaClient::new(
|
|
||||||
self.ollama.primary_url.clone(),
|
|
||||||
self.ollama.fallback_url.clone(),
|
|
||||||
model.clone(),
|
|
||||||
Some(model.clone()),
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
if ollama_is_chat {
|
|
||||||
span.set_attribute(KeyValue::new("model", self.ollama.primary_model.clone()));
|
|
||||||
}
|
|
||||||
self.ollama.clone()
|
|
||||||
};
|
|
||||||
|
|
||||||
if apply_sampling_to_ollama {
|
|
||||||
if let Some(ctx) = num_ctx {
|
|
||||||
log::info!("Using custom context size: {}", ctx);
|
|
||||||
span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
|
|
||||||
ollama_client.set_num_ctx(Some(ctx));
|
|
||||||
}
|
|
||||||
|
|
||||||
if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
|
|
||||||
log::info!(
|
|
||||||
"Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}",
|
|
||||||
temperature,
|
|
||||||
top_p,
|
|
||||||
top_k,
|
|
||||||
min_p
|
|
||||||
);
|
|
||||||
if let Some(t) = temperature {
|
|
||||||
span.set_attribute(KeyValue::new("temperature", t as f64));
|
|
||||||
}
|
|
||||||
if let Some(p) = top_p {
|
|
||||||
span.set_attribute(KeyValue::new("top_p", p as f64));
|
|
||||||
}
|
|
||||||
if let Some(k) = top_k {
|
|
||||||
span.set_attribute(KeyValue::new("top_k", k as i64));
|
|
||||||
}
|
|
||||||
if let Some(m) = min_p {
|
|
||||||
span.set_attribute(KeyValue::new("min_p", m as f64));
|
|
||||||
}
|
|
||||||
ollama_client.set_sampling_params(temperature, top_p, top_k, min_p);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 1c. In hybrid mode, clone the configured OpenRouter client and
|
|
||||||
// apply per-request overrides.
|
|
||||||
let openrouter_client: Option<OpenRouterClient> = if is_hybrid {
|
|
||||||
let arc = self.openrouter.as_ref().ok_or_else(|| {
|
|
||||||
anyhow::anyhow!("hybrid backend unavailable: OPENROUTER_API_KEY not configured")
|
|
||||||
})?;
|
|
||||||
let mut c: OpenRouterClient = (**arc).clone();
|
|
||||||
if let Some(ref m) = custom_model {
|
|
||||||
c.primary_model = m.clone();
|
|
||||||
span.set_attribute(KeyValue::new("custom_model", m.clone()));
|
|
||||||
}
|
|
||||||
span.set_attribute(KeyValue::new("openrouter_model", c.primary_model.clone()));
|
|
||||||
if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
|
|
||||||
if let Some(t) = temperature {
|
|
||||||
span.set_attribute(KeyValue::new("temperature", t as f64));
|
|
||||||
}
|
|
||||||
if let Some(p) = top_p {
|
|
||||||
span.set_attribute(KeyValue::new("top_p", p as f64));
|
|
||||||
}
|
|
||||||
if let Some(k) = top_k {
|
|
||||||
span.set_attribute(KeyValue::new("top_k", k as i64));
|
|
||||||
}
|
|
||||||
if let Some(m) = min_p {
|
|
||||||
span.set_attribute(KeyValue::new("min_p", m as f64));
|
|
||||||
}
|
|
||||||
c.set_sampling_params(temperature, top_p, top_k, min_p);
|
|
||||||
}
|
|
||||||
if let Some(ctx) = num_ctx {
|
|
||||||
span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
|
|
||||||
c.set_num_ctx(Some(ctx));
|
|
||||||
}
|
|
||||||
Some(c)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
// 1d. When `LLM_BACKEND=llamacpp` and we're in local mode (not
|
|
||||||
// hybrid — hybrid keeps chat on OpenRouter), clone the llamacpp
|
|
||||||
// client and apply per-request overrides. Same shape as the
|
|
||||||
// openrouter branch above; describe_image will route through
|
|
||||||
// the vision slot configured on the client.
|
|
||||||
let llamacpp_client: Option<LlamaCppClient> = if local_via_llamacpp && !is_hybrid {
|
|
||||||
let arc = self.llamacpp.as_ref().ok_or_else(|| {
|
|
||||||
anyhow::anyhow!("LLM_BACKEND=llamacpp but LLAMA_SWAP_URL not configured")
|
|
||||||
})?;
|
|
||||||
let mut c: LlamaCppClient = (**arc).clone();
|
|
||||||
if let Some(ref m) = custom_model {
|
|
||||||
c.primary_model = m.clone();
|
|
||||||
span.set_attribute(KeyValue::new("custom_model", m.clone()));
|
|
||||||
}
|
|
||||||
span.set_attribute(KeyValue::new("llamacpp_model", c.primary_model.clone()));
|
|
||||||
if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
|
|
||||||
if let Some(t) = temperature {
|
|
||||||
span.set_attribute(KeyValue::new("temperature", t as f64));
|
|
||||||
}
|
|
||||||
if let Some(p) = top_p {
|
|
||||||
span.set_attribute(KeyValue::new("top_p", p as f64));
|
|
||||||
}
|
|
||||||
if let Some(k) = top_k {
|
|
||||||
span.set_attribute(KeyValue::new("top_k", k as i64));
|
|
||||||
}
|
|
||||||
if let Some(m) = min_p {
|
|
||||||
span.set_attribute(KeyValue::new("min_p", m as f64));
|
|
||||||
}
|
|
||||||
c.set_sampling_params(temperature, top_p, top_k, min_p);
|
|
||||||
}
|
|
||||||
if let Some(ctx) = num_ctx {
|
|
||||||
span.set_attribute(KeyValue::new("num_ctx", ctx as i64));
|
|
||||||
c.set_num_ctx(Some(ctx));
|
|
||||||
}
|
|
||||||
Some(c)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
};
|
||||||
|
let backend = self.resolve_backend(kind, &overrides).await?;
|
||||||
|
span.set_attribute(KeyValue::new("model", backend.model().to_string()));
|
||||||
|
span.set_attribute(KeyValue::new("images_inline", backend.images_inline));
|
||||||
|
|
||||||
let insight_cx = current_cx.with_span(span);
|
let insight_cx = current_cx.with_span(span);
|
||||||
|
|
||||||
// 2. Verify chat model supports tool calling.
|
|
||||||
// - local: existing Ollama model availability + capability check.
|
|
||||||
// - hybrid: trust the operator's curated allowlist
|
|
||||||
// (OPENROUTER_ALLOWED_MODELS) — no live precheck. A bad model id
|
|
||||||
// surfaces as a chat-call error on the next step.
|
|
||||||
let has_vision = if describes_then_inlines {
|
|
||||||
// Hybrid: chat model never sees images — describe-then-inject.
|
|
||||||
true
|
|
||||||
} else if local_via_llamacpp {
|
|
||||||
// llama-swap models receive images directly via OpenAI content
|
|
||||||
// parts. Capability probing isn't available (no `/api/show`),
|
|
||||||
// so assume vision support; a misconfigured model surfaces as
|
|
||||||
// a chat-call error.
|
|
||||||
true
|
|
||||||
} else {
|
|
||||||
if let Some(ref model_name) = custom_model {
|
|
||||||
let available_on_primary =
|
|
||||||
OllamaClient::is_model_available(&ollama_client.primary_url, model_name)
|
|
||||||
.await
|
|
||||||
.unwrap_or(false);
|
|
||||||
|
|
||||||
let available_on_fallback =
|
|
||||||
if let Some(ref fallback_url) = ollama_client.fallback_url {
|
|
||||||
OllamaClient::is_model_available(fallback_url, model_name)
|
|
||||||
.await
|
|
||||||
.unwrap_or(false)
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
};
|
|
||||||
|
|
||||||
if !available_on_primary && !available_on_fallback {
|
|
||||||
anyhow::bail!(
|
|
||||||
"model not available: '{}' not found on any configured server",
|
|
||||||
model_name
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let model_name_for_caps = &ollama_client.primary_model;
|
|
||||||
let capabilities = match OllamaClient::check_model_capabilities(
|
|
||||||
&ollama_client.primary_url,
|
|
||||||
model_name_for_caps,
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
{
|
|
||||||
Ok(caps) => caps,
|
|
||||||
Err(_) => {
|
|
||||||
let fallback_url = ollama_client.fallback_url.as_deref().ok_or_else(|| {
|
|
||||||
anyhow::anyhow!(
|
|
||||||
"Failed to check model capabilities for '{}': model not found on primary server and no fallback configured",
|
|
||||||
model_name_for_caps
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
OllamaClient::check_model_capabilities(fallback_url, model_name_for_caps)
|
|
||||||
.await
|
|
||||||
.map_err(|e| {
|
|
||||||
anyhow::anyhow!(
|
|
||||||
"Failed to check model capabilities for '{}': {}",
|
|
||||||
model_name_for_caps,
|
|
||||||
e
|
|
||||||
)
|
|
||||||
})?
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
if !capabilities.has_tool_calling {
|
|
||||||
return Err(anyhow::anyhow!(
|
|
||||||
"tool calling not supported by model '{}'",
|
|
||||||
ollama_client.primary_model
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
insight_cx
|
|
||||||
.span()
|
|
||||||
.set_attribute(KeyValue::new("model_has_vision", capabilities.has_vision));
|
|
||||||
insight_cx
|
|
||||||
.span()
|
|
||||||
.set_attribute(KeyValue::new("model_has_tool_calling", true));
|
|
||||||
|
|
||||||
capabilities.has_vision
|
|
||||||
};
|
|
||||||
|
|
||||||
// 3. Fetch EXIF
|
// 3. Fetch EXIF
|
||||||
let exif = {
|
let exif = {
|
||||||
let mut exif_dao = self.exif_dao.lock().expect("Unable to lock ExifDao");
|
let mut exif_dao = self.exif_dao.lock().expect("Unable to lock ExifDao");
|
||||||
@@ -4103,60 +3857,33 @@ Return ONLY the summary, nothing else."#,
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// 7. Load image if vision capable.
|
// 7. Load image. Always attempted — vision-capable models get the
|
||||||
// In hybrid mode we ALSO describe it locally now so the
|
// base64 inline; hybrid mode describes it locally and injects text.
|
||||||
// description can be inlined as text — the OpenRouter chat model
|
let image_base64 = match self.load_image_as_base64(&file_path) {
|
||||||
// never receives the base64 image directly.
|
Ok(b64) => {
|
||||||
let image_base64 = if has_vision {
|
log::info!("Loaded image for agentic model");
|
||||||
match self.load_image_as_base64(&file_path) {
|
Some(b64)
|
||||||
Ok(b64) => {
|
}
|
||||||
log::info!("Loaded image for vision-capable agentic model");
|
Err(e) => {
|
||||||
Some(b64)
|
log::warn!("Failed to load image for agentic: {}", e);
|
||||||
}
|
None
|
||||||
Err(e) => {
|
|
||||||
log::warn!("Failed to load image for agentic vision: {}", e);
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// describe-then-inline path (hybrid only). Vision describe routes
|
// Describe-then-inline (hybrid only). Vision describe routes through
|
||||||
// through whichever local backend is configured — llama-swap when
|
// the local backend so non-text work stays off OpenRouter.
|
||||||
// `local_via_llamacpp`, otherwise Ollama.
|
let inlined_visual_description: Option<String> = if !backend.images_inline {
|
||||||
let inlined_visual_description: Option<String> = if describes_then_inlines {
|
|
||||||
match image_base64.as_deref() {
|
match image_base64.as_deref() {
|
||||||
Some(b64) => {
|
Some(b64) => match backend.local().describe_image(b64).await {
|
||||||
let described = if local_via_llamacpp {
|
Ok(desc) => {
|
||||||
self.llamacpp
|
log::info!("{}: vision describe succeeded ({} chars)", kind, desc.len());
|
||||||
.as_ref()
|
Some(desc)
|
||||||
.expect("local_via_llamacpp guarantees Some")
|
|
||||||
.describe_image(b64)
|
|
||||||
.await
|
|
||||||
} else {
|
|
||||||
self.ollama.describe_image(b64).await
|
|
||||||
};
|
|
||||||
|
|
||||||
match described {
|
|
||||||
Ok(desc) => {
|
|
||||||
log::info!(
|
|
||||||
"{}: vision describe succeeded ({} chars)",
|
|
||||||
backend_label,
|
|
||||||
desc.len()
|
|
||||||
);
|
|
||||||
Some(desc)
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
log::warn!(
|
|
||||||
"{}: vision describe failed, continuing without: {}",
|
|
||||||
backend_label,
|
|
||||||
e
|
|
||||||
);
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
Err(e) => {
|
||||||
|
log::warn!("{}: vision describe failed, continuing without: {}", kind, e);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
},
|
||||||
None => None,
|
None => None,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -4228,34 +3955,24 @@ Return ONLY the summary, nothing else."#,
|
|||||||
date = date_taken.format("%B %d, %Y"),
|
date = date_taken.format("%B %d, %Y"),
|
||||||
);
|
);
|
||||||
|
|
||||||
// 10. Define tools. Gate flags computed from current data presence;
|
// 10. Define tools. describe_photo offered only when the chat model
|
||||||
// hybrid mode omits describe_photo since the chat model receives
|
// sees images directly (images_inline); in hybrid mode the visual
|
||||||
// the visual description inline (so we pass `false` for
|
// description is already inlined as text.
|
||||||
// has_vision in that mode regardless of the model's actual
|
let gate_opts = self.current_gate_opts(backend.images_inline);
|
||||||
// capability).
|
|
||||||
let gate_opts = self.current_gate_opts(has_vision && !describes_then_inlines);
|
|
||||||
let tools = Self::build_tool_definitions(gate_opts);
|
let tools = Self::build_tool_definitions(gate_opts);
|
||||||
|
|
||||||
// 11. Build initial messages. In describe-then-inline modes images
|
// 11. Build initial messages. images_inline → attach base64 to the
|
||||||
// are never attached to the wire message — the description is part
|
// user message; describe-then-inline → text was already injected.
|
||||||
// of `user_content`.
|
|
||||||
let system_msg = ChatMessage::system(system_content);
|
let system_msg = ChatMessage::system(system_content);
|
||||||
let mut user_msg = ChatMessage::user(user_content);
|
let mut user_msg = ChatMessage::user(user_content);
|
||||||
if !describes_then_inlines && let Some(ref img) = image_base64 {
|
if backend.images_inline {
|
||||||
user_msg.images = Some(vec![img.clone()]);
|
if let Some(ref img) = image_base64 {
|
||||||
|
user_msg.images = Some(vec![img.clone()]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut messages = vec![system_msg, user_msg];
|
let mut messages = vec![system_msg, user_msg];
|
||||||
|
|
||||||
// 12. Agentic loop — dispatch through the selected backend.
|
|
||||||
let chat_backend: &dyn LlmClient = if let Some(ref lc_c) = llamacpp_client {
|
|
||||||
lc_c
|
|
||||||
} else if let Some(ref or_c) = openrouter_client {
|
|
||||||
or_c
|
|
||||||
} else {
|
|
||||||
&ollama_client
|
|
||||||
};
|
|
||||||
|
|
||||||
let loop_span = tracer.start_with_context("ai.agentic.loop", &insight_cx);
|
let loop_span = tracer.start_with_context("ai.agentic.loop", &insight_cx);
|
||||||
let loop_cx = insight_cx.with_span(loop_span);
|
let loop_cx = insight_cx.with_span(loop_span);
|
||||||
|
|
||||||
@@ -4268,7 +3985,8 @@ Return ONLY the summary, nothing else."#,
|
|||||||
iterations_used = iteration + 1;
|
iterations_used = iteration + 1;
|
||||||
log::info!("Agentic iteration {}/{}", iteration + 1, max_iterations);
|
log::info!("Agentic iteration {}/{}", iteration + 1, max_iterations);
|
||||||
|
|
||||||
let (response, prompt_tokens, eval_tokens) = chat_backend
|
let (response, prompt_tokens, eval_tokens) = backend
|
||||||
|
.chat()
|
||||||
.chat_with_tools(messages.clone(), tools.clone())
|
.chat_with_tools(messages.clone(), tools.clone())
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
@@ -4308,13 +4026,11 @@ Return ONLY the summary, nothing else."#,
|
|||||||
.execute_tool(
|
.execute_tool(
|
||||||
&tool_call.function.name,
|
&tool_call.function.name,
|
||||||
&tool_call.function.arguments,
|
&tool_call.function.arguments,
|
||||||
&ollama_client,
|
&backend,
|
||||||
&image_base64,
|
&image_base64,
|
||||||
&file_path,
|
&file_path,
|
||||||
user_id,
|
user_id,
|
||||||
&persona_id,
|
&persona_id,
|
||||||
chat_backend.primary_model(),
|
|
||||||
&backend_label,
|
|
||||||
&loop_cx,
|
&loop_cx,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
@@ -4338,7 +4054,8 @@ Return ONLY the summary, nothing else."#,
|
|||||||
"Based on the context gathered, please write the final photo insight: a title and a detailed personal summary. Write in first person as {}.",
|
"Based on the context gathered, please write the final photo insight: a title and a detailed personal summary. Write in first person as {}.",
|
||||||
user_display_name()
|
user_display_name()
|
||||||
)));
|
)));
|
||||||
let (final_response, prompt_tokens, eval_tokens) = chat_backend
|
let (final_response, prompt_tokens, eval_tokens) = backend
|
||||||
|
.chat()
|
||||||
.chat_with_tools(messages.clone(), vec![])
|
.chat_with_tools(messages.clone(), vec![])
|
||||||
.await?;
|
.await?;
|
||||||
last_prompt_eval_count = prompt_tokens;
|
last_prompt_eval_count = prompt_tokens;
|
||||||
@@ -4360,7 +4077,8 @@ Return ONLY the summary, nothing else."#,
|
|||||||
let title_system = custom_system_prompt.as_deref().unwrap_or(
|
let title_system = custom_system_prompt.as_deref().unwrap_or(
|
||||||
"You are my long term memory assistant. Use only the information provided. Do not invent details.",
|
"You are my long term memory assistant. Use only the information provided. Do not invent details.",
|
||||||
);
|
);
|
||||||
let title_raw = chat_backend
|
let title_raw = backend
|
||||||
|
.chat()
|
||||||
.generate(&title_prompt, Some(title_system), None)
|
.generate(&title_prompt, Some(title_system), None)
|
||||||
.await?;
|
.await?;
|
||||||
let title = title_raw.trim().trim_matches('"').to_string();
|
let title = title_raw.trim().trim_matches('"').to_string();
|
||||||
@@ -4383,7 +4101,7 @@ Return ONLY the summary, nothing else."#,
|
|||||||
};
|
};
|
||||||
|
|
||||||
// 15. Store insight (returns the persisted row including its new id)
|
// 15. Store insight (returns the persisted row including its new id)
|
||||||
let model_version = chat_backend.primary_model().to_string();
|
let model_version = backend.model().to_string();
|
||||||
let fewshot_source_ids_json = if fewshot_source_ids.is_empty() {
|
let fewshot_source_ids_json = if fewshot_source_ids.is_empty() {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
@@ -4398,7 +4116,7 @@ Return ONLY the summary, nothing else."#,
|
|||||||
model_version,
|
model_version,
|
||||||
is_current: true,
|
is_current: true,
|
||||||
training_messages,
|
training_messages,
|
||||||
backend: backend_label.clone(),
|
backend: kind.as_str().to_string(),
|
||||||
fewshot_source_ids: fewshot_source_ids_json,
|
fewshot_source_ids: fewshot_source_ids_json,
|
||||||
content_hash: None,
|
content_hash: None,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -290,9 +290,6 @@ impl Default for AppState {
|
|||||||
Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new()));
|
Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new()));
|
||||||
let insight_chat = Arc::new(InsightChatService::new(
|
let insight_chat = Arc::new(InsightChatService::new(
|
||||||
Arc::new(insight_generator.clone()),
|
Arc::new(insight_generator.clone()),
|
||||||
ollama.clone(),
|
|
||||||
openrouter.clone(),
|
|
||||||
llamacpp.clone(),
|
|
||||||
insight_dao.clone(),
|
insight_dao.clone(),
|
||||||
chat_locks,
|
chat_locks,
|
||||||
));
|
));
|
||||||
@@ -470,9 +467,6 @@ impl AppState {
|
|||||||
Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new()));
|
Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new()));
|
||||||
let insight_chat = Arc::new(InsightChatService::new(
|
let insight_chat = Arc::new(InsightChatService::new(
|
||||||
Arc::new(insight_generator.clone()),
|
Arc::new(insight_generator.clone()),
|
||||||
ollama.clone(),
|
|
||||||
None,
|
|
||||||
None,
|
|
||||||
insight_dao.clone(),
|
insight_dao.clone(),
|
||||||
chat_locks,
|
chat_locks,
|
||||||
));
|
));
|
||||||
|
|||||||
Reference in New Issue
Block a user