ai: send images directly to llamacpp chat models + add ResolvedBackend

llamacpp models now receive images via OpenAI content-parts instead of
the describe-then-inline strategy (hybrid mode unchanged). Fixes
assistant messages with tool_calls emitting content: null instead of ""
to satisfy strict Jinja template role-alternation checks. Adds debug
logging of message role sequences on llamacpp requests.

Introduces BackendKind enum, SamplingOverrides, and ResolvedBackend in
a new backend.rs module. InsightGenerator::resolve_backend centralises
client construction + vision capability detection — next step wires the
existing inline dispatch through it.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-05-24 14:00:37 -04:00
parent be51421b38
commit 0631820fbf
6 changed files with 395 additions and 70 deletions
+14 -19
View File
@@ -311,7 +311,7 @@ impl InsightChatService {
let is_hybrid = effective_backend == "hybrid";
let local_via_llamacpp =
crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some();
let describes_then_inlines = is_hybrid || local_via_llamacpp;
let describes_then_inlines = is_hybrid;
span.set_attribute(KeyValue::new("backend", effective_backend.clone()));
// 4. Build the chat backend client. Hybrid → OpenRouter; local with
@@ -408,12 +408,11 @@ impl InsightChatService {
let model_used = chat_backend.primary_model().to_string();
span.set_attribute(KeyValue::new("model", model_used.clone()));
// 5. Decide vision + tool set. In describe-then-inline modes
// (hybrid, llamacpp) we always omit `describe_photo` (matches the
// original generation flow). In local we trust the stored
// history's first-user shape: if it carries `images`, the
// original model was vision-capable, and we keep `describe_photo`
// available.
// 5. Decide vision + tool set. In describe-then-inline mode
// (hybrid only) we omit `describe_photo`. In local and llamacpp
// we trust the stored history's first-user shape: if it carries
// `images`, the original model was vision-capable, and we keep
// `describe_photo` available.
let local_first_user_has_image = messages
.iter()
.find(|m| m.role == "user")
@@ -821,9 +820,7 @@ impl InsightChatService {
.unwrap_or_else(|| stored_backend.clone());
validate_cross_replay(&stored_backend, &effective_backend)?;
let is_hybrid = effective_backend == "hybrid";
let local_via_llamacpp =
crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some();
let describes_then_inlines = is_hybrid || local_via_llamacpp;
let describes_then_inlines = is_hybrid;
let max_iterations = req
.max_iterations
@@ -842,10 +839,9 @@ impl InsightChatService {
let chat_backend: &dyn LlmClient = chat_backend_holder.as_ref();
let model_used = chat_backend.primary_model().to_string();
// Tool set — local mode + first user turn carries an image →
// offer describe_photo. Describe-then-inline modes (hybrid OR
// local_via_llamacpp): visual description was inlined when the
// insight was bootstrapped, no describe tool needed.
// Tool set — local/llamacpp mode + first user turn carries an image →
// offer describe_photo. Describe-then-inline mode (hybrid only):
// visual description was inlined at bootstrap, no describe tool needed.
let local_first_user_has_image = messages
.iter()
.find(|m| m.role == "user")
@@ -991,7 +987,7 @@ impl InsightChatService {
let is_hybrid = effective_backend == "hybrid";
let local_via_llamacpp =
crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some();
let describes_then_inlines = is_hybrid || local_via_llamacpp;
let describes_then_inlines = is_hybrid;
let max_iterations = req
.max_iterations
@@ -1023,10 +1019,9 @@ impl InsightChatService {
_ => None,
});
// Describe-then-inline (hybrid OR local_via_llamacpp): pre-describe
// the image so a text-only chat model gets the visual description
// inline. Vision source follows `LLM_BACKEND`: llama-swap when
// `local_via_llamacpp`, else Ollama.
// Describe-then-inline (hybrid only): pre-describe the image so a
// text-only chat model gets the visual description inline. llamacpp
// sends images directly to the chat model.
let visual_block = if describes_then_inlines {
match image_base64.as_deref() {
Some(b64) => {