ai: send images directly to llamacpp chat models + add ResolvedBackend
llamacpp models now receive images via OpenAI content-parts instead of the describe-then-inline strategy (hybrid mode unchanged). Fixes assistant messages with tool_calls emitting content: null instead of "" to satisfy strict Jinja template role-alternation checks. Adds debug logging of message role sequences on llamacpp requests. Introduces BackendKind enum, SamplingOverrides, and ResolvedBackend in a new backend.rs module. InsightGenerator::resolve_backend centralises client construction + vision capability detection — next step wires the existing inline dispatch through it. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+14
-19
@@ -311,7 +311,7 @@ impl InsightChatService {
|
||||
let is_hybrid = effective_backend == "hybrid";
|
||||
let local_via_llamacpp =
|
||||
crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some();
|
||||
let describes_then_inlines = is_hybrid || local_via_llamacpp;
|
||||
let describes_then_inlines = is_hybrid;
|
||||
span.set_attribute(KeyValue::new("backend", effective_backend.clone()));
|
||||
|
||||
// 4. Build the chat backend client. Hybrid → OpenRouter; local with
|
||||
@@ -408,12 +408,11 @@ impl InsightChatService {
|
||||
let model_used = chat_backend.primary_model().to_string();
|
||||
span.set_attribute(KeyValue::new("model", model_used.clone()));
|
||||
|
||||
// 5. Decide vision + tool set. In describe-then-inline modes
|
||||
// (hybrid, llamacpp) we always omit `describe_photo` (matches the
|
||||
// original generation flow). In local we trust the stored
|
||||
// history's first-user shape: if it carries `images`, the
|
||||
// original model was vision-capable, and we keep `describe_photo`
|
||||
// available.
|
||||
// 5. Decide vision + tool set. In describe-then-inline mode
|
||||
// (hybrid only) we omit `describe_photo`. In local and llamacpp
|
||||
// we trust the stored history's first-user shape: if it carries
|
||||
// `images`, the original model was vision-capable, and we keep
|
||||
// `describe_photo` available.
|
||||
let local_first_user_has_image = messages
|
||||
.iter()
|
||||
.find(|m| m.role == "user")
|
||||
@@ -821,9 +820,7 @@ impl InsightChatService {
|
||||
.unwrap_or_else(|| stored_backend.clone());
|
||||
validate_cross_replay(&stored_backend, &effective_backend)?;
|
||||
let is_hybrid = effective_backend == "hybrid";
|
||||
let local_via_llamacpp =
|
||||
crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some();
|
||||
let describes_then_inlines = is_hybrid || local_via_llamacpp;
|
||||
let describes_then_inlines = is_hybrid;
|
||||
|
||||
let max_iterations = req
|
||||
.max_iterations
|
||||
@@ -842,10 +839,9 @@ impl InsightChatService {
|
||||
let chat_backend: &dyn LlmClient = chat_backend_holder.as_ref();
|
||||
let model_used = chat_backend.primary_model().to_string();
|
||||
|
||||
// Tool set — local mode + first user turn carries an image →
|
||||
// offer describe_photo. Describe-then-inline modes (hybrid OR
|
||||
// local_via_llamacpp): visual description was inlined when the
|
||||
// insight was bootstrapped, no describe tool needed.
|
||||
// Tool set — local/llamacpp mode + first user turn carries an image →
|
||||
// offer describe_photo. Describe-then-inline mode (hybrid only):
|
||||
// visual description was inlined at bootstrap, no describe tool needed.
|
||||
let local_first_user_has_image = messages
|
||||
.iter()
|
||||
.find(|m| m.role == "user")
|
||||
@@ -991,7 +987,7 @@ impl InsightChatService {
|
||||
let is_hybrid = effective_backend == "hybrid";
|
||||
let local_via_llamacpp =
|
||||
crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some();
|
||||
let describes_then_inlines = is_hybrid || local_via_llamacpp;
|
||||
let describes_then_inlines = is_hybrid;
|
||||
|
||||
let max_iterations = req
|
||||
.max_iterations
|
||||
@@ -1023,10 +1019,9 @@ impl InsightChatService {
|
||||
_ => None,
|
||||
});
|
||||
|
||||
// Describe-then-inline (hybrid OR local_via_llamacpp): pre-describe
|
||||
// the image so a text-only chat model gets the visual description
|
||||
// inline. Vision source follows `LLM_BACKEND`: llama-swap when
|
||||
// `local_via_llamacpp`, else Ollama.
|
||||
// Describe-then-inline (hybrid only): pre-describe the image so a
|
||||
// text-only chat model gets the visual description inline. llamacpp
|
||||
// sends images directly to the chat model.
|
||||
let visual_block = if describes_then_inlines {
|
||||
match image_base64.as_deref() {
|
||||
Some(b64) => {
|
||||
|
||||
Reference in New Issue
Block a user