ai: send images directly to llamacpp chat models + add ResolvedBackend

llamacpp models now receive images via OpenAI content-parts instead of the describe-then-inline strategy (hybrid mode unchanged). Fixes assistant messages with tool_calls emitting content: null instead of "" to satisfy strict Jinja template role-alternation checks. Adds debug logging of message role sequences on llamacpp requests. Introduces BackendKind enum, SamplingOverrides, and ResolvedBackend in a new backend.rs module. InsightGenerator::resolve_backend centralises client construction + vision capability detection — next step wires the existing inline dispatch through it. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-24 14:00:37 -04:00
parent be51421b38
commit 0631820fbf
6 changed files with 395 additions and 70 deletions
@@ -311,7 +311,7 @@ impl InsightChatService {
        let is_hybrid = effective_backend == "hybrid";
        let local_via_llamacpp =
            crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some();
-        let describes_then_inlines = is_hybrid || local_via_llamacpp;
+        let describes_then_inlines = is_hybrid;
        span.set_attribute(KeyValue::new("backend", effective_backend.clone()));

        // 4. Build the chat backend client. Hybrid → OpenRouter; local with
@@ -408,12 +408,11 @@ impl InsightChatService {
        let model_used = chat_backend.primary_model().to_string();
        span.set_attribute(KeyValue::new("model", model_used.clone()));

-        // 5. Decide vision + tool set. In describe-then-inline modes
-        //    (hybrid, llamacpp) we always omit `describe_photo` (matches the
-        //    original generation flow). In local we trust the stored
-        //    history's first-user shape: if it carries `images`, the
-        //    original model was vision-capable, and we keep `describe_photo`
-        //    available.
+        // 5. Decide vision + tool set. In describe-then-inline mode
+        //    (hybrid only) we omit `describe_photo`. In local and llamacpp
+        //    we trust the stored history's first-user shape: if it carries
+        //    `images`, the original model was vision-capable, and we keep
+        //    `describe_photo` available.
        let local_first_user_has_image = messages
            .iter()
            .find(|m| m.role == "user")
@@ -821,9 +820,7 @@ impl InsightChatService {
            .unwrap_or_else(|| stored_backend.clone());
        validate_cross_replay(&stored_backend, &effective_backend)?;
        let is_hybrid = effective_backend == "hybrid";
-        let local_via_llamacpp =
-            crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some();
-        let describes_then_inlines = is_hybrid || local_via_llamacpp;
+        let describes_then_inlines = is_hybrid;

        let max_iterations = req
            .max_iterations
@@ -842,10 +839,9 @@ impl InsightChatService {
        let chat_backend: &dyn LlmClient = chat_backend_holder.as_ref();
        let model_used = chat_backend.primary_model().to_string();

-        // Tool set — local mode + first user turn carries an image →
-        // offer describe_photo. Describe-then-inline modes (hybrid OR
-        // local_via_llamacpp): visual description was inlined when the
-        // insight was bootstrapped, no describe tool needed.
+        // Tool set — local/llamacpp mode + first user turn carries an image →
+        // offer describe_photo. Describe-then-inline mode (hybrid only):
+        // visual description was inlined at bootstrap, no describe tool needed.
        let local_first_user_has_image = messages
            .iter()
            .find(|m| m.role == "user")
@@ -991,7 +987,7 @@ impl InsightChatService {
        let is_hybrid = effective_backend == "hybrid";
        let local_via_llamacpp =
            crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some();
-        let describes_then_inlines = is_hybrid || local_via_llamacpp;
+        let describes_then_inlines = is_hybrid;

        let max_iterations = req
            .max_iterations
@@ -1023,10 +1019,9 @@ impl InsightChatService {
                _ => None,
            });

-        // Describe-then-inline (hybrid OR local_via_llamacpp): pre-describe
-        // the image so a text-only chat model gets the visual description
-        // inline. Vision source follows `LLM_BACKEND`: llama-swap when
-        // `local_via_llamacpp`, else Ollama.
+        // Describe-then-inline (hybrid only): pre-describe the image so a
+        // text-only chat model gets the visual description inline. llamacpp
+        // sends images directly to the chat model.
        let visual_block = if describes_then_inlines {
            match image_base64.as_deref() {
                Some(b64) => {