ai: send images directly to llamacpp chat models + add ResolvedBackend
llamacpp models now receive images via OpenAI content-parts instead of the describe-then-inline strategy (hybrid mode unchanged). Fixes assistant messages with tool_calls emitting content: null instead of "" to satisfy strict Jinja template role-alternation checks. Adds debug logging of message role sequences on llamacpp requests. Introduces BackendKind enum, SamplingOverrides, and ResolvedBackend in a new backend.rs module. InsightGenerator::resolve_backend centralises client construction + vision capability detection — next step wires the existing inline dispatch through it. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
118
src/ai/backend.rs
Normal file
118
src/ai/backend.rs
Normal file
@@ -0,0 +1,118 @@
|
||||
use anyhow::{Result, anyhow};
|
||||
|
||||
use crate::ai::llm_client::LlmClient;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum BackendKind {
|
||||
Local,
|
||||
Hybrid,
|
||||
}
|
||||
|
||||
impl BackendKind {
|
||||
pub fn parse(s: &str) -> Result<Self> {
|
||||
match s.trim().to_lowercase().as_str() {
|
||||
"local" | "" => Ok(Self::Local),
|
||||
"hybrid" => Ok(Self::Hybrid),
|
||||
other => Err(anyhow!("unknown backend '{}'; expected 'local' or 'hybrid'", other)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Local => "local",
|
||||
Self::Hybrid => "hybrid",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for BackendKind {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(self.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SamplingOverrides {
|
||||
pub model: Option<String>,
|
||||
pub num_ctx: Option<i32>,
|
||||
pub temperature: Option<f32>,
|
||||
pub top_p: Option<f32>,
|
||||
pub top_k: Option<i32>,
|
||||
pub min_p: Option<f32>,
|
||||
}
|
||||
|
||||
impl SamplingOverrides {
|
||||
pub fn has_sampling(&self) -> bool {
|
||||
self.temperature.is_some()
|
||||
|| self.top_p.is_some()
|
||||
|| self.top_k.is_some()
|
||||
|| self.min_p.is_some()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ResolvedBackend {
|
||||
chat: Box<dyn LlmClient>,
|
||||
local: Box<dyn LlmClient>,
|
||||
pub kind: BackendKind,
|
||||
/// `true` when the chat model receives images directly (Ollama with
|
||||
/// vision, or llamacpp). `false` for hybrid where we describe-then-inline.
|
||||
pub images_inline: bool,
|
||||
}
|
||||
|
||||
impl ResolvedBackend {
|
||||
pub fn new(
|
||||
chat: Box<dyn LlmClient>,
|
||||
local: Box<dyn LlmClient>,
|
||||
kind: BackendKind,
|
||||
images_inline: bool,
|
||||
) -> Self {
|
||||
Self { chat, local, kind, images_inline }
|
||||
}
|
||||
|
||||
pub fn chat(&self) -> &dyn LlmClient {
|
||||
self.chat.as_ref()
|
||||
}
|
||||
|
||||
pub fn local(&self) -> &dyn LlmClient {
|
||||
self.local.as_ref()
|
||||
}
|
||||
|
||||
pub fn model(&self) -> &str {
|
||||
self.chat.primary_model()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parse_backend_kind() {
|
||||
assert_eq!(BackendKind::parse("local").unwrap(), BackendKind::Local);
|
||||
assert_eq!(BackendKind::parse("hybrid").unwrap(), BackendKind::Hybrid);
|
||||
assert_eq!(BackendKind::parse(" Local ").unwrap(), BackendKind::Local);
|
||||
assert_eq!(BackendKind::parse("HYBRID").unwrap(), BackendKind::Hybrid);
|
||||
assert_eq!(BackendKind::parse("").unwrap(), BackendKind::Local);
|
||||
assert!(BackendKind::parse("vllm").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backend_kind_as_str_roundtrips() {
|
||||
assert_eq!(BackendKind::parse(BackendKind::Local.as_str()).unwrap(), BackendKind::Local);
|
||||
assert_eq!(BackendKind::parse(BackendKind::Hybrid.as_str()).unwrap(), BackendKind::Hybrid);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sampling_overrides_has_sampling() {
|
||||
let empty = SamplingOverrides {
|
||||
model: None, num_ctx: None, temperature: None,
|
||||
top_p: None, top_k: None, min_p: None,
|
||||
};
|
||||
assert!(!empty.has_sampling());
|
||||
|
||||
let with_temp = SamplingOverrides {
|
||||
model: None, num_ctx: Some(4096), temperature: Some(0.7),
|
||||
top_p: None, top_k: None, min_p: None,
|
||||
};
|
||||
assert!(with_temp.has_sampling());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user