llamacpp models now receive images via OpenAI content-parts instead of the describe-then-inline strategy (hybrid mode unchanged). Fixes assistant messages with tool_calls emitting content: null instead of "" to satisfy strict Jinja template role-alternation checks. Adds debug logging of message role sequences on llamacpp requests. Introduces BackendKind enum, SamplingOverrides, and ResolvedBackend in a new backend.rs module. InsightGenerator::resolve_backend centralises client construction + vision capability detection — next step wires the existing inline dispatch through it. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
119 lines
3.2 KiB
Rust
119 lines
3.2 KiB
Rust
use anyhow::{Result, anyhow};
|
|
|
|
use crate::ai::llm_client::LlmClient;
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum BackendKind {
|
|
Local,
|
|
Hybrid,
|
|
}
|
|
|
|
impl BackendKind {
|
|
pub fn parse(s: &str) -> Result<Self> {
|
|
match s.trim().to_lowercase().as_str() {
|
|
"local" | "" => Ok(Self::Local),
|
|
"hybrid" => Ok(Self::Hybrid),
|
|
other => Err(anyhow!("unknown backend '{}'; expected 'local' or 'hybrid'", other)),
|
|
}
|
|
}
|
|
|
|
pub fn as_str(&self) -> &'static str {
|
|
match self {
|
|
Self::Local => "local",
|
|
Self::Hybrid => "hybrid",
|
|
}
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Display for BackendKind {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
f.write_str(self.as_str())
|
|
}
|
|
}
|
|
|
|
pub struct SamplingOverrides {
|
|
pub model: Option<String>,
|
|
pub num_ctx: Option<i32>,
|
|
pub temperature: Option<f32>,
|
|
pub top_p: Option<f32>,
|
|
pub top_k: Option<i32>,
|
|
pub min_p: Option<f32>,
|
|
}
|
|
|
|
impl SamplingOverrides {
|
|
pub fn has_sampling(&self) -> bool {
|
|
self.temperature.is_some()
|
|
|| self.top_p.is_some()
|
|
|| self.top_k.is_some()
|
|
|| self.min_p.is_some()
|
|
}
|
|
}
|
|
|
|
pub struct ResolvedBackend {
|
|
chat: Box<dyn LlmClient>,
|
|
local: Box<dyn LlmClient>,
|
|
pub kind: BackendKind,
|
|
/// `true` when the chat model receives images directly (Ollama with
|
|
/// vision, or llamacpp). `false` for hybrid where we describe-then-inline.
|
|
pub images_inline: bool,
|
|
}
|
|
|
|
impl ResolvedBackend {
|
|
pub fn new(
|
|
chat: Box<dyn LlmClient>,
|
|
local: Box<dyn LlmClient>,
|
|
kind: BackendKind,
|
|
images_inline: bool,
|
|
) -> Self {
|
|
Self { chat, local, kind, images_inline }
|
|
}
|
|
|
|
pub fn chat(&self) -> &dyn LlmClient {
|
|
self.chat.as_ref()
|
|
}
|
|
|
|
pub fn local(&self) -> &dyn LlmClient {
|
|
self.local.as_ref()
|
|
}
|
|
|
|
pub fn model(&self) -> &str {
|
|
self.chat.primary_model()
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn parse_backend_kind() {
|
|
assert_eq!(BackendKind::parse("local").unwrap(), BackendKind::Local);
|
|
assert_eq!(BackendKind::parse("hybrid").unwrap(), BackendKind::Hybrid);
|
|
assert_eq!(BackendKind::parse(" Local ").unwrap(), BackendKind::Local);
|
|
assert_eq!(BackendKind::parse("HYBRID").unwrap(), BackendKind::Hybrid);
|
|
assert_eq!(BackendKind::parse("").unwrap(), BackendKind::Local);
|
|
assert!(BackendKind::parse("vllm").is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn backend_kind_as_str_roundtrips() {
|
|
assert_eq!(BackendKind::parse(BackendKind::Local.as_str()).unwrap(), BackendKind::Local);
|
|
assert_eq!(BackendKind::parse(BackendKind::Hybrid.as_str()).unwrap(), BackendKind::Hybrid);
|
|
}
|
|
|
|
#[test]
|
|
fn sampling_overrides_has_sampling() {
|
|
let empty = SamplingOverrides {
|
|
model: None, num_ctx: None, temperature: None,
|
|
top_p: None, top_k: None, min_p: None,
|
|
};
|
|
assert!(!empty.has_sampling());
|
|
|
|
let with_temp = SamplingOverrides {
|
|
model: None, num_ctx: Some(4096), temperature: Some(0.7),
|
|
top_p: None, top_k: None, min_p: None,
|
|
};
|
|
assert!(with_temp.has_sampling());
|
|
}
|
|
}
|