Files
ImageApi/src/ai/backend.rs
T
Cameron Cordes 475072810e AI: add enable_thinking reasoning toggle plumbed to llama.cpp
New optional SamplingOverride forwarded to llama-server as
chat_template_kwargs.enable_thinking (gates Qwen3-style reasoning
blocks). None leaves the template default; other backends ignore it.
Wired through the agentic-insight and chat-turn request bodies/handlers.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-29 12:39:16 -04:00

147 lines
3.8 KiB
Rust

use anyhow::{Result, anyhow};
use crate::ai::llm_client::LlmClient;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BackendKind {
Local,
Hybrid,
}
impl BackendKind {
pub fn parse(s: &str) -> Result<Self> {
match s.trim().to_lowercase().as_str() {
"local" | "" => Ok(Self::Local),
"hybrid" => Ok(Self::Hybrid),
other => Err(anyhow!(
"unknown backend '{}'; expected 'local' or 'hybrid'",
other
)),
}
}
pub fn as_str(&self) -> &'static str {
match self {
Self::Local => "local",
Self::Hybrid => "hybrid",
}
}
}
impl std::fmt::Display for BackendKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
pub struct SamplingOverrides {
pub model: Option<String>,
pub num_ctx: Option<i32>,
pub temperature: Option<f32>,
pub top_p: Option<f32>,
pub top_k: Option<i32>,
pub min_p: Option<f32>,
/// Reasoning toggle. Only the llama.cpp backend honors it (forwarded as
/// `chat_template_kwargs.enable_thinking`); other backends ignore it.
/// `None` leaves the model/template default in place.
pub enable_thinking: Option<bool>,
}
impl SamplingOverrides {
pub fn has_sampling(&self) -> bool {
self.temperature.is_some()
|| self.top_p.is_some()
|| self.top_k.is_some()
|| self.min_p.is_some()
}
}
pub struct ResolvedBackend {
chat: Box<dyn LlmClient>,
local: Box<dyn LlmClient>,
pub kind: BackendKind,
/// `true` when the chat model receives images directly (Ollama with
/// vision, or llamacpp). `false` for hybrid where we describe-then-inline.
pub images_inline: bool,
}
impl ResolvedBackend {
pub fn new(
chat: Box<dyn LlmClient>,
local: Box<dyn LlmClient>,
kind: BackendKind,
images_inline: bool,
) -> Self {
Self {
chat,
local,
kind,
images_inline,
}
}
pub fn chat(&self) -> &dyn LlmClient {
self.chat.as_ref()
}
pub fn local(&self) -> &dyn LlmClient {
self.local.as_ref()
}
pub fn model(&self) -> &str {
self.chat.primary_model()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_backend_kind() {
assert_eq!(BackendKind::parse("local").unwrap(), BackendKind::Local);
assert_eq!(BackendKind::parse("hybrid").unwrap(), BackendKind::Hybrid);
assert_eq!(BackendKind::parse(" Local ").unwrap(), BackendKind::Local);
assert_eq!(BackendKind::parse("HYBRID").unwrap(), BackendKind::Hybrid);
assert_eq!(BackendKind::parse("").unwrap(), BackendKind::Local);
assert!(BackendKind::parse("vllm").is_err());
}
#[test]
fn backend_kind_as_str_roundtrips() {
assert_eq!(
BackendKind::parse(BackendKind::Local.as_str()).unwrap(),
BackendKind::Local
);
assert_eq!(
BackendKind::parse(BackendKind::Hybrid.as_str()).unwrap(),
BackendKind::Hybrid
);
}
#[test]
fn sampling_overrides_has_sampling() {
let empty = SamplingOverrides {
model: None,
num_ctx: None,
temperature: None,
top_p: None,
top_k: None,
min_p: None,
enable_thinking: None,
};
assert!(!empty.has_sampling());
let with_temp = SamplingOverrides {
model: None,
num_ctx: Some(4096),
temperature: Some(0.7),
top_p: None,
top_k: None,
min_p: None,
enable_thinking: None,
};
assert!(with_temp.has_sampling());
}
}