ImageApi/src/ai/backend.rs

use anyhow::{Result, anyhow};

use crate::ai::llm_client::LlmClient;

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BackendKind {
    Local,
    Hybrid,
}

impl BackendKind {
    pub fn parse(s: &str) -> Result<Self> {
        match s.trim().to_lowercase().as_str() {
            "local" | "" => Ok(Self::Local),
            "hybrid" => Ok(Self::Hybrid),
            other => Err(anyhow!(
                "unknown backend '{}'; expected 'local' or 'hybrid'",
                other
            )),
        }
    }

    pub fn as_str(&self) -> &'static str {
        match self {
            Self::Local => "local",
            Self::Hybrid => "hybrid",
        }
    }
}

impl std::fmt::Display for BackendKind {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.write_str(self.as_str())
    }
}

pub struct SamplingOverrides {
    pub model: Option<String>,
    pub num_ctx: Option<i32>,
    pub temperature: Option<f32>,
    pub top_p: Option<f32>,
    pub top_k: Option<i32>,
    pub min_p: Option<f32>,
    /// Reasoning toggle. Only the llama.cpp backend honors it (forwarded as
    /// `chat_template_kwargs.enable_thinking`); other backends ignore it.
    /// `None` leaves the model/template default in place.
    pub enable_thinking: Option<bool>,
}

impl SamplingOverrides {
    pub fn has_sampling(&self) -> bool {
        self.temperature.is_some()
            || self.top_p.is_some()
            || self.top_k.is_some()
            || self.min_p.is_some()
    }
}

pub struct ResolvedBackend {
    chat: Box<dyn LlmClient>,
    local: Box<dyn LlmClient>,
    pub kind: BackendKind,
    /// `true` when the chat model receives images directly (Ollama with
    /// vision, or llamacpp). `false` for hybrid where we describe-then-inline.
    pub images_inline: bool,
}

impl ResolvedBackend {
    pub fn new(
        chat: Box<dyn LlmClient>,
        local: Box<dyn LlmClient>,
        kind: BackendKind,
        images_inline: bool,
    ) -> Self {
        Self {
            chat,
            local,
            kind,
            images_inline,
        }
    }

    pub fn chat(&self) -> &dyn LlmClient {
        self.chat.as_ref()
    }

    pub fn local(&self) -> &dyn LlmClient {
        self.local.as_ref()
    }

    pub fn model(&self) -> &str {
        self.chat.primary_model()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn parse_backend_kind() {
        assert_eq!(BackendKind::parse("local").unwrap(), BackendKind::Local);
        assert_eq!(BackendKind::parse("hybrid").unwrap(), BackendKind::Hybrid);
        assert_eq!(BackendKind::parse("  Local ").unwrap(), BackendKind::Local);
        assert_eq!(BackendKind::parse("HYBRID").unwrap(), BackendKind::Hybrid);
        assert_eq!(BackendKind::parse("").unwrap(), BackendKind::Local);
        assert!(BackendKind::parse("vllm").is_err());
    }

    #[test]
    fn backend_kind_as_str_roundtrips() {
        assert_eq!(
            BackendKind::parse(BackendKind::Local.as_str()).unwrap(),
            BackendKind::Local
        );
        assert_eq!(
            BackendKind::parse(BackendKind::Hybrid.as_str()).unwrap(),
            BackendKind::Hybrid
        );
    }

    #[test]
    fn sampling_overrides_has_sampling() {
        let empty = SamplingOverrides {
            model: None,
            num_ctx: None,
            temperature: None,
            top_p: None,
            top_k: None,
            min_p: None,
            enable_thinking: None,
        };
        assert!(!empty.has_sampling());

        let with_temp = SamplingOverrides {
            model: None,
            num_ctx: Some(4096),
            temperature: Some(0.7),
            top_p: None,
            top_k: None,
            min_p: None,
            enable_thinking: None,
        };
        assert!(with_temp.has_sampling());
    }
}