2026-04-21 01:54:34 +00:00
4 changed files with 131 additions and 2 deletions
@@ -18,6 +18,14 @@ pub struct GeneratePhotoInsightRequest {
    pub system_prompt: Option<String>,
    #[serde(default)]
    pub num_ctx: Option<i32>,
    #[serde(default)]
    pub temperature: Option<f32>,
    #[serde(default)]
    pub top_p: Option<f32>,
    #[serde(default)]
    pub top_k: Option<i32>,
    #[serde(default)]
    pub min_p: Option<f32>,
 }
 #[derive(Debug, Deserialize)]
@@ -108,6 +116,10 @@ pub async fn generate_insight_handler(
            request.model.clone(),
            request.system_prompt.clone(),
            request.num_ctx,
            request.temperature,
            request.top_p,
            request.top_k,
            request.min_p,
        )
        .await;
@@ -282,6 +294,10 @@ pub async fn generate_agentic_insight_handler(
            request.model.clone(),
            request.system_prompt.clone(),
            request.num_ctx,
            request.temperature,
            request.top_p,
            request.top_k,
            request.min_p,
            max_iterations,
        )
        .await;
@@ -644,6 +644,10 @@ impl InsightGenerator {
        custom_model: Option<String>,
        custom_system_prompt: Option<String>,
        num_ctx: Option<i32>,
        temperature: Option<f32>,
        top_p: Option<f32>,
        top_k: Option<i32>,
        min_p: Option<f32>,
    ) -> Result<()> {
        let tracer = global_tracer();
        let current_cx = opentelemetry::Context::current();
@@ -677,6 +681,30 @@ impl InsightGenerator {
            ollama_client.set_num_ctx(Some(ctx));
        }
        // Apply sampling parameters if any were provided
        if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
            log::info!(
                "Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}",
                temperature,
                top_p,
                top_k,
                min_p
            );
            if let Some(t) = temperature {
                span.set_attribute(KeyValue::new("temperature", t as f64));
            }
            if let Some(p) = top_p {
                span.set_attribute(KeyValue::new("top_p", p as f64));
            }
            if let Some(k) = top_k {
                span.set_attribute(KeyValue::new("top_k", k as i64));
            }
            if let Some(m) = min_p {
                span.set_attribute(KeyValue::new("min_p", m as f64));
            }
            ollama_client.set_sampling_params(temperature, top_p, top_k, min_p);
        }
        // Create context with this span for child operations
        let insight_cx = current_cx.with_span(span);
@@ -2280,6 +2308,10 @@ Return ONLY the summary, nothing else."#,
        custom_model: Option<String>,
        custom_system_prompt: Option<String>,
        num_ctx: Option<i32>,
        temperature: Option<f32>,
        top_p: Option<f32>,
        top_k: Option<i32>,
        min_p: Option<f32>,
        max_iterations: usize,
    ) -> Result<(Option<i32>, Option<i32>)> {
        let tracer = global_tracer();
@@ -2313,6 +2345,29 @@ Return ONLY the summary, nothing else."#,
            ollama_client.set_num_ctx(Some(ctx));
        }
        if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
            log::info!(
                "Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}",
                temperature,
                top_p,
                top_k,
                min_p
            );
            if let Some(t) = temperature {
                span.set_attribute(KeyValue::new("temperature", t as f64));
            }
            if let Some(p) = top_p {
                span.set_attribute(KeyValue::new("top_p", p as f64));
            }
            if let Some(k) = top_k {
                span.set_attribute(KeyValue::new("top_k", k as i64));
            }
            if let Some(m) = min_p {
                span.set_attribute(KeyValue::new("min_p", m as f64));
            }
            ollama_client.set_sampling_params(temperature, top_p, top_k, min_p);
        }
        let insight_cx = current_cx.with_span(span);
        // 2a. Verify the model exists on at least one server before checking capabilities
@@ -46,6 +46,10 @@ pub struct OllamaClient {
    pub primary_model: String,
    pub fallback_model: Option<String>,
    num_ctx: Option<i32>,
    temperature: Option<f32>,
    top_p: Option<f32>,
    top_k: Option<i32>,
    min_p: Option<f32>,
 }
 impl OllamaClient {
@@ -66,6 +70,10 @@ impl OllamaClient {
            primary_model,
            fallback_model,
            num_ctx: None,
            temperature: None,
            top_p: None,
            top_k: None,
            min_p: None,
        }
    }
@@ -73,6 +81,43 @@ impl OllamaClient {
        self.num_ctx = num_ctx;
    }
    /// Set sampling parameters for generation. `None` values leave the
    /// server-side default in place.
    pub fn set_sampling_params(
        &mut self,
        temperature: Option<f32>,
        top_p: Option<f32>,
        top_k: Option<i32>,
        min_p: Option<f32>,
    ) {
        self.temperature = temperature;
        self.top_p = top_p;
        self.top_k = top_k;
        self.min_p = min_p;
    }
    /// Build an `OllamaOptions` payload from the currently configured fields.
    /// Returns `None` if no options would be set, so the `options` field is
    /// omitted from the request entirely.
    fn build_options(&self) -> Option<OllamaOptions> {
        if self.num_ctx.is_none()
            && self.temperature.is_none()
            && self.top_p.is_none()
            && self.top_k.is_none()
            && self.min_p.is_none()
        {
            None
        } else {
            Some(OllamaOptions {
                num_ctx: self.num_ctx,
                temperature: self.temperature,
                top_p: self.top_p,
                top_k: self.top_k,
                min_p: self.min_p,
            })
        }
    }
    /// Replace the HTTP client with one using a custom request timeout.
    /// Useful for slow models where the default 120s may be insufficient.
    pub fn with_request_timeout(mut self, secs: u64) -> Self {
@@ -269,7 +314,7 @@ impl OllamaClient {
            prompt: prompt.to_string(),
            stream: false,
            system: system.map(|s| s.to_string()),
-            options: self.num_ctx.map(|ctx| OllamaOptions { num_ctx: Some(ctx) }),
+            options: self.build_options(),
            images,
        };
@@ -592,7 +637,7 @@ Analyze the image and use specific details from both the visual content and the
                .unwrap_or(&self.primary_model)
        };
-        let options = self.num_ctx.map(|ctx| OllamaOptions { num_ctx: Some(ctx) });
+        let options = self.build_options();
        let request_body = OllamaChatRequest {
            model,
@@ -785,7 +830,16 @@ struct OllamaRequest {
 #[derive(Serialize)]
 struct OllamaOptions {
    #[serde(skip_serializing_if = "Option::is_none")]
    num_ctx: Option<i32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    temperature: Option<f32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    top_p: Option<f32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    top_k: Option<i32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    min_p: Option<f32>,
 }
 /// Tool definition sent in /api/chat requests (OpenAI-compatible format)
@@ -202,6 +202,10 @@ async fn main() -> anyhow::Result<()> {
                args.model.clone(),
                None,
                args.num_ctx,
                None,
                None,
                None,
                None,
                args.max_iterations,
            )
            .await