feat: add temperature, top_p, top_k, min_p params to insight generation

Expose Ollama sampling params through the insight generation endpoints so users can tune creativity/determinism per request. All four are optional — omitted values fall through to the model's server-side defaults. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-15 09:27:59 -04:00
parent c703a47f17
commit b599f7a34b
4 changed files with 131 additions and 2 deletions
--- a/src/ai/ollama.rs
+++ b/src/ai/ollama.rs
@@ -46,6 +46,10 @@ pub struct OllamaClient {
    pub primary_model: String,
    pub fallback_model: Option<String>,
    num_ctx: Option<i32>,
+    temperature: Option<f32>,
+    top_p: Option<f32>,
+    top_k: Option<i32>,
+    min_p: Option<f32>,
 }

 impl OllamaClient {
@@ -66,6 +70,10 @@ impl OllamaClient {
            primary_model,
            fallback_model,
            num_ctx: None,
+            temperature: None,
+            top_p: None,
+            top_k: None,
+            min_p: None,
        }
    }

@@ -73,6 +81,43 @@ impl OllamaClient {
        self.num_ctx = num_ctx;
    }

+    /// Set sampling parameters for generation. `None` values leave the
+    /// server-side default in place.
+    pub fn set_sampling_params(
+        &mut self,
+        temperature: Option<f32>,
+        top_p: Option<f32>,
+        top_k: Option<i32>,
+        min_p: Option<f32>,
+    ) {
+        self.temperature = temperature;
+        self.top_p = top_p;
+        self.top_k = top_k;
+        self.min_p = min_p;
+    }
+
+    /// Build an `OllamaOptions` payload from the currently configured fields.
+    /// Returns `None` if no options would be set, so the `options` field is
+    /// omitted from the request entirely.
+    fn build_options(&self) -> Option<OllamaOptions> {
+        if self.num_ctx.is_none()
+            && self.temperature.is_none()
+            && self.top_p.is_none()
+            && self.top_k.is_none()
+            && self.min_p.is_none()
+        {
+            None
+        } else {
+            Some(OllamaOptions {
+                num_ctx: self.num_ctx,
+                temperature: self.temperature,
+                top_p: self.top_p,
+                top_k: self.top_k,
+                min_p: self.min_p,
+            })
+        }
+    }
+
    /// Replace the HTTP client with one using a custom request timeout.
    /// Useful for slow models where the default 120s may be insufficient.
    pub fn with_request_timeout(mut self, secs: u64) -> Self {
@@ -269,7 +314,7 @@ impl OllamaClient {
            prompt: prompt.to_string(),
            stream: false,
            system: system.map(|s| s.to_string()),
-            options: self.num_ctx.map(|ctx| OllamaOptions { num_ctx: Some(ctx) }),
+            options: self.build_options(),
            images,
        };

@@ -592,7 +637,7 @@ Analyze the image and use specific details from both the visual content and the
                .unwrap_or(&self.primary_model)
        };

-        let options = self.num_ctx.map(|ctx| OllamaOptions { num_ctx: Some(ctx) });
+        let options = self.build_options();

        let request_body = OllamaChatRequest {
            model,
@@ -785,7 +830,16 @@ struct OllamaRequest {

 #[derive(Serialize)]
 struct OllamaOptions {
+    #[serde(skip_serializing_if = "Option::is_none")]
    num_ctx: Option<i32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    temperature: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    top_p: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    top_k: Option<i32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    min_p: Option<f32>,
 }

 /// Tool definition sent in /api/chat requests (OpenAI-compatible format)