From b599f7a34b99eafac92c4c6d2971570bb53463f1 Mon Sep 17 00:00:00 2001
From: Cameron <ccordes12@mail.bw.edu>
Date: Wed, 15 Apr 2026 09:27:59 -0400
Subject: [PATCH] feat: add temperature, top_p, top_k, min_p params to insight
 generation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Expose Ollama sampling params through the insight generation endpoints
so users can tune creativity/determinism per request. All four are
optional — omitted values fall through to the model's server-side
defaults.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/ai/handlers.rs            | 16 ++++++++++
 src/ai/insight_generator.rs   | 55 +++++++++++++++++++++++++++++++++
 src/ai/ollama.rs              | 58 +++++++++++++++++++++++++++++++++--
 src/bin/populate_knowledge.rs |  4 +++
 4 files changed, 131 insertions(+), 2 deletions(-)
diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs
index 60e0964..cf7fd5b 100644
--- a/src/ai/handlers.rs
+++ b/src/ai/handlers.rs
@@ -18,6 +18,14 @@ pub struct GeneratePhotoInsightRequest {
     pub system_prompt: Option<String>,
     #[serde(default)]
     pub num_ctx: Option<i32>,
+    #[serde(default)]
+    pub temperature: Option<f32>,
+    #[serde(default)]
+    pub top_p: Option<f32>,
+    #[serde(default)]
+    pub top_k: Option<i32>,
+    #[serde(default)]
+    pub min_p: Option<f32>,
 }
 
 #[derive(Debug, Deserialize)]
@@ -108,6 +116,10 @@ pub async fn generate_insight_handler(
             request.model.clone(),
             request.system_prompt.clone(),
             request.num_ctx,
+            request.temperature,
+            request.top_p,
+            request.top_k,
+            request.min_p,
         )
         .await;
 
@@ -282,6 +294,10 @@ pub async fn generate_agentic_insight_handler(
             request.model.clone(),
             request.system_prompt.clone(),
             request.num_ctx,
+            request.temperature,
+            request.top_p,
+            request.top_k,
+            request.min_p,
             max_iterations,
         )
         .await;
diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs
index 603f704..8c7c934 100644
--- a/src/ai/insight_generator.rs
+++ b/src/ai/insight_generator.rs
@@ -644,6 +644,10 @@ impl InsightGenerator {
         custom_model: Option<String>,
         custom_system_prompt: Option<String>,
         num_ctx: Option<i32>,
+        temperature: Option<f32>,
+        top_p: Option<f32>,
+        top_k: Option<i32>,
+        min_p: Option<f32>,
     ) -> Result<()> {
         let tracer = global_tracer();
         let current_cx = opentelemetry::Context::current();
@@ -677,6 +681,30 @@ impl InsightGenerator {
             ollama_client.set_num_ctx(Some(ctx));
         }
 
+        // Apply sampling parameters if any were provided
+        if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
+            log::info!(
+                "Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}",
+                temperature,
+                top_p,
+                top_k,
+                min_p
+            );
+            if let Some(t) = temperature {
+                span.set_attribute(KeyValue::new("temperature", t as f64));
+            }
+            if let Some(p) = top_p {
+                span.set_attribute(KeyValue::new("top_p", p as f64));
+            }
+            if let Some(k) = top_k {
+                span.set_attribute(KeyValue::new("top_k", k as i64));
+            }
+            if let Some(m) = min_p {
+                span.set_attribute(KeyValue::new("min_p", m as f64));
+            }
+            ollama_client.set_sampling_params(temperature, top_p, top_k, min_p);
+        }
+
         // Create context with this span for child operations
         let insight_cx = current_cx.with_span(span);
 
@@ -2280,6 +2308,10 @@ Return ONLY the summary, nothing else."#,
         custom_model: Option<String>,
         custom_system_prompt: Option<String>,
         num_ctx: Option<i32>,
+        temperature: Option<f32>,
+        top_p: Option<f32>,
+        top_k: Option<i32>,
+        min_p: Option<f32>,
         max_iterations: usize,
     ) -> Result<(Option<i32>, Option<i32>)> {
         let tracer = global_tracer();
@@ -2313,6 +2345,29 @@ Return ONLY the summary, nothing else."#,
             ollama_client.set_num_ctx(Some(ctx));
         }
 
+        if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
+            log::info!(
+                "Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}",
+                temperature,
+                top_p,
+                top_k,
+                min_p
+            );
+            if let Some(t) = temperature {
+                span.set_attribute(KeyValue::new("temperature", t as f64));
+            }
+            if let Some(p) = top_p {
+                span.set_attribute(KeyValue::new("top_p", p as f64));
+            }
+            if let Some(k) = top_k {
+                span.set_attribute(KeyValue::new("top_k", k as i64));
+            }
+            if let Some(m) = min_p {
+                span.set_attribute(KeyValue::new("min_p", m as f64));
+            }
+            ollama_client.set_sampling_params(temperature, top_p, top_k, min_p);
+        }
+
         let insight_cx = current_cx.with_span(span);
 
         // 2a. Verify the model exists on at least one server before checking capabilities
diff --git a/src/ai/ollama.rs b/src/ai/ollama.rs
index 3728da7..1f42b6c 100644
--- a/src/ai/ollama.rs
+++ b/src/ai/ollama.rs
@@ -46,6 +46,10 @@ pub struct OllamaClient {
     pub primary_model: String,
     pub fallback_model: Option<String>,
     num_ctx: Option<i32>,
+    temperature: Option<f32>,
+    top_p: Option<f32>,
+    top_k: Option<i32>,
+    min_p: Option<f32>,
 }
 
 impl OllamaClient {
@@ -66,6 +70,10 @@ impl OllamaClient {
             primary_model,
             fallback_model,
             num_ctx: None,
+            temperature: None,
+            top_p: None,
+            top_k: None,
+            min_p: None,
         }
     }
 
@@ -73,6 +81,43 @@ impl OllamaClient {
         self.num_ctx = num_ctx;
     }
 
+    /// Set sampling parameters for generation. `None` values leave the
+    /// server-side default in place.
+    pub fn set_sampling_params(
+        &mut self,
+        temperature: Option<f32>,
+        top_p: Option<f32>,
+        top_k: Option<i32>,
+        min_p: Option<f32>,
+    ) {
+        self.temperature = temperature;
+        self.top_p = top_p;
+        self.top_k = top_k;
+        self.min_p = min_p;
+    }
+
+    /// Build an `OllamaOptions` payload from the currently configured fields.
+    /// Returns `None` if no options would be set, so the `options` field is
+    /// omitted from the request entirely.
+    fn build_options(&self) -> Option<OllamaOptions> {
+        if self.num_ctx.is_none()
+            && self.temperature.is_none()
+            && self.top_p.is_none()
+            && self.top_k.is_none()
+            && self.min_p.is_none()
+        {
+            None
+        } else {
+            Some(OllamaOptions {
+                num_ctx: self.num_ctx,
+                temperature: self.temperature,
+                top_p: self.top_p,
+                top_k: self.top_k,
+                min_p: self.min_p,
+            })
+        }
+    }
+
     /// Replace the HTTP client with one using a custom request timeout.
     /// Useful for slow models where the default 120s may be insufficient.
     pub fn with_request_timeout(mut self, secs: u64) -> Self {
@@ -269,7 +314,7 @@ impl OllamaClient {
             prompt: prompt.to_string(),
             stream: false,
             system: system.map(|s| s.to_string()),
-            options: self.num_ctx.map(|ctx| OllamaOptions { num_ctx: Some(ctx) }),
+            options: self.build_options(),
             images,
         };
 
@@ -592,7 +637,7 @@ Analyze the image and use specific details from both the visual content and the
                 .unwrap_or(&self.primary_model)
         };
 
-        let options = self.num_ctx.map(|ctx| OllamaOptions { num_ctx: Some(ctx) });
+        let options = self.build_options();
 
         let request_body = OllamaChatRequest {
             model,
@@ -785,7 +830,16 @@ struct OllamaRequest {
 
 #[derive(Serialize)]
 struct OllamaOptions {
+    #[serde(skip_serializing_if = "Option::is_none")]
     num_ctx: Option<i32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    temperature: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    top_p: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    top_k: Option<i32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    min_p: Option<f32>,
 }
 
 /// Tool definition sent in /api/chat requests (OpenAI-compatible format)
diff --git a/src/bin/populate_knowledge.rs b/src/bin/populate_knowledge.rs
index 432084b..2c53fdc 100644
--- a/src/bin/populate_knowledge.rs
+++ b/src/bin/populate_knowledge.rs
@@ -202,6 +202,10 @@ async fn main() -> anyhow::Result<()> {
                 args.model.clone(),
                 None,
                 args.num_ctx,
+                None,
+                None,
+                None,
+                None,
                 args.max_iterations,
             )
             .await