From b599f7a34b99eafac92c4c6d2971570bb53463f1 Mon Sep 17 00:00:00 2001 From: Cameron Date: Wed, 15 Apr 2026 09:27:59 -0400 Subject: [PATCH] feat: add temperature, top_p, top_k, min_p params to insight generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose Ollama sampling params through the insight generation endpoints so users can tune creativity/determinism per request. All four are optional — omitted values fall through to the model's server-side defaults. Co-Authored-By: Claude Opus 4.6 --- src/ai/handlers.rs | 16 ++++++++++ src/ai/insight_generator.rs | 55 +++++++++++++++++++++++++++++++++ src/ai/ollama.rs | 58 +++++++++++++++++++++++++++++++++-- src/bin/populate_knowledge.rs | 4 +++ 4 files changed, 131 insertions(+), 2 deletions(-) diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs index 60e0964..cf7fd5b 100644 --- a/src/ai/handlers.rs +++ b/src/ai/handlers.rs @@ -18,6 +18,14 @@ pub struct GeneratePhotoInsightRequest { pub system_prompt: Option, #[serde(default)] pub num_ctx: Option, + #[serde(default)] + pub temperature: Option, + #[serde(default)] + pub top_p: Option, + #[serde(default)] + pub top_k: Option, + #[serde(default)] + pub min_p: Option, } #[derive(Debug, Deserialize)] @@ -108,6 +116,10 @@ pub async fn generate_insight_handler( request.model.clone(), request.system_prompt.clone(), request.num_ctx, + request.temperature, + request.top_p, + request.top_k, + request.min_p, ) .await; @@ -282,6 +294,10 @@ pub async fn generate_agentic_insight_handler( request.model.clone(), request.system_prompt.clone(), request.num_ctx, + request.temperature, + request.top_p, + request.top_k, + request.min_p, max_iterations, ) .await; diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 603f704..8c7c934 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -644,6 +644,10 @@ impl InsightGenerator { custom_model: Option, custom_system_prompt: Option, num_ctx: Option, + temperature: Option, + top_p: Option, + top_k: Option, + min_p: Option, ) -> Result<()> { let tracer = global_tracer(); let current_cx = opentelemetry::Context::current(); @@ -677,6 +681,30 @@ impl InsightGenerator { ollama_client.set_num_ctx(Some(ctx)); } + // Apply sampling parameters if any were provided + if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() { + log::info!( + "Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}", + temperature, + top_p, + top_k, + min_p + ); + if let Some(t) = temperature { + span.set_attribute(KeyValue::new("temperature", t as f64)); + } + if let Some(p) = top_p { + span.set_attribute(KeyValue::new("top_p", p as f64)); + } + if let Some(k) = top_k { + span.set_attribute(KeyValue::new("top_k", k as i64)); + } + if let Some(m) = min_p { + span.set_attribute(KeyValue::new("min_p", m as f64)); + } + ollama_client.set_sampling_params(temperature, top_p, top_k, min_p); + } + // Create context with this span for child operations let insight_cx = current_cx.with_span(span); @@ -2280,6 +2308,10 @@ Return ONLY the summary, nothing else."#, custom_model: Option, custom_system_prompt: Option, num_ctx: Option, + temperature: Option, + top_p: Option, + top_k: Option, + min_p: Option, max_iterations: usize, ) -> Result<(Option, Option)> { let tracer = global_tracer(); @@ -2313,6 +2345,29 @@ Return ONLY the summary, nothing else."#, ollama_client.set_num_ctx(Some(ctx)); } + if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() { + log::info!( + "Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}", + temperature, + top_p, + top_k, + min_p + ); + if let Some(t) = temperature { + span.set_attribute(KeyValue::new("temperature", t as f64)); + } + if let Some(p) = top_p { + span.set_attribute(KeyValue::new("top_p", p as f64)); + } + if let Some(k) = top_k { + span.set_attribute(KeyValue::new("top_k", k as i64)); + } + if let Some(m) = min_p { + span.set_attribute(KeyValue::new("min_p", m as f64)); + } + ollama_client.set_sampling_params(temperature, top_p, top_k, min_p); + } + let insight_cx = current_cx.with_span(span); // 2a. Verify the model exists on at least one server before checking capabilities diff --git a/src/ai/ollama.rs b/src/ai/ollama.rs index 3728da7..1f42b6c 100644 --- a/src/ai/ollama.rs +++ b/src/ai/ollama.rs @@ -46,6 +46,10 @@ pub struct OllamaClient { pub primary_model: String, pub fallback_model: Option, num_ctx: Option, + temperature: Option, + top_p: Option, + top_k: Option, + min_p: Option, } impl OllamaClient { @@ -66,6 +70,10 @@ impl OllamaClient { primary_model, fallback_model, num_ctx: None, + temperature: None, + top_p: None, + top_k: None, + min_p: None, } } @@ -73,6 +81,43 @@ impl OllamaClient { self.num_ctx = num_ctx; } + /// Set sampling parameters for generation. `None` values leave the + /// server-side default in place. + pub fn set_sampling_params( + &mut self, + temperature: Option, + top_p: Option, + top_k: Option, + min_p: Option, + ) { + self.temperature = temperature; + self.top_p = top_p; + self.top_k = top_k; + self.min_p = min_p; + } + + /// Build an `OllamaOptions` payload from the currently configured fields. + /// Returns `None` if no options would be set, so the `options` field is + /// omitted from the request entirely. + fn build_options(&self) -> Option { + if self.num_ctx.is_none() + && self.temperature.is_none() + && self.top_p.is_none() + && self.top_k.is_none() + && self.min_p.is_none() + { + None + } else { + Some(OllamaOptions { + num_ctx: self.num_ctx, + temperature: self.temperature, + top_p: self.top_p, + top_k: self.top_k, + min_p: self.min_p, + }) + } + } + /// Replace the HTTP client with one using a custom request timeout. /// Useful for slow models where the default 120s may be insufficient. pub fn with_request_timeout(mut self, secs: u64) -> Self { @@ -269,7 +314,7 @@ impl OllamaClient { prompt: prompt.to_string(), stream: false, system: system.map(|s| s.to_string()), - options: self.num_ctx.map(|ctx| OllamaOptions { num_ctx: Some(ctx) }), + options: self.build_options(), images, }; @@ -592,7 +637,7 @@ Analyze the image and use specific details from both the visual content and the .unwrap_or(&self.primary_model) }; - let options = self.num_ctx.map(|ctx| OllamaOptions { num_ctx: Some(ctx) }); + let options = self.build_options(); let request_body = OllamaChatRequest { model, @@ -785,7 +830,16 @@ struct OllamaRequest { #[derive(Serialize)] struct OllamaOptions { + #[serde(skip_serializing_if = "Option::is_none")] num_ctx: Option, + #[serde(skip_serializing_if = "Option::is_none")] + temperature: Option, + #[serde(skip_serializing_if = "Option::is_none")] + top_p: Option, + #[serde(skip_serializing_if = "Option::is_none")] + top_k: Option, + #[serde(skip_serializing_if = "Option::is_none")] + min_p: Option, } /// Tool definition sent in /api/chat requests (OpenAI-compatible format) diff --git a/src/bin/populate_knowledge.rs b/src/bin/populate_knowledge.rs index 432084b..2c53fdc 100644 --- a/src/bin/populate_knowledge.rs +++ b/src/bin/populate_knowledge.rs @@ -202,6 +202,10 @@ async fn main() -> anyhow::Result<()> { args.model.clone(), None, args.num_ctx, + None, + None, + None, + None, args.max_iterations, ) .await