feat: add temperature, top_p, top_k, min_p params to insight generation
Expose Ollama sampling params through the insight generation endpoints so users can tune creativity/determinism per request. All four are optional — omitted values fall through to the model's server-side defaults. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -46,6 +46,10 @@ pub struct OllamaClient {
|
||||
pub primary_model: String,
|
||||
pub fallback_model: Option<String>,
|
||||
num_ctx: Option<i32>,
|
||||
temperature: Option<f32>,
|
||||
top_p: Option<f32>,
|
||||
top_k: Option<i32>,
|
||||
min_p: Option<f32>,
|
||||
}
|
||||
|
||||
impl OllamaClient {
|
||||
@@ -66,6 +70,10 @@ impl OllamaClient {
|
||||
primary_model,
|
||||
fallback_model,
|
||||
num_ctx: None,
|
||||
temperature: None,
|
||||
top_p: None,
|
||||
top_k: None,
|
||||
min_p: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,6 +81,43 @@ impl OllamaClient {
|
||||
self.num_ctx = num_ctx;
|
||||
}
|
||||
|
||||
/// Set sampling parameters for generation. `None` values leave the
|
||||
/// server-side default in place.
|
||||
pub fn set_sampling_params(
|
||||
&mut self,
|
||||
temperature: Option<f32>,
|
||||
top_p: Option<f32>,
|
||||
top_k: Option<i32>,
|
||||
min_p: Option<f32>,
|
||||
) {
|
||||
self.temperature = temperature;
|
||||
self.top_p = top_p;
|
||||
self.top_k = top_k;
|
||||
self.min_p = min_p;
|
||||
}
|
||||
|
||||
/// Build an `OllamaOptions` payload from the currently configured fields.
|
||||
/// Returns `None` if no options would be set, so the `options` field is
|
||||
/// omitted from the request entirely.
|
||||
fn build_options(&self) -> Option<OllamaOptions> {
|
||||
if self.num_ctx.is_none()
|
||||
&& self.temperature.is_none()
|
||||
&& self.top_p.is_none()
|
||||
&& self.top_k.is_none()
|
||||
&& self.min_p.is_none()
|
||||
{
|
||||
None
|
||||
} else {
|
||||
Some(OllamaOptions {
|
||||
num_ctx: self.num_ctx,
|
||||
temperature: self.temperature,
|
||||
top_p: self.top_p,
|
||||
top_k: self.top_k,
|
||||
min_p: self.min_p,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Replace the HTTP client with one using a custom request timeout.
|
||||
/// Useful for slow models where the default 120s may be insufficient.
|
||||
pub fn with_request_timeout(mut self, secs: u64) -> Self {
|
||||
@@ -269,7 +314,7 @@ impl OllamaClient {
|
||||
prompt: prompt.to_string(),
|
||||
stream: false,
|
||||
system: system.map(|s| s.to_string()),
|
||||
options: self.num_ctx.map(|ctx| OllamaOptions { num_ctx: Some(ctx) }),
|
||||
options: self.build_options(),
|
||||
images,
|
||||
};
|
||||
|
||||
@@ -592,7 +637,7 @@ Analyze the image and use specific details from both the visual content and the
|
||||
.unwrap_or(&self.primary_model)
|
||||
};
|
||||
|
||||
let options = self.num_ctx.map(|ctx| OllamaOptions { num_ctx: Some(ctx) });
|
||||
let options = self.build_options();
|
||||
|
||||
let request_body = OllamaChatRequest {
|
||||
model,
|
||||
@@ -785,7 +830,16 @@ struct OllamaRequest {
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct OllamaOptions {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
num_ctx: Option<i32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
temperature: Option<f32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
top_p: Option<f32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
top_k: Option<i32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
min_p: Option<f32>,
|
||||
}
|
||||
|
||||
/// Tool definition sent in /api/chat requests (OpenAI-compatible format)
|
||||
|
||||
Reference in New Issue
Block a user