feat: add temperature, top_p, top_k, min_p params to insight generation
Expose Ollama sampling params through the insight generation endpoints so users can tune creativity/determinism per request. All four are optional — omitted values fall through to the model's server-side defaults. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -644,6 +644,10 @@ impl InsightGenerator {
|
||||
custom_model: Option<String>,
|
||||
custom_system_prompt: Option<String>,
|
||||
num_ctx: Option<i32>,
|
||||
temperature: Option<f32>,
|
||||
top_p: Option<f32>,
|
||||
top_k: Option<i32>,
|
||||
min_p: Option<f32>,
|
||||
) -> Result<()> {
|
||||
let tracer = global_tracer();
|
||||
let current_cx = opentelemetry::Context::current();
|
||||
@@ -677,6 +681,30 @@ impl InsightGenerator {
|
||||
ollama_client.set_num_ctx(Some(ctx));
|
||||
}
|
||||
|
||||
// Apply sampling parameters if any were provided
|
||||
if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
|
||||
log::info!(
|
||||
"Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}",
|
||||
temperature,
|
||||
top_p,
|
||||
top_k,
|
||||
min_p
|
||||
);
|
||||
if let Some(t) = temperature {
|
||||
span.set_attribute(KeyValue::new("temperature", t as f64));
|
||||
}
|
||||
if let Some(p) = top_p {
|
||||
span.set_attribute(KeyValue::new("top_p", p as f64));
|
||||
}
|
||||
if let Some(k) = top_k {
|
||||
span.set_attribute(KeyValue::new("top_k", k as i64));
|
||||
}
|
||||
if let Some(m) = min_p {
|
||||
span.set_attribute(KeyValue::new("min_p", m as f64));
|
||||
}
|
||||
ollama_client.set_sampling_params(temperature, top_p, top_k, min_p);
|
||||
}
|
||||
|
||||
// Create context with this span for child operations
|
||||
let insight_cx = current_cx.with_span(span);
|
||||
|
||||
@@ -2280,6 +2308,10 @@ Return ONLY the summary, nothing else."#,
|
||||
custom_model: Option<String>,
|
||||
custom_system_prompt: Option<String>,
|
||||
num_ctx: Option<i32>,
|
||||
temperature: Option<f32>,
|
||||
top_p: Option<f32>,
|
||||
top_k: Option<i32>,
|
||||
min_p: Option<f32>,
|
||||
max_iterations: usize,
|
||||
) -> Result<(Option<i32>, Option<i32>)> {
|
||||
let tracer = global_tracer();
|
||||
@@ -2313,6 +2345,29 @@ Return ONLY the summary, nothing else."#,
|
||||
ollama_client.set_num_ctx(Some(ctx));
|
||||
}
|
||||
|
||||
if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
|
||||
log::info!(
|
||||
"Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}",
|
||||
temperature,
|
||||
top_p,
|
||||
top_k,
|
||||
min_p
|
||||
);
|
||||
if let Some(t) = temperature {
|
||||
span.set_attribute(KeyValue::new("temperature", t as f64));
|
||||
}
|
||||
if let Some(p) = top_p {
|
||||
span.set_attribute(KeyValue::new("top_p", p as f64));
|
||||
}
|
||||
if let Some(k) = top_k {
|
||||
span.set_attribute(KeyValue::new("top_k", k as i64));
|
||||
}
|
||||
if let Some(m) = min_p {
|
||||
span.set_attribute(KeyValue::new("min_p", m as f64));
|
||||
}
|
||||
ollama_client.set_sampling_params(temperature, top_p, top_k, min_p);
|
||||
}
|
||||
|
||||
let insight_cx = current_cx.with_span(span);
|
||||
|
||||
// 2a. Verify the model exists on at least one server before checking capabilities
|
||||
|
||||
Reference in New Issue
Block a user