003-knowledge-memory #55
@@ -18,6 +18,14 @@ pub struct GeneratePhotoInsightRequest {
|
|||||||
pub system_prompt: Option<String>,
|
pub system_prompt: Option<String>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub num_ctx: Option<i32>,
|
pub num_ctx: Option<i32>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub temperature: Option<f32>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub top_p: Option<f32>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub top_k: Option<i32>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub min_p: Option<f32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
@@ -108,6 +116,10 @@ pub async fn generate_insight_handler(
|
|||||||
request.model.clone(),
|
request.model.clone(),
|
||||||
request.system_prompt.clone(),
|
request.system_prompt.clone(),
|
||||||
request.num_ctx,
|
request.num_ctx,
|
||||||
|
request.temperature,
|
||||||
|
request.top_p,
|
||||||
|
request.top_k,
|
||||||
|
request.min_p,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@@ -282,6 +294,10 @@ pub async fn generate_agentic_insight_handler(
|
|||||||
request.model.clone(),
|
request.model.clone(),
|
||||||
request.system_prompt.clone(),
|
request.system_prompt.clone(),
|
||||||
request.num_ctx,
|
request.num_ctx,
|
||||||
|
request.temperature,
|
||||||
|
request.top_p,
|
||||||
|
request.top_k,
|
||||||
|
request.min_p,
|
||||||
max_iterations,
|
max_iterations,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|||||||
@@ -644,6 +644,10 @@ impl InsightGenerator {
|
|||||||
custom_model: Option<String>,
|
custom_model: Option<String>,
|
||||||
custom_system_prompt: Option<String>,
|
custom_system_prompt: Option<String>,
|
||||||
num_ctx: Option<i32>,
|
num_ctx: Option<i32>,
|
||||||
|
temperature: Option<f32>,
|
||||||
|
top_p: Option<f32>,
|
||||||
|
top_k: Option<i32>,
|
||||||
|
min_p: Option<f32>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let tracer = global_tracer();
|
let tracer = global_tracer();
|
||||||
let current_cx = opentelemetry::Context::current();
|
let current_cx = opentelemetry::Context::current();
|
||||||
@@ -677,6 +681,30 @@ impl InsightGenerator {
|
|||||||
ollama_client.set_num_ctx(Some(ctx));
|
ollama_client.set_num_ctx(Some(ctx));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Apply sampling parameters if any were provided
|
||||||
|
if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
|
||||||
|
log::info!(
|
||||||
|
"Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}",
|
||||||
|
temperature,
|
||||||
|
top_p,
|
||||||
|
top_k,
|
||||||
|
min_p
|
||||||
|
);
|
||||||
|
if let Some(t) = temperature {
|
||||||
|
span.set_attribute(KeyValue::new("temperature", t as f64));
|
||||||
|
}
|
||||||
|
if let Some(p) = top_p {
|
||||||
|
span.set_attribute(KeyValue::new("top_p", p as f64));
|
||||||
|
}
|
||||||
|
if let Some(k) = top_k {
|
||||||
|
span.set_attribute(KeyValue::new("top_k", k as i64));
|
||||||
|
}
|
||||||
|
if let Some(m) = min_p {
|
||||||
|
span.set_attribute(KeyValue::new("min_p", m as f64));
|
||||||
|
}
|
||||||
|
ollama_client.set_sampling_params(temperature, top_p, top_k, min_p);
|
||||||
|
}
|
||||||
|
|
||||||
// Create context with this span for child operations
|
// Create context with this span for child operations
|
||||||
let insight_cx = current_cx.with_span(span);
|
let insight_cx = current_cx.with_span(span);
|
||||||
|
|
||||||
@@ -2280,6 +2308,10 @@ Return ONLY the summary, nothing else."#,
|
|||||||
custom_model: Option<String>,
|
custom_model: Option<String>,
|
||||||
custom_system_prompt: Option<String>,
|
custom_system_prompt: Option<String>,
|
||||||
num_ctx: Option<i32>,
|
num_ctx: Option<i32>,
|
||||||
|
temperature: Option<f32>,
|
||||||
|
top_p: Option<f32>,
|
||||||
|
top_k: Option<i32>,
|
||||||
|
min_p: Option<f32>,
|
||||||
max_iterations: usize,
|
max_iterations: usize,
|
||||||
) -> Result<(Option<i32>, Option<i32>)> {
|
) -> Result<(Option<i32>, Option<i32>)> {
|
||||||
let tracer = global_tracer();
|
let tracer = global_tracer();
|
||||||
@@ -2313,6 +2345,29 @@ Return ONLY the summary, nothing else."#,
|
|||||||
ollama_client.set_num_ctx(Some(ctx));
|
ollama_client.set_num_ctx(Some(ctx));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
|
||||||
|
log::info!(
|
||||||
|
"Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}",
|
||||||
|
temperature,
|
||||||
|
top_p,
|
||||||
|
top_k,
|
||||||
|
min_p
|
||||||
|
);
|
||||||
|
if let Some(t) = temperature {
|
||||||
|
span.set_attribute(KeyValue::new("temperature", t as f64));
|
||||||
|
}
|
||||||
|
if let Some(p) = top_p {
|
||||||
|
span.set_attribute(KeyValue::new("top_p", p as f64));
|
||||||
|
}
|
||||||
|
if let Some(k) = top_k {
|
||||||
|
span.set_attribute(KeyValue::new("top_k", k as i64));
|
||||||
|
}
|
||||||
|
if let Some(m) = min_p {
|
||||||
|
span.set_attribute(KeyValue::new("min_p", m as f64));
|
||||||
|
}
|
||||||
|
ollama_client.set_sampling_params(temperature, top_p, top_k, min_p);
|
||||||
|
}
|
||||||
|
|
||||||
let insight_cx = current_cx.with_span(span);
|
let insight_cx = current_cx.with_span(span);
|
||||||
|
|
||||||
// 2a. Verify the model exists on at least one server before checking capabilities
|
// 2a. Verify the model exists on at least one server before checking capabilities
|
||||||
|
|||||||
@@ -46,6 +46,10 @@ pub struct OllamaClient {
|
|||||||
pub primary_model: String,
|
pub primary_model: String,
|
||||||
pub fallback_model: Option<String>,
|
pub fallback_model: Option<String>,
|
||||||
num_ctx: Option<i32>,
|
num_ctx: Option<i32>,
|
||||||
|
temperature: Option<f32>,
|
||||||
|
top_p: Option<f32>,
|
||||||
|
top_k: Option<i32>,
|
||||||
|
min_p: Option<f32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl OllamaClient {
|
impl OllamaClient {
|
||||||
@@ -66,6 +70,10 @@ impl OllamaClient {
|
|||||||
primary_model,
|
primary_model,
|
||||||
fallback_model,
|
fallback_model,
|
||||||
num_ctx: None,
|
num_ctx: None,
|
||||||
|
temperature: None,
|
||||||
|
top_p: None,
|
||||||
|
top_k: None,
|
||||||
|
min_p: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -73,6 +81,43 @@ impl OllamaClient {
|
|||||||
self.num_ctx = num_ctx;
|
self.num_ctx = num_ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set sampling parameters for generation. `None` values leave the
|
||||||
|
/// server-side default in place.
|
||||||
|
pub fn set_sampling_params(
|
||||||
|
&mut self,
|
||||||
|
temperature: Option<f32>,
|
||||||
|
top_p: Option<f32>,
|
||||||
|
top_k: Option<i32>,
|
||||||
|
min_p: Option<f32>,
|
||||||
|
) {
|
||||||
|
self.temperature = temperature;
|
||||||
|
self.top_p = top_p;
|
||||||
|
self.top_k = top_k;
|
||||||
|
self.min_p = min_p;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build an `OllamaOptions` payload from the currently configured fields.
|
||||||
|
/// Returns `None` if no options would be set, so the `options` field is
|
||||||
|
/// omitted from the request entirely.
|
||||||
|
fn build_options(&self) -> Option<OllamaOptions> {
|
||||||
|
if self.num_ctx.is_none()
|
||||||
|
&& self.temperature.is_none()
|
||||||
|
&& self.top_p.is_none()
|
||||||
|
&& self.top_k.is_none()
|
||||||
|
&& self.min_p.is_none()
|
||||||
|
{
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(OllamaOptions {
|
||||||
|
num_ctx: self.num_ctx,
|
||||||
|
temperature: self.temperature,
|
||||||
|
top_p: self.top_p,
|
||||||
|
top_k: self.top_k,
|
||||||
|
min_p: self.min_p,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Replace the HTTP client with one using a custom request timeout.
|
/// Replace the HTTP client with one using a custom request timeout.
|
||||||
/// Useful for slow models where the default 120s may be insufficient.
|
/// Useful for slow models where the default 120s may be insufficient.
|
||||||
pub fn with_request_timeout(mut self, secs: u64) -> Self {
|
pub fn with_request_timeout(mut self, secs: u64) -> Self {
|
||||||
@@ -269,7 +314,7 @@ impl OllamaClient {
|
|||||||
prompt: prompt.to_string(),
|
prompt: prompt.to_string(),
|
||||||
stream: false,
|
stream: false,
|
||||||
system: system.map(|s| s.to_string()),
|
system: system.map(|s| s.to_string()),
|
||||||
options: self.num_ctx.map(|ctx| OllamaOptions { num_ctx: Some(ctx) }),
|
options: self.build_options(),
|
||||||
images,
|
images,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -592,7 +637,7 @@ Analyze the image and use specific details from both the visual content and the
|
|||||||
.unwrap_or(&self.primary_model)
|
.unwrap_or(&self.primary_model)
|
||||||
};
|
};
|
||||||
|
|
||||||
let options = self.num_ctx.map(|ctx| OllamaOptions { num_ctx: Some(ctx) });
|
let options = self.build_options();
|
||||||
|
|
||||||
let request_body = OllamaChatRequest {
|
let request_body = OllamaChatRequest {
|
||||||
model,
|
model,
|
||||||
@@ -785,7 +830,16 @@ struct OllamaRequest {
|
|||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
struct OllamaOptions {
|
struct OllamaOptions {
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
num_ctx: Option<i32>,
|
num_ctx: Option<i32>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
temperature: Option<f32>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
top_p: Option<f32>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
top_k: Option<i32>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
min_p: Option<f32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Tool definition sent in /api/chat requests (OpenAI-compatible format)
|
/// Tool definition sent in /api/chat requests (OpenAI-compatible format)
|
||||||
|
|||||||
@@ -202,6 +202,10 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
args.model.clone(),
|
args.model.clone(),
|
||||||
None,
|
None,
|
||||||
args.num_ctx,
|
args.num_ctx,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
args.max_iterations,
|
args.max_iterations,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
|
|||||||
Reference in New Issue
Block a user