003-knowledge-memory #55

Merged
cameron merged 12 commits from 003-knowledge-memory into master 2026-04-21 01:54:34 +00:00
4 changed files with 131 additions and 2 deletions
Showing only changes of commit b599f7a34b - Show all commits

View File

@@ -18,6 +18,14 @@ pub struct GeneratePhotoInsightRequest {
pub system_prompt: Option<String>, pub system_prompt: Option<String>,
#[serde(default)] #[serde(default)]
pub num_ctx: Option<i32>, pub num_ctx: Option<i32>,
#[serde(default)]
pub temperature: Option<f32>,
#[serde(default)]
pub top_p: Option<f32>,
#[serde(default)]
pub top_k: Option<i32>,
#[serde(default)]
pub min_p: Option<f32>,
} }
#[derive(Debug, Deserialize)] #[derive(Debug, Deserialize)]
@@ -108,6 +116,10 @@ pub async fn generate_insight_handler(
request.model.clone(), request.model.clone(),
request.system_prompt.clone(), request.system_prompt.clone(),
request.num_ctx, request.num_ctx,
request.temperature,
request.top_p,
request.top_k,
request.min_p,
) )
.await; .await;
@@ -282,6 +294,10 @@ pub async fn generate_agentic_insight_handler(
request.model.clone(), request.model.clone(),
request.system_prompt.clone(), request.system_prompt.clone(),
request.num_ctx, request.num_ctx,
request.temperature,
request.top_p,
request.top_k,
request.min_p,
max_iterations, max_iterations,
) )
.await; .await;

View File

@@ -644,6 +644,10 @@ impl InsightGenerator {
custom_model: Option<String>, custom_model: Option<String>,
custom_system_prompt: Option<String>, custom_system_prompt: Option<String>,
num_ctx: Option<i32>, num_ctx: Option<i32>,
temperature: Option<f32>,
top_p: Option<f32>,
top_k: Option<i32>,
min_p: Option<f32>,
) -> Result<()> { ) -> Result<()> {
let tracer = global_tracer(); let tracer = global_tracer();
let current_cx = opentelemetry::Context::current(); let current_cx = opentelemetry::Context::current();
@@ -677,6 +681,30 @@ impl InsightGenerator {
ollama_client.set_num_ctx(Some(ctx)); ollama_client.set_num_ctx(Some(ctx));
} }
// Apply sampling parameters if any were provided
if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
log::info!(
"Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}",
temperature,
top_p,
top_k,
min_p
);
if let Some(t) = temperature {
span.set_attribute(KeyValue::new("temperature", t as f64));
}
if let Some(p) = top_p {
span.set_attribute(KeyValue::new("top_p", p as f64));
}
if let Some(k) = top_k {
span.set_attribute(KeyValue::new("top_k", k as i64));
}
if let Some(m) = min_p {
span.set_attribute(KeyValue::new("min_p", m as f64));
}
ollama_client.set_sampling_params(temperature, top_p, top_k, min_p);
}
// Create context with this span for child operations // Create context with this span for child operations
let insight_cx = current_cx.with_span(span); let insight_cx = current_cx.with_span(span);
@@ -2280,6 +2308,10 @@ Return ONLY the summary, nothing else."#,
custom_model: Option<String>, custom_model: Option<String>,
custom_system_prompt: Option<String>, custom_system_prompt: Option<String>,
num_ctx: Option<i32>, num_ctx: Option<i32>,
temperature: Option<f32>,
top_p: Option<f32>,
top_k: Option<i32>,
min_p: Option<f32>,
max_iterations: usize, max_iterations: usize,
) -> Result<(Option<i32>, Option<i32>)> { ) -> Result<(Option<i32>, Option<i32>)> {
let tracer = global_tracer(); let tracer = global_tracer();
@@ -2313,6 +2345,29 @@ Return ONLY the summary, nothing else."#,
ollama_client.set_num_ctx(Some(ctx)); ollama_client.set_num_ctx(Some(ctx));
} }
if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() {
log::info!(
"Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}",
temperature,
top_p,
top_k,
min_p
);
if let Some(t) = temperature {
span.set_attribute(KeyValue::new("temperature", t as f64));
}
if let Some(p) = top_p {
span.set_attribute(KeyValue::new("top_p", p as f64));
}
if let Some(k) = top_k {
span.set_attribute(KeyValue::new("top_k", k as i64));
}
if let Some(m) = min_p {
span.set_attribute(KeyValue::new("min_p", m as f64));
}
ollama_client.set_sampling_params(temperature, top_p, top_k, min_p);
}
let insight_cx = current_cx.with_span(span); let insight_cx = current_cx.with_span(span);
// 2a. Verify the model exists on at least one server before checking capabilities // 2a. Verify the model exists on at least one server before checking capabilities

View File

@@ -46,6 +46,10 @@ pub struct OllamaClient {
pub primary_model: String, pub primary_model: String,
pub fallback_model: Option<String>, pub fallback_model: Option<String>,
num_ctx: Option<i32>, num_ctx: Option<i32>,
temperature: Option<f32>,
top_p: Option<f32>,
top_k: Option<i32>,
min_p: Option<f32>,
} }
impl OllamaClient { impl OllamaClient {
@@ -66,6 +70,10 @@ impl OllamaClient {
primary_model, primary_model,
fallback_model, fallback_model,
num_ctx: None, num_ctx: None,
temperature: None,
top_p: None,
top_k: None,
min_p: None,
} }
} }
@@ -73,6 +81,43 @@ impl OllamaClient {
self.num_ctx = num_ctx; self.num_ctx = num_ctx;
} }
/// Set sampling parameters for generation. `None` values leave the
/// server-side default in place.
pub fn set_sampling_params(
&mut self,
temperature: Option<f32>,
top_p: Option<f32>,
top_k: Option<i32>,
min_p: Option<f32>,
) {
self.temperature = temperature;
self.top_p = top_p;
self.top_k = top_k;
self.min_p = min_p;
}
/// Build an `OllamaOptions` payload from the currently configured fields.
/// Returns `None` if no options would be set, so the `options` field is
/// omitted from the request entirely.
fn build_options(&self) -> Option<OllamaOptions> {
if self.num_ctx.is_none()
&& self.temperature.is_none()
&& self.top_p.is_none()
&& self.top_k.is_none()
&& self.min_p.is_none()
{
None
} else {
Some(OllamaOptions {
num_ctx: self.num_ctx,
temperature: self.temperature,
top_p: self.top_p,
top_k: self.top_k,
min_p: self.min_p,
})
}
}
/// Replace the HTTP client with one using a custom request timeout. /// Replace the HTTP client with one using a custom request timeout.
/// Useful for slow models where the default 120s may be insufficient. /// Useful for slow models where the default 120s may be insufficient.
pub fn with_request_timeout(mut self, secs: u64) -> Self { pub fn with_request_timeout(mut self, secs: u64) -> Self {
@@ -269,7 +314,7 @@ impl OllamaClient {
prompt: prompt.to_string(), prompt: prompt.to_string(),
stream: false, stream: false,
system: system.map(|s| s.to_string()), system: system.map(|s| s.to_string()),
options: self.num_ctx.map(|ctx| OllamaOptions { num_ctx: Some(ctx) }), options: self.build_options(),
images, images,
}; };
@@ -592,7 +637,7 @@ Analyze the image and use specific details from both the visual content and the
.unwrap_or(&self.primary_model) .unwrap_or(&self.primary_model)
}; };
let options = self.num_ctx.map(|ctx| OllamaOptions { num_ctx: Some(ctx) }); let options = self.build_options();
let request_body = OllamaChatRequest { let request_body = OllamaChatRequest {
model, model,
@@ -785,7 +830,16 @@ struct OllamaRequest {
#[derive(Serialize)] #[derive(Serialize)]
struct OllamaOptions { struct OllamaOptions {
#[serde(skip_serializing_if = "Option::is_none")]
num_ctx: Option<i32>, num_ctx: Option<i32>,
#[serde(skip_serializing_if = "Option::is_none")]
temperature: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
top_p: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
top_k: Option<i32>,
#[serde(skip_serializing_if = "Option::is_none")]
min_p: Option<f32>,
} }
/// Tool definition sent in /api/chat requests (OpenAI-compatible format) /// Tool definition sent in /api/chat requests (OpenAI-compatible format)

View File

@@ -202,6 +202,10 @@ async fn main() -> anyhow::Result<()> {
args.model.clone(), args.model.clone(),
None, None,
args.num_ctx, args.num_ctx,
None,
None,
None,
None,
args.max_iterations, args.max_iterations,
) )
.await .await