feat: surface Ollama context token usage in agentic insight response
Captures prompt_eval_count and eval_count from Ollama /api/chat responses during the agentic loop and returns them in POST /insights/generate/agentic so the frontend can display context window usage to the user. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -33,6 +33,10 @@ pub struct PhotoInsightResponse {
|
||||
pub summary: String,
|
||||
pub generated_at: i64,
|
||||
pub model_version: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub prompt_eval_count: Option<i32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub eval_count: Option<i32>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
@@ -133,6 +137,8 @@ pub async fn get_insight_handler(
|
||||
summary: insight.summary,
|
||||
generated_at: insight.generated_at,
|
||||
model_version: insight.model_version,
|
||||
prompt_eval_count: None,
|
||||
eval_count: None,
|
||||
};
|
||||
HttpResponse::Ok().json(response)
|
||||
}
|
||||
@@ -197,6 +203,8 @@ pub async fn get_all_insights_handler(
|
||||
summary: insight.summary,
|
||||
generated_at: insight.generated_at,
|
||||
model_version: insight.model_version,
|
||||
prompt_eval_count: None,
|
||||
eval_count: None,
|
||||
})
|
||||
.collect();
|
||||
|
||||
@@ -263,7 +271,7 @@ pub async fn generate_agentic_insight_handler(
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(()) => {
|
||||
Ok((prompt_eval_count, eval_count)) => {
|
||||
span.set_status(Status::Ok);
|
||||
// Fetch the stored insight to return it
|
||||
let otel_context = opentelemetry::Context::new();
|
||||
@@ -277,6 +285,8 @@ pub async fn generate_agentic_insight_handler(
|
||||
summary: insight.summary,
|
||||
generated_at: insight.generated_at,
|
||||
model_version: insight.model_version,
|
||||
prompt_eval_count,
|
||||
eval_count,
|
||||
};
|
||||
HttpResponse::Ok().json(response)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user