feat: surface Ollama context token usage in agentic insight response

Captures prompt_eval_count and eval_count from Ollama /api/chat responses
during the agentic loop and returns them in POST /insights/generate/agentic
so the frontend can display context window usage to the user.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Cameron
2026-04-03 17:25:35 -04:00
parent 50cf526b46
commit b2cf99c857
3 changed files with 530 additions and 36 deletions

View File

@@ -33,6 +33,10 @@ pub struct PhotoInsightResponse {
pub summary: String,
pub generated_at: i64,
pub model_version: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt_eval_count: Option<i32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub eval_count: Option<i32>,
}
#[derive(Debug, Serialize)]
@@ -133,6 +137,8 @@ pub async fn get_insight_handler(
summary: insight.summary,
generated_at: insight.generated_at,
model_version: insight.model_version,
prompt_eval_count: None,
eval_count: None,
};
HttpResponse::Ok().json(response)
}
@@ -197,6 +203,8 @@ pub async fn get_all_insights_handler(
summary: insight.summary,
generated_at: insight.generated_at,
model_version: insight.model_version,
prompt_eval_count: None,
eval_count: None,
})
.collect();
@@ -263,7 +271,7 @@ pub async fn generate_agentic_insight_handler(
.await;
match result {
Ok(()) => {
Ok((prompt_eval_count, eval_count)) => {
span.set_status(Status::Ok);
// Fetch the stored insight to return it
let otel_context = opentelemetry::Context::new();
@@ -277,6 +285,8 @@ pub async fn generate_agentic_insight_handler(
summary: insight.summary,
generated_at: insight.generated_at,
model_version: insight.model_version,
prompt_eval_count,
eval_count,
};
HttpResponse::Ok().json(response)
}