feat: surface Ollama context token usage in agentic insight response

Captures prompt_eval_count and eval_count from Ollama /api/chat responses during the agentic loop and returns them in POST /insights/generate/agentic so the frontend can display context window usage to the user. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-03 17:25:35 -04:00
parent 50cf526b46
commit b2cf99c857
3 changed files with 530 additions and 36 deletions
--- a/src/ai/handlers.rs
+++ b/src/ai/handlers.rs
@@ -33,6 +33,10 @@ pub struct PhotoInsightResponse {
    pub summary: String,
    pub generated_at: i64,
    pub model_version: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub prompt_eval_count: Option<i32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub eval_count: Option<i32>,
 }

 #[derive(Debug, Serialize)]
@@ -133,6 +137,8 @@ pub async fn get_insight_handler(
                summary: insight.summary,
                generated_at: insight.generated_at,
                model_version: insight.model_version,
+                prompt_eval_count: None,
+                eval_count: None,
            };
            HttpResponse::Ok().json(response)
        }
@@ -197,6 +203,8 @@ pub async fn get_all_insights_handler(
                    summary: insight.summary,
                    generated_at: insight.generated_at,
                    model_version: insight.model_version,
+                    prompt_eval_count: None,
+                    eval_count: None,
                })
                .collect();

@@ -263,7 +271,7 @@ pub async fn generate_agentic_insight_handler(
        .await;

    match result {
-        Ok(()) => {
+        Ok((prompt_eval_count, eval_count)) => {
            span.set_status(Status::Ok);
            // Fetch the stored insight to return it
            let otel_context = opentelemetry::Context::new();
@@ -277,6 +285,8 @@ pub async fn generate_agentic_insight_handler(
                        summary: insight.summary,
                        generated_at: insight.generated_at,
                        model_version: insight.model_version,
+                        prompt_eval_count,
+                        eval_count,
                    };
                    HttpResponse::Ok().json(response)
                }