feat: surface Ollama context token usage in agentic insight response

Captures prompt_eval_count and eval_count from Ollama /api/chat responses during the agentic loop and returns them in POST /insights/generate/agentic so the frontend can display context window usage to the user. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-03 17:25:35 -04:00
parent 50cf526b46
commit b2cf99c857
3 changed files with 530 additions and 36 deletions
--- a/src/ai/ollama.rs
+++ b/src/ai/ollama.rs
@@ -507,7 +507,7 @@ Analyze the image and use specific details from both the visual content and the
        &self,
        messages: Vec<ChatMessage>,
        tools: Vec<Tool>,
-    ) -> Result<ChatMessage> {
+    ) -> Result<(ChatMessage, Option<i32>, Option<i32>)> {
        // Try primary server first
        log::info!(
            "Attempting chat_with_tools with primary server: {} (model: {})",
@@ -519,9 +519,9 @@ Analyze the image and use specific details from both the visual content and the
            .await;

        match primary_result {
-            Ok(response) => {
+            Ok(result) => {
                log::info!("Successfully got chat_with_tools response from primary server");
-                Ok(response)
+                Ok(result)
            }
            Err(e) => {
                log::warn!("Primary server chat_with_tools failed: {}", e);
@@ -540,11 +540,11 @@ Analyze the image and use specific details from both the visual content and the
                        .try_chat_with_tools(fallback_url, messages, tools)
                        .await
                    {
-                        Ok(response) => {
+                        Ok(result) => {
                            log::info!(
                                "Successfully got chat_with_tools response from fallback server"
                            );
-                            Ok(response)
+                            Ok(result)
                        }
                        Err(fallback_e) => {
                            log::error!(
@@ -571,7 +571,7 @@ Analyze the image and use specific details from both the visual content and the
        base_url: &str,
        messages: Vec<ChatMessage>,
        tools: Vec<Tool>,
-    ) -> Result<ChatMessage> {
+    ) -> Result<(ChatMessage, Option<i32>, Option<i32>)> {
        let url = format!("{}/api/chat", base_url);
        let model = if base_url == self.primary_url {
            &self.primary_model
@@ -623,7 +623,11 @@ Analyze the image and use specific details from both the visual content and the
            .await
            .with_context(|| "Failed to parse Ollama chat response")?;

-        Ok(chat_response.message)
+        Ok((
+            chat_response.message,
+            chat_response.prompt_eval_count,
+            chat_response.eval_count,
+        ))
    }

    /// Generate an embedding vector for text using nomic-embed-text:v1.5
@@ -876,6 +880,10 @@ struct OllamaChatResponse {
    #[serde(default)]
    #[allow(dead_code)]
    done_reason: String,
+    #[serde(default)]
+    prompt_eval_count: Option<i32>,
+    #[serde(default)]
+    eval_count: Option<i32>,
 }

 #[derive(Deserialize)]