feat: surface Ollama context token usage in agentic insight response

Captures prompt_eval_count and eval_count from Ollama /api/chat responses
during the agentic loop and returns them in POST /insights/generate/agentic
so the frontend can display context window usage to the user.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Cameron
2026-04-03 17:25:35 -04:00
parent 50cf526b46
commit b2cf99c857
3 changed files with 530 additions and 36 deletions

View File

@@ -507,7 +507,7 @@ Analyze the image and use specific details from both the visual content and the
&self,
messages: Vec<ChatMessage>,
tools: Vec<Tool>,
) -> Result<ChatMessage> {
) -> Result<(ChatMessage, Option<i32>, Option<i32>)> {
// Try primary server first
log::info!(
"Attempting chat_with_tools with primary server: {} (model: {})",
@@ -519,9 +519,9 @@ Analyze the image and use specific details from both the visual content and the
.await;
match primary_result {
Ok(response) => {
Ok(result) => {
log::info!("Successfully got chat_with_tools response from primary server");
Ok(response)
Ok(result)
}
Err(e) => {
log::warn!("Primary server chat_with_tools failed: {}", e);
@@ -540,11 +540,11 @@ Analyze the image and use specific details from both the visual content and the
.try_chat_with_tools(fallback_url, messages, tools)
.await
{
Ok(response) => {
Ok(result) => {
log::info!(
"Successfully got chat_with_tools response from fallback server"
);
Ok(response)
Ok(result)
}
Err(fallback_e) => {
log::error!(
@@ -571,7 +571,7 @@ Analyze the image and use specific details from both the visual content and the
base_url: &str,
messages: Vec<ChatMessage>,
tools: Vec<Tool>,
) -> Result<ChatMessage> {
) -> Result<(ChatMessage, Option<i32>, Option<i32>)> {
let url = format!("{}/api/chat", base_url);
let model = if base_url == self.primary_url {
&self.primary_model
@@ -623,7 +623,11 @@ Analyze the image and use specific details from both the visual content and the
.await
.with_context(|| "Failed to parse Ollama chat response")?;
Ok(chat_response.message)
Ok((
chat_response.message,
chat_response.prompt_eval_count,
chat_response.eval_count,
))
}
/// Generate an embedding vector for text using nomic-embed-text:v1.5
@@ -876,6 +880,10 @@ struct OllamaChatResponse {
#[serde(default)]
#[allow(dead_code)]
done_reason: String,
#[serde(default)]
prompt_eval_count: Option<i32>,
#[serde(default)]
eval_count: Option<i32>,
}
#[derive(Deserialize)]