feat: surface Ollama context token usage in agentic insight response
Captures prompt_eval_count and eval_count from Ollama /api/chat responses during the agentic loop and returns them in POST /insights/generate/agentic so the frontend can display context window usage to the user. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -507,7 +507,7 @@ Analyze the image and use specific details from both the visual content and the
|
||||
&self,
|
||||
messages: Vec<ChatMessage>,
|
||||
tools: Vec<Tool>,
|
||||
) -> Result<ChatMessage> {
|
||||
) -> Result<(ChatMessage, Option<i32>, Option<i32>)> {
|
||||
// Try primary server first
|
||||
log::info!(
|
||||
"Attempting chat_with_tools with primary server: {} (model: {})",
|
||||
@@ -519,9 +519,9 @@ Analyze the image and use specific details from both the visual content and the
|
||||
.await;
|
||||
|
||||
match primary_result {
|
||||
Ok(response) => {
|
||||
Ok(result) => {
|
||||
log::info!("Successfully got chat_with_tools response from primary server");
|
||||
Ok(response)
|
||||
Ok(result)
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("Primary server chat_with_tools failed: {}", e);
|
||||
@@ -540,11 +540,11 @@ Analyze the image and use specific details from both the visual content and the
|
||||
.try_chat_with_tools(fallback_url, messages, tools)
|
||||
.await
|
||||
{
|
||||
Ok(response) => {
|
||||
Ok(result) => {
|
||||
log::info!(
|
||||
"Successfully got chat_with_tools response from fallback server"
|
||||
);
|
||||
Ok(response)
|
||||
Ok(result)
|
||||
}
|
||||
Err(fallback_e) => {
|
||||
log::error!(
|
||||
@@ -571,7 +571,7 @@ Analyze the image and use specific details from both the visual content and the
|
||||
base_url: &str,
|
||||
messages: Vec<ChatMessage>,
|
||||
tools: Vec<Tool>,
|
||||
) -> Result<ChatMessage> {
|
||||
) -> Result<(ChatMessage, Option<i32>, Option<i32>)> {
|
||||
let url = format!("{}/api/chat", base_url);
|
||||
let model = if base_url == self.primary_url {
|
||||
&self.primary_model
|
||||
@@ -623,7 +623,11 @@ Analyze the image and use specific details from both the visual content and the
|
||||
.await
|
||||
.with_context(|| "Failed to parse Ollama chat response")?;
|
||||
|
||||
Ok(chat_response.message)
|
||||
Ok((
|
||||
chat_response.message,
|
||||
chat_response.prompt_eval_count,
|
||||
chat_response.eval_count,
|
||||
))
|
||||
}
|
||||
|
||||
/// Generate an embedding vector for text using nomic-embed-text:v1.5
|
||||
@@ -876,6 +880,10 @@ struct OllamaChatResponse {
|
||||
#[serde(default)]
|
||||
#[allow(dead_code)]
|
||||
done_reason: String,
|
||||
#[serde(default)]
|
||||
prompt_eval_count: Option<i32>,
|
||||
#[serde(default)]
|
||||
eval_count: Option<i32>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
|
||||
Reference in New Issue
Block a user