feat(ai): surface tool invocations in chat history

load_history now groups preceding tool_call + tool_result scaffolding under each assistant reply as `tools: [{name, arguments, result}]`. Result bodies over 2000 chars are truncated for payload size with a `result_truncated` flag; the full value remains in training_messages. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-21 16:03:53 -04:00
parent 65ab10e9a8
commit c2bd3c08e1
2 changed files with 102 additions and 1 deletions
--- a/src/ai/handlers.rs
+++ b/src/ai/handlers.rs
@@ -704,6 +704,17 @@ pub struct RenderedHistoryMessage {
    pub role: String,
    pub content: String,
    pub is_initial: bool,
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub tools: Vec<HistoryToolInvocation>,
 }
 #[derive(Debug, Serialize)]
 pub struct HistoryToolInvocation {
    pub name: String,
    pub arguments: serde_json::Value,
    pub result: String,
    #[serde(skip_serializing_if = "std::ops::Not::not")]
    pub result_truncated: bool,
 }
 #[derive(Debug, Deserialize)]
@@ -787,6 +798,16 @@ pub async fn chat_history_handler(
                    role: m.role,
                    content: m.content,
                    is_initial: m.is_initial,
                    tools: m
                        .tools
                        .into_iter()
                        .map(|t| HistoryToolInvocation {
                            name: t.name,
                            arguments: t.arguments,
                            result: t.result,
                            result_truncated: t.result_truncated,
                        })
                        .collect(),
                })
                .collect(),
            turn_count: view.turn_count,
--- a/src/ai/insight_chat.rs
+++ b/src/ai/insight_chat.rs
@@ -115,10 +115,39 @@ impl InsightChatService {
        let mut rendered = Vec::new();
        let mut user_turns_seen = 0usize;
        let mut assistant_turns_seen = 0usize;
        // Accumulate tool invocations seen since the last user turn. An
        // invocation is: one assistant tool_call message (which may hold
        // multiple calls) + the N following tool-role messages (one per call,
        // in order). They attach to the next assistant-with-content, which
        // is the "final" reply for the current turn.
        //
        // Wire shape from the model:
        //   assistant { tool_calls: [A, B], content: "" }
        //   tool      { content: "result of A" }
        //   tool      { content: "result of B" }
        //   assistant { content: "here's the answer" }  ← rendered as final
        let mut pending_tools: Vec<ToolInvocation> = Vec::new();
        // Queue of (name, arguments) awaiting a tool_result to pair with.
        let mut pending_calls: std::collections::VecDeque<(String, serde_json::Value)> =
            std::collections::VecDeque::new();
        for msg in &messages {
            match msg.role.as_str() {
                "system" => continue,
-                "tool" => continue,
+                "tool" => {
                    if let Some((name, arguments)) = pending_calls.pop_front() {
                        let (result, result_truncated) = truncate_tool_result(&msg.content);
                        pending_tools.push(ToolInvocation {
                            name,
                            arguments,
                            result,
                            result_truncated,
                        });
                    }
                    // If there's no pending call, the tool message is an
                    // orphan (shouldn't happen in practice) — skip silently.
                }
                "assistant" => {
                    let has_tool_calls = msg
                        .tool_calls
@@ -126,22 +155,41 @@ impl InsightChatService {
                        .map(|c| !c.is_empty())
                        .unwrap_or(false);
                    if has_tool_calls && msg.content.trim().is_empty() {
                        // Tool-dispatch turn: enqueue calls, wait for tool
                        // results on subsequent messages.
                        if let Some(ref tcs) = msg.tool_calls {
                            for tc in tcs {
                                pending_calls.push_back((
                                    tc.function.name.clone(),
                                    tc.function.arguments.clone(),
                                ));
                            }
                        }
                        continue;
                    }
                    // Final assistant reply for this turn — drain accumulated
                    // tools into it.
                    assistant_turns_seen += 1;
                    let tools = std::mem::take(&mut pending_tools);
                    pending_calls.clear(); // any leftover unpaired calls are dropped
                    rendered.push(RenderedMessage {
                        role: "assistant".to_string(),
                        content: msg.content.clone(),
                        is_initial: false,
                        tools,
                    });
                }
                "user" => {
                    let is_initial = user_turns_seen == 0;
                    user_turns_seen += 1;
                    // New user turn resets any in-flight tool state.
                    pending_tools.clear();
                    pending_calls.clear();
                    rendered.push(RenderedMessage {
                        role: "user".to_string(),
                        content: msg.content.clone(),
                        is_initial,
                        tools: Vec::new(),
                    });
                }
                _ => continue,
@@ -609,6 +657,38 @@ pub struct RenderedMessage {
    pub role: String,
    pub content: String,
    pub is_initial: bool,
    /// Tools invoked during this turn (only populated for assistant replies).
    /// Empty for user messages and for assistant replies that didn't involve
    /// tool calls.
    pub tools: Vec<ToolInvocation>,
 }
 #[derive(Debug, Clone)]
 pub struct ToolInvocation {
    pub name: String,
    pub arguments: serde_json::Value,
    pub result: String,
    /// True when `result` was trimmed for payload size. Full value remains
    /// available in the raw training_messages blob.
    pub result_truncated: bool,
 }
 /// Soft cap for tool-result bodies returned via the history API. Keeps
 /// payloads small for the mobile client — verbose SMS / geocoding responses
 /// don't need to ship in full for inspection.
 const TOOL_RESULT_PREVIEW_MAX: usize = 2000;
 fn truncate_tool_result(s: &str) -> (String, bool) {
    if s.len() <= TOOL_RESULT_PREVIEW_MAX {
        (s.to_string(), false)
    } else {
        // Cut on a char boundary.
        let mut cut = TOOL_RESULT_PREVIEW_MAX;
        while !s.is_char_boundary(cut) && cut > 0 {
            cut -= 1;
        }
        (s[..cut].to_string(), true)
    }
 }
 /// Trim history to fit within `budget_bytes` of serialized JSON. Preserves