feat(ai): chat rewind + ollama metrics logging
Rewind: POST /insights/chat/rewind truncates training_messages at a given rendered index, dropping the target message plus any preceding tool-call scaffolding. The initial user prompt is protected. Metrics: log prompt_eval_count/duration and eval_count/duration from every Ollama chat response, rendered as tokens + ms + tok/s. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -479,6 +479,109 @@ impl InsightChatService {
|
||||
model_used,
|
||||
})
|
||||
}
|
||||
|
||||
/// Truncate the stored conversation so the rendered message at
|
||||
/// `discard_from_rendered_index` (and everything after it — including
|
||||
/// the tool-call scaffolding that produced a discarded assistant reply)
|
||||
/// is removed. The initial user turn cannot be discarded; attempting to
|
||||
/// do so returns an error.
|
||||
///
|
||||
/// Holds the per-file chat mutex so it serialises with `chat_turn`.
|
||||
pub async fn rewind_history(
|
||||
&self,
|
||||
library_id: i32,
|
||||
file_path: &str,
|
||||
discard_from_rendered_index: usize,
|
||||
) -> Result<()> {
|
||||
if discard_from_rendered_index == 0 {
|
||||
bail!("cannot discard the initial user message");
|
||||
}
|
||||
let normalized = normalize_path(file_path);
|
||||
|
||||
let lock_key = (library_id, normalized.clone());
|
||||
let entry_lock = {
|
||||
let mut locks = self.chat_locks.lock().await;
|
||||
locks
|
||||
.entry(lock_key.clone())
|
||||
.or_insert_with(|| Arc::new(TokioMutex::new(())))
|
||||
.clone()
|
||||
};
|
||||
let _guard = entry_lock.lock().await;
|
||||
|
||||
let insight = {
|
||||
let cx = opentelemetry::Context::new();
|
||||
let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
|
||||
dao.get_insight(&cx, &normalized)
|
||||
.map_err(|e| anyhow!("failed to load insight: {:?}", e))?
|
||||
.ok_or_else(|| anyhow!("no insight found for path"))?
|
||||
};
|
||||
let raw_history = insight
|
||||
.training_messages
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("insight has no chat history"))?;
|
||||
let messages: Vec<ChatMessage> = serde_json::from_str(raw_history)
|
||||
.map_err(|e| anyhow!("failed to deserialize chat history: {}", e))?;
|
||||
|
||||
let cut_at = find_raw_cut(&messages, discard_from_rendered_index)
|
||||
.ok_or_else(|| anyhow!("discard_from_rendered_index out of range"))?;
|
||||
|
||||
let truncated = &messages[..cut_at];
|
||||
let json = serde_json::to_string(truncated)
|
||||
.map_err(|e| anyhow!("failed to serialize truncated history: {}", e))?;
|
||||
|
||||
let cx = opentelemetry::Context::new();
|
||||
let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
|
||||
dao.update_training_messages(&cx, library_id, &normalized, &json)
|
||||
.map_err(|e| anyhow!("failed to persist truncated history: {:?}", e))?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this raw message visible in the rendered transcript? Must match
|
||||
/// `load_history`'s filter exactly — `find_raw_cut` depends on it to map
|
||||
/// rendered indices back to raw positions.
|
||||
fn is_rendered(m: &ChatMessage) -> bool {
|
||||
match m.role.as_str() {
|
||||
"user" => true,
|
||||
"assistant" => {
|
||||
let has_tool_calls = m
|
||||
.tool_calls
|
||||
.as_ref()
|
||||
.map(|c| !c.is_empty())
|
||||
.unwrap_or(false);
|
||||
!(has_tool_calls && m.content.trim().is_empty())
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Given a rendered index to start discarding from, find the raw index at
|
||||
/// which to truncate. The cut position is the raw length after all prior
|
||||
/// rendered messages — which also strips any tool-call scaffolding that
|
||||
/// immediately precedes the discarded rendered message. Returns `None` if
|
||||
/// `discard_from_rendered_index` is past the end of the rendered view.
|
||||
pub(crate) fn find_raw_cut(
|
||||
messages: &[ChatMessage],
|
||||
discard_from_rendered_index: usize,
|
||||
) -> Option<usize> {
|
||||
let mut rendered_count = 0usize;
|
||||
let mut last_kept_raw_end = 0usize;
|
||||
for (i, m) in messages.iter().enumerate() {
|
||||
if !is_rendered(m) {
|
||||
continue;
|
||||
}
|
||||
if rendered_count == discard_from_rendered_index {
|
||||
return Some(last_kept_raw_end);
|
||||
}
|
||||
rendered_count += 1;
|
||||
last_kept_raw_end = i + 1;
|
||||
}
|
||||
if rendered_count == discard_from_rendered_index {
|
||||
// Discarding past the last rendered message is a no-op, but we
|
||||
// surface it as "nothing to cut" rather than silent success.
|
||||
return None;
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Read AGENTIC_CHAT_MAX_ITERATIONS once per call. Cheap; keeps the code
|
||||
@@ -637,4 +740,46 @@ mod tests {
|
||||
let dropped = apply_context_budget(&mut msgs, 1);
|
||||
assert!(!dropped);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rewind_strips_assistant_and_tool_scaffolding() {
|
||||
// Rendered: [user1, asst1, user2, asst2] → cut at rendered index 3
|
||||
// (the final asst2) should drop the tool-call scaffolding + asst2,
|
||||
// leaving raw up through user2.
|
||||
let msgs = vec![
|
||||
ChatMessage::system("sys"),
|
||||
ChatMessage::user("q1"),
|
||||
assistant_text("a1"),
|
||||
ChatMessage::user("q2"),
|
||||
assistant_with_tool_call("lookup"),
|
||||
ChatMessage::tool_result("data"),
|
||||
assistant_text("a2 final"),
|
||||
];
|
||||
let cut = find_raw_cut(&msgs, 3).expect("cut found");
|
||||
// raw[0..cut] should end at user("q2") — indices 0..=3.
|
||||
assert_eq!(cut, 4);
|
||||
assert_eq!(msgs[cut - 1].role, "user");
|
||||
assert_eq!(msgs[cut - 1].content, "q2");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rewind_at_second_rendered_cuts_after_first_user() {
|
||||
// Rendered index 1 = the first assistant reply → dropping it should
|
||||
// leave just the initial user message.
|
||||
let msgs = vec![
|
||||
ChatMessage::system("s"),
|
||||
ChatMessage::user("q1"),
|
||||
assistant_with_tool_call("tool"),
|
||||
ChatMessage::tool_result("r"),
|
||||
assistant_text("a1"),
|
||||
];
|
||||
let cut = find_raw_cut(&msgs, 1).expect("cut found");
|
||||
assert_eq!(cut, 2); // sys + user("q1")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rewind_beyond_range_returns_none() {
|
||||
let msgs = vec![ChatMessage::user("q1"), assistant_text("a1")];
|
||||
assert!(find_raw_cut(&msgs, 5).is_none());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user