feat(ai): chat rewind + ollama metrics logging

Rewind: POST /insights/chat/rewind truncates training_messages at a
given rendered index, dropping the target message plus any preceding
tool-call scaffolding. The initial user prompt is protected.

Metrics: log prompt_eval_count/duration and eval_count/duration from
every Ollama chat response, rendered as tokens + ms + tok/s.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron
2026-04-21 15:16:32 -04:00
parent 0b9528f61e
commit 65ab10e9a8
5 changed files with 270 additions and 4 deletions

View File

@@ -706,6 +706,63 @@ pub struct RenderedHistoryMessage {
pub is_initial: bool,
}
#[derive(Debug, Deserialize)]
pub struct ChatRewindHttpRequest {
pub file_path: String,
#[serde(default)]
pub library: Option<String>,
/// 0-based index into the rendered transcript. The message at this
/// index, and everything after it, is discarded. Must be > 0 — the
/// initial user message is protected.
pub discard_from_rendered_index: usize,
}
/// POST /insights/chat/rewind — truncate the stored conversation so the
/// rendered message at `discard_from_rendered_index` (and everything after)
/// is removed. Use when a user wants to retry a turn with a different
/// prompt without prior replies poisoning context.
#[post("/insights/chat/rewind")]
pub async fn chat_rewind_handler(
_claims: Claims,
request: web::Json<ChatRewindHttpRequest>,
app_state: web::Data<AppState>,
) -> impl Responder {
let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
Ok(Some(lib)) => lib,
Ok(None) => app_state.primary_library(),
Err(e) => {
return HttpResponse::BadRequest().json(serde_json::json!({
"error": format!("invalid library: {}", e)
}));
}
};
match app_state
.insight_chat
.rewind_history(
library.id,
&request.file_path,
request.discard_from_rendered_index,
)
.await
{
Ok(()) => HttpResponse::Ok().json(serde_json::json!({ "success": true })),
Err(e) => {
let msg = format!("{}", e);
log::error!("Chat rewind failed: {}", msg);
if msg.contains("no insight found") {
HttpResponse::NotFound().json(serde_json::json!({ "error": msg }))
} else if msg.contains("no chat history") {
HttpResponse::Conflict().json(serde_json::json!({ "error": msg }))
} else if msg.contains("cannot discard the initial") || msg.contains("out of range") {
HttpResponse::BadRequest().json(serde_json::json!({ "error": msg }))
} else {
HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg }))
}
}
}
}
/// GET /insights/chat/history — return the rendered transcript for a photo.
#[get("/insights/chat/history")]
pub async fn chat_history_handler(