feat(ai): streaming chat endpoint with live tool events
Add LlmClient::chat_with_tools_stream and SSE endpoint POST /insights/chat/stream that emits text deltas, tool_call / tool_result pairs, truncated notice, and a terminal done frame as the agentic loop runs. - Ollama: parses NDJSON from /api/chat stream, accumulates content deltas, emits Done with tool_calls from the final chunk. - OpenRouter: parses OpenAI-compatible SSE, reassembles tool_call argument deltas by index, asks for stream_options.include_usage. - InsightChatService spawns the loop on a tokio task, feeds events through an mpsc channel, persists training_messages at the end. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -3,7 +3,7 @@ use opentelemetry::KeyValue;
|
||||
use opentelemetry::trace::{Span, Status, Tracer};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::ai::insight_chat::ChatTurnRequest;
|
||||
use crate::ai::insight_chat::{ChatStreamEvent, ChatTurnRequest};
|
||||
use crate::ai::{InsightGenerator, ModelCapabilities, OllamaClient};
|
||||
use crate::data::Claims;
|
||||
use crate::database::{ExifDao, InsightDao};
|
||||
@@ -826,3 +826,109 @@ pub async fn chat_history_handler(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// POST /insights/chat/stream — streaming variant of /insights/chat.
|
||||
/// Returns `text/event-stream` with one event per chat stream event.
|
||||
#[post("/insights/chat/stream")]
|
||||
pub async fn chat_stream_handler(
|
||||
_claims: Claims,
|
||||
request: web::Json<ChatTurnHttpRequest>,
|
||||
app_state: web::Data<AppState>,
|
||||
) -> HttpResponse {
|
||||
let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
|
||||
Ok(Some(lib)) => lib,
|
||||
Ok(None) => app_state.primary_library(),
|
||||
Err(e) => {
|
||||
return HttpResponse::BadRequest().json(serde_json::json!({
|
||||
"error": format!("invalid library: {}", e)
|
||||
}));
|
||||
}
|
||||
};
|
||||
|
||||
let chat_req = ChatTurnRequest {
|
||||
library_id: library.id,
|
||||
file_path: request.file_path.clone(),
|
||||
user_message: request.user_message.clone(),
|
||||
model: request.model.clone(),
|
||||
backend: request.backend.clone(),
|
||||
num_ctx: request.num_ctx,
|
||||
temperature: request.temperature,
|
||||
top_p: request.top_p,
|
||||
top_k: request.top_k,
|
||||
min_p: request.min_p,
|
||||
max_iterations: request.max_iterations,
|
||||
amend: request.amend,
|
||||
};
|
||||
|
||||
let service = app_state.insight_chat.clone();
|
||||
let events = service.chat_turn_stream(chat_req);
|
||||
|
||||
// Map ChatStreamEvent → SSE frame bytes.
|
||||
let sse_stream = futures::stream::StreamExt::map(events, |ev| {
|
||||
let frame = render_sse_frame(&ev);
|
||||
Ok::<_, actix_web::Error>(actix_web::web::Bytes::from(frame))
|
||||
});
|
||||
|
||||
HttpResponse::Ok()
|
||||
.content_type("text/event-stream")
|
||||
.insert_header(("Cache-Control", "no-cache"))
|
||||
.insert_header(("X-Accel-Buffering", "no")) // nginx: disable response buffering
|
||||
.streaming(sse_stream)
|
||||
}
|
||||
|
||||
fn render_sse_frame(ev: &ChatStreamEvent) -> String {
|
||||
let (event_name, payload) = match ev {
|
||||
ChatStreamEvent::IterationStart { n, max } => {
|
||||
("iteration_start", serde_json::json!({ "n": n, "max": max }))
|
||||
}
|
||||
ChatStreamEvent::Truncated => ("truncated", serde_json::json!({})),
|
||||
ChatStreamEvent::TextDelta(delta) => ("text", serde_json::json!({ "delta": delta })),
|
||||
ChatStreamEvent::ToolCall {
|
||||
index,
|
||||
name,
|
||||
arguments,
|
||||
} => (
|
||||
"tool_call",
|
||||
serde_json::json!({ "index": index, "name": name, "arguments": arguments }),
|
||||
),
|
||||
ChatStreamEvent::ToolResult {
|
||||
index,
|
||||
name,
|
||||
result,
|
||||
result_truncated,
|
||||
} => (
|
||||
"tool_result",
|
||||
serde_json::json!({
|
||||
"index": index,
|
||||
"name": name,
|
||||
"result": result,
|
||||
"result_truncated": result_truncated,
|
||||
}),
|
||||
),
|
||||
ChatStreamEvent::Done {
|
||||
tool_calls_made,
|
||||
iterations_used,
|
||||
truncated,
|
||||
prompt_eval_count,
|
||||
eval_count,
|
||||
amended_insight_id,
|
||||
backend_used,
|
||||
model_used,
|
||||
} => (
|
||||
"done",
|
||||
serde_json::json!({
|
||||
"tool_calls_made": tool_calls_made,
|
||||
"iterations_used": iterations_used,
|
||||
"truncated": truncated,
|
||||
"prompt_eval_count": prompt_eval_count,
|
||||
"eval_count": eval_count,
|
||||
"amended_insight_id": amended_insight_id,
|
||||
"backend": backend_used,
|
||||
"model": model_used,
|
||||
}),
|
||||
),
|
||||
ChatStreamEvent::Error(msg) => ("error", serde_json::json!({ "message": msg })),
|
||||
};
|
||||
let data = serde_json::to_string(&payload).unwrap_or_else(|_| "{}".to_string());
|
||||
format!("event: {}\ndata: {}\n\n", event_name, data)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user