Add reconnectable async chat-turn flow with in-memory TurnRegistry
Replace the one-shot SSE chat stream with an async dispatch + reconnectable
replay flow so the mobile client survives backgrounding, network blips, and
OS-killed sockets without losing an in-flight agentic turn.
- TurnRegistry/TurnEntry: in-memory per-turn event buffer (cap 500, front
eviction) shared by the agentic loop (writer) and SSE replay readers.
ReplayOutcome + replay_from/next_batch distinguish Events/CaughtUp/Gone;
next_batch registers the Notify before reading state (no lost wakeup) and
drains every buffered event before signaling terminal, so the final
Done/Error is never dropped and the stream closes cleanly.
- Endpoints: POST /insights/chat/turn (202 + turn_id), GET
/insights/chat/turn/{id} (SSE replay, ?skip_before= resume, per-event seq,
410 on eviction), DELETE /insights/chat/turn/{id} (real task abort +
cooperative is_running() check at each loop boundary).
- Cancellation actually stops the task (AbortHandle stored on the entry) and
emits a Done{cancelled:true}; callers skip persistence on cancel.
- Background sweeper drops stale turns; interval clamped to <=300s.
- OpenTelemetry spans: ai.chat.turn.execute/replay/cancel.
- Legacy POST /insights/chat/stream path preserved unchanged.
Tests: registry coverage for terminal delivery (race guard), waiting, Gone,
abort, eviction; handler integration tests for 404/410, skip_before, seq
stamping, completed replay, and cancel.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -9,11 +9,14 @@ use tokio::sync::Mutex as TokioMutex;
|
||||
use crate::ai::backend::{BackendKind, ResolvedBackend, SamplingOverrides};
|
||||
use crate::ai::insight_generator::InsightGenerator;
|
||||
use crate::ai::llm_client::{ChatMessage, LlmStreamEvent, Tool};
|
||||
use crate::ai::turn_registry::TurnEntry;
|
||||
use crate::ai::turn_registry::TurnRegistry;
|
||||
use crate::database::InsightDao;
|
||||
use crate::database::models::InsertPhotoInsight;
|
||||
use crate::otel::global_tracer;
|
||||
use crate::utils::normalize_path;
|
||||
use futures::stream::{BoxStream, StreamExt};
|
||||
use uuid::Uuid;
|
||||
|
||||
const DEFAULT_MAX_ITERATIONS: usize = 6;
|
||||
const DEFAULT_NUM_CTX: i32 = 8192;
|
||||
@@ -678,6 +681,626 @@ impl InsightChatService {
|
||||
Ok(rx)
|
||||
}
|
||||
|
||||
/// Async turn dispatch: creates a TurnEntry in the registry, spawns the
|
||||
/// agentic loop on a Tokio task, and returns the turn_id immediately.
|
||||
/// Events are buffered in the TurnEntry for SSE replay.
|
||||
pub async fn chat_turn_async(
|
||||
self: Arc<Self>,
|
||||
registry: Arc<TurnRegistry>,
|
||||
req: ChatTurnRequest,
|
||||
) -> String {
|
||||
let turn_id = Uuid::new_v4().to_string();
|
||||
let entry = Arc::new(TurnEntry::new(
|
||||
turn_id.clone(),
|
||||
req.file_path.clone(),
|
||||
req.library_id,
|
||||
));
|
||||
registry.insert(entry.clone()).await;
|
||||
|
||||
let svc = self.clone();
|
||||
let entry_clone = entry.clone();
|
||||
let turn_id_for_span = turn_id.clone();
|
||||
let library_id = req.library_id;
|
||||
let handle = tokio::spawn(async move {
|
||||
// Span covering the whole spawned turn execution. Created here (not
|
||||
// in the HTTP handler) because the dispatch span ends at the 202
|
||||
// response, long before this work runs.
|
||||
let tracer = global_tracer();
|
||||
let mut span = tracer.start("ai.chat.turn.execute");
|
||||
span.set_attribute(KeyValue::new("turn_id", turn_id_for_span));
|
||||
span.set_attribute(KeyValue::new("library_id", library_id as i64));
|
||||
|
||||
let result = svc
|
||||
.run_streaming_turn_with_entry(req, entry_clone.clone())
|
||||
.await;
|
||||
if let Err(ref e) = result {
|
||||
span.set_attribute(KeyValue::new("status", "error"));
|
||||
span.set_status(Status::error(format!("{e}")));
|
||||
// Push the terminal event BEFORE flipping status: a replay
|
||||
// reader treats a terminal status with no buffered tail as
|
||||
// "closed", so the Error must be in the buffer first.
|
||||
let _ = entry_clone
|
||||
.push_event(ChatStreamEvent::Error(format!("{}", e)))
|
||||
.await;
|
||||
entry_clone.set_terminal_status(crate::ai::turn_registry::TurnStatus::Error);
|
||||
} else {
|
||||
span.set_attribute(KeyValue::new("status", "done"));
|
||||
span.set_status(Status::Ok);
|
||||
}
|
||||
});
|
||||
|
||||
// Install the abort handle so DELETE can actually stop the task.
|
||||
entry.set_abort_handle(handle.abort_handle());
|
||||
|
||||
turn_id
|
||||
}
|
||||
|
||||
/// Variant of `run_streaming_turn` that pushes events to a `TurnEntry`
|
||||
/// buffer instead of an `mpsc::Sender`.
|
||||
async fn run_streaming_turn_with_entry(
|
||||
self: Arc<Self>,
|
||||
req: ChatTurnRequest,
|
||||
entry: Arc<TurnEntry>,
|
||||
) -> Result<()> {
|
||||
if req.user_message.trim().is_empty() {
|
||||
bail!("user_message must not be empty");
|
||||
}
|
||||
if req.user_message.len() > 8192 {
|
||||
bail!("user_message exceeds 8192 chars");
|
||||
}
|
||||
let normalized = normalize_path(&req.file_path);
|
||||
|
||||
let lock_key = (req.library_id, normalized.clone());
|
||||
let entry_lock = {
|
||||
let mut locks = self.chat_locks.lock().await;
|
||||
locks
|
||||
.entry(lock_key.clone())
|
||||
.or_insert_with(|| Arc::new(TokioMutex::new(())))
|
||||
.clone()
|
||||
};
|
||||
let _guard = entry_lock.lock().await;
|
||||
|
||||
// Look up existing insight scoped to this turn's library_id.
|
||||
let existing_insight = {
|
||||
let cx = opentelemetry::Context::new();
|
||||
let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
|
||||
dao.get_current_insight_for_library(&cx, req.library_id, &normalized)
|
||||
.map_err(|e| anyhow!("failed to load insight: {:?}", e))?
|
||||
};
|
||||
|
||||
if req.regenerate || existing_insight.is_none() {
|
||||
return self
|
||||
.run_bootstrap_streaming_with_entry(req, normalized, entry)
|
||||
.await;
|
||||
}
|
||||
let insight = existing_insight.expect("just checked Some above");
|
||||
self.run_continuation_streaming_with_entry(req, normalized, insight, entry)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Continuation path with TurnEntry buffer.
|
||||
async fn run_continuation_streaming_with_entry(
|
||||
&self,
|
||||
req: ChatTurnRequest,
|
||||
normalized: String,
|
||||
insight: crate::database::models::PhotoInsight,
|
||||
entry: Arc<TurnEntry>,
|
||||
) -> Result<()> {
|
||||
let active_persona = req
|
||||
.persona_id
|
||||
.clone()
|
||||
.filter(|s| !s.trim().is_empty())
|
||||
.unwrap_or_else(|| "default".to_string());
|
||||
let raw_history = insight.training_messages.as_ref().ok_or_else(|| {
|
||||
anyhow!("insight has no chat history; regenerate this insight in agentic mode")
|
||||
})?;
|
||||
let mut messages: Vec<ChatMessage> = serde_json::from_str(raw_history)
|
||||
.map_err(|e| anyhow!("failed to deserialize chat history: {}", e))?;
|
||||
|
||||
let stored_backend = insight.backend.clone();
|
||||
let effective_backend = req
|
||||
.backend
|
||||
.as_deref()
|
||||
.map(|s| s.trim().to_lowercase())
|
||||
.filter(|s| !s.is_empty())
|
||||
.unwrap_or_else(|| stored_backend.clone());
|
||||
let kind = BackendKind::parse(&effective_backend)?;
|
||||
validate_cross_replay(&stored_backend, kind.as_str())?;
|
||||
|
||||
let max_iterations = req
|
||||
.max_iterations
|
||||
.unwrap_or(DEFAULT_MAX_ITERATIONS)
|
||||
.clamp(1, env_max_iterations());
|
||||
|
||||
let stored_model = insight.model_version.clone();
|
||||
let overrides = SamplingOverrides {
|
||||
model: req
|
||||
.model
|
||||
.clone()
|
||||
.or_else(|| Some(stored_model.clone()))
|
||||
.filter(|m| !m.is_empty()),
|
||||
num_ctx: req.num_ctx,
|
||||
temperature: req.temperature,
|
||||
top_p: req.top_p,
|
||||
top_k: req.top_k,
|
||||
min_p: req.min_p,
|
||||
};
|
||||
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
||||
let model_used = backend.model().to_string();
|
||||
|
||||
let local_first_user_has_image = messages
|
||||
.iter()
|
||||
.find(|m| m.role == "user")
|
||||
.and_then(|m| m.images.as_ref())
|
||||
.map(|imgs| !imgs.is_empty())
|
||||
.unwrap_or(false);
|
||||
let offer_describe_tool = backend.images_inline && local_first_user_has_image;
|
||||
let gate_opts = self.generator.current_gate_opts_for_persona(
|
||||
offer_describe_tool,
|
||||
Some((req.user_id, &active_persona)),
|
||||
);
|
||||
let tools = InsightGenerator::build_tool_definitions(gate_opts);
|
||||
|
||||
let image_base64: Option<String> = if offer_describe_tool {
|
||||
self.generator.load_image_as_base64(&normalized).ok()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let budget_tokens = (req.num_ctx.unwrap_or(DEFAULT_NUM_CTX) as usize)
|
||||
.saturating_sub(RESPONSE_HEADROOM_TOKENS);
|
||||
let budget_bytes = budget_tokens.saturating_mul(BYTES_PER_TOKEN);
|
||||
let truncated = apply_context_budget(&mut messages, budget_bytes);
|
||||
if truncated {
|
||||
let _ = entry.push_event(ChatStreamEvent::Truncated).await;
|
||||
}
|
||||
|
||||
messages.push(ChatMessage::user(req.user_message.clone()));
|
||||
|
||||
let override_stash =
|
||||
apply_system_prompt_override(&mut messages, req.system_prompt.as_deref());
|
||||
let original_system_content = annotate_system_with_budget(&mut messages, max_iterations);
|
||||
|
||||
let outcome = self
|
||||
.run_streaming_agentic_loop_with_entry(
|
||||
&backend,
|
||||
&mut messages,
|
||||
tools,
|
||||
&image_base64,
|
||||
&normalized,
|
||||
req.user_id,
|
||||
&active_persona,
|
||||
max_iterations,
|
||||
&entry,
|
||||
)
|
||||
.await?;
|
||||
let AgenticLoopOutcome {
|
||||
tool_calls_made,
|
||||
iterations_used,
|
||||
last_prompt_eval_count,
|
||||
last_eval_count,
|
||||
final_content,
|
||||
cancelled,
|
||||
} = outcome;
|
||||
|
||||
// Turn was cancelled mid-flight: the DELETE handler already pushed the
|
||||
// terminal event and flipped status. Don't persist a partial turn or
|
||||
// push a second terminal event.
|
||||
if cancelled {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
restore_system_content(&mut messages, original_system_content);
|
||||
|
||||
if !req.amend {
|
||||
restore_system_prompt_override(&mut messages, override_stash);
|
||||
}
|
||||
|
||||
let json = serde_json::to_string(&messages)
|
||||
.map_err(|e| anyhow!("failed to serialize chat history: {}", e))?;
|
||||
|
||||
let mut amended_insight_id: Option<i32> = None;
|
||||
if req.amend {
|
||||
let (title, body) = crate::ai::insight_generator::parse_title_body(&final_content);
|
||||
let final_content = body;
|
||||
|
||||
let new_row = InsertPhotoInsight {
|
||||
library_id: req.library_id,
|
||||
file_path: normalized.clone(),
|
||||
title,
|
||||
summary: final_content.clone(),
|
||||
generated_at: Utc::now().timestamp(),
|
||||
model_version: model_used.clone(),
|
||||
is_current: true,
|
||||
training_messages: Some(json),
|
||||
backend: kind.as_str().to_string(),
|
||||
fewshot_source_ids: None,
|
||||
content_hash: None,
|
||||
num_ctx: req.num_ctx,
|
||||
temperature: req.temperature,
|
||||
top_p: req.top_p,
|
||||
top_k: req.top_k,
|
||||
min_p: req.min_p,
|
||||
system_prompt: req.system_prompt.clone(),
|
||||
persona_id: req.persona_id.clone(),
|
||||
prompt_eval_count: None,
|
||||
eval_count: None,
|
||||
};
|
||||
let cx = opentelemetry::Context::new();
|
||||
let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
|
||||
let stored = dao
|
||||
.store_insight(&cx, new_row)
|
||||
.map_err(|e| anyhow!("failed to store amended insight: {:?}", e))?;
|
||||
amended_insight_id = Some(stored.id);
|
||||
} else {
|
||||
let cx = opentelemetry::Context::new();
|
||||
let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
|
||||
let rows = dao
|
||||
.update_training_messages(&cx, req.library_id, &normalized, &json)
|
||||
.map_err(|e| anyhow!("failed to persist chat history: {:?}", e))?;
|
||||
if rows == 0 {
|
||||
log::warn!(
|
||||
"update_training_messages (stream) updated 0 rows for {} (lib {}), \
|
||||
concurrent regenerate likely flipped is_current",
|
||||
normalized,
|
||||
req.library_id
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let _ = entry
|
||||
.push_event(ChatStreamEvent::Done {
|
||||
tool_calls_made,
|
||||
iterations_used,
|
||||
truncated,
|
||||
prompt_tokens: last_prompt_eval_count,
|
||||
eval_tokens: last_eval_count,
|
||||
num_ctx: req.num_ctx,
|
||||
amended_insight_id,
|
||||
backend_used: kind.as_str().to_string(),
|
||||
model_used,
|
||||
cancelled: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
entry.set_terminal_status(crate::ai::turn_registry::TurnStatus::Done);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Bootstrap path with TurnEntry buffer.
|
||||
async fn run_bootstrap_streaming_with_entry(
|
||||
&self,
|
||||
req: ChatTurnRequest,
|
||||
normalized: String,
|
||||
entry: Arc<TurnEntry>,
|
||||
) -> Result<()> {
|
||||
let active_persona = req
|
||||
.persona_id
|
||||
.clone()
|
||||
.filter(|s| !s.trim().is_empty())
|
||||
.unwrap_or_else(|| "default".to_string());
|
||||
let effective_backend = resolve_bootstrap_backend(req.backend.as_deref())?;
|
||||
let kind = BackendKind::parse(&effective_backend)?;
|
||||
|
||||
let max_iterations = req
|
||||
.max_iterations
|
||||
.unwrap_or(DEFAULT_MAX_ITERATIONS)
|
||||
.clamp(1, env_max_iterations());
|
||||
|
||||
let overrides = SamplingOverrides {
|
||||
model: req.model.clone().filter(|m| !m.is_empty()),
|
||||
num_ctx: req.num_ctx,
|
||||
temperature: req.temperature,
|
||||
top_p: req.top_p,
|
||||
top_k: req.top_k,
|
||||
min_p: req.min_p,
|
||||
};
|
||||
let backend = self.generator.resolve_backend(kind, &overrides).await?;
|
||||
let model_used = backend.model().to_string();
|
||||
|
||||
let image_base64: Option<String> = self.generator.load_image_as_base64(&normalized).ok();
|
||||
|
||||
let exif = self.generator.fetch_exif(&normalized);
|
||||
let date_taken_str = resolve_date_taken_for_context(&exif, &normalized);
|
||||
let gps = exif
|
||||
.as_ref()
|
||||
.and_then(|e| match (e.gps_latitude, e.gps_longitude) {
|
||||
(Some(lat), Some(lon)) => Some((lat as f64, lon as f64)),
|
||||
_ => None,
|
||||
});
|
||||
|
||||
let visual_block = if !backend.images_inline {
|
||||
match image_base64.as_deref() {
|
||||
Some(b64) => match backend.local().describe_image(b64).await {
|
||||
Ok(desc) => {
|
||||
format!("Visual description (from local vision model):\n{}\n", desc)
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("{} bootstrap: describe_image failed: {}", kind.as_str(), e);
|
||||
String::new()
|
||||
}
|
||||
},
|
||||
None => String::new(),
|
||||
}
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
let offer_describe_tool = backend.images_inline && image_base64.is_some();
|
||||
let gate_opts = self.generator.current_gate_opts_for_persona(
|
||||
offer_describe_tool,
|
||||
Some((req.user_id, &active_persona)),
|
||||
);
|
||||
let tools = InsightGenerator::build_tool_definitions(gate_opts);
|
||||
|
||||
let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref());
|
||||
let system_content = build_bootstrap_system_message(
|
||||
&persona,
|
||||
&normalized,
|
||||
date_taken_str.as_deref(),
|
||||
gps,
|
||||
&visual_block,
|
||||
);
|
||||
let system_msg = ChatMessage::system(system_content);
|
||||
let mut user_msg = ChatMessage::user(req.user_message.clone());
|
||||
if backend.images_inline
|
||||
&& let Some(ref img) = image_base64
|
||||
{
|
||||
user_msg.images = Some(vec![img.clone()]);
|
||||
}
|
||||
let mut messages = vec![system_msg, user_msg];
|
||||
|
||||
let outcome = self
|
||||
.run_streaming_agentic_loop_with_entry(
|
||||
&backend,
|
||||
&mut messages,
|
||||
tools,
|
||||
&image_base64,
|
||||
&normalized,
|
||||
req.user_id,
|
||||
&active_persona,
|
||||
max_iterations,
|
||||
&entry,
|
||||
)
|
||||
.await?;
|
||||
let AgenticLoopOutcome {
|
||||
tool_calls_made,
|
||||
iterations_used,
|
||||
last_prompt_eval_count,
|
||||
last_eval_count,
|
||||
final_content,
|
||||
cancelled,
|
||||
} = outcome;
|
||||
|
||||
// Turn was cancelled mid-flight: the DELETE handler already pushed the
|
||||
// terminal event and flipped status. Don't persist a partial turn or
|
||||
// push a second terminal event.
|
||||
if cancelled {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let (title, body) = crate::ai::insight_generator::parse_title_body(&final_content);
|
||||
|
||||
let json = serde_json::to_string(&messages)
|
||||
.map_err(|e| anyhow!("failed to serialize chat history: {}", e))?;
|
||||
let new_row = InsertPhotoInsight {
|
||||
library_id: req.library_id,
|
||||
file_path: normalized.clone(),
|
||||
title,
|
||||
summary: body,
|
||||
generated_at: Utc::now().timestamp(),
|
||||
model_version: model_used.clone(),
|
||||
is_current: true,
|
||||
training_messages: Some(json),
|
||||
backend: kind.as_str().to_string(),
|
||||
fewshot_source_ids: None,
|
||||
content_hash: None,
|
||||
num_ctx: req.num_ctx,
|
||||
temperature: req.temperature,
|
||||
top_p: req.top_p,
|
||||
top_k: req.top_k,
|
||||
min_p: req.min_p,
|
||||
system_prompt: req.system_prompt.clone(),
|
||||
persona_id: req.persona_id.clone(),
|
||||
prompt_eval_count: None,
|
||||
eval_count: None,
|
||||
};
|
||||
let stored = {
|
||||
let cx = opentelemetry::Context::new();
|
||||
let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
|
||||
dao.store_insight(&cx, new_row)
|
||||
.map_err(|e| anyhow!("failed to store bootstrap insight: {:?}", e))?
|
||||
};
|
||||
|
||||
let _ = entry
|
||||
.push_event(ChatStreamEvent::Done {
|
||||
tool_calls_made,
|
||||
iterations_used,
|
||||
truncated: false,
|
||||
prompt_tokens: last_prompt_eval_count,
|
||||
eval_tokens: last_eval_count,
|
||||
num_ctx: req.num_ctx,
|
||||
amended_insight_id: Some(stored.id),
|
||||
backend_used: kind.as_str().to_string(),
|
||||
model_used,
|
||||
cancelled: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
entry.set_terminal_status(crate::ai::turn_registry::TurnStatus::Done);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Agentic loop variant that pushes events to a `TurnEntry` buffer.
|
||||
async fn run_streaming_agentic_loop_with_entry(
|
||||
&self,
|
||||
backend: &ResolvedBackend,
|
||||
messages: &mut Vec<ChatMessage>,
|
||||
tools: Vec<Tool>,
|
||||
image_base64: &Option<String>,
|
||||
normalized: &str,
|
||||
user_id: i32,
|
||||
active_persona: &str,
|
||||
max_iterations: usize,
|
||||
entry: &Arc<TurnEntry>,
|
||||
) -> Result<AgenticLoopOutcome> {
|
||||
let mut tool_calls_made = 0usize;
|
||||
let mut iterations_used = 0usize;
|
||||
let mut last_prompt_eval_count: Option<i32> = None;
|
||||
let mut last_eval_count: Option<i32> = None;
|
||||
let mut final_content = String::new();
|
||||
|
||||
for iteration in 0..max_iterations {
|
||||
// Cooperative cancellation: a DELETE flips status out of Running
|
||||
// (and aborts this task). Check at the iteration boundary so an
|
||||
// in-flight tool round finishes cleanly rather than mid-write.
|
||||
if !entry.is_running() {
|
||||
return Ok(AgenticLoopOutcome {
|
||||
tool_calls_made,
|
||||
iterations_used,
|
||||
last_prompt_eval_count,
|
||||
last_eval_count,
|
||||
final_content,
|
||||
cancelled: true,
|
||||
});
|
||||
}
|
||||
|
||||
iterations_used = iteration + 1;
|
||||
let _ = entry
|
||||
.push_event(ChatStreamEvent::IterationStart {
|
||||
n: iterations_used,
|
||||
max: max_iterations,
|
||||
})
|
||||
.await;
|
||||
|
||||
let mut stream = backend
|
||||
.chat()
|
||||
.chat_with_tools_stream(messages.clone(), tools.clone())
|
||||
.await?;
|
||||
|
||||
let mut final_message: Option<ChatMessage> = None;
|
||||
while let Some(ev) = stream.next().await {
|
||||
let ev = ev?;
|
||||
match ev {
|
||||
LlmStreamEvent::TextDelta(delta) => {
|
||||
let _ = entry.push_event(ChatStreamEvent::TextDelta(delta)).await;
|
||||
}
|
||||
LlmStreamEvent::Done {
|
||||
message,
|
||||
prompt_eval_count,
|
||||
eval_count,
|
||||
} => {
|
||||
last_prompt_eval_count = prompt_eval_count;
|
||||
last_eval_count = eval_count;
|
||||
final_message = Some(message);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut response =
|
||||
final_message.ok_or_else(|| anyhow!("stream ended without a Done event"))?;
|
||||
|
||||
if let Some(ref mut tcs) = response.tool_calls {
|
||||
for tc in tcs.iter_mut() {
|
||||
if !tc.function.arguments.is_object() {
|
||||
tc.function.arguments = serde_json::Value::Object(Default::default());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
messages.push(response.clone());
|
||||
|
||||
if let Some(ref tool_calls) = response.tool_calls
|
||||
&& !tool_calls.is_empty()
|
||||
{
|
||||
for tool_call in tool_calls {
|
||||
tool_calls_made += 1;
|
||||
let call_index = tool_calls_made - 1;
|
||||
let _ = entry
|
||||
.push_event(ChatStreamEvent::ToolCall {
|
||||
index: call_index,
|
||||
name: tool_call.function.name.clone(),
|
||||
arguments: tool_call.function.arguments.clone(),
|
||||
})
|
||||
.await;
|
||||
let cx = opentelemetry::Context::new();
|
||||
let result = self
|
||||
.generator
|
||||
.execute_tool(
|
||||
&tool_call.function.name,
|
||||
&tool_call.function.arguments,
|
||||
backend,
|
||||
image_base64,
|
||||
normalized,
|
||||
user_id,
|
||||
active_persona,
|
||||
&cx,
|
||||
)
|
||||
.await;
|
||||
let (result_preview, result_truncated) = truncate_tool_result(&result);
|
||||
let _ = entry
|
||||
.push_event(ChatStreamEvent::ToolResult {
|
||||
index: call_index,
|
||||
name: tool_call.function.name.clone(),
|
||||
result: result_preview,
|
||||
result_truncated,
|
||||
})
|
||||
.await;
|
||||
messages.push(ChatMessage::tool_result(result));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
final_content = response.content;
|
||||
break;
|
||||
}
|
||||
|
||||
// No-tools fallback
|
||||
if final_content.is_empty() {
|
||||
let synthetic_idx = messages.len();
|
||||
messages.push(ChatMessage::user(
|
||||
"Please write your final answer now without calling any more tools.",
|
||||
));
|
||||
let mut stream = backend
|
||||
.chat()
|
||||
.chat_with_tools_stream(messages.clone(), vec![])
|
||||
.await?;
|
||||
let mut final_message: Option<ChatMessage> = None;
|
||||
while let Some(ev) = stream.next().await {
|
||||
let ev = ev?;
|
||||
match ev {
|
||||
LlmStreamEvent::TextDelta(delta) => {
|
||||
let _ = entry.push_event(ChatStreamEvent::TextDelta(delta)).await;
|
||||
}
|
||||
LlmStreamEvent::Done {
|
||||
message,
|
||||
prompt_eval_count,
|
||||
eval_count,
|
||||
} => {
|
||||
last_prompt_eval_count = prompt_eval_count;
|
||||
last_eval_count = eval_count;
|
||||
final_message = Some(message);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
let final_response =
|
||||
final_message.ok_or_else(|| anyhow!("final stream ended without a Done event"))?;
|
||||
final_content = final_response.content.clone();
|
||||
messages.push(final_response);
|
||||
messages.remove(synthetic_idx);
|
||||
}
|
||||
|
||||
Ok(AgenticLoopOutcome {
|
||||
tool_calls_made,
|
||||
iterations_used,
|
||||
last_prompt_eval_count,
|
||||
last_eval_count,
|
||||
final_content,
|
||||
cancelled: false,
|
||||
})
|
||||
}
|
||||
|
||||
async fn run_streaming_turn(
|
||||
self: Arc<Self>,
|
||||
req: ChatTurnRequest,
|
||||
@@ -836,6 +1459,8 @@ impl InsightChatService {
|
||||
last_prompt_eval_count,
|
||||
last_eval_count,
|
||||
final_content,
|
||||
// The mpsc (legacy) path has no cancellation channel.
|
||||
cancelled: _,
|
||||
} = outcome;
|
||||
|
||||
// Drop the per-turn iteration-budget note before persisting so it
|
||||
@@ -916,6 +1541,7 @@ impl InsightChatService {
|
||||
amended_insight_id,
|
||||
backend_used: kind.as_str().to_string(),
|
||||
model_used,
|
||||
cancelled: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1052,6 +1678,8 @@ impl InsightChatService {
|
||||
last_prompt_eval_count,
|
||||
last_eval_count,
|
||||
final_content,
|
||||
// The mpsc (legacy) path has no cancellation channel.
|
||||
cancelled: _,
|
||||
} = outcome;
|
||||
|
||||
let (title, body) = crate::ai::insight_generator::parse_title_body(&final_content);
|
||||
@@ -1101,6 +1729,7 @@ impl InsightChatService {
|
||||
amended_insight_id: Some(stored.id),
|
||||
backend_used: kind.as_str().to_string(),
|
||||
model_used,
|
||||
cancelled: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1274,6 +1903,7 @@ impl InsightChatService {
|
||||
last_prompt_eval_count,
|
||||
last_eval_count,
|
||||
final_content,
|
||||
cancelled: false,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1402,6 +2032,10 @@ struct AgenticLoopOutcome {
|
||||
last_prompt_eval_count: Option<i32>,
|
||||
last_eval_count: Option<i32>,
|
||||
final_content: String,
|
||||
/// True when the loop exited early because the turn was cancelled
|
||||
/// (status flipped out of `Running`). Callers skip persistence and the
|
||||
/// terminal `Done` push — the cancel handler owns the terminal event.
|
||||
cancelled: bool,
|
||||
}
|
||||
|
||||
/// Events emitted by `chat_turn_stream`. One stream per turn; ends after
|
||||
@@ -1456,6 +2090,10 @@ pub enum ChatStreamEvent {
|
||||
amended_insight_id: Option<i32>,
|
||||
backend_used: String,
|
||||
model_used: String,
|
||||
/// True only for the synthetic terminal event emitted by the cancel
|
||||
/// handler, so clients can distinguish a user-cancelled turn from a
|
||||
/// natural completion. Always false on the normal success path.
|
||||
cancelled: bool,
|
||||
},
|
||||
/// Terminal failure event. No further events follow.
|
||||
Error(String),
|
||||
|
||||
Reference in New Issue
Block a user