Add reconnectable async chat-turn flow with in-memory TurnRegistry

Replace the one-shot SSE chat stream with an async dispatch + reconnectable
replay flow so the mobile client survives backgrounding, network blips, and
OS-killed sockets without losing an in-flight agentic turn.

- TurnRegistry/TurnEntry: in-memory per-turn event buffer (cap 500, front
  eviction) shared by the agentic loop (writer) and SSE replay readers.
  ReplayOutcome + replay_from/next_batch distinguish Events/CaughtUp/Gone;
  next_batch registers the Notify before reading state (no lost wakeup) and
  drains every buffered event before signaling terminal, so the final
  Done/Error is never dropped and the stream closes cleanly.
- Endpoints: POST /insights/chat/turn (202 + turn_id), GET
  /insights/chat/turn/{id} (SSE replay, ?skip_before= resume, per-event seq,
  410 on eviction), DELETE /insights/chat/turn/{id} (real task abort +
  cooperative is_running() check at each loop boundary).
- Cancellation actually stops the task (AbortHandle stored on the entry) and
  emits a Done{cancelled:true}; callers skip persistence on cancel.
- Background sweeper drops stale turns; interval clamped to <=300s.
- OpenTelemetry spans: ai.chat.turn.execute/replay/cancel.
- Legacy POST /insights/chat/stream path preserved unchanged.

Tests: registry coverage for terminal delivery (race guard), waiting, Gone,
abort, eviction; handler integration tests for 404/410, skip_before, seq
stamping, completed replay, and cancel.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-05-29 19:50:25 -04:00
parent 0c1c1c6792
commit 962f7bf05c
8 changed files with 1946 additions and 17 deletions
+638
View File
@@ -9,11 +9,14 @@ use tokio::sync::Mutex as TokioMutex;
use crate::ai::backend::{BackendKind, ResolvedBackend, SamplingOverrides};
use crate::ai::insight_generator::InsightGenerator;
use crate::ai::llm_client::{ChatMessage, LlmStreamEvent, Tool};
use crate::ai::turn_registry::TurnEntry;
use crate::ai::turn_registry::TurnRegistry;
use crate::database::InsightDao;
use crate::database::models::InsertPhotoInsight;
use crate::otel::global_tracer;
use crate::utils::normalize_path;
use futures::stream::{BoxStream, StreamExt};
use uuid::Uuid;
const DEFAULT_MAX_ITERATIONS: usize = 6;
const DEFAULT_NUM_CTX: i32 = 8192;
@@ -678,6 +681,626 @@ impl InsightChatService {
Ok(rx)
}
/// Async turn dispatch: creates a TurnEntry in the registry, spawns the
/// agentic loop on a Tokio task, and returns the turn_id immediately.
/// Events are buffered in the TurnEntry for SSE replay.
pub async fn chat_turn_async(
self: Arc<Self>,
registry: Arc<TurnRegistry>,
req: ChatTurnRequest,
) -> String {
let turn_id = Uuid::new_v4().to_string();
let entry = Arc::new(TurnEntry::new(
turn_id.clone(),
req.file_path.clone(),
req.library_id,
));
registry.insert(entry.clone()).await;
let svc = self.clone();
let entry_clone = entry.clone();
let turn_id_for_span = turn_id.clone();
let library_id = req.library_id;
let handle = tokio::spawn(async move {
// Span covering the whole spawned turn execution. Created here (not
// in the HTTP handler) because the dispatch span ends at the 202
// response, long before this work runs.
let tracer = global_tracer();
let mut span = tracer.start("ai.chat.turn.execute");
span.set_attribute(KeyValue::new("turn_id", turn_id_for_span));
span.set_attribute(KeyValue::new("library_id", library_id as i64));
let result = svc
.run_streaming_turn_with_entry(req, entry_clone.clone())
.await;
if let Err(ref e) = result {
span.set_attribute(KeyValue::new("status", "error"));
span.set_status(Status::error(format!("{e}")));
// Push the terminal event BEFORE flipping status: a replay
// reader treats a terminal status with no buffered tail as
// "closed", so the Error must be in the buffer first.
let _ = entry_clone
.push_event(ChatStreamEvent::Error(format!("{}", e)))
.await;
entry_clone.set_terminal_status(crate::ai::turn_registry::TurnStatus::Error);
} else {
span.set_attribute(KeyValue::new("status", "done"));
span.set_status(Status::Ok);
}
});
// Install the abort handle so DELETE can actually stop the task.
entry.set_abort_handle(handle.abort_handle());
turn_id
}
/// Variant of `run_streaming_turn` that pushes events to a `TurnEntry`
/// buffer instead of an `mpsc::Sender`.
async fn run_streaming_turn_with_entry(
self: Arc<Self>,
req: ChatTurnRequest,
entry: Arc<TurnEntry>,
) -> Result<()> {
if req.user_message.trim().is_empty() {
bail!("user_message must not be empty");
}
if req.user_message.len() > 8192 {
bail!("user_message exceeds 8192 chars");
}
let normalized = normalize_path(&req.file_path);
let lock_key = (req.library_id, normalized.clone());
let entry_lock = {
let mut locks = self.chat_locks.lock().await;
locks
.entry(lock_key.clone())
.or_insert_with(|| Arc::new(TokioMutex::new(())))
.clone()
};
let _guard = entry_lock.lock().await;
// Look up existing insight scoped to this turn's library_id.
let existing_insight = {
let cx = opentelemetry::Context::new();
let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
dao.get_current_insight_for_library(&cx, req.library_id, &normalized)
.map_err(|e| anyhow!("failed to load insight: {:?}", e))?
};
if req.regenerate || existing_insight.is_none() {
return self
.run_bootstrap_streaming_with_entry(req, normalized, entry)
.await;
}
let insight = existing_insight.expect("just checked Some above");
self.run_continuation_streaming_with_entry(req, normalized, insight, entry)
.await
}
/// Continuation path with TurnEntry buffer.
async fn run_continuation_streaming_with_entry(
&self,
req: ChatTurnRequest,
normalized: String,
insight: crate::database::models::PhotoInsight,
entry: Arc<TurnEntry>,
) -> Result<()> {
let active_persona = req
.persona_id
.clone()
.filter(|s| !s.trim().is_empty())
.unwrap_or_else(|| "default".to_string());
let raw_history = insight.training_messages.as_ref().ok_or_else(|| {
anyhow!("insight has no chat history; regenerate this insight in agentic mode")
})?;
let mut messages: Vec<ChatMessage> = serde_json::from_str(raw_history)
.map_err(|e| anyhow!("failed to deserialize chat history: {}", e))?;
let stored_backend = insight.backend.clone();
let effective_backend = req
.backend
.as_deref()
.map(|s| s.trim().to_lowercase())
.filter(|s| !s.is_empty())
.unwrap_or_else(|| stored_backend.clone());
let kind = BackendKind::parse(&effective_backend)?;
validate_cross_replay(&stored_backend, kind.as_str())?;
let max_iterations = req
.max_iterations
.unwrap_or(DEFAULT_MAX_ITERATIONS)
.clamp(1, env_max_iterations());
let stored_model = insight.model_version.clone();
let overrides = SamplingOverrides {
model: req
.model
.clone()
.or_else(|| Some(stored_model.clone()))
.filter(|m| !m.is_empty()),
num_ctx: req.num_ctx,
temperature: req.temperature,
top_p: req.top_p,
top_k: req.top_k,
min_p: req.min_p,
};
let backend = self.generator.resolve_backend(kind, &overrides).await?;
let model_used = backend.model().to_string();
let local_first_user_has_image = messages
.iter()
.find(|m| m.role == "user")
.and_then(|m| m.images.as_ref())
.map(|imgs| !imgs.is_empty())
.unwrap_or(false);
let offer_describe_tool = backend.images_inline && local_first_user_has_image;
let gate_opts = self.generator.current_gate_opts_for_persona(
offer_describe_tool,
Some((req.user_id, &active_persona)),
);
let tools = InsightGenerator::build_tool_definitions(gate_opts);
let image_base64: Option<String> = if offer_describe_tool {
self.generator.load_image_as_base64(&normalized).ok()
} else {
None
};
let budget_tokens = (req.num_ctx.unwrap_or(DEFAULT_NUM_CTX) as usize)
.saturating_sub(RESPONSE_HEADROOM_TOKENS);
let budget_bytes = budget_tokens.saturating_mul(BYTES_PER_TOKEN);
let truncated = apply_context_budget(&mut messages, budget_bytes);
if truncated {
let _ = entry.push_event(ChatStreamEvent::Truncated).await;
}
messages.push(ChatMessage::user(req.user_message.clone()));
let override_stash =
apply_system_prompt_override(&mut messages, req.system_prompt.as_deref());
let original_system_content = annotate_system_with_budget(&mut messages, max_iterations);
let outcome = self
.run_streaming_agentic_loop_with_entry(
&backend,
&mut messages,
tools,
&image_base64,
&normalized,
req.user_id,
&active_persona,
max_iterations,
&entry,
)
.await?;
let AgenticLoopOutcome {
tool_calls_made,
iterations_used,
last_prompt_eval_count,
last_eval_count,
final_content,
cancelled,
} = outcome;
// Turn was cancelled mid-flight: the DELETE handler already pushed the
// terminal event and flipped status. Don't persist a partial turn or
// push a second terminal event.
if cancelled {
return Ok(());
}
restore_system_content(&mut messages, original_system_content);
if !req.amend {
restore_system_prompt_override(&mut messages, override_stash);
}
let json = serde_json::to_string(&messages)
.map_err(|e| anyhow!("failed to serialize chat history: {}", e))?;
let mut amended_insight_id: Option<i32> = None;
if req.amend {
let (title, body) = crate::ai::insight_generator::parse_title_body(&final_content);
let final_content = body;
let new_row = InsertPhotoInsight {
library_id: req.library_id,
file_path: normalized.clone(),
title,
summary: final_content.clone(),
generated_at: Utc::now().timestamp(),
model_version: model_used.clone(),
is_current: true,
training_messages: Some(json),
backend: kind.as_str().to_string(),
fewshot_source_ids: None,
content_hash: None,
num_ctx: req.num_ctx,
temperature: req.temperature,
top_p: req.top_p,
top_k: req.top_k,
min_p: req.min_p,
system_prompt: req.system_prompt.clone(),
persona_id: req.persona_id.clone(),
prompt_eval_count: None,
eval_count: None,
};
let cx = opentelemetry::Context::new();
let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
let stored = dao
.store_insight(&cx, new_row)
.map_err(|e| anyhow!("failed to store amended insight: {:?}", e))?;
amended_insight_id = Some(stored.id);
} else {
let cx = opentelemetry::Context::new();
let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
let rows = dao
.update_training_messages(&cx, req.library_id, &normalized, &json)
.map_err(|e| anyhow!("failed to persist chat history: {:?}", e))?;
if rows == 0 {
log::warn!(
"update_training_messages (stream) updated 0 rows for {} (lib {}), \
concurrent regenerate likely flipped is_current",
normalized,
req.library_id
);
}
}
let _ = entry
.push_event(ChatStreamEvent::Done {
tool_calls_made,
iterations_used,
truncated,
prompt_tokens: last_prompt_eval_count,
eval_tokens: last_eval_count,
num_ctx: req.num_ctx,
amended_insight_id,
backend_used: kind.as_str().to_string(),
model_used,
cancelled: false,
})
.await;
entry.set_terminal_status(crate::ai::turn_registry::TurnStatus::Done);
Ok(())
}
/// Bootstrap path with TurnEntry buffer.
async fn run_bootstrap_streaming_with_entry(
&self,
req: ChatTurnRequest,
normalized: String,
entry: Arc<TurnEntry>,
) -> Result<()> {
let active_persona = req
.persona_id
.clone()
.filter(|s| !s.trim().is_empty())
.unwrap_or_else(|| "default".to_string());
let effective_backend = resolve_bootstrap_backend(req.backend.as_deref())?;
let kind = BackendKind::parse(&effective_backend)?;
let max_iterations = req
.max_iterations
.unwrap_or(DEFAULT_MAX_ITERATIONS)
.clamp(1, env_max_iterations());
let overrides = SamplingOverrides {
model: req.model.clone().filter(|m| !m.is_empty()),
num_ctx: req.num_ctx,
temperature: req.temperature,
top_p: req.top_p,
top_k: req.top_k,
min_p: req.min_p,
};
let backend = self.generator.resolve_backend(kind, &overrides).await?;
let model_used = backend.model().to_string();
let image_base64: Option<String> = self.generator.load_image_as_base64(&normalized).ok();
let exif = self.generator.fetch_exif(&normalized);
let date_taken_str = resolve_date_taken_for_context(&exif, &normalized);
let gps = exif
.as_ref()
.and_then(|e| match (e.gps_latitude, e.gps_longitude) {
(Some(lat), Some(lon)) => Some((lat as f64, lon as f64)),
_ => None,
});
let visual_block = if !backend.images_inline {
match image_base64.as_deref() {
Some(b64) => match backend.local().describe_image(b64).await {
Ok(desc) => {
format!("Visual description (from local vision model):\n{}\n", desc)
}
Err(e) => {
log::warn!("{} bootstrap: describe_image failed: {}", kind.as_str(), e);
String::new()
}
},
None => String::new(),
}
} else {
String::new()
};
let offer_describe_tool = backend.images_inline && image_base64.is_some();
let gate_opts = self.generator.current_gate_opts_for_persona(
offer_describe_tool,
Some((req.user_id, &active_persona)),
);
let tools = InsightGenerator::build_tool_definitions(gate_opts);
let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref());
let system_content = build_bootstrap_system_message(
&persona,
&normalized,
date_taken_str.as_deref(),
gps,
&visual_block,
);
let system_msg = ChatMessage::system(system_content);
let mut user_msg = ChatMessage::user(req.user_message.clone());
if backend.images_inline
&& let Some(ref img) = image_base64
{
user_msg.images = Some(vec![img.clone()]);
}
let mut messages = vec![system_msg, user_msg];
let outcome = self
.run_streaming_agentic_loop_with_entry(
&backend,
&mut messages,
tools,
&image_base64,
&normalized,
req.user_id,
&active_persona,
max_iterations,
&entry,
)
.await?;
let AgenticLoopOutcome {
tool_calls_made,
iterations_used,
last_prompt_eval_count,
last_eval_count,
final_content,
cancelled,
} = outcome;
// Turn was cancelled mid-flight: the DELETE handler already pushed the
// terminal event and flipped status. Don't persist a partial turn or
// push a second terminal event.
if cancelled {
return Ok(());
}
let (title, body) = crate::ai::insight_generator::parse_title_body(&final_content);
let json = serde_json::to_string(&messages)
.map_err(|e| anyhow!("failed to serialize chat history: {}", e))?;
let new_row = InsertPhotoInsight {
library_id: req.library_id,
file_path: normalized.clone(),
title,
summary: body,
generated_at: Utc::now().timestamp(),
model_version: model_used.clone(),
is_current: true,
training_messages: Some(json),
backend: kind.as_str().to_string(),
fewshot_source_ids: None,
content_hash: None,
num_ctx: req.num_ctx,
temperature: req.temperature,
top_p: req.top_p,
top_k: req.top_k,
min_p: req.min_p,
system_prompt: req.system_prompt.clone(),
persona_id: req.persona_id.clone(),
prompt_eval_count: None,
eval_count: None,
};
let stored = {
let cx = opentelemetry::Context::new();
let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
dao.store_insight(&cx, new_row)
.map_err(|e| anyhow!("failed to store bootstrap insight: {:?}", e))?
};
let _ = entry
.push_event(ChatStreamEvent::Done {
tool_calls_made,
iterations_used,
truncated: false,
prompt_tokens: last_prompt_eval_count,
eval_tokens: last_eval_count,
num_ctx: req.num_ctx,
amended_insight_id: Some(stored.id),
backend_used: kind.as_str().to_string(),
model_used,
cancelled: false,
})
.await;
entry.set_terminal_status(crate::ai::turn_registry::TurnStatus::Done);
Ok(())
}
/// Agentic loop variant that pushes events to a `TurnEntry` buffer.
async fn run_streaming_agentic_loop_with_entry(
&self,
backend: &ResolvedBackend,
messages: &mut Vec<ChatMessage>,
tools: Vec<Tool>,
image_base64: &Option<String>,
normalized: &str,
user_id: i32,
active_persona: &str,
max_iterations: usize,
entry: &Arc<TurnEntry>,
) -> Result<AgenticLoopOutcome> {
let mut tool_calls_made = 0usize;
let mut iterations_used = 0usize;
let mut last_prompt_eval_count: Option<i32> = None;
let mut last_eval_count: Option<i32> = None;
let mut final_content = String::new();
for iteration in 0..max_iterations {
// Cooperative cancellation: a DELETE flips status out of Running
// (and aborts this task). Check at the iteration boundary so an
// in-flight tool round finishes cleanly rather than mid-write.
if !entry.is_running() {
return Ok(AgenticLoopOutcome {
tool_calls_made,
iterations_used,
last_prompt_eval_count,
last_eval_count,
final_content,
cancelled: true,
});
}
iterations_used = iteration + 1;
let _ = entry
.push_event(ChatStreamEvent::IterationStart {
n: iterations_used,
max: max_iterations,
})
.await;
let mut stream = backend
.chat()
.chat_with_tools_stream(messages.clone(), tools.clone())
.await?;
let mut final_message: Option<ChatMessage> = None;
while let Some(ev) = stream.next().await {
let ev = ev?;
match ev {
LlmStreamEvent::TextDelta(delta) => {
let _ = entry.push_event(ChatStreamEvent::TextDelta(delta)).await;
}
LlmStreamEvent::Done {
message,
prompt_eval_count,
eval_count,
} => {
last_prompt_eval_count = prompt_eval_count;
last_eval_count = eval_count;
final_message = Some(message);
break;
}
}
}
let mut response =
final_message.ok_or_else(|| anyhow!("stream ended without a Done event"))?;
if let Some(ref mut tcs) = response.tool_calls {
for tc in tcs.iter_mut() {
if !tc.function.arguments.is_object() {
tc.function.arguments = serde_json::Value::Object(Default::default());
}
}
}
messages.push(response.clone());
if let Some(ref tool_calls) = response.tool_calls
&& !tool_calls.is_empty()
{
for tool_call in tool_calls {
tool_calls_made += 1;
let call_index = tool_calls_made - 1;
let _ = entry
.push_event(ChatStreamEvent::ToolCall {
index: call_index,
name: tool_call.function.name.clone(),
arguments: tool_call.function.arguments.clone(),
})
.await;
let cx = opentelemetry::Context::new();
let result = self
.generator
.execute_tool(
&tool_call.function.name,
&tool_call.function.arguments,
backend,
image_base64,
normalized,
user_id,
active_persona,
&cx,
)
.await;
let (result_preview, result_truncated) = truncate_tool_result(&result);
let _ = entry
.push_event(ChatStreamEvent::ToolResult {
index: call_index,
name: tool_call.function.name.clone(),
result: result_preview,
result_truncated,
})
.await;
messages.push(ChatMessage::tool_result(result));
}
continue;
}
final_content = response.content;
break;
}
// No-tools fallback
if final_content.is_empty() {
let synthetic_idx = messages.len();
messages.push(ChatMessage::user(
"Please write your final answer now without calling any more tools.",
));
let mut stream = backend
.chat()
.chat_with_tools_stream(messages.clone(), vec![])
.await?;
let mut final_message: Option<ChatMessage> = None;
while let Some(ev) = stream.next().await {
let ev = ev?;
match ev {
LlmStreamEvent::TextDelta(delta) => {
let _ = entry.push_event(ChatStreamEvent::TextDelta(delta)).await;
}
LlmStreamEvent::Done {
message,
prompt_eval_count,
eval_count,
} => {
last_prompt_eval_count = prompt_eval_count;
last_eval_count = eval_count;
final_message = Some(message);
break;
}
}
}
let final_response =
final_message.ok_or_else(|| anyhow!("final stream ended without a Done event"))?;
final_content = final_response.content.clone();
messages.push(final_response);
messages.remove(synthetic_idx);
}
Ok(AgenticLoopOutcome {
tool_calls_made,
iterations_used,
last_prompt_eval_count,
last_eval_count,
final_content,
cancelled: false,
})
}
async fn run_streaming_turn(
self: Arc<Self>,
req: ChatTurnRequest,
@@ -836,6 +1459,8 @@ impl InsightChatService {
last_prompt_eval_count,
last_eval_count,
final_content,
// The mpsc (legacy) path has no cancellation channel.
cancelled: _,
} = outcome;
// Drop the per-turn iteration-budget note before persisting so it
@@ -916,6 +1541,7 @@ impl InsightChatService {
amended_insight_id,
backend_used: kind.as_str().to_string(),
model_used,
cancelled: false,
})
.await;
@@ -1052,6 +1678,8 @@ impl InsightChatService {
last_prompt_eval_count,
last_eval_count,
final_content,
// The mpsc (legacy) path has no cancellation channel.
cancelled: _,
} = outcome;
let (title, body) = crate::ai::insight_generator::parse_title_body(&final_content);
@@ -1101,6 +1729,7 @@ impl InsightChatService {
amended_insight_id: Some(stored.id),
backend_used: kind.as_str().to_string(),
model_used,
cancelled: false,
})
.await;
@@ -1274,6 +1903,7 @@ impl InsightChatService {
last_prompt_eval_count,
last_eval_count,
final_content,
cancelled: false,
})
}
}
@@ -1402,6 +2032,10 @@ struct AgenticLoopOutcome {
last_prompt_eval_count: Option<i32>,
last_eval_count: Option<i32>,
final_content: String,
/// True when the loop exited early because the turn was cancelled
/// (status flipped out of `Running`). Callers skip persistence and the
/// terminal `Done` push — the cancel handler owns the terminal event.
cancelled: bool,
}
/// Events emitted by `chat_turn_stream`. One stream per turn; ends after
@@ -1456,6 +2090,10 @@ pub enum ChatStreamEvent {
amended_insight_id: Option<i32>,
backend_used: String,
model_used: String,
/// True only for the synthetic terminal event emitted by the cancel
/// handler, so clients can distinguish a user-cancelled turn from a
/// natural completion. Always false on the normal success path.
cancelled: bool,
},
/// Terminal failure event. No further events follow.
Error(String),