Add reconnectable async chat-turn flow with in-memory TurnRegistry

Replace the one-shot SSE chat stream with an async dispatch + reconnectable replay flow so the mobile client survives backgrounding, network blips, and OS-killed sockets without losing an in-flight agentic turn. - TurnRegistry/TurnEntry: in-memory per-turn event buffer (cap 500, front eviction) shared by the agentic loop (writer) and SSE replay readers. ReplayOutcome + replay_from/next_batch distinguish Events/CaughtUp/Gone; next_batch registers the Notify before reading state (no lost wakeup) and drains every buffered event before signaling terminal, so the final Done/Error is never dropped and the stream closes cleanly. - Endpoints: POST /insights/chat/turn (202 + turn_id), GET /insights/chat/turn/{id} (SSE replay, ?skip_before= resume, per-event seq, 410 on eviction), DELETE /insights/chat/turn/{id} (real task abort + cooperative is_running() check at each loop boundary). - Cancellation actually stops the task (AbortHandle stored on the entry) and emits a Done{cancelled:true}; callers skip persistence on cancel. - Background sweeper drops stale turns; interval clamped to <=300s. - OpenTelemetry spans: ai.chat.turn.execute/replay/cancel. - Legacy POST /insights/chat/stream path preserved unchanged. Tests: registry coverage for terminal delivery (race guard), waiting, Gone, abort, eviction; handler integration tests for 404/410, skip_before, seq stamping, completed replay, and cancel. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-29 19:50:25 -04:00
parent 0c1c1c6792
commit 962f7bf05c
8 changed files with 1946 additions and 17 deletions
@@ -9,11 +9,14 @@ use tokio::sync::Mutex as TokioMutex;
 use crate::ai::backend::{BackendKind, ResolvedBackend, SamplingOverrides};
 use crate::ai::insight_generator::InsightGenerator;
 use crate::ai::llm_client::{ChatMessage, LlmStreamEvent, Tool};
+use crate::ai::turn_registry::TurnEntry;
+use crate::ai::turn_registry::TurnRegistry;
 use crate::database::InsightDao;
 use crate::database::models::InsertPhotoInsight;
 use crate::otel::global_tracer;
 use crate::utils::normalize_path;
 use futures::stream::{BoxStream, StreamExt};
+use uuid::Uuid;

 const DEFAULT_MAX_ITERATIONS: usize = 6;
 const DEFAULT_NUM_CTX: i32 = 8192;
@@ -678,6 +681,626 @@ impl InsightChatService {
        Ok(rx)
    }

+    /// Async turn dispatch: creates a TurnEntry in the registry, spawns the
+    /// agentic loop on a Tokio task, and returns the turn_id immediately.
+    /// Events are buffered in the TurnEntry for SSE replay.
+    pub async fn chat_turn_async(
+        self: Arc<Self>,
+        registry: Arc<TurnRegistry>,
+        req: ChatTurnRequest,
+    ) -> String {
+        let turn_id = Uuid::new_v4().to_string();
+        let entry = Arc::new(TurnEntry::new(
+            turn_id.clone(),
+            req.file_path.clone(),
+            req.library_id,
+        ));
+        registry.insert(entry.clone()).await;
+
+        let svc = self.clone();
+        let entry_clone = entry.clone();
+        let turn_id_for_span = turn_id.clone();
+        let library_id = req.library_id;
+        let handle = tokio::spawn(async move {
+            // Span covering the whole spawned turn execution. Created here (not
+            // in the HTTP handler) because the dispatch span ends at the 202
+            // response, long before this work runs.
+            let tracer = global_tracer();
+            let mut span = tracer.start("ai.chat.turn.execute");
+            span.set_attribute(KeyValue::new("turn_id", turn_id_for_span));
+            span.set_attribute(KeyValue::new("library_id", library_id as i64));
+
+            let result = svc
+                .run_streaming_turn_with_entry(req, entry_clone.clone())
+                .await;
+            if let Err(ref e) = result {
+                span.set_attribute(KeyValue::new("status", "error"));
+                span.set_status(Status::error(format!("{e}")));
+                // Push the terminal event BEFORE flipping status: a replay
+                // reader treats a terminal status with no buffered tail as
+                // "closed", so the Error must be in the buffer first.
+                let _ = entry_clone
+                    .push_event(ChatStreamEvent::Error(format!("{}", e)))
+                    .await;
+                entry_clone.set_terminal_status(crate::ai::turn_registry::TurnStatus::Error);
+            } else {
+                span.set_attribute(KeyValue::new("status", "done"));
+                span.set_status(Status::Ok);
+            }
+        });
+
+        // Install the abort handle so DELETE can actually stop the task.
+        entry.set_abort_handle(handle.abort_handle());
+
+        turn_id
+    }
+
+    /// Variant of `run_streaming_turn` that pushes events to a `TurnEntry`
+    /// buffer instead of an `mpsc::Sender`.
+    async fn run_streaming_turn_with_entry(
+        self: Arc<Self>,
+        req: ChatTurnRequest,
+        entry: Arc<TurnEntry>,
+    ) -> Result<()> {
+        if req.user_message.trim().is_empty() {
+            bail!("user_message must not be empty");
+        }
+        if req.user_message.len() > 8192 {
+            bail!("user_message exceeds 8192 chars");
+        }
+        let normalized = normalize_path(&req.file_path);
+
+        let lock_key = (req.library_id, normalized.clone());
+        let entry_lock = {
+            let mut locks = self.chat_locks.lock().await;
+            locks
+                .entry(lock_key.clone())
+                .or_insert_with(|| Arc::new(TokioMutex::new(())))
+                .clone()
+        };
+        let _guard = entry_lock.lock().await;
+
+        // Look up existing insight scoped to this turn's library_id.
+        let existing_insight = {
+            let cx = opentelemetry::Context::new();
+            let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
+            dao.get_current_insight_for_library(&cx, req.library_id, &normalized)
+                .map_err(|e| anyhow!("failed to load insight: {:?}", e))?
+        };
+
+        if req.regenerate || existing_insight.is_none() {
+            return self
+                .run_bootstrap_streaming_with_entry(req, normalized, entry)
+                .await;
+        }
+        let insight = existing_insight.expect("just checked Some above");
+        self.run_continuation_streaming_with_entry(req, normalized, insight, entry)
+            .await
+    }
+
+    /// Continuation path with TurnEntry buffer.
+    async fn run_continuation_streaming_with_entry(
+        &self,
+        req: ChatTurnRequest,
+        normalized: String,
+        insight: crate::database::models::PhotoInsight,
+        entry: Arc<TurnEntry>,
+    ) -> Result<()> {
+        let active_persona = req
+            .persona_id
+            .clone()
+            .filter(|s| !s.trim().is_empty())
+            .unwrap_or_else(|| "default".to_string());
+        let raw_history = insight.training_messages.as_ref().ok_or_else(|| {
+            anyhow!("insight has no chat history; regenerate this insight in agentic mode")
+        })?;
+        let mut messages: Vec<ChatMessage> = serde_json::from_str(raw_history)
+            .map_err(|e| anyhow!("failed to deserialize chat history: {}", e))?;
+
+        let stored_backend = insight.backend.clone();
+        let effective_backend = req
+            .backend
+            .as_deref()
+            .map(|s| s.trim().to_lowercase())
+            .filter(|s| !s.is_empty())
+            .unwrap_or_else(|| stored_backend.clone());
+        let kind = BackendKind::parse(&effective_backend)?;
+        validate_cross_replay(&stored_backend, kind.as_str())?;
+
+        let max_iterations = req
+            .max_iterations
+            .unwrap_or(DEFAULT_MAX_ITERATIONS)
+            .clamp(1, env_max_iterations());
+
+        let stored_model = insight.model_version.clone();
+        let overrides = SamplingOverrides {
+            model: req
+                .model
+                .clone()
+                .or_else(|| Some(stored_model.clone()))
+                .filter(|m| !m.is_empty()),
+            num_ctx: req.num_ctx,
+            temperature: req.temperature,
+            top_p: req.top_p,
+            top_k: req.top_k,
+            min_p: req.min_p,
+        };
+        let backend = self.generator.resolve_backend(kind, &overrides).await?;
+        let model_used = backend.model().to_string();
+
+        let local_first_user_has_image = messages
+            .iter()
+            .find(|m| m.role == "user")
+            .and_then(|m| m.images.as_ref())
+            .map(|imgs| !imgs.is_empty())
+            .unwrap_or(false);
+        let offer_describe_tool = backend.images_inline && local_first_user_has_image;
+        let gate_opts = self.generator.current_gate_opts_for_persona(
+            offer_describe_tool,
+            Some((req.user_id, &active_persona)),
+        );
+        let tools = InsightGenerator::build_tool_definitions(gate_opts);
+
+        let image_base64: Option<String> = if offer_describe_tool {
+            self.generator.load_image_as_base64(&normalized).ok()
+        } else {
+            None
+        };
+
+        let budget_tokens = (req.num_ctx.unwrap_or(DEFAULT_NUM_CTX) as usize)
+            .saturating_sub(RESPONSE_HEADROOM_TOKENS);
+        let budget_bytes = budget_tokens.saturating_mul(BYTES_PER_TOKEN);
+        let truncated = apply_context_budget(&mut messages, budget_bytes);
+        if truncated {
+            let _ = entry.push_event(ChatStreamEvent::Truncated).await;
+        }
+
+        messages.push(ChatMessage::user(req.user_message.clone()));
+
+        let override_stash =
+            apply_system_prompt_override(&mut messages, req.system_prompt.as_deref());
+        let original_system_content = annotate_system_with_budget(&mut messages, max_iterations);
+
+        let outcome = self
+            .run_streaming_agentic_loop_with_entry(
+                &backend,
+                &mut messages,
+                tools,
+                &image_base64,
+                &normalized,
+                req.user_id,
+                &active_persona,
+                max_iterations,
+                &entry,
+            )
+            .await?;
+        let AgenticLoopOutcome {
+            tool_calls_made,
+            iterations_used,
+            last_prompt_eval_count,
+            last_eval_count,
+            final_content,
+            cancelled,
+        } = outcome;
+
+        // Turn was cancelled mid-flight: the DELETE handler already pushed the
+        // terminal event and flipped status. Don't persist a partial turn or
+        // push a second terminal event.
+        if cancelled {
+            return Ok(());
+        }
+
+        restore_system_content(&mut messages, original_system_content);
+
+        if !req.amend {
+            restore_system_prompt_override(&mut messages, override_stash);
+        }
+
+        let json = serde_json::to_string(&messages)
+            .map_err(|e| anyhow!("failed to serialize chat history: {}", e))?;
+
+        let mut amended_insight_id: Option<i32> = None;
+        if req.amend {
+            let (title, body) = crate::ai::insight_generator::parse_title_body(&final_content);
+            let final_content = body;
+
+            let new_row = InsertPhotoInsight {
+                library_id: req.library_id,
+                file_path: normalized.clone(),
+                title,
+                summary: final_content.clone(),
+                generated_at: Utc::now().timestamp(),
+                model_version: model_used.clone(),
+                is_current: true,
+                training_messages: Some(json),
+                backend: kind.as_str().to_string(),
+                fewshot_source_ids: None,
+                content_hash: None,
+                num_ctx: req.num_ctx,
+                temperature: req.temperature,
+                top_p: req.top_p,
+                top_k: req.top_k,
+                min_p: req.min_p,
+                system_prompt: req.system_prompt.clone(),
+                persona_id: req.persona_id.clone(),
+                prompt_eval_count: None,
+                eval_count: None,
+            };
+            let cx = opentelemetry::Context::new();
+            let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
+            let stored = dao
+                .store_insight(&cx, new_row)
+                .map_err(|e| anyhow!("failed to store amended insight: {:?}", e))?;
+            amended_insight_id = Some(stored.id);
+        } else {
+            let cx = opentelemetry::Context::new();
+            let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
+            let rows = dao
+                .update_training_messages(&cx, req.library_id, &normalized, &json)
+                .map_err(|e| anyhow!("failed to persist chat history: {:?}", e))?;
+            if rows == 0 {
+                log::warn!(
+                    "update_training_messages (stream) updated 0 rows for {} (lib {}), \
+                     concurrent regenerate likely flipped is_current",
+                    normalized,
+                    req.library_id
+                );
+            }
+        }
+
+        let _ = entry
+            .push_event(ChatStreamEvent::Done {
+                tool_calls_made,
+                iterations_used,
+                truncated,
+                prompt_tokens: last_prompt_eval_count,
+                eval_tokens: last_eval_count,
+                num_ctx: req.num_ctx,
+                amended_insight_id,
+                backend_used: kind.as_str().to_string(),
+                model_used,
+                cancelled: false,
+            })
+            .await;
+
+        entry.set_terminal_status(crate::ai::turn_registry::TurnStatus::Done);
+        Ok(())
+    }
+
+    /// Bootstrap path with TurnEntry buffer.
+    async fn run_bootstrap_streaming_with_entry(
+        &self,
+        req: ChatTurnRequest,
+        normalized: String,
+        entry: Arc<TurnEntry>,
+    ) -> Result<()> {
+        let active_persona = req
+            .persona_id
+            .clone()
+            .filter(|s| !s.trim().is_empty())
+            .unwrap_or_else(|| "default".to_string());
+        let effective_backend = resolve_bootstrap_backend(req.backend.as_deref())?;
+        let kind = BackendKind::parse(&effective_backend)?;
+
+        let max_iterations = req
+            .max_iterations
+            .unwrap_or(DEFAULT_MAX_ITERATIONS)
+            .clamp(1, env_max_iterations());
+
+        let overrides = SamplingOverrides {
+            model: req.model.clone().filter(|m| !m.is_empty()),
+            num_ctx: req.num_ctx,
+            temperature: req.temperature,
+            top_p: req.top_p,
+            top_k: req.top_k,
+            min_p: req.min_p,
+        };
+        let backend = self.generator.resolve_backend(kind, &overrides).await?;
+        let model_used = backend.model().to_string();
+
+        let image_base64: Option<String> = self.generator.load_image_as_base64(&normalized).ok();
+
+        let exif = self.generator.fetch_exif(&normalized);
+        let date_taken_str = resolve_date_taken_for_context(&exif, &normalized);
+        let gps = exif
+            .as_ref()
+            .and_then(|e| match (e.gps_latitude, e.gps_longitude) {
+                (Some(lat), Some(lon)) => Some((lat as f64, lon as f64)),
+                _ => None,
+            });
+
+        let visual_block = if !backend.images_inline {
+            match image_base64.as_deref() {
+                Some(b64) => match backend.local().describe_image(b64).await {
+                    Ok(desc) => {
+                        format!("Visual description (from local vision model):\n{}\n", desc)
+                    }
+                    Err(e) => {
+                        log::warn!("{} bootstrap: describe_image failed: {}", kind.as_str(), e);
+                        String::new()
+                    }
+                },
+                None => String::new(),
+            }
+        } else {
+            String::new()
+        };
+
+        let offer_describe_tool = backend.images_inline && image_base64.is_some();
+        let gate_opts = self.generator.current_gate_opts_for_persona(
+            offer_describe_tool,
+            Some((req.user_id, &active_persona)),
+        );
+        let tools = InsightGenerator::build_tool_definitions(gate_opts);
+
+        let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref());
+        let system_content = build_bootstrap_system_message(
+            &persona,
+            &normalized,
+            date_taken_str.as_deref(),
+            gps,
+            &visual_block,
+        );
+        let system_msg = ChatMessage::system(system_content);
+        let mut user_msg = ChatMessage::user(req.user_message.clone());
+        if backend.images_inline
+            && let Some(ref img) = image_base64
+        {
+            user_msg.images = Some(vec![img.clone()]);
+        }
+        let mut messages = vec![system_msg, user_msg];
+
+        let outcome = self
+            .run_streaming_agentic_loop_with_entry(
+                &backend,
+                &mut messages,
+                tools,
+                &image_base64,
+                &normalized,
+                req.user_id,
+                &active_persona,
+                max_iterations,
+                &entry,
+            )
+            .await?;
+        let AgenticLoopOutcome {
+            tool_calls_made,
+            iterations_used,
+            last_prompt_eval_count,
+            last_eval_count,
+            final_content,
+            cancelled,
+        } = outcome;
+
+        // Turn was cancelled mid-flight: the DELETE handler already pushed the
+        // terminal event and flipped status. Don't persist a partial turn or
+        // push a second terminal event.
+        if cancelled {
+            return Ok(());
+        }
+
+        let (title, body) = crate::ai::insight_generator::parse_title_body(&final_content);
+
+        let json = serde_json::to_string(&messages)
+            .map_err(|e| anyhow!("failed to serialize chat history: {}", e))?;
+        let new_row = InsertPhotoInsight {
+            library_id: req.library_id,
+            file_path: normalized.clone(),
+            title,
+            summary: body,
+            generated_at: Utc::now().timestamp(),
+            model_version: model_used.clone(),
+            is_current: true,
+            training_messages: Some(json),
+            backend: kind.as_str().to_string(),
+            fewshot_source_ids: None,
+            content_hash: None,
+            num_ctx: req.num_ctx,
+            temperature: req.temperature,
+            top_p: req.top_p,
+            top_k: req.top_k,
+            min_p: req.min_p,
+            system_prompt: req.system_prompt.clone(),
+            persona_id: req.persona_id.clone(),
+            prompt_eval_count: None,
+            eval_count: None,
+        };
+        let stored = {
+            let cx = opentelemetry::Context::new();
+            let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
+            dao.store_insight(&cx, new_row)
+                .map_err(|e| anyhow!("failed to store bootstrap insight: {:?}", e))?
+        };
+
+        let _ = entry
+            .push_event(ChatStreamEvent::Done {
+                tool_calls_made,
+                iterations_used,
+                truncated: false,
+                prompt_tokens: last_prompt_eval_count,
+                eval_tokens: last_eval_count,
+                num_ctx: req.num_ctx,
+                amended_insight_id: Some(stored.id),
+                backend_used: kind.as_str().to_string(),
+                model_used,
+                cancelled: false,
+            })
+            .await;
+
+        entry.set_terminal_status(crate::ai::turn_registry::TurnStatus::Done);
+        Ok(())
+    }
+
+    /// Agentic loop variant that pushes events to a `TurnEntry` buffer.
+    async fn run_streaming_agentic_loop_with_entry(
+        &self,
+        backend: &ResolvedBackend,
+        messages: &mut Vec<ChatMessage>,
+        tools: Vec<Tool>,
+        image_base64: &Option<String>,
+        normalized: &str,
+        user_id: i32,
+        active_persona: &str,
+        max_iterations: usize,
+        entry: &Arc<TurnEntry>,
+    ) -> Result<AgenticLoopOutcome> {
+        let mut tool_calls_made = 0usize;
+        let mut iterations_used = 0usize;
+        let mut last_prompt_eval_count: Option<i32> = None;
+        let mut last_eval_count: Option<i32> = None;
+        let mut final_content = String::new();
+
+        for iteration in 0..max_iterations {
+            // Cooperative cancellation: a DELETE flips status out of Running
+            // (and aborts this task). Check at the iteration boundary so an
+            // in-flight tool round finishes cleanly rather than mid-write.
+            if !entry.is_running() {
+                return Ok(AgenticLoopOutcome {
+                    tool_calls_made,
+                    iterations_used,
+                    last_prompt_eval_count,
+                    last_eval_count,
+                    final_content,
+                    cancelled: true,
+                });
+            }
+
+            iterations_used = iteration + 1;
+            let _ = entry
+                .push_event(ChatStreamEvent::IterationStart {
+                    n: iterations_used,
+                    max: max_iterations,
+                })
+                .await;
+
+            let mut stream = backend
+                .chat()
+                .chat_with_tools_stream(messages.clone(), tools.clone())
+                .await?;
+
+            let mut final_message: Option<ChatMessage> = None;
+            while let Some(ev) = stream.next().await {
+                let ev = ev?;
+                match ev {
+                    LlmStreamEvent::TextDelta(delta) => {
+                        let _ = entry.push_event(ChatStreamEvent::TextDelta(delta)).await;
+                    }
+                    LlmStreamEvent::Done {
+                        message,
+                        prompt_eval_count,
+                        eval_count,
+                    } => {
+                        last_prompt_eval_count = prompt_eval_count;
+                        last_eval_count = eval_count;
+                        final_message = Some(message);
+                        break;
+                    }
+                }
+            }
+            let mut response =
+                final_message.ok_or_else(|| anyhow!("stream ended without a Done event"))?;
+
+            if let Some(ref mut tcs) = response.tool_calls {
+                for tc in tcs.iter_mut() {
+                    if !tc.function.arguments.is_object() {
+                        tc.function.arguments = serde_json::Value::Object(Default::default());
+                    }
+                }
+            }
+
+            messages.push(response.clone());
+
+            if let Some(ref tool_calls) = response.tool_calls
+                && !tool_calls.is_empty()
+            {
+                for tool_call in tool_calls {
+                    tool_calls_made += 1;
+                    let call_index = tool_calls_made - 1;
+                    let _ = entry
+                        .push_event(ChatStreamEvent::ToolCall {
+                            index: call_index,
+                            name: tool_call.function.name.clone(),
+                            arguments: tool_call.function.arguments.clone(),
+                        })
+                        .await;
+                    let cx = opentelemetry::Context::new();
+                    let result = self
+                        .generator
+                        .execute_tool(
+                            &tool_call.function.name,
+                            &tool_call.function.arguments,
+                            backend,
+                            image_base64,
+                            normalized,
+                            user_id,
+                            active_persona,
+                            &cx,
+                        )
+                        .await;
+                    let (result_preview, result_truncated) = truncate_tool_result(&result);
+                    let _ = entry
+                        .push_event(ChatStreamEvent::ToolResult {
+                            index: call_index,
+                            name: tool_call.function.name.clone(),
+                            result: result_preview,
+                            result_truncated,
+                        })
+                        .await;
+                    messages.push(ChatMessage::tool_result(result));
+                }
+                continue;
+            }
+
+            final_content = response.content;
+            break;
+        }
+
+        // No-tools fallback
+        if final_content.is_empty() {
+            let synthetic_idx = messages.len();
+            messages.push(ChatMessage::user(
+                "Please write your final answer now without calling any more tools.",
+            ));
+            let mut stream = backend
+                .chat()
+                .chat_with_tools_stream(messages.clone(), vec![])
+                .await?;
+            let mut final_message: Option<ChatMessage> = None;
+            while let Some(ev) = stream.next().await {
+                let ev = ev?;
+                match ev {
+                    LlmStreamEvent::TextDelta(delta) => {
+                        let _ = entry.push_event(ChatStreamEvent::TextDelta(delta)).await;
+                    }
+                    LlmStreamEvent::Done {
+                        message,
+                        prompt_eval_count,
+                        eval_count,
+                    } => {
+                        last_prompt_eval_count = prompt_eval_count;
+                        last_eval_count = eval_count;
+                        final_message = Some(message);
+                        break;
+                    }
+                }
+            }
+            let final_response =
+                final_message.ok_or_else(|| anyhow!("final stream ended without a Done event"))?;
+            final_content = final_response.content.clone();
+            messages.push(final_response);
+            messages.remove(synthetic_idx);
+        }
+
+        Ok(AgenticLoopOutcome {
+            tool_calls_made,
+            iterations_used,
+            last_prompt_eval_count,
+            last_eval_count,
+            final_content,
+            cancelled: false,
+        })
+    }
+
    async fn run_streaming_turn(
        self: Arc<Self>,
        req: ChatTurnRequest,
@@ -836,6 +1459,8 @@ impl InsightChatService {
            last_prompt_eval_count,
            last_eval_count,
            final_content,
+            // The mpsc (legacy) path has no cancellation channel.
+            cancelled: _,
        } = outcome;

        // Drop the per-turn iteration-budget note before persisting so it
@@ -916,6 +1541,7 @@ impl InsightChatService {
                amended_insight_id,
                backend_used: kind.as_str().to_string(),
                model_used,
+                cancelled: false,
            })
            .await;

@@ -1052,6 +1678,8 @@ impl InsightChatService {
            last_prompt_eval_count,
            last_eval_count,
            final_content,
+            // The mpsc (legacy) path has no cancellation channel.
+            cancelled: _,
        } = outcome;

        let (title, body) = crate::ai::insight_generator::parse_title_body(&final_content);
@@ -1101,6 +1729,7 @@ impl InsightChatService {
                amended_insight_id: Some(stored.id),
                backend_used: kind.as_str().to_string(),
                model_used,
+                cancelled: false,
            })
            .await;

@@ -1274,6 +1903,7 @@ impl InsightChatService {
            last_prompt_eval_count,
            last_eval_count,
            final_content,
+            cancelled: false,
        })
    }
 }
@@ -1402,6 +2032,10 @@ struct AgenticLoopOutcome {
    last_prompt_eval_count: Option<i32>,
    last_eval_count: Option<i32>,
    final_content: String,
+    /// True when the loop exited early because the turn was cancelled
+    /// (status flipped out of `Running`). Callers skip persistence and the
+    /// terminal `Done` push — the cancel handler owns the terminal event.
+    cancelled: bool,
 }

 /// Events emitted by `chat_turn_stream`. One stream per turn; ends after
@@ -1456,6 +2090,10 @@ pub enum ChatStreamEvent {
        amended_insight_id: Option<i32>,
        backend_used: String,
        model_used: String,
+        /// True only for the synthetic terminal event emitted by the cancel
+        /// handler, so clients can distinguish a user-cancelled turn from a
+        /// natural completion. Always false on the normal success path.
+        cancelled: bool,
    },
    /// Terminal failure event. No further events follow.
    Error(String),