From 928efe49f97c24c8c4ae2fd96d2e5a494909534d Mon Sep 17 00:00:00 2001
From: Cameron Cordes <cameronc.dev@gmail.com>
Date: Fri, 8 May 2026 10:41:50 -0400
Subject: [PATCH 1/5] insight-chat: bootstrap insight on first Discuss message
 + regenerate flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tap-Discuss-on-no-insight previously failed silently: ImageApi's
/insights/chat/stream required an existing agentic insight, errored
when missing, and emitted the failure as `event: error` — which the
frontend SSE consumer ignored (it listens for `error_message`).

This commit closes both gaps with a server-side state machine:

- /insights/chat/stream now branches on insight presence. Missing
  insight (or `regenerate: true` in the body) → bootstrap path:
  builds [System(req.system_prompt), User(req.user_message + image)],
  runs the agentic loop, generates a title, persists a new row via
  store_insight (which auto-flips priors). Existing insight →
  continuation path (unchanged behaviour).
- New `regenerate: bool` request field forces bootstrap even when an
  insight exists. Takes precedence over `amend`.
- `done` SSE payload field-name alignment with Apollo's frontend
  convention: prompt_eval_count → prompt_tokens, eval_count →
  eval_tokens, num_ctx echo added.
- `amended_insight_id` semantics broaden — now populated whenever the
  turn produced a new row (bootstrap, regenerate, or amend). Existing
  amend clients keep working unchanged; new clients get the new row's
  id for free.
- `event: error` → `event: error_message` so frontend errors stop
  silently dropping.

Refactor: extracted run_streaming_agentic_loop, build_chat_clients,
and generate_title as shared helpers between bootstrap and
continuation. Continuation path's outer logic moves to
run_continuation_streaming with no behaviour change.

Mobile-ready: any client (Apollo backend, mobile, future) sends one
request to /insights/chat/stream and gets the right path. Apollo's
proxy stays a dumb pipe.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/ai/handlers.rs     |  28 +-
 src/ai/insight_chat.rs | 579 ++++++++++++++++++++++++++++++-----------
 2 files changed, 447 insertions(+), 160 deletions(-)
diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs
index f6a0a37..24d93b8 100644
--- a/src/ai/handlers.rs
+++ b/src/ai/handlers.rs
@@ -641,11 +641,18 @@ pub struct ChatTurnHttpRequest {
     #[serde(default)]
     pub max_iterations: Option<usize>,
     /// Per-turn system-prompt override. Ephemeral in append mode,
-    /// persisted in amend mode. See ChatTurnRequest for semantics.
+    /// persisted in amend / regenerate mode. See ChatTurnRequest for
+    /// semantics. Also seeds the bootstrap path when no insight exists.
     #[serde(default)]
     pub system_prompt: Option<String>,
     #[serde(default)]
     pub amend: bool,
+    /// When true, force the bootstrap path even if an insight already
+    /// exists: flip the existing row(s) to `is_current=false` and create
+    /// a new insight row from this turn. Takes precedence over `amend`.
+    /// Collapses to a normal bootstrap when no insight exists.
+    #[serde(default)]
+    pub regenerate: bool,
 }
 
 #[derive(Debug, Serialize)]
@@ -701,6 +708,7 @@ pub async fn chat_turn_handler(
         max_iterations: request.max_iterations,
         system_prompt: request.system_prompt.clone(),
         amend: request.amend,
+        regenerate: request.regenerate,
     };
 
     match app_state.insight_chat.chat_turn(chat_req).await {
@@ -916,6 +924,7 @@ pub async fn chat_stream_handler(
         max_iterations: request.max_iterations,
         system_prompt: request.system_prompt.clone(),
         amend: request.amend,
+        regenerate: request.regenerate,
     };
 
     let service = app_state.insight_chat.clone();
@@ -967,8 +976,9 @@ fn render_sse_frame(ev: &ChatStreamEvent) -> String {
             tool_calls_made,
             iterations_used,
             truncated,
-            prompt_eval_count,
-            eval_count,
+            prompt_tokens,
+            eval_tokens,
+            num_ctx,
             amended_insight_id,
             backend_used,
             model_used,
@@ -978,14 +988,20 @@ fn render_sse_frame(ev: &ChatStreamEvent) -> String {
                 "tool_calls_made": tool_calls_made,
                 "iterations_used": iterations_used,
                 "truncated": truncated,
-                "prompt_eval_count": prompt_eval_count,
-                "eval_count": eval_count,
+                "prompt_tokens": prompt_tokens,
+                "eval_tokens": eval_tokens,
+                "num_ctx": num_ctx,
                 "amended_insight_id": amended_insight_id,
                 "backend": backend_used,
                 "model": model_used,
             }),
         ),
-        ChatStreamEvent::Error(msg) => ("error", serde_json::json!({ "message": msg })),
+        // Apollo's frontend SSE consumer (and its free-chat backend, which
+        // is the de-facto convention) listens for `error_message`. Emitting
+        // `error` here meant any failure on the photo-chat path (e.g.
+        // "no insight found for path") was silently dropped, leaving an
+        // empty assistant bubble with no clue why the turn died.
+        ChatStreamEvent::Error(msg) => ("error_message", serde_json::json!({ "message": msg })),
     };
     let data = serde_json::to_string(&payload).unwrap_or_else(|_| "{}".to_string());
     format!("event: {}\ndata: {}\n\n", event_name, data)
diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs
index bbb03dd..1907728 100644
--- a/src/ai/insight_chat.rs
+++ b/src/ai/insight_chat.rs
@@ -7,7 +7,7 @@ use std::sync::{Arc, Mutex};
 use tokio::sync::Mutex as TokioMutex;
 
 use crate::ai::insight_generator::InsightGenerator;
-use crate::ai::llm_client::{ChatMessage, LlmClient, LlmStreamEvent};
+use crate::ai::llm_client::{ChatMessage, LlmClient, LlmStreamEvent, Tool};
 use crate::ai::ollama::OllamaClient;
 use crate::ai::openrouter::OpenRouterClient;
 use crate::database::InsightDao;
@@ -53,6 +53,12 @@ pub struct ChatTurnRequest {
     /// When true, write a new insight row (regenerating title) instead of
     /// updating training_messages on the existing row.
     pub amend: bool,
+    /// When true, force the bootstrap path even if an insight already exists:
+    /// flip prior rows to `is_current=false` and create a new insight row
+    /// from `system_prompt` + `user_message` + photo. Takes precedence over
+    /// `amend`. With no existing insight, collapses to a normal bootstrap
+    /// (the row-flip step is a no-op).
+    pub regenerate: bool,
 }
 
 #[derive(Debug)]
@@ -701,24 +707,44 @@ impl InsightChatService {
         };
         let _guard = entry_lock.lock().await;
 
-        let insight = {
+        // Look up existing insight (None when missing). The branch below
+        // decides bootstrap vs. continuation: `regenerate=true` forces
+        // bootstrap regardless of the row's presence; missing rows always
+        // bootstrap.
+        let existing_insight = {
             let cx = opentelemetry::Context::new();
             let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
             dao.get_insight(&cx, &normalized)
                 .map_err(|e| anyhow!("failed to load insight: {:?}", e))?
-                .ok_or_else(|| anyhow!("no insight found for path"))?
         };
-        let raw_history = insight
-            .training_messages
-            .as_ref()
-            .ok_or_else(|| {
-                anyhow!("insight has no chat history; regenerate this insight in agentic mode")
-            })?
-            .clone();
-        let mut messages: Vec<ChatMessage> = serde_json::from_str(&raw_history)
+
+        if req.regenerate || existing_insight.is_none() {
+            return self.run_bootstrap_streaming(req, normalized, tx).await;
+        }
+        let insight = existing_insight.expect("just checked Some above");
+        self.run_continuation_streaming(req, normalized, insight, tx)
+            .await
+    }
+
+    /// Continuation path: photo has an existing agentic insight with
+    /// `training_messages` populated. Replay the transcript, append a new
+    /// turn, run the agentic loop, persist (UPDATE for append; INSERT new
+    /// row for amend).
+    async fn run_continuation_streaming(
+        &self,
+        req: ChatTurnRequest,
+        normalized: String,
+        insight: crate::database::models::PhotoInsight,
+        tx: tokio::sync::mpsc::Sender<ChatStreamEvent>,
+    ) -> Result<()> {
+        let raw_history = insight.training_messages.as_ref().ok_or_else(|| {
+            anyhow!("insight has no chat history; regenerate this insight in agentic mode")
+        })?;
+        let mut messages: Vec<ChatMessage> = serde_json::from_str(raw_history)
             .map_err(|e| anyhow!("failed to deserialize chat history: {}", e))?;
 
-        // Backend selection — same rules as non-streaming chat_turn.
+        // Backend selection — defer to stored insight's backend unless the
+        // request supplies an override.
         let stored_backend = insight.backend.clone();
         let effective_backend = req
             .backend
@@ -752,59 +778,14 @@ impl InsightChatService {
             .or_else(|| Some(stored_model.clone()))
             .filter(|m| !m.is_empty());
 
-        let mut ollama_client = self.ollama.clone();
-        let mut openrouter_client: Option<OpenRouterClient> = None;
-
-        if is_hybrid {
-            let arc = self.openrouter.as_ref().ok_or_else(|| {
-                anyhow!("hybrid backend unavailable: OPENROUTER_API_KEY not configured")
-            })?;
-            let mut c: OpenRouterClient = (**arc).clone();
-            if let Some(ref m) = custom_model {
-                c.primary_model = m.clone();
-            }
-            if req.temperature.is_some()
-                || req.top_p.is_some()
-                || req.top_k.is_some()
-                || req.min_p.is_some()
-            {
-                c.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
-            }
-            if let Some(ctx) = req.num_ctx {
-                c.set_num_ctx(Some(ctx));
-            }
-            openrouter_client = Some(c);
-        } else {
-            if let Some(ref m) = custom_model
-                && m != &self.ollama.primary_model
-            {
-                ollama_client = OllamaClient::new(
-                    self.ollama.primary_url.clone(),
-                    self.ollama.fallback_url.clone(),
-                    m.clone(),
-                    Some(m.clone()),
-                );
-            }
-            if req.temperature.is_some()
-                || req.top_p.is_some()
-                || req.top_k.is_some()
-                || req.min_p.is_some()
-            {
-                ollama_client.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
-            }
-            if let Some(ctx) = req.num_ctx {
-                ollama_client.set_num_ctx(Some(ctx));
-            }
-        }
-
-        let chat_backend: &dyn LlmClient = if let Some(ref c) = openrouter_client {
-            c
-        } else {
-            &ollama_client
-        };
+        let (chat_backend_holder, ollama_client) =
+            self.build_chat_clients(is_hybrid, custom_model.as_deref(), &req)?;
+        let chat_backend: &dyn LlmClient = chat_backend_holder.as_ref();
         let model_used = chat_backend.primary_model().to_string();
 
-        // Tool set.
+        // Tool set — local mode + first user turn carries an image →
+        // offer describe_photo. Hybrid: visual description was inlined
+        // when the insight was bootstrapped, no describe tool needed.
         let local_first_user_has_image = messages
             .iter()
             .find(|m| m.role == "user")
@@ -812,10 +793,6 @@ impl InsightChatService {
             .map(|imgs| !imgs.is_empty())
             .unwrap_or(false);
         let offer_describe_tool = !is_hybrid && local_first_user_has_image;
-        // current_gate_opts(has_vision) sets gate_opts.has_vision = has_vision
-        // and probes the per-table presence flags. Pass `offer_describe_tool`
-        // directly — the `!is_hybrid && local_first_user_has_image` decision
-        // is the chat-path's vision predicate.
         let gate_opts = self.generator.current_gate_opts(offer_describe_tool);
         let tools = InsightGenerator::build_tool_definitions(gate_opts);
 
@@ -839,9 +816,340 @@ impl InsightChatService {
         // Mirror chat_turn: per-turn override goes on first, budget note next.
         let override_stash =
             apply_system_prompt_override(&mut messages, req.system_prompt.as_deref());
-
         let original_system_content = annotate_system_with_budget(&mut messages, max_iterations);
 
+        let outcome = self
+            .run_streaming_agentic_loop(
+                chat_backend,
+                &ollama_client,
+                &mut messages,
+                tools,
+                &image_base64,
+                &normalized,
+                max_iterations,
+                &tx,
+            )
+            .await?;
+        let AgenticLoopOutcome {
+            tool_calls_made,
+            iterations_used,
+            last_prompt_eval_count,
+            last_eval_count,
+            final_content,
+        } = outcome;
+
+        // Drop the per-turn iteration-budget note before persisting so it
+        // doesn't snowball on subsequent turns.
+        restore_system_content(&mut messages, original_system_content);
+
+        // Append mode: undo the per-turn system-prompt override.
+        // Amend mode: keep it — it becomes the new row's system message.
+        if !req.amend {
+            restore_system_prompt_override(&mut messages, override_stash);
+        }
+
+        let json = serde_json::to_string(&messages)
+            .map_err(|e| anyhow!("failed to serialize chat history: {}", e))?;
+
+        let mut amended_insight_id: Option<i32> = None;
+        if req.amend {
+            let title = self.generate_title(chat_backend, &final_content).await?;
+
+            // Amended rows intentionally do not inherit the parent's
+            // `fewshot_source_ids`. The parent's few-shot influence is still
+            // present in this row's content; if you want strict lineage
+            // tracking for training-set filtering, fetch the parent here and
+            // copy its value forward.
+            let new_row = InsertPhotoInsight {
+                library_id: req.library_id,
+                file_path: normalized.clone(),
+                title,
+                summary: final_content.clone(),
+                generated_at: Utc::now().timestamp(),
+                model_version: model_used.clone(),
+                is_current: true,
+                training_messages: Some(json),
+                backend: effective_backend.clone(),
+                fewshot_source_ids: None,
+                content_hash: None,
+            };
+            let cx = opentelemetry::Context::new();
+            let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
+            let stored = dao
+                .store_insight(&cx, new_row)
+                .map_err(|e| anyhow!("failed to store amended insight: {:?}", e))?;
+            amended_insight_id = Some(stored.id);
+        } else {
+            let cx = opentelemetry::Context::new();
+            let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
+            dao.update_training_messages(&cx, req.library_id, &normalized, &json)
+                .map_err(|e| anyhow!("failed to persist chat history: {:?}", e))?;
+        }
+
+        let _ = tx
+            .send(ChatStreamEvent::Done {
+                tool_calls_made,
+                iterations_used,
+                truncated,
+                prompt_tokens: last_prompt_eval_count,
+                eval_tokens: last_eval_count,
+                num_ctx: req.num_ctx,
+                amended_insight_id,
+                backend_used: effective_backend,
+                model_used,
+            })
+            .await;
+
+        Ok(())
+    }
+
+    /// Bootstrap path: no insight row yet (or `regenerate=true`). Build a
+    /// fresh transcript from `req.system_prompt` + `req.user_message` +
+    /// the photo, run the agentic loop, generate a title, and INSERT a
+    /// new insight row. `store_insight` flips any prior rows for the same
+    /// `(library_id, file_path)` to `is_current=false` — that's how
+    /// `regenerate` shadows the previous insight.
+    async fn run_bootstrap_streaming(
+        &self,
+        req: ChatTurnRequest,
+        normalized: String,
+        tx: tokio::sync::mpsc::Sender<ChatStreamEvent>,
+    ) -> Result<()> {
+        let effective_backend = req
+            .backend
+            .as_deref()
+            .map(|s| s.trim().to_lowercase())
+            .filter(|s| !s.is_empty())
+            .unwrap_or_else(|| "local".to_string());
+        if !matches!(effective_backend.as_str(), "local" | "hybrid") {
+            bail!(
+                "unknown backend '{}'; expected 'local' or 'hybrid'",
+                effective_backend
+            );
+        }
+        let is_hybrid = effective_backend == "hybrid";
+
+        let max_iterations = req
+            .max_iterations
+            .unwrap_or(DEFAULT_MAX_ITERATIONS)
+            .clamp(1, env_max_iterations());
+
+        let custom_model = req.model.clone().filter(|m| !m.is_empty());
+        let (chat_backend_holder, ollama_client) =
+            self.build_chat_clients(is_hybrid, custom_model.as_deref(), &req)?;
+        let chat_backend: &dyn LlmClient = chat_backend_holder.as_ref();
+        let model_used = chat_backend.primary_model().to_string();
+
+        // Load image bytes once. RAW preview fallback is handled inside
+        // load_image_as_base64. Errors degrade silently — a chat that
+        // discusses metadata-only is still useful.
+        let image_base64: Option<String> = self.generator.load_image_as_base64(&normalized).ok();
+
+        // Hybrid backend: pre-describe the image via local Ollama vision
+        // and inline the description into the user turn. OpenRouter chat
+        // models don't see images directly. Mirrors the same pre-describe
+        // pass that `generate_agentic_insight_for_photo` does for hybrid.
+        let user_content = if is_hybrid {
+            let visual = match image_base64.as_deref() {
+                Some(b64) => match self.ollama.describe_image(b64).await {
+                    Ok(desc) => format!(
+                        "Visual description (from local vision model):\n{}\n\n",
+                        desc
+                    ),
+                    Err(e) => {
+                        log::warn!("hybrid bootstrap: local describe_image failed: {}", e);
+                        String::new()
+                    }
+                },
+                None => String::new(),
+            };
+            format!("{}{}", visual, req.user_message)
+        } else {
+            req.user_message.clone()
+        };
+
+        // Tool gates. Local + image present → expose describe_photo so
+        // the chat model can re-look at the photo on demand. Hybrid:
+        // already inlined, no tool needed.
+        let offer_describe_tool = !is_hybrid && image_base64.is_some();
+        let gate_opts = self.generator.current_gate_opts(offer_describe_tool);
+        let tools = InsightGenerator::build_tool_definitions(gate_opts);
+
+        // Build initial messages: persona system + user-turn-with-image.
+        let system_content = req
+            .system_prompt
+            .as_deref()
+            .map(str::trim)
+            .filter(|s| !s.is_empty())
+            .unwrap_or(BOOTSTRAP_DEFAULT_SYSTEM_PROMPT)
+            .to_string();
+        let system_msg = ChatMessage::system(system_content);
+        let mut user_msg = ChatMessage::user(user_content);
+        if !is_hybrid && let Some(ref img) = image_base64 {
+            user_msg.images = Some(vec![img.clone()]);
+        }
+        let mut messages = vec![system_msg, user_msg];
+
+        let outcome = self
+            .run_streaming_agentic_loop(
+                chat_backend,
+                &ollama_client,
+                &mut messages,
+                tools,
+                &image_base64,
+                &normalized,
+                max_iterations,
+                &tx,
+            )
+            .await?;
+        let AgenticLoopOutcome {
+            tool_calls_made,
+            iterations_used,
+            last_prompt_eval_count,
+            last_eval_count,
+            final_content,
+        } = outcome;
+
+        let title = self.generate_title(chat_backend, &final_content).await?;
+
+        let json = serde_json::to_string(&messages)
+            .map_err(|e| anyhow!("failed to serialize chat history: {}", e))?;
+        let new_row = InsertPhotoInsight {
+            library_id: req.library_id,
+            file_path: normalized.clone(),
+            title,
+            summary: final_content,
+            generated_at: Utc::now().timestamp(),
+            model_version: model_used.clone(),
+            is_current: true,
+            training_messages: Some(json),
+            backend: effective_backend.clone(),
+            fewshot_source_ids: None,
+            content_hash: None,
+        };
+        let stored = {
+            let cx = opentelemetry::Context::new();
+            let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
+            dao.store_insight(&cx, new_row)
+                .map_err(|e| anyhow!("failed to store bootstrap insight: {:?}", e))?
+        };
+
+        // amended_insight_id semantics broaden on bootstrap/regenerate:
+        // populated whenever this turn produced a new insight row, so
+        // clients don't need a separate field to learn the new id.
+        let _ = tx
+            .send(ChatStreamEvent::Done {
+                tool_calls_made,
+                iterations_used,
+                truncated: false,
+                prompt_tokens: last_prompt_eval_count,
+                eval_tokens: last_eval_count,
+                num_ctx: req.num_ctx,
+                amended_insight_id: Some(stored.id),
+                backend_used: effective_backend,
+                model_used,
+            })
+            .await;
+
+        Ok(())
+    }
+
+    /// Set up chat clients (Ollama + optional OpenRouter) shared by
+    /// bootstrap and continuation. Returns the chat-side backend client
+    /// (boxed because hybrid and local return different concrete types)
+    /// and the Ollama client used for describe-image / local tool calls.
+    fn build_chat_clients(
+        &self,
+        is_hybrid: bool,
+        custom_model: Option<&str>,
+        req: &ChatTurnRequest,
+    ) -> Result<(Box<dyn LlmClient>, OllamaClient)> {
+        let mut ollama_client = self.ollama.clone();
+
+        if is_hybrid {
+            let arc = self.openrouter.as_ref().ok_or_else(|| {
+                anyhow!("hybrid backend unavailable: OPENROUTER_API_KEY not configured")
+            })?;
+            let mut c: OpenRouterClient = (**arc).clone();
+            if let Some(m) = custom_model {
+                c.primary_model = m.to_string();
+            }
+            if req.temperature.is_some()
+                || req.top_p.is_some()
+                || req.top_k.is_some()
+                || req.min_p.is_some()
+            {
+                c.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
+            }
+            if let Some(ctx) = req.num_ctx {
+                c.set_num_ctx(Some(ctx));
+            }
+            return Ok((Box::new(c), ollama_client));
+        }
+
+        if let Some(m) = custom_model
+            && m != self.ollama.primary_model
+        {
+            ollama_client = OllamaClient::new(
+                self.ollama.primary_url.clone(),
+                self.ollama.fallback_url.clone(),
+                m.to_string(),
+                Some(m.to_string()),
+            );
+        }
+        if req.temperature.is_some()
+            || req.top_p.is_some()
+            || req.top_k.is_some()
+            || req.min_p.is_some()
+        {
+            ollama_client.set_sampling_params(req.temperature, req.top_p, req.top_k, req.min_p);
+        }
+        if let Some(ctx) = req.num_ctx {
+            ollama_client.set_num_ctx(Some(ctx));
+        }
+        Ok((Box::new(ollama_client.clone()), ollama_client))
+    }
+
+    /// Generate a short title via the same chat backend so voice stays
+    /// consistent with the body. Mirrors generate_agentic_insight_for_photo's
+    /// titling pass.
+    async fn generate_title(
+        &self,
+        chat_backend: &dyn LlmClient,
+        final_content: &str,
+    ) -> Result<String> {
+        let title_prompt = format!(
+            "Create a short title (maximum 8 words) for the following journal entry:\n\n{}\n\n\
+             Capture the key moment or theme. Return ONLY the title, nothing else.",
+            final_content
+        );
+        let title_raw = chat_backend
+            .generate(
+                &title_prompt,
+                Some(
+                    "You are my long term memory assistant. Use only the information provided. Do not invent details.",
+                ),
+                None,
+            )
+            .await?;
+        Ok(title_raw.trim().trim_matches('"').to_string())
+    }
+
+    /// Drive the agentic loop with streaming SSE events. Shared between
+    /// bootstrap and continuation. Mutates `messages` in place (response
+    /// turns + tool results are appended) and returns counters + the
+    /// final assistant content.
+    async fn run_streaming_agentic_loop(
+        &self,
+        chat_backend: &dyn LlmClient,
+        ollama_client: &OllamaClient,
+        messages: &mut Vec<ChatMessage>,
+        tools: Vec<Tool>,
+        image_base64: &Option<String>,
+        normalized: &str,
+        max_iterations: usize,
+        tx: &tokio::sync::mpsc::Sender<ChatStreamEvent>,
+    ) -> Result<AgenticLoopOutcome> {
         let mut tool_calls_made = 0usize;
         let mut iterations_used = 0usize;
         let mut last_prompt_eval_count: Option<i32> = None;
@@ -883,7 +1191,8 @@ impl InsightChatService {
             let mut response =
                 final_message.ok_or_else(|| anyhow!("stream ended without a Done event"))?;
 
-            // Normalize non-object tool arguments (same as non-streaming path).
+            // Normalize non-object tool arguments (some models occasionally
+            // return null/string/bool which Ollama rejects on the next turn).
             if let Some(ref mut tcs) = response.tool_calls {
                 for tc in tcs.iter_mut() {
                     if !tc.function.arguments.is_object() {
@@ -897,7 +1206,7 @@ impl InsightChatService {
             if let Some(ref tool_calls) = response.tool_calls
                 && !tool_calls.is_empty()
             {
-                for (i, tool_call) in tool_calls.iter().enumerate() {
+                for tool_call in tool_calls {
                     tool_calls_made += 1;
                     let call_index = tool_calls_made - 1;
                     let _ = tx
@@ -913,9 +1222,9 @@ impl InsightChatService {
                         .execute_tool(
                             &tool_call.function.name,
                             &tool_call.function.arguments,
-                            &ollama_client,
-                            &image_base64,
-                            &normalized,
+                            ollama_client,
+                            image_base64,
+                            normalized,
                             &cx,
                         )
                         .await;
@@ -929,7 +1238,6 @@ impl InsightChatService {
                         })
                         .await;
                     messages.push(ChatMessage::tool_result(result));
-                    let _ = i; // reserved for per-call ordering if needed
                 }
                 continue;
             }
@@ -938,6 +1246,8 @@ impl InsightChatService {
             break;
         }
 
+        // No-tools fallback: loop exhausted iterations without producing a
+        // tool-free reply. Ask once more, with no tools attached.
         if final_content.is_empty() {
             messages.push(ChatMessage::user(
                 "Please write your final answer now without calling any more tools.",
@@ -970,87 +1280,33 @@ impl InsightChatService {
             messages.push(final_response);
         }
 
-        // Drop the per-turn iteration-budget note from the system message
-        // before we persist so it doesn't snowball on each subsequent turn.
-        restore_system_content(&mut messages, original_system_content);
-
-        // Append mode: undo the per-turn system-prompt override (mirrors
-        // chat_turn). Amend mode: keep the override — it becomes the new
-        // insight row's system message.
-        if !req.amend {
-            restore_system_prompt_override(&mut messages, override_stash);
-        }
-
-        // Persist.
-        let json = serde_json::to_string(&messages)
-            .map_err(|e| anyhow!("failed to serialize chat history: {}", e))?;
-
-        let mut amended_insight_id: Option<i32> = None;
-        if req.amend {
-            let title_prompt = format!(
-                "Create a short title (maximum 8 words) for the following journal entry:\n\n{}\n\n\
-                 Capture the key moment or theme. Return ONLY the title, nothing else.",
-                final_content
-            );
-            let title_raw = chat_backend
-                .generate(
-                    &title_prompt,
-                    Some(
-                        "You are my long term memory assistant. Use only the information provided. Do not invent details.",
-                    ),
-                    None,
-                )
-                .await?;
-            let title = title_raw.trim().trim_matches('"').to_string();
-
-            // Amended rows intentionally do not inherit the parent's
-            // `fewshot_source_ids`. The parent's few-shot influence is still
-            // present in this row's content; if you want strict lineage
-            // tracking for training-set filtering, fetch the parent here and
-            // copy its value forward.
-            let new_row = InsertPhotoInsight {
-                library_id: req.library_id,
-                file_path: normalized.clone(),
-                title,
-                summary: final_content.clone(),
-                generated_at: Utc::now().timestamp(),
-                model_version: model_used.clone(),
-                is_current: true,
-                training_messages: Some(json),
-                backend: effective_backend.clone(),
-                fewshot_source_ids: None,
-                content_hash: None,
-            };
-            let cx = opentelemetry::Context::new();
-            let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
-            let stored = dao
-                .store_insight(&cx, new_row)
-                .map_err(|e| anyhow!("failed to store amended insight: {:?}", e))?;
-            amended_insight_id = Some(stored.id);
-        } else {
-            let cx = opentelemetry::Context::new();
-            let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao");
-            dao.update_training_messages(&cx, req.library_id, &normalized, &json)
-                .map_err(|e| anyhow!("failed to persist chat history: {:?}", e))?;
-        }
-
-        let _ = tx
-            .send(ChatStreamEvent::Done {
-                tool_calls_made,
-                iterations_used,
-                truncated,
-                prompt_eval_count: last_prompt_eval_count,
-                eval_count: last_eval_count,
-                amended_insight_id,
-                backend_used: effective_backend,
-                model_used,
-            })
-            .await;
-
-        Ok(())
+        Ok(AgenticLoopOutcome {
+            tool_calls_made,
+            iterations_used,
+            last_prompt_eval_count,
+            last_eval_count,
+            final_content,
+        })
     }
 }
 
+/// Default system prompt for bootstrap when the client didn't supply one
+/// (or supplied an empty string). Apollo's frontend always sends a persona
+/// prompt today, so this is a fallback for clients that don't.
+const BOOTSTRAP_DEFAULT_SYSTEM_PROMPT: &str = "You are a helpful AI assistant analyzing the user's photo. \
+     Use the available tools to gather context and answer their questions \
+     in a conversational tone.";
+
+/// Outcome of one streaming agentic loop pass. Shared between bootstrap
+/// and continuation.
+struct AgenticLoopOutcome {
+    tool_calls_made: usize,
+    iterations_used: usize,
+    last_prompt_eval_count: Option<i32>,
+    last_eval_count: Option<i32>,
+    final_content: String,
+}
+
 /// Events emitted by `chat_turn_stream`. One stream per turn; ends after
 /// `Done` or `Error`.
 #[derive(Debug, Clone)]
@@ -1083,8 +1339,23 @@ pub enum ChatStreamEvent {
         tool_calls_made: usize,
         iterations_used: usize,
         truncated: bool,
-        prompt_eval_count: Option<i32>,
-        eval_count: Option<i32>,
+        /// Renamed from `prompt_eval_count` to match the wire-name Apollo's
+        /// frontend (and the mobile client) consume on the `done` SSE
+        /// payload.
+        prompt_tokens: Option<i32>,
+        /// Renamed from `eval_count`. Same rationale.
+        eval_tokens: Option<i32>,
+        /// The configured context-window ceiling that ran this turn — echoes
+        /// the request's `num_ctx` (or the server default when none was
+        /// supplied). Lets clients render `prompt_tokens / num_ctx` without
+        /// remembering what they asked for.
+        num_ctx: Option<i32>,
+        /// Populated when this turn produced a NEW insight row — bootstrap
+        /// (no prior insight), regenerate (old row flipped to is_current=
+        /// false), or amend. Null on append. Clients should target this id
+        /// for any subsequent operation that previously needed a known
+        /// insight (e.g., a follow-up `/insights/chat` against the just-
+        /// created row).
         amended_insight_id: Option<i32>,
         backend_used: String,
         model_used: String,

From a29ff406a1244b6681dfd9e1242b5d7ca1779489 Mon Sep 17 00:00:00 2001
From: Cameron Cordes <cameronc.dev@gmail.com>
Date: Fri, 8 May 2026 10:56:22 -0400
Subject: [PATCH 2/5] insight-chat: extract bootstrap resolution helpers +
 unit-test them
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

resolve_bootstrap_system_prompt and resolve_bootstrap_backend run on
every bootstrap turn — they pick the persisted system prompt and the
chosen backend label. They were inline conditionals before; pulling
them out makes the rules testable without spinning up the full
streaming stack.

9 new tests cover:
- system prompt fallback to BOOTSTRAP_DEFAULT_SYSTEM_PROMPT for None,
  empty string, whitespace-only
- supplied non-empty prompts pass through verbatim, with interior
  newlines / spacing preserved (Apollo personas use multi-line tool
  listings)
- backend defaults to "local" for None / empty
- "local" / "hybrid" accepted case-insensitively with edge-trim
- unknown labels return a descriptive error

Total insight_chat tests: 24 (up from 15). No behaviour change.
---
 src/ai/insight_chat.rs | 118 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 99 insertions(+), 19 deletions(-)

diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs
index 1907728..b3126f4 100644
--- a/src/ai/insight_chat.rs
+++ b/src/ai/insight_chat.rs
@@ -915,18 +915,7 @@ impl InsightChatService {
         normalized: String,
         tx: tokio::sync::mpsc::Sender<ChatStreamEvent>,
     ) -> Result<()> {
-        let effective_backend = req
-            .backend
-            .as_deref()
-            .map(|s| s.trim().to_lowercase())
-            .filter(|s| !s.is_empty())
-            .unwrap_or_else(|| "local".to_string());
-        if !matches!(effective_backend.as_str(), "local" | "hybrid") {
-            bail!(
-                "unknown backend '{}'; expected 'local' or 'hybrid'",
-                effective_backend
-            );
-        }
+        let effective_backend = resolve_bootstrap_backend(req.backend.as_deref())?;
         let is_hybrid = effective_backend == "hybrid";
 
         let max_iterations = req
@@ -976,13 +965,7 @@ impl InsightChatService {
         let tools = InsightGenerator::build_tool_definitions(gate_opts);
 
         // Build initial messages: persona system + user-turn-with-image.
-        let system_content = req
-            .system_prompt
-            .as_deref()
-            .map(str::trim)
-            .filter(|s| !s.is_empty())
-            .unwrap_or(BOOTSTRAP_DEFAULT_SYSTEM_PROMPT)
-            .to_string();
+        let system_content = resolve_bootstrap_system_prompt(req.system_prompt.as_deref());
         let system_msg = ChatMessage::system(system_content);
         let mut user_msg = ChatMessage::user(user_content);
         if !is_hybrid && let Some(ref img) = image_base64 {
@@ -1297,6 +1280,34 @@ const BOOTSTRAP_DEFAULT_SYSTEM_PROMPT: &str = "You are a helpful AI assistant an
      Use the available tools to gather context and answer their questions \
      in a conversational tone.";
 
+/// Pick the system prompt for bootstrap. Trimmed-non-empty supplied wins;
+/// otherwise fall back to [`BOOTSTRAP_DEFAULT_SYSTEM_PROMPT`]. Returns an
+/// owned `String` because the bootstrap caller persists it on the new
+/// insight row.
+fn resolve_bootstrap_system_prompt(supplied: Option<&str>) -> String {
+    supplied
+        .map(str::trim)
+        .filter(|s| !s.is_empty())
+        .map(str::to_string)
+        .unwrap_or_else(|| BOOTSTRAP_DEFAULT_SYSTEM_PROMPT.to_string())
+}
+
+/// Pick the backend label for bootstrap. Bootstrap has no stored insight
+/// to defer to (that's continuation's behaviour), so the default is
+/// `"local"`. Returns an error if the supplied label is non-empty but
+/// not one of the recognised values — same surface as continuation's
+/// validation.
+fn resolve_bootstrap_backend(supplied: Option<&str>) -> Result<String> {
+    let lower = supplied
+        .map(|s| s.trim().to_lowercase())
+        .filter(|s| !s.is_empty())
+        .unwrap_or_else(|| "local".to_string());
+    if !matches!(lower.as_str(), "local" | "hybrid") {
+        bail!("unknown backend '{}'; expected 'local' or 'hybrid'", lower);
+    }
+    Ok(lower)
+}
+
 /// Outcome of one streaming agentic loop pass. Shared between bootstrap
 /// and continuation.
 struct AgenticLoopOutcome {
@@ -1840,4 +1851,73 @@ mod tests {
         assert_eq!(msgs.len(), 1);
         assert_eq!(msgs[0].role, "user");
     }
+
+    // ── Bootstrap prompt / backend resolution ─────────────────────────
+    // Resolution helpers run on every bootstrap turn — they pick the
+    // persisted system prompt and the chosen backend label. Bugs here
+    // would silently swap the persona or miscategorise a backend.
+
+    #[test]
+    fn bootstrap_system_prompt_falls_back_to_default_for_none() {
+        let out = resolve_bootstrap_system_prompt(None);
+        assert_eq!(out, BOOTSTRAP_DEFAULT_SYSTEM_PROMPT);
+    }
+
+    #[test]
+    fn bootstrap_system_prompt_falls_back_to_default_for_empty_string() {
+        // Apollo currently sends `''` when no persona is selected.
+        let out = resolve_bootstrap_system_prompt(Some(""));
+        assert_eq!(out, BOOTSTRAP_DEFAULT_SYSTEM_PROMPT);
+    }
+
+    #[test]
+    fn bootstrap_system_prompt_falls_back_to_default_for_whitespace() {
+        let out = resolve_bootstrap_system_prompt(Some("   \n\t  "));
+        assert_eq!(out, BOOTSTRAP_DEFAULT_SYSTEM_PROMPT);
+    }
+
+    #[test]
+    fn bootstrap_system_prompt_uses_supplied_when_non_empty() {
+        let out = resolve_bootstrap_system_prompt(Some("you are a journal"));
+        assert_eq!(out, "you are a journal");
+    }
+
+    #[test]
+    fn bootstrap_system_prompt_does_not_strip_inner_whitespace() {
+        // Trim only happens at the edges — interior newlines and spacing
+        // (which Apollo's persona uses for tool listings) must survive.
+        let prompt = "line one\nline two\n  bullet";
+        let out = resolve_bootstrap_system_prompt(Some(prompt));
+        assert_eq!(out, prompt);
+    }
+
+    #[test]
+    fn bootstrap_backend_defaults_to_local_when_none() {
+        let out = resolve_bootstrap_backend(None).unwrap();
+        assert_eq!(out, "local");
+    }
+
+    #[test]
+    fn bootstrap_backend_defaults_to_local_when_empty() {
+        let out = resolve_bootstrap_backend(Some("")).unwrap();
+        assert_eq!(out, "local");
+    }
+
+    #[test]
+    fn bootstrap_backend_accepts_local_and_hybrid_case_insensitively() {
+        assert_eq!(resolve_bootstrap_backend(Some("LOCAL")).unwrap(), "local");
+        assert_eq!(resolve_bootstrap_backend(Some("Hybrid")).unwrap(), "hybrid");
+        assert_eq!(
+            resolve_bootstrap_backend(Some("  local  ")).unwrap(),
+            "local"
+        );
+    }
+
+    #[test]
+    fn bootstrap_backend_rejects_unknown_label() {
+        let err = resolve_bootstrap_backend(Some("openrouter")).unwrap_err();
+        let msg = format!("{}", err);
+        assert!(msg.contains("unknown backend"));
+        assert!(msg.contains("openrouter"));
+    }
 }

From 24ecf2abd462868d302e02f4869b53eac6b43f79 Mon Sep 17 00:00:00 2001
From: Cameron Cordes <cameronc.dev@gmail.com>
Date: Fri, 8 May 2026 10:59:35 -0400
Subject: [PATCH 3/5] insight-chat: prepend Photo file path: <path> to
 bootstrap user turn
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bug: bootstrap user_content was just the user's typed message (plus
the hybrid visual description). Tools that take a file_path arg —
recall_facts_for_photo, get_file_tags, get_faces_in_photo — had no
way to learn the canonical path. Small models would invent
placeholders like "input_file_0.png" or call the tool with a name
guessed from a hidden multimodal input handle, neither of which
matched any real photo.

Fix: prepend a single-line "Photo file path: <normalized>\n\n" block
to user_content. Same shape generate_agentic_insight_for_photo
already uses for non-chat callers — kept the bootstrap minimal
(no date / GPS / tags pre-stuffing; the agentic loop can fetch
those via tools when needed).

Hybrid still injects the visual description block between the path
block and the user message; local mode just gets path + user text.
---
 src/ai/insight_chat.rs | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs
index b3126f4..f8a023f 100644
--- a/src/ai/insight_chat.rs
+++ b/src/ai/insight_chat.rs
@@ -934,12 +934,20 @@ impl InsightChatService {
         // discusses metadata-only is still useful.
         let image_base64: Option<String> = self.generator.load_image_as_base64(&normalized).ok();
 
+        // Photo path block. Several agentic tools (recall_facts_for_photo,
+        // get_file_tags, get_faces_in_photo, etc.) take a `file_path` arg
+        // that the model has no way to know unless we put it in the user
+        // turn. Without this block small models invent placeholders like
+        // "input_file_0.png" or refuse and ask the user. Mirrors the
+        // user_content layout `generate_agentic_insight_for_photo` uses.
+        let path_block = format!("Photo file path: {}\n\n", normalized);
+
         // Hybrid backend: pre-describe the image via local Ollama vision
         // and inline the description into the user turn. OpenRouter chat
         // models don't see images directly. Mirrors the same pre-describe
         // pass that `generate_agentic_insight_for_photo` does for hybrid.
-        let user_content = if is_hybrid {
-            let visual = match image_base64.as_deref() {
+        let visual_block = if is_hybrid {
+            match image_base64.as_deref() {
                 Some(b64) => match self.ollama.describe_image(b64).await {
                     Ok(desc) => format!(
                         "Visual description (from local vision model):\n{}\n\n",
@@ -951,12 +959,13 @@ impl InsightChatService {
                     }
                 },
                 None => String::new(),
-            };
-            format!("{}{}", visual, req.user_message)
+            }
         } else {
-            req.user_message.clone()
+            String::new()
         };
 
+        let user_content = format!("{}{}{}", path_block, visual_block, req.user_message);
+
         // Tool gates. Local + image present → expose describe_photo so
         // the chat model can re-look at the photo on demand. Hybrid:
         // already inlined, no tool needed.

From a0ec1a5080413e46cab69d936f69975868f7a63a Mon Sep 17 00:00:00 2001
From: Cameron Cordes <cameronc.dev@gmail.com>
Date: Fri, 8 May 2026 11:07:03 -0400
Subject: [PATCH 4/5] insight-chat: photo context belongs in system msg, not
 user turn
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After refresh, the rendered transcript was showing two unwanted
artifacts in the initial user bubble:

  Photo file path: pics/DSC_5171.jpg
  please tell me about this photo and what was going on around it

  Please write your final answer now without calling any more tools.

Two distinct bugs:

1. Bootstrap was prepending `Photo file path: <path>` (and, in
   hybrid mode, the visual description block) into the user-turn
   content. The model needed it to call file_path-keyed tools, but
   the user could see it in their own bubble on replay.

2. The no-tools fallback ("Please write your final answer now…")
   was a synthetic user message we never stripped from history,
   so it persisted into training_messages, rendered as a second
   user bubble, AND wiped the prior tool-call accumulator inside
   load_history (user-turn handler clears pending_tools), which
   is why the tool invocations disappeared from the assistant
   bubble after refresh.

Fixes:

- New `build_bootstrap_system_message` helper composes the persona
  with a `--- PHOTO CONTEXT ---` block (path + optional visual
  description). Lives in the system message, not the user turn.
  The user's bubble shows only what they typed.
- Streaming agentic loop's no-tools fallback now records its
  insertion index and removes the synthetic user prompt from
  `messages` after the model responds. Final assistant content
  stays — it reads coherently on replay without the synthetic
  prompt above it. Applies to both bootstrap and continuation.

3 new tests cover the system-message composer (path-only, with
visual block, persona-trim). Total insight_chat unit tests: 27.
---
 src/ai/insight_chat.rs | 97 +++++++++++++++++++++++++++++++++---------
 1 file changed, 78 insertions(+), 19 deletions(-)

diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs
index f8a023f..c58dc4d 100644
--- a/src/ai/insight_chat.rs
+++ b/src/ai/insight_chat.rs
@@ -934,25 +934,16 @@ impl InsightChatService {
         // discusses metadata-only is still useful.
         let image_base64: Option<String> = self.generator.load_image_as_base64(&normalized).ok();
 
-        // Photo path block. Several agentic tools (recall_facts_for_photo,
-        // get_file_tags, get_faces_in_photo, etc.) take a `file_path` arg
-        // that the model has no way to know unless we put it in the user
-        // turn. Without this block small models invent placeholders like
-        // "input_file_0.png" or refuse and ask the user. Mirrors the
-        // user_content layout `generate_agentic_insight_for_photo` uses.
-        let path_block = format!("Photo file path: {}\n\n", normalized);
-
         // Hybrid backend: pre-describe the image via local Ollama vision
-        // and inline the description into the user turn. OpenRouter chat
-        // models don't see images directly. Mirrors the same pre-describe
+        // so OpenRouter chat models (which can't see images directly) get
+        // the visual description as text. Mirrors the same pre-describe
         // pass that `generate_agentic_insight_for_photo` does for hybrid.
         let visual_block = if is_hybrid {
             match image_base64.as_deref() {
                 Some(b64) => match self.ollama.describe_image(b64).await {
-                    Ok(desc) => format!(
-                        "Visual description (from local vision model):\n{}\n\n",
-                        desc
-                    ),
+                    Ok(desc) => {
+                        format!("Visual description (from local vision model):\n{}\n", desc)
+                    }
                     Err(e) => {
                         log::warn!("hybrid bootstrap: local describe_image failed: {}", e);
                         String::new()
@@ -964,8 +955,6 @@ impl InsightChatService {
             String::new()
         };
 
-        let user_content = format!("{}{}{}", path_block, visual_block, req.user_message);
-
         // Tool gates. Local + image present → expose describe_photo so
         // the chat model can re-look at the photo on demand. Hybrid:
         // already inlined, no tool needed.
@@ -973,10 +962,17 @@ impl InsightChatService {
         let gate_opts = self.generator.current_gate_opts(offer_describe_tool);
         let tools = InsightGenerator::build_tool_definitions(gate_opts);
 
-        // Build initial messages: persona system + user-turn-with-image.
-        let system_content = resolve_bootstrap_system_prompt(req.system_prompt.as_deref());
+        // System message = persona + photo context block. Photo context
+        // is in the system message — not the user turn — so the user's
+        // bubble in the rendered transcript shows only what they typed.
+        // Several agentic tools (recall_facts_for_photo, get_file_tags,
+        // get_faces_in_photo, etc.) take a `file_path` arg the model
+        // can't know without being told; in hybrid mode the visual
+        // description belongs here for the same reason.
+        let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref());
+        let system_content = build_bootstrap_system_message(&persona, &normalized, &visual_block);
         let system_msg = ChatMessage::system(system_content);
-        let mut user_msg = ChatMessage::user(user_content);
+        let mut user_msg = ChatMessage::user(req.user_message.clone());
         if !is_hybrid && let Some(ref img) = image_base64 {
             user_msg.images = Some(vec![img.clone()]);
         }
@@ -1241,6 +1237,13 @@ impl InsightChatService {
         // No-tools fallback: loop exhausted iterations without producing a
         // tool-free reply. Ask once more, with no tools attached.
         if final_content.is_empty() {
+            // Index of the synthetic "please write your final answer"
+            // user message — we strip it from history after the model
+            // responds, so it never appears in the rendered transcript
+            // and load_history's user-turn handler doesn't reset
+            // pending_tools at this position (wiping the prior tool
+            // calls from the final assistant render).
+            let synthetic_idx = messages.len();
             messages.push(ChatMessage::user(
                 "Please write your final answer now without calling any more tools.",
             ));
@@ -1270,6 +1273,10 @@ impl InsightChatService {
                 final_message.ok_or_else(|| anyhow!("final stream ended without a Done event"))?;
             final_content = final_response.content.clone();
             messages.push(final_response);
+            // Drop the synthetic prompt — internal scaffolding only. The
+            // model's final_response (now at the end) was generated with
+            // it in context and reads coherently without it on replay.
+            messages.remove(synthetic_idx);
         }
 
         Ok(AgenticLoopOutcome {
@@ -1301,6 +1308,27 @@ fn resolve_bootstrap_system_prompt(supplied: Option<&str>) -> String {
         .unwrap_or_else(|| BOOTSTRAP_DEFAULT_SYSTEM_PROMPT.to_string())
 }
 
+/// Compose the bootstrap system message: the persona on top, followed
+/// by a photo-context block carrying the file path (and, in hybrid
+/// mode, the local-vision visual description). Lives in the system
+/// message — not the user turn — so the rendered transcript shows
+/// only what the user typed.
+fn build_bootstrap_system_message(
+    persona: &str,
+    normalized_path: &str,
+    visual_block: &str,
+) -> String {
+    let mut out = persona.trim_end().to_string();
+    out.push_str("\n\n--- PHOTO CONTEXT ---\n");
+    out.push_str(&format!("Photo file path: {}\n", normalized_path));
+    if !visual_block.is_empty() {
+        // visual_block already ends with a newline; no extra separator
+        // needed.
+        out.push_str(visual_block);
+    }
+    out
+}
+
 /// Pick the backend label for bootstrap. Bootstrap has no stored insight
 /// to defer to (that's continuation's behaviour), so the default is
 /// `"local"`. Returns an error if the supplied label is non-empty but
@@ -1929,4 +1957,35 @@ mod tests {
         assert!(msg.contains("unknown backend"));
         assert!(msg.contains("openrouter"));
     }
+
+    #[test]
+    fn bootstrap_system_message_includes_path_and_persona() {
+        let out = build_bootstrap_system_message("you are helpful", "pics/IMG.jpg", "");
+        assert!(out.starts_with("you are helpful"));
+        assert!(out.contains("--- PHOTO CONTEXT ---"));
+        assert!(out.contains("Photo file path: pics/IMG.jpg"));
+        // No visual block — should not introduce a stray "Visual" line.
+        assert!(!out.contains("Visual description"));
+    }
+
+    #[test]
+    fn bootstrap_system_message_includes_visual_block_when_supplied() {
+        let visual = "Visual description (from local vision model):\nA dog in a park.\n";
+        let out = build_bootstrap_system_message("voice", "p.jpg", visual);
+        assert!(out.contains("Photo file path: p.jpg"));
+        assert!(out.contains("A dog in a park"));
+        // Path appears before visual.
+        let path_pos = out.find("Photo file path:").unwrap();
+        let visual_pos = out.find("A dog in a park").unwrap();
+        assert!(path_pos < visual_pos);
+    }
+
+    #[test]
+    fn bootstrap_system_message_trims_persona_trailing_whitespace() {
+        // Two consecutive newlines before the photo-context divider —
+        // any trailing whitespace from the persona must be collapsed
+        // so we don't end up with `\n\n\n\n--- PHOTO CONTEXT ---`.
+        let out = build_bootstrap_system_message("voice  \n\n\n", "p.jpg", "");
+        assert!(out.contains("voice\n\n--- PHOTO CONTEXT ---"));
+    }
 }

From 3699e059a227f2cd30cac98aed657572e7a7dac7 Mon Sep 17 00:00:00 2001
From: Cameron Cordes <cameronc.dev@gmail.com>
Date: Fri, 8 May 2026 11:14:39 -0400
Subject: [PATCH 5/5] insight-chat: include Date taken + GPS in bootstrap photo
 context
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bootstrap system message gave the model a file path and (in
hybrid mode) a visual description, but no temporal anchor. Models
defaulted to today's date when calling get_sms_messages — Nov 2014
photos were getting "2024-03-11" passed as `date`, missing every
historical message and leading the model to confidently misreport
context.

This commit folds two more EXIF-sourced facts into the
--- PHOTO CONTEXT --- block:

  Date taken: <YYYY-MM-DD or "unknown">
  GPS: <lat, lon to 4dp>           (omitted when no GPS)

Resolution waterfall for date_taken matches the documented canonical
date pipeline at the EXIF / filename steps, but intentionally stops
short of the fs-time fallback `generate_agentic_insight_for_photo`
uses — for chat we'd rather show "unknown" than mislead the model
with an inode mtime. GPS is taken straight from EXIF when both
lat/lon are populated; absent GPS suppresses the line entirely so
the model doesn't hallucinate coordinates.

InsightGenerator gains a `fetch_exif(file_path)` accessor (crate-
visible) so the chat service doesn't need its own ExifDao plumbing.

build_bootstrap_system_message picks up two new params (date,
gps); existing tests updated and 5 new tests cover:
- date present / absent / waterfall (EXIF wins, filename fallback,
  None when neither source has it)
- GPS present / absent
- ordering (path → date → visual)

Total insight_chat unit tests: 33 (up from 27).
---
 src/ai/insight_chat.rs      | 171 ++++++++++++++++++++++++++++++++----
 src/ai/insight_generator.rs |  11 +++
 2 files changed, 166 insertions(+), 16 deletions(-)

diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs
index c58dc4d..e4d8a56 100644
--- a/src/ai/insight_chat.rs
+++ b/src/ai/insight_chat.rs
@@ -934,6 +934,20 @@ impl InsightChatService {
         // discusses metadata-only is still useful.
         let image_base64: Option<String> = self.generator.load_image_as_base64(&normalized).ok();
 
+        // EXIF lookup once — date_taken and GPS go into the photo
+        // context block in the system message. Without these the model
+        // hallucinates dates / GPS-keyed tool args (`get_sms_messages`
+        // would otherwise default to today's date and miss every
+        // historical photo).
+        let exif = self.generator.fetch_exif(&normalized);
+        let date_taken_str = resolve_date_taken_for_context(&exif, &normalized);
+        let gps = exif
+            .as_ref()
+            .and_then(|e| match (e.gps_latitude, e.gps_longitude) {
+                (Some(lat), Some(lon)) => Some((lat as f64, lon as f64)),
+                _ => None,
+            });
+
         // Hybrid backend: pre-describe the image via local Ollama vision
         // so OpenRouter chat models (which can't see images directly) get
         // the visual description as text. Mirrors the same pre-describe
@@ -967,10 +981,18 @@ impl InsightChatService {
         // bubble in the rendered transcript shows only what they typed.
         // Several agentic tools (recall_facts_for_photo, get_file_tags,
         // get_faces_in_photo, etc.) take a `file_path` arg the model
-        // can't know without being told; in hybrid mode the visual
-        // description belongs here for the same reason.
+        // can't know without being told. `Date taken:` and `GPS:` give
+        // get_sms_messages / reverse_geocode / get_personal_place_at
+        // the args they need. In hybrid mode the visual description
+        // belongs here for the same reason.
         let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref());
-        let system_content = build_bootstrap_system_message(&persona, &normalized, &visual_block);
+        let system_content = build_bootstrap_system_message(
+            &persona,
+            &normalized,
+            date_taken_str.as_deref(),
+            gps,
+            &visual_block,
+        );
         let system_msg = ChatMessage::system(system_content);
         let mut user_msg = ChatMessage::user(req.user_message.clone());
         if !is_hybrid && let Some(ref img) = image_base64 {
@@ -1309,18 +1331,29 @@ fn resolve_bootstrap_system_prompt(supplied: Option<&str>) -> String {
 }
 
 /// Compose the bootstrap system message: the persona on top, followed
-/// by a photo-context block carrying the file path (and, in hybrid
-/// mode, the local-vision visual description). Lives in the system
-/// message — not the user turn — so the rendered transcript shows
-/// only what the user typed.
+/// by a photo-context block carrying the file path, date taken (when
+/// known), GPS (when present), and — in hybrid mode — the local-vision
+/// visual description. Lives in the system message — not the user
+/// turn — so the rendered transcript shows only what the user typed.
 fn build_bootstrap_system_message(
     persona: &str,
     normalized_path: &str,
+    date_taken: Option<&str>,
+    gps: Option<(f64, f64)>,
     visual_block: &str,
 ) -> String {
     let mut out = persona.trim_end().to_string();
     out.push_str("\n\n--- PHOTO CONTEXT ---\n");
     out.push_str(&format!("Photo file path: {}\n", normalized_path));
+    out.push_str(&format!(
+        "Date taken: {}\n",
+        date_taken.unwrap_or("unknown")
+    ));
+    if let Some((lat, lon)) = gps {
+        // Four decimal places ≈ 11 m of precision — plenty for any
+        // place-lookup tool, and keeps the prompt short.
+        out.push_str(&format!("GPS: {:.4}, {:.4}\n", lat, lon));
+    }
     if !visual_block.is_empty() {
         // visual_block already ends with a newline; no extra separator
         // needed.
@@ -1329,6 +1362,27 @@ fn build_bootstrap_system_message(
     out
 }
 
+/// Resolve a human-readable `YYYY-MM-DD` date string for the photo
+/// context block. Waterfall: EXIF `date_taken` → filename pattern →
+/// `None`. The fs-time fallback that `generate_agentic_insight_for_photo`
+/// uses is intentionally NOT applied here — for chat we'd rather show
+/// "unknown" than a misleading inode mtime as the photo's date.
+fn resolve_date_taken_for_context(
+    exif: &Option<crate::database::models::ImageExif>,
+    file_path: &str,
+) -> Option<String> {
+    let from_exif = exif
+        .as_ref()
+        .and_then(|e| e.date_taken)
+        .and_then(|ts| chrono::DateTime::from_timestamp(ts, 0))
+        .map(|dt| dt.format("%Y-%m-%d").to_string());
+    if from_exif.is_some() {
+        return from_exif;
+    }
+    crate::memories::extract_date_from_filename(file_path)
+        .map(|dt| dt.format("%Y-%m-%d").to_string())
+}
+
 /// Pick the backend label for bootstrap. Bootstrap has no stored insight
 /// to defer to (that's continuation's behaviour), so the default is
 /// `"local"`. Returns an error if the supplied label is non-empty but
@@ -1960,32 +2014,117 @@ mod tests {
 
     #[test]
     fn bootstrap_system_message_includes_path_and_persona() {
-        let out = build_bootstrap_system_message("you are helpful", "pics/IMG.jpg", "");
+        let out = build_bootstrap_system_message("you are helpful", "pics/IMG.jpg", None, None, "");
         assert!(out.starts_with("you are helpful"));
         assert!(out.contains("--- PHOTO CONTEXT ---"));
         assert!(out.contains("Photo file path: pics/IMG.jpg"));
-        // No visual block — should not introduce a stray "Visual" line.
+        // No date supplied → "unknown" so the model doesn't guess.
+        assert!(out.contains("Date taken: unknown"));
+        assert!(!out.contains("GPS:"));
         assert!(!out.contains("Visual description"));
     }
 
+    #[test]
+    fn bootstrap_system_message_includes_date_when_supplied() {
+        let out =
+            build_bootstrap_system_message("voice", "pics/IMG.jpg", Some("2014-11-08"), None, "");
+        assert!(out.contains("Date taken: 2014-11-08"));
+        assert!(!out.contains("Date taken: unknown"));
+    }
+
+    #[test]
+    fn bootstrap_system_message_includes_gps_when_present() {
+        let out = build_bootstrap_system_message(
+            "voice",
+            "p.jpg",
+            Some("2020-01-01"),
+            Some((42.36123, -71.05789)),
+            "",
+        );
+        // Four decimals — enough for place lookup, short enough to
+        // not bloat the system prompt.
+        assert!(out.contains("GPS: 42.3612, -71.0579"));
+    }
+
+    #[test]
+    fn bootstrap_system_message_omits_gps_when_none() {
+        let out = build_bootstrap_system_message("voice", "p.jpg", Some("2020-01-01"), None, "");
+        assert!(!out.contains("GPS:"));
+    }
+
     #[test]
     fn bootstrap_system_message_includes_visual_block_when_supplied() {
         let visual = "Visual description (from local vision model):\nA dog in a park.\n";
-        let out = build_bootstrap_system_message("voice", "p.jpg", visual);
+        let out =
+            build_bootstrap_system_message("voice", "p.jpg", Some("2020-01-01"), None, visual);
         assert!(out.contains("Photo file path: p.jpg"));
         assert!(out.contains("A dog in a park"));
-        // Path appears before visual.
+        // Path before date before visual.
         let path_pos = out.find("Photo file path:").unwrap();
+        let date_pos = out.find("Date taken:").unwrap();
         let visual_pos = out.find("A dog in a park").unwrap();
-        assert!(path_pos < visual_pos);
+        assert!(path_pos < date_pos);
+        assert!(date_pos < visual_pos);
     }
 
     #[test]
     fn bootstrap_system_message_trims_persona_trailing_whitespace() {
-        // Two consecutive newlines before the photo-context divider —
-        // any trailing whitespace from the persona must be collapsed
-        // so we don't end up with `\n\n\n\n--- PHOTO CONTEXT ---`.
-        let out = build_bootstrap_system_message("voice  \n\n\n", "p.jpg", "");
+        let out = build_bootstrap_system_message("voice  \n\n\n", "p.jpg", None, None, "");
         assert!(out.contains("voice\n\n--- PHOTO CONTEXT ---"));
     }
+
+    #[test]
+    fn date_taken_for_context_prefers_exif_over_filename() {
+        // EXIF wins when both are present (matches the canonical
+        // date_resolver waterfall — EXIF is more reliable than
+        // import-named filenames).
+        let exif = Some(crate::database::models::ImageExif {
+            id: 0,
+            library_id: 1,
+            file_path: "Screenshot_2014-06-01.png".to_string(),
+            camera_make: None,
+            camera_model: None,
+            lens_model: None,
+            width: None,
+            height: None,
+            orientation: None,
+            gps_latitude: None,
+            gps_longitude: None,
+            gps_altitude: None,
+            focal_length: None,
+            aperture: None,
+            shutter_speed: None,
+            iso: None,
+            // 2021-08-15 12:00:00 UTC
+            date_taken: Some(1_629_028_800),
+            created_time: 0,
+            last_modified: 0,
+            content_hash: None,
+            size_bytes: None,
+            phash_64: None,
+            dhash_64: None,
+            duplicate_of_hash: None,
+            duplicate_decided_at: None,
+            date_taken_source: None,
+            original_date_taken: None,
+            original_date_taken_source: None,
+        });
+        let out = resolve_date_taken_for_context(&exif, "Screenshot_2014-06-01.png");
+        assert_eq!(out.as_deref(), Some("2021-08-15"));
+    }
+
+    #[test]
+    fn date_taken_for_context_falls_back_to_filename_when_no_exif() {
+        // memories::extract_date_from_filename requires date+time in
+        // the filename — date-only patterns aren't matched. Use the
+        // canonical screenshot pattern for the regression case.
+        let out = resolve_date_taken_for_context(&None, "Screenshot_2014-06-01-20-44-50.png");
+        assert_eq!(out.as_deref(), Some("2014-06-01"));
+    }
+
+    #[test]
+    fn date_taken_for_context_returns_none_when_neither_source() {
+        let out = resolve_date_taken_for_context(&None, "DSC_5171.JPG");
+        assert!(out.is_none());
+    }
 }
diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs
index 3e0d7fb..2df22fa 100644
--- a/src/ai/insight_generator.rs
+++ b/src/ai/insight_generator.rs
@@ -229,6 +229,17 @@ impl InsightGenerator {
         None
     }
 
+    /// Look up the EXIF row for a photo. Returns `None` when no row
+    /// exists yet (file watcher hasn't reached it) or the DAO call
+    /// fails. Used by callers — including the chat-bootstrap path —
+    /// that need a few specific fields (date_taken, GPS) without
+    /// duplicating DAO plumbing.
+    pub(crate) fn fetch_exif(&self, file_path: &str) -> Option<crate::database::models::ImageExif> {
+        let cx = opentelemetry::Context::current();
+        let mut dao = self.exif_dao.lock().expect("Unable to lock ExifDao");
+        dao.get_exif(&cx, file_path).ok().flatten()
+    }
+
     /// Load image file, resize it, and encode as base64 for vision models
     /// Resizes to max 1024px on longest edge to reduce context usage
     pub(crate) fn load_image_as_base64(&self, file_path: &str) -> Result<String> {