diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs
index f8a023f..c58dc4d 100644
--- a/src/ai/insight_chat.rs
+++ b/src/ai/insight_chat.rs
@@ -934,25 +934,16 @@ impl InsightChatService {
         // discusses metadata-only is still useful.
         let image_base64: Option<String> = self.generator.load_image_as_base64(&normalized).ok();
 
-        // Photo path block. Several agentic tools (recall_facts_for_photo,
-        // get_file_tags, get_faces_in_photo, etc.) take a `file_path` arg
-        // that the model has no way to know unless we put it in the user
-        // turn. Without this block small models invent placeholders like
-        // "input_file_0.png" or refuse and ask the user. Mirrors the
-        // user_content layout `generate_agentic_insight_for_photo` uses.
-        let path_block = format!("Photo file path: {}\n\n", normalized);
-
         // Hybrid backend: pre-describe the image via local Ollama vision
-        // and inline the description into the user turn. OpenRouter chat
-        // models don't see images directly. Mirrors the same pre-describe
+        // so OpenRouter chat models (which can't see images directly) get
+        // the visual description as text. Mirrors the same pre-describe
         // pass that `generate_agentic_insight_for_photo` does for hybrid.
         let visual_block = if is_hybrid {
             match image_base64.as_deref() {
                 Some(b64) => match self.ollama.describe_image(b64).await {
-                    Ok(desc) => format!(
-                        "Visual description (from local vision model):\n{}\n\n",
-                        desc
-                    ),
+                    Ok(desc) => {
+                        format!("Visual description (from local vision model):\n{}\n", desc)
+                    }
                     Err(e) => {
                         log::warn!("hybrid bootstrap: local describe_image failed: {}", e);
                         String::new()
@@ -964,8 +955,6 @@ impl InsightChatService {
             String::new()
         };
 
-        let user_content = format!("{}{}{}", path_block, visual_block, req.user_message);
-
         // Tool gates. Local + image present → expose describe_photo so
         // the chat model can re-look at the photo on demand. Hybrid:
         // already inlined, no tool needed.
@@ -973,10 +962,17 @@ impl InsightChatService {
         let gate_opts = self.generator.current_gate_opts(offer_describe_tool);
         let tools = InsightGenerator::build_tool_definitions(gate_opts);
 
-        // Build initial messages: persona system + user-turn-with-image.
-        let system_content = resolve_bootstrap_system_prompt(req.system_prompt.as_deref());
+        // System message = persona + photo context block. Photo context
+        // is in the system message — not the user turn — so the user's
+        // bubble in the rendered transcript shows only what they typed.
+        // Several agentic tools (recall_facts_for_photo, get_file_tags,
+        // get_faces_in_photo, etc.) take a `file_path` arg the model
+        // can't know without being told; in hybrid mode the visual
+        // description belongs here for the same reason.
+        let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref());
+        let system_content = build_bootstrap_system_message(&persona, &normalized, &visual_block);
         let system_msg = ChatMessage::system(system_content);
-        let mut user_msg = ChatMessage::user(user_content);
+        let mut user_msg = ChatMessage::user(req.user_message.clone());
         if !is_hybrid && let Some(ref img) = image_base64 {
             user_msg.images = Some(vec![img.clone()]);
         }
@@ -1241,6 +1237,13 @@ impl InsightChatService {
         // No-tools fallback: loop exhausted iterations without producing a
         // tool-free reply. Ask once more, with no tools attached.
         if final_content.is_empty() {
+            // Index of the synthetic "please write your final answer"
+            // user message — we strip it from history after the model
+            // responds, so it never appears in the rendered transcript
+            // and load_history's user-turn handler doesn't reset
+            // pending_tools at this position (wiping the prior tool
+            // calls from the final assistant render).
+            let synthetic_idx = messages.len();
             messages.push(ChatMessage::user(
                 "Please write your final answer now without calling any more tools.",
             ));
@@ -1270,6 +1273,10 @@ impl InsightChatService {
                 final_message.ok_or_else(|| anyhow!("final stream ended without a Done event"))?;
             final_content = final_response.content.clone();
             messages.push(final_response);
+            // Drop the synthetic prompt — internal scaffolding only. The
+            // model's final_response (now at the end) was generated with
+            // it in context and reads coherently without it on replay.
+            messages.remove(synthetic_idx);
         }
 
         Ok(AgenticLoopOutcome {
@@ -1301,6 +1308,27 @@ fn resolve_bootstrap_system_prompt(supplied: Option<&str>) -> String {
         .unwrap_or_else(|| BOOTSTRAP_DEFAULT_SYSTEM_PROMPT.to_string())
 }
 
+/// Compose the bootstrap system message: the persona on top, followed
+/// by a photo-context block carrying the file path (and, in hybrid
+/// mode, the local-vision visual description). Lives in the system
+/// message — not the user turn — so the rendered transcript shows
+/// only what the user typed.
+fn build_bootstrap_system_message(
+    persona: &str,
+    normalized_path: &str,
+    visual_block: &str,
+) -> String {
+    let mut out = persona.trim_end().to_string();
+    out.push_str("\n\n--- PHOTO CONTEXT ---\n");
+    out.push_str(&format!("Photo file path: {}\n", normalized_path));
+    if !visual_block.is_empty() {
+        // visual_block already ends with a newline; no extra separator
+        // needed.
+        out.push_str(visual_block);
+    }
+    out
+}
+
 /// Pick the backend label for bootstrap. Bootstrap has no stored insight
 /// to defer to (that's continuation's behaviour), so the default is
 /// `"local"`. Returns an error if the supplied label is non-empty but
@@ -1929,4 +1957,35 @@ mod tests {
         assert!(msg.contains("unknown backend"));
         assert!(msg.contains("openrouter"));
     }
+
+    #[test]
+    fn bootstrap_system_message_includes_path_and_persona() {
+        let out = build_bootstrap_system_message("you are helpful", "pics/IMG.jpg", "");
+        assert!(out.starts_with("you are helpful"));
+        assert!(out.contains("--- PHOTO CONTEXT ---"));
+        assert!(out.contains("Photo file path: pics/IMG.jpg"));
+        // No visual block — should not introduce a stray "Visual" line.
+        assert!(!out.contains("Visual description"));
+    }
+
+    #[test]
+    fn bootstrap_system_message_includes_visual_block_when_supplied() {
+        let visual = "Visual description (from local vision model):\nA dog in a park.\n";
+        let out = build_bootstrap_system_message("voice", "p.jpg", visual);
+        assert!(out.contains("Photo file path: p.jpg"));
+        assert!(out.contains("A dog in a park"));
+        // Path appears before visual.
+        let path_pos = out.find("Photo file path:").unwrap();
+        let visual_pos = out.find("A dog in a park").unwrap();
+        assert!(path_pos < visual_pos);
+    }
+
+    #[test]
+    fn bootstrap_system_message_trims_persona_trailing_whitespace() {
+        // Two consecutive newlines before the photo-context divider —
+        // any trailing whitespace from the persona must be collapsed
+        // so we don't end up with `\n\n\n\n--- PHOTO CONTEXT ---`.
+        let out = build_bootstrap_system_message("voice  \n\n\n", "p.jpg", "");
+        assert!(out.contains("voice\n\n--- PHOTO CONTEXT ---"));
+    }
 }