From b711252c23e625a863d4c81edcfc43707ab1f874 Mon Sep 17 00:00:00 2001
From: Cameron Cordes <cameronc.dev@gmail.com>
Date: Tue, 9 Jun 2026 18:29:35 -0400
Subject: [PATCH] Resolve persona prompts server-side; drop synthetic prompt in
 chat_turn
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A request carrying persona_id but no system_prompt used to fall back to
the neutral default voice. Both agentic generation
(generate_agentic_insight_handler) and chat bootstrap now resolve the
persona's stored prompt from the persona store, with precedence:
explicit non-blank client system_prompt > persona store lookup >
existing default ("default" persona id behaves the same — used if the
store has a row, neutral default otherwise). Resolution happens at the
handler / bootstrap entry where the DAO is reachable; internals are
unchanged. resolve_bootstrap_system_prompt takes the resolved persona
prompt as a second argument, with precedence tests.

Also in insight_chat:

- Sync chat_turn no longer persists the synthetic "Please write your
  final answer now without calling any more tools." user message pushed
  on iteration exhaustion — extracted both streaming variants'
  synthetic_idx pattern into push/remove_synthetic_final_prompt (the
  remove is a defensive no-op on index drift) and applied it to all
  three loops; round-trip test included.
- Strip leaked <think> blocks from the final content persisted as the
  reply in chat_turn and both streaming AgenticLoopOutcomes (mid-stream
  TextDeltas are untouched; the raw transcript keeps the block).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 src/ai/handlers.rs     |  17 ++++-
 src/ai/insight_chat.rs | 169 ++++++++++++++++++++++++++++++++++-------
 2 files changed, 158 insertions(+), 28 deletions(-)
diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs
index 9bcb048..5e46418 100644
--- a/src/ai/handlers.rs
+++ b/src/ai/handlers.rs
@@ -809,6 +809,21 @@ pub async fn generate_agentic_insight_handler(
         .filter(|s| !s.trim().is_empty())
         .unwrap_or_else(|| "default".to_string());
 
+    // Server-side persona resolution: an explicit client `system_prompt`
+    // wins; otherwise the persona's stored prompt from the persona store;
+    // otherwise None and `build_system_content` applies its neutral
+    // default. Without the lookup, a request carrying only `persona_id`
+    // silently generated in the default voice.
+    let system_prompt = request
+        .system_prompt
+        .clone()
+        .filter(|s| !s.trim().is_empty())
+        .or_else(|| {
+            app_state
+                .insight_generator
+                .persona_system_prompt(user_id, &persona_id)
+        });
+
     let max_iterations: usize = std::env::var("AGENTIC_MAX_ITERATIONS")
         .ok()
         .and_then(|v| v.parse().ok())
@@ -834,7 +849,7 @@ pub async fn generate_agentic_insight_handler(
                 generator_for_task.generate_agentic_insight_for_photo(
                     &path_for_task,
                     request.model.clone(),
-                    request.system_prompt.clone(),
+                    system_prompt,
                     request.num_ctx,
                     request.temperature,
                     request.top_p,
diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs
index 3c5eb26..7298296 100644
--- a/src/ai/insight_chat.rs
+++ b/src/ai/insight_chat.rs
@@ -33,6 +33,12 @@ const BYTES_PER_TOKEN: usize = 4;
 /// characters) must NOT be counted as text bytes — doing so dwarfs the entire
 /// text budget and forces spurious truncation on every turn.
 const IMAGE_TOKENS_EACH: usize = 1300;
+/// User prompt injected when the agentic loop exhausts its iteration budget
+/// without producing a tool-free reply. Internal scaffolding only — it is
+/// stripped from the transcript before persistence (see
+/// [`push_synthetic_final_prompt`] / [`remove_synthetic_final_prompt`]).
+const SYNTHETIC_FINAL_ANSWER_PROMPT: &str =
+    "Please write your final answer now without calling any more tools.";
 
 pub type ChatLockMap = Arc<TokioMutex<HashMap<(i32, String), Arc<TokioMutex<()>>>>>;
 
@@ -457,9 +463,7 @@ impl InsightChatService {
                 "Chat loop exhausted after {} iterations, requesting final answer",
                 iterations_used
             );
-            messages.push(ChatMessage::user(
-                "Please write your final answer now without calling any more tools.",
-            ));
+            let synthetic_idx = push_synthetic_final_prompt(&mut messages);
             let (final_response, prompt_tokens, eval_tokens) = backend
                 .chat()
                 .chat_with_tools(messages.clone(), vec![])
@@ -468,8 +472,15 @@ impl InsightChatService {
             last_eval_count = eval_tokens;
             final_content = final_response.content.clone();
             messages.push(final_response);
+            // Drop the synthetic prompt before persistence — internal
+            // scaffolding only (mirrors both streaming variants).
+            remove_synthetic_final_prompt(&mut messages, synthetic_idx);
         }
 
+        // Strip any leaked <think> reasoning block from the content we
+        // return / persist as the reply (the raw transcript keeps it).
+        let final_content = crate::ai::llm_client::strip_think_blocks(&final_content);
+
         loop_cx.span().set_status(Status::Ok);
 
         // Drop the per-turn iteration-budget note from the system message
@@ -1039,7 +1050,12 @@ impl InsightChatService {
         );
         let tools = InsightGenerator::build_tool_definitions(gate_opts);
 
-        let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref());
+        // Server-side persona resolution: explicit client system_prompt wins;
+        // else the active persona's stored prompt; else the neutral default.
+        let persona_prompt = self
+            .generator
+            .persona_system_prompt(req.user_id, &active_persona);
+        let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref(), persona_prompt);
         let system_content = build_bootstrap_system_message(
             &persona,
             &normalized,
@@ -1263,10 +1279,7 @@ impl InsightChatService {
 
         // No-tools fallback
         if final_content.is_empty() {
-            let synthetic_idx = messages.len();
-            messages.push(ChatMessage::user(
-                "Please write your final answer now without calling any more tools.",
-            ));
+            let synthetic_idx = push_synthetic_final_prompt(messages);
             let mut stream = backend
                 .chat()
                 .chat_with_tools_stream(messages.clone(), vec![])
@@ -1294,7 +1307,7 @@ impl InsightChatService {
                 final_message.ok_or_else(|| anyhow!("final stream ended without a Done event"))?;
             final_content = final_response.content.clone();
             messages.push(final_response);
-            messages.remove(synthetic_idx);
+            remove_synthetic_final_prompt(messages, synthetic_idx);
         }
 
         Ok(AgenticLoopOutcome {
@@ -1302,7 +1315,9 @@ impl InsightChatService {
             iterations_used,
             last_prompt_eval_count,
             last_eval_count,
-            final_content,
+            // Strip any leaked <think> reasoning block from the content the
+            // caller persists as title/summary (the raw transcript keeps it).
+            final_content: crate::ai::llm_client::strip_think_blocks(&final_content),
             cancelled: false,
         })
     }
@@ -1648,7 +1663,12 @@ impl InsightChatService {
         // get_sms_messages / reverse_geocode / get_personal_place_at
         // the args they need. In hybrid mode the visual description
         // belongs here for the same reason.
-        let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref());
+        // Server-side persona resolution: explicit client system_prompt wins;
+        // else the active persona's stored prompt; else the neutral default.
+        let persona_prompt = self
+            .generator
+            .persona_system_prompt(req.user_id, &active_persona);
+        let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref(), persona_prompt);
         let system_content = build_bootstrap_system_message(
             &persona,
             &normalized,
@@ -1866,10 +1886,7 @@ impl InsightChatService {
             // and load_history's user-turn handler doesn't reset
             // pending_tools at this position (wiping the prior tool
             // calls from the final assistant render).
-            let synthetic_idx = messages.len();
-            messages.push(ChatMessage::user(
-                "Please write your final answer now without calling any more tools.",
-            ));
+            let synthetic_idx = push_synthetic_final_prompt(messages);
             let mut stream = backend
                 .chat()
                 .chat_with_tools_stream(messages.clone(), vec![])
@@ -1900,7 +1917,7 @@ impl InsightChatService {
             // Drop the synthetic prompt — internal scaffolding only. The
             // model's final_response (now at the end) was generated with
             // it in context and reads coherently without it on replay.
-            messages.remove(synthetic_idx);
+            remove_synthetic_final_prompt(messages, synthetic_idx);
         }
 
         Ok(AgenticLoopOutcome {
@@ -1908,7 +1925,9 @@ impl InsightChatService {
             iterations_used,
             last_prompt_eval_count,
             last_eval_count,
-            final_content,
+            // Strip any leaked <think> reasoning block from the content the
+            // caller persists as title/summary (the raw transcript keeps it).
+            final_content: crate::ai::llm_client::strip_think_blocks(&final_content),
             cancelled: false,
         })
     }
@@ -1921,15 +1940,21 @@ const BOOTSTRAP_DEFAULT_SYSTEM_PROMPT: &str = "You are a helpful AI assistant an
      Use the available tools to gather context and answer their questions \
      in a conversational tone.";
 
-/// Pick the system prompt for bootstrap. Trimmed-non-empty supplied wins;
-/// otherwise fall back to [`BOOTSTRAP_DEFAULT_SYSTEM_PROMPT`]. Returns an
-/// owned `String` because the bootstrap caller persists it on the new
-/// insight row.
-fn resolve_bootstrap_system_prompt(supplied: Option<&str>) -> String {
+/// Pick the system prompt for bootstrap. Precedence: trimmed-non-empty
+/// `supplied` (the client's explicit `system_prompt`) wins; else
+/// `persona_prompt` (the active persona's stored prompt, resolved
+/// server-side from the persona store); else
+/// [`BOOTSTRAP_DEFAULT_SYSTEM_PROMPT`]. Returns an owned `String` because
+/// the bootstrap caller persists it on the new insight row.
+fn resolve_bootstrap_system_prompt(
+    supplied: Option<&str>,
+    persona_prompt: Option<String>,
+) -> String {
     supplied
         .map(str::trim)
         .filter(|s| !s.is_empty())
         .map(str::to_string)
+        .or_else(|| persona_prompt.filter(|s| !s.trim().is_empty()))
         .unwrap_or_else(|| BOOTSTRAP_DEFAULT_SYSTEM_PROMPT.to_string())
 }
 
@@ -2200,6 +2225,30 @@ fn restore_system_content(messages: &mut [ChatMessage], original: Option<String>
     }
 }
 
+/// Append the synthetic "write your final answer" user prompt, returning the
+/// index the caller must later hand to [`remove_synthetic_final_prompt`].
+/// Used when the agentic loop exhausts its budget: the model gets one more
+/// (tool-free) request, but the nudge itself must never persist — it would
+/// render as a user bubble in the transcript and reset `load_history`'s
+/// pending-tools tracking at that position.
+fn push_synthetic_final_prompt(messages: &mut Vec<ChatMessage>) -> usize {
+    let idx = messages.len();
+    messages.push(ChatMessage::user(SYNTHETIC_FINAL_ANSWER_PROMPT));
+    idx
+}
+
+/// Remove the synthetic prompt inserted by [`push_synthetic_final_prompt`].
+/// Defensive no-op when the message at `idx` isn't the synthetic prompt —
+/// guards against index drift if the surrounding code is reordered.
+fn remove_synthetic_final_prompt(messages: &mut Vec<ChatMessage>, idx: usize) {
+    if messages
+        .get(idx)
+        .is_some_and(|m| m.role == "user" && m.content == SYNTHETIC_FINAL_ANSWER_PROMPT)
+    {
+        messages.remove(idx);
+    }
+}
+
 /// Receipt produced by [`apply_system_prompt_override`] so the caller can
 /// undo the override before persistence. Two variants because we either
 /// replaced an existing system message (need its original content) or
@@ -2643,26 +2692,26 @@ mod tests {
 
     #[test]
     fn bootstrap_system_prompt_falls_back_to_default_for_none() {
-        let out = resolve_bootstrap_system_prompt(None);
+        let out = resolve_bootstrap_system_prompt(None, None);
         assert_eq!(out, BOOTSTRAP_DEFAULT_SYSTEM_PROMPT);
     }
 
     #[test]
     fn bootstrap_system_prompt_falls_back_to_default_for_empty_string() {
         // Apollo currently sends `''` when no persona is selected.
-        let out = resolve_bootstrap_system_prompt(Some(""));
+        let out = resolve_bootstrap_system_prompt(Some(""), None);
         assert_eq!(out, BOOTSTRAP_DEFAULT_SYSTEM_PROMPT);
     }
 
     #[test]
     fn bootstrap_system_prompt_falls_back_to_default_for_whitespace() {
-        let out = resolve_bootstrap_system_prompt(Some("   \n\t  "));
+        let out = resolve_bootstrap_system_prompt(Some("   \n\t  "), None);
         assert_eq!(out, BOOTSTRAP_DEFAULT_SYSTEM_PROMPT);
     }
 
     #[test]
     fn bootstrap_system_prompt_uses_supplied_when_non_empty() {
-        let out = resolve_bootstrap_system_prompt(Some("you are a journal"));
+        let out = resolve_bootstrap_system_prompt(Some("you are a journal"), None);
         assert_eq!(out, "you are a journal");
     }
 
@@ -2671,10 +2720,76 @@ mod tests {
         // Trim only happens at the edges — interior newlines and spacing
         // (which Apollo's persona uses for tool listings) must survive.
         let prompt = "line one\nline two\n  bullet";
-        let out = resolve_bootstrap_system_prompt(Some(prompt));
+        let out = resolve_bootstrap_system_prompt(Some(prompt), None);
         assert_eq!(out, prompt);
     }
 
+    #[test]
+    fn bootstrap_system_prompt_explicit_wins_over_persona_store() {
+        let out = resolve_bootstrap_system_prompt(
+            Some("explicit prompt"),
+            Some("stored persona prompt".to_string()),
+        );
+        assert_eq!(out, "explicit prompt");
+    }
+
+    #[test]
+    fn bootstrap_system_prompt_uses_persona_store_when_no_explicit() {
+        // Request carried persona_id but no system_prompt — the persona's
+        // stored prompt must be used, not the neutral default.
+        let out = resolve_bootstrap_system_prompt(None, Some("stored persona prompt".to_string()));
+        assert_eq!(out, "stored persona prompt");
+
+        // Empty explicit prompt behaves like None.
+        let out =
+            resolve_bootstrap_system_prompt(Some(""), Some("stored persona prompt".to_string()));
+        assert_eq!(out, "stored persona prompt");
+    }
+
+    #[test]
+    fn bootstrap_system_prompt_blank_persona_prompt_falls_to_default() {
+        let out = resolve_bootstrap_system_prompt(None, Some("   ".to_string()));
+        assert_eq!(out, BOOTSTRAP_DEFAULT_SYSTEM_PROMPT);
+    }
+
+    // ── Synthetic final-answer prompt scaffolding ──────────────────────
+
+    #[test]
+    fn synthetic_final_prompt_round_trip_leaves_no_scaffolding() {
+        // Exhausted-loop fallback: nudge pushed, model reply appended, nudge
+        // removed — the persisted transcript must contain the reply but not
+        // the synthetic user prompt (all three loop variants rely on this).
+        let mut msgs = vec![
+            ChatMessage::system("sys"),
+            ChatMessage::user("q"),
+            assistant_with_tool_call("lookup"),
+            ChatMessage::tool_result("data"),
+        ];
+        let idx = push_synthetic_final_prompt(&mut msgs);
+        assert_eq!(msgs[idx].content, SYNTHETIC_FINAL_ANSWER_PROMPT);
+
+        msgs.push(assistant_text("final answer"));
+        remove_synthetic_final_prompt(&mut msgs, idx);
+
+        assert_eq!(msgs.len(), 5);
+        assert!(
+            msgs.iter()
+                .all(|m| m.content != SYNTHETIC_FINAL_ANSWER_PROMPT),
+            "synthetic prompt must not persist"
+        );
+        assert_eq!(msgs.last().unwrap().content, "final answer");
+    }
+
+    #[test]
+    fn remove_synthetic_final_prompt_is_noop_on_index_mismatch() {
+        // Defensive guard: if the message at idx isn't the synthetic prompt
+        // (index drift), nothing is removed.
+        let mut msgs = vec![ChatMessage::user("q"), assistant_text("a")];
+        remove_synthetic_final_prompt(&mut msgs, 0);
+        remove_synthetic_final_prompt(&mut msgs, 5);
+        assert_eq!(msgs.len(), 2);
+    }
+
     #[test]
     fn bootstrap_backend_defaults_to_local_when_none() {
         let out = resolve_bootstrap_backend(None).unwrap();