From b711252c23e625a863d4c81edcfc43707ab1f874 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Tue, 9 Jun 2026 18:29:35 -0400 Subject: [PATCH] Resolve persona prompts server-side; drop synthetic prompt in chat_turn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A request carrying persona_id but no system_prompt used to fall back to the neutral default voice. Both agentic generation (generate_agentic_insight_handler) and chat bootstrap now resolve the persona's stored prompt from the persona store, with precedence: explicit non-blank client system_prompt > persona store lookup > existing default ("default" persona id behaves the same — used if the store has a row, neutral default otherwise). Resolution happens at the handler / bootstrap entry where the DAO is reachable; internals are unchanged. resolve_bootstrap_system_prompt takes the resolved persona prompt as a second argument, with precedence tests. Also in insight_chat: - Sync chat_turn no longer persists the synthetic "Please write your final answer now without calling any more tools." user message pushed on iteration exhaustion — extracted both streaming variants' synthetic_idx pattern into push/remove_synthetic_final_prompt (the remove is a defensive no-op on index drift) and applied it to all three loops; round-trip test included. - Strip leaked blocks from the final content persisted as the reply in chat_turn and both streaming AgenticLoopOutcomes (mid-stream TextDeltas are untouched; the raw transcript keeps the block). Co-Authored-By: Claude Fable 5 --- src/ai/handlers.rs | 17 ++++- src/ai/insight_chat.rs | 169 ++++++++++++++++++++++++++++++++++------- 2 files changed, 158 insertions(+), 28 deletions(-) diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs index 9bcb048..5e46418 100644 --- a/src/ai/handlers.rs +++ b/src/ai/handlers.rs @@ -809,6 +809,21 @@ pub async fn generate_agentic_insight_handler( .filter(|s| !s.trim().is_empty()) .unwrap_or_else(|| "default".to_string()); + // Server-side persona resolution: an explicit client `system_prompt` + // wins; otherwise the persona's stored prompt from the persona store; + // otherwise None and `build_system_content` applies its neutral + // default. Without the lookup, a request carrying only `persona_id` + // silently generated in the default voice. + let system_prompt = request + .system_prompt + .clone() + .filter(|s| !s.trim().is_empty()) + .or_else(|| { + app_state + .insight_generator + .persona_system_prompt(user_id, &persona_id) + }); + let max_iterations: usize = std::env::var("AGENTIC_MAX_ITERATIONS") .ok() .and_then(|v| v.parse().ok()) @@ -834,7 +849,7 @@ pub async fn generate_agentic_insight_handler( generator_for_task.generate_agentic_insight_for_photo( &path_for_task, request.model.clone(), - request.system_prompt.clone(), + system_prompt, request.num_ctx, request.temperature, request.top_p, diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs index 3c5eb26..7298296 100644 --- a/src/ai/insight_chat.rs +++ b/src/ai/insight_chat.rs @@ -33,6 +33,12 @@ const BYTES_PER_TOKEN: usize = 4; /// characters) must NOT be counted as text bytes — doing so dwarfs the entire /// text budget and forces spurious truncation on every turn. const IMAGE_TOKENS_EACH: usize = 1300; +/// User prompt injected when the agentic loop exhausts its iteration budget +/// without producing a tool-free reply. Internal scaffolding only — it is +/// stripped from the transcript before persistence (see +/// [`push_synthetic_final_prompt`] / [`remove_synthetic_final_prompt`]). +const SYNTHETIC_FINAL_ANSWER_PROMPT: &str = + "Please write your final answer now without calling any more tools."; pub type ChatLockMap = Arc>>>>; @@ -457,9 +463,7 @@ impl InsightChatService { "Chat loop exhausted after {} iterations, requesting final answer", iterations_used ); - messages.push(ChatMessage::user( - "Please write your final answer now without calling any more tools.", - )); + let synthetic_idx = push_synthetic_final_prompt(&mut messages); let (final_response, prompt_tokens, eval_tokens) = backend .chat() .chat_with_tools(messages.clone(), vec![]) @@ -468,8 +472,15 @@ impl InsightChatService { last_eval_count = eval_tokens; final_content = final_response.content.clone(); messages.push(final_response); + // Drop the synthetic prompt before persistence — internal + // scaffolding only (mirrors both streaming variants). + remove_synthetic_final_prompt(&mut messages, synthetic_idx); } + // Strip any leaked reasoning block from the content we + // return / persist as the reply (the raw transcript keeps it). + let final_content = crate::ai::llm_client::strip_think_blocks(&final_content); + loop_cx.span().set_status(Status::Ok); // Drop the per-turn iteration-budget note from the system message @@ -1039,7 +1050,12 @@ impl InsightChatService { ); let tools = InsightGenerator::build_tool_definitions(gate_opts); - let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref()); + // Server-side persona resolution: explicit client system_prompt wins; + // else the active persona's stored prompt; else the neutral default. + let persona_prompt = self + .generator + .persona_system_prompt(req.user_id, &active_persona); + let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref(), persona_prompt); let system_content = build_bootstrap_system_message( &persona, &normalized, @@ -1263,10 +1279,7 @@ impl InsightChatService { // No-tools fallback if final_content.is_empty() { - let synthetic_idx = messages.len(); - messages.push(ChatMessage::user( - "Please write your final answer now without calling any more tools.", - )); + let synthetic_idx = push_synthetic_final_prompt(messages); let mut stream = backend .chat() .chat_with_tools_stream(messages.clone(), vec![]) @@ -1294,7 +1307,7 @@ impl InsightChatService { final_message.ok_or_else(|| anyhow!("final stream ended without a Done event"))?; final_content = final_response.content.clone(); messages.push(final_response); - messages.remove(synthetic_idx); + remove_synthetic_final_prompt(messages, synthetic_idx); } Ok(AgenticLoopOutcome { @@ -1302,7 +1315,9 @@ impl InsightChatService { iterations_used, last_prompt_eval_count, last_eval_count, - final_content, + // Strip any leaked reasoning block from the content the + // caller persists as title/summary (the raw transcript keeps it). + final_content: crate::ai::llm_client::strip_think_blocks(&final_content), cancelled: false, }) } @@ -1648,7 +1663,12 @@ impl InsightChatService { // get_sms_messages / reverse_geocode / get_personal_place_at // the args they need. In hybrid mode the visual description // belongs here for the same reason. - let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref()); + // Server-side persona resolution: explicit client system_prompt wins; + // else the active persona's stored prompt; else the neutral default. + let persona_prompt = self + .generator + .persona_system_prompt(req.user_id, &active_persona); + let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref(), persona_prompt); let system_content = build_bootstrap_system_message( &persona, &normalized, @@ -1866,10 +1886,7 @@ impl InsightChatService { // and load_history's user-turn handler doesn't reset // pending_tools at this position (wiping the prior tool // calls from the final assistant render). - let synthetic_idx = messages.len(); - messages.push(ChatMessage::user( - "Please write your final answer now without calling any more tools.", - )); + let synthetic_idx = push_synthetic_final_prompt(messages); let mut stream = backend .chat() .chat_with_tools_stream(messages.clone(), vec![]) @@ -1900,7 +1917,7 @@ impl InsightChatService { // Drop the synthetic prompt — internal scaffolding only. The // model's final_response (now at the end) was generated with // it in context and reads coherently without it on replay. - messages.remove(synthetic_idx); + remove_synthetic_final_prompt(messages, synthetic_idx); } Ok(AgenticLoopOutcome { @@ -1908,7 +1925,9 @@ impl InsightChatService { iterations_used, last_prompt_eval_count, last_eval_count, - final_content, + // Strip any leaked reasoning block from the content the + // caller persists as title/summary (the raw transcript keeps it). + final_content: crate::ai::llm_client::strip_think_blocks(&final_content), cancelled: false, }) } @@ -1921,15 +1940,21 @@ const BOOTSTRAP_DEFAULT_SYSTEM_PROMPT: &str = "You are a helpful AI assistant an Use the available tools to gather context and answer their questions \ in a conversational tone."; -/// Pick the system prompt for bootstrap. Trimmed-non-empty supplied wins; -/// otherwise fall back to [`BOOTSTRAP_DEFAULT_SYSTEM_PROMPT`]. Returns an -/// owned `String` because the bootstrap caller persists it on the new -/// insight row. -fn resolve_bootstrap_system_prompt(supplied: Option<&str>) -> String { +/// Pick the system prompt for bootstrap. Precedence: trimmed-non-empty +/// `supplied` (the client's explicit `system_prompt`) wins; else +/// `persona_prompt` (the active persona's stored prompt, resolved +/// server-side from the persona store); else +/// [`BOOTSTRAP_DEFAULT_SYSTEM_PROMPT`]. Returns an owned `String` because +/// the bootstrap caller persists it on the new insight row. +fn resolve_bootstrap_system_prompt( + supplied: Option<&str>, + persona_prompt: Option, +) -> String { supplied .map(str::trim) .filter(|s| !s.is_empty()) .map(str::to_string) + .or_else(|| persona_prompt.filter(|s| !s.trim().is_empty())) .unwrap_or_else(|| BOOTSTRAP_DEFAULT_SYSTEM_PROMPT.to_string()) } @@ -2200,6 +2225,30 @@ fn restore_system_content(messages: &mut [ChatMessage], original: Option } } +/// Append the synthetic "write your final answer" user prompt, returning the +/// index the caller must later hand to [`remove_synthetic_final_prompt`]. +/// Used when the agentic loop exhausts its budget: the model gets one more +/// (tool-free) request, but the nudge itself must never persist — it would +/// render as a user bubble in the transcript and reset `load_history`'s +/// pending-tools tracking at that position. +fn push_synthetic_final_prompt(messages: &mut Vec) -> usize { + let idx = messages.len(); + messages.push(ChatMessage::user(SYNTHETIC_FINAL_ANSWER_PROMPT)); + idx +} + +/// Remove the synthetic prompt inserted by [`push_synthetic_final_prompt`]. +/// Defensive no-op when the message at `idx` isn't the synthetic prompt — +/// guards against index drift if the surrounding code is reordered. +fn remove_synthetic_final_prompt(messages: &mut Vec, idx: usize) { + if messages + .get(idx) + .is_some_and(|m| m.role == "user" && m.content == SYNTHETIC_FINAL_ANSWER_PROMPT) + { + messages.remove(idx); + } +} + /// Receipt produced by [`apply_system_prompt_override`] so the caller can /// undo the override before persistence. Two variants because we either /// replaced an existing system message (need its original content) or @@ -2643,26 +2692,26 @@ mod tests { #[test] fn bootstrap_system_prompt_falls_back_to_default_for_none() { - let out = resolve_bootstrap_system_prompt(None); + let out = resolve_bootstrap_system_prompt(None, None); assert_eq!(out, BOOTSTRAP_DEFAULT_SYSTEM_PROMPT); } #[test] fn bootstrap_system_prompt_falls_back_to_default_for_empty_string() { // Apollo currently sends `''` when no persona is selected. - let out = resolve_bootstrap_system_prompt(Some("")); + let out = resolve_bootstrap_system_prompt(Some(""), None); assert_eq!(out, BOOTSTRAP_DEFAULT_SYSTEM_PROMPT); } #[test] fn bootstrap_system_prompt_falls_back_to_default_for_whitespace() { - let out = resolve_bootstrap_system_prompt(Some(" \n\t ")); + let out = resolve_bootstrap_system_prompt(Some(" \n\t "), None); assert_eq!(out, BOOTSTRAP_DEFAULT_SYSTEM_PROMPT); } #[test] fn bootstrap_system_prompt_uses_supplied_when_non_empty() { - let out = resolve_bootstrap_system_prompt(Some("you are a journal")); + let out = resolve_bootstrap_system_prompt(Some("you are a journal"), None); assert_eq!(out, "you are a journal"); } @@ -2671,10 +2720,76 @@ mod tests { // Trim only happens at the edges — interior newlines and spacing // (which Apollo's persona uses for tool listings) must survive. let prompt = "line one\nline two\n bullet"; - let out = resolve_bootstrap_system_prompt(Some(prompt)); + let out = resolve_bootstrap_system_prompt(Some(prompt), None); assert_eq!(out, prompt); } + #[test] + fn bootstrap_system_prompt_explicit_wins_over_persona_store() { + let out = resolve_bootstrap_system_prompt( + Some("explicit prompt"), + Some("stored persona prompt".to_string()), + ); + assert_eq!(out, "explicit prompt"); + } + + #[test] + fn bootstrap_system_prompt_uses_persona_store_when_no_explicit() { + // Request carried persona_id but no system_prompt — the persona's + // stored prompt must be used, not the neutral default. + let out = resolve_bootstrap_system_prompt(None, Some("stored persona prompt".to_string())); + assert_eq!(out, "stored persona prompt"); + + // Empty explicit prompt behaves like None. + let out = + resolve_bootstrap_system_prompt(Some(""), Some("stored persona prompt".to_string())); + assert_eq!(out, "stored persona prompt"); + } + + #[test] + fn bootstrap_system_prompt_blank_persona_prompt_falls_to_default() { + let out = resolve_bootstrap_system_prompt(None, Some(" ".to_string())); + assert_eq!(out, BOOTSTRAP_DEFAULT_SYSTEM_PROMPT); + } + + // ── Synthetic final-answer prompt scaffolding ────────────────────── + + #[test] + fn synthetic_final_prompt_round_trip_leaves_no_scaffolding() { + // Exhausted-loop fallback: nudge pushed, model reply appended, nudge + // removed — the persisted transcript must contain the reply but not + // the synthetic user prompt (all three loop variants rely on this). + let mut msgs = vec![ + ChatMessage::system("sys"), + ChatMessage::user("q"), + assistant_with_tool_call("lookup"), + ChatMessage::tool_result("data"), + ]; + let idx = push_synthetic_final_prompt(&mut msgs); + assert_eq!(msgs[idx].content, SYNTHETIC_FINAL_ANSWER_PROMPT); + + msgs.push(assistant_text("final answer")); + remove_synthetic_final_prompt(&mut msgs, idx); + + assert_eq!(msgs.len(), 5); + assert!( + msgs.iter() + .all(|m| m.content != SYNTHETIC_FINAL_ANSWER_PROMPT), + "synthetic prompt must not persist" + ); + assert_eq!(msgs.last().unwrap().content, "final answer"); + } + + #[test] + fn remove_synthetic_final_prompt_is_noop_on_index_mismatch() { + // Defensive guard: if the message at idx isn't the synthetic prompt + // (index drift), nothing is removed. + let mut msgs = vec![ChatMessage::user("q"), assistant_text("a")]; + remove_synthetic_final_prompt(&mut msgs, 0); + remove_synthetic_final_prompt(&mut msgs, 5); + assert_eq!(msgs.len(), 2); + } + #[test] fn bootstrap_backend_defaults_to_local_when_none() { let out = resolve_bootstrap_backend(None).unwrap();