Resolve persona prompts server-side; drop synthetic prompt in chat_turn
A request carrying persona_id but no system_prompt used to fall back to
the neutral default voice. Both agentic generation
(generate_agentic_insight_handler) and chat bootstrap now resolve the
persona's stored prompt from the persona store, with precedence:
explicit non-blank client system_prompt > persona store lookup >
existing default ("default" persona id behaves the same — used if the
store has a row, neutral default otherwise). Resolution happens at the
handler / bootstrap entry where the DAO is reachable; internals are
unchanged. resolve_bootstrap_system_prompt takes the resolved persona
prompt as a second argument, with precedence tests.
Also in insight_chat:
- Sync chat_turn no longer persists the synthetic "Please write your
final answer now without calling any more tools." user message pushed
on iteration exhaustion — extracted both streaming variants'
synthetic_idx pattern into push/remove_synthetic_final_prompt (the
remove is a defensive no-op on index drift) and applied it to all
three loops; round-trip test included.
- Strip leaked <think> blocks from the final content persisted as the
reply in chat_turn and both streaming AgenticLoopOutcomes (mid-stream
TextDeltas are untouched; the raw transcript keeps the block).
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
+16
-1
@@ -809,6 +809,21 @@ pub async fn generate_agentic_insight_handler(
|
|||||||
.filter(|s| !s.trim().is_empty())
|
.filter(|s| !s.trim().is_empty())
|
||||||
.unwrap_or_else(|| "default".to_string());
|
.unwrap_or_else(|| "default".to_string());
|
||||||
|
|
||||||
|
// Server-side persona resolution: an explicit client `system_prompt`
|
||||||
|
// wins; otherwise the persona's stored prompt from the persona store;
|
||||||
|
// otherwise None and `build_system_content` applies its neutral
|
||||||
|
// default. Without the lookup, a request carrying only `persona_id`
|
||||||
|
// silently generated in the default voice.
|
||||||
|
let system_prompt = request
|
||||||
|
.system_prompt
|
||||||
|
.clone()
|
||||||
|
.filter(|s| !s.trim().is_empty())
|
||||||
|
.or_else(|| {
|
||||||
|
app_state
|
||||||
|
.insight_generator
|
||||||
|
.persona_system_prompt(user_id, &persona_id)
|
||||||
|
});
|
||||||
|
|
||||||
let max_iterations: usize = std::env::var("AGENTIC_MAX_ITERATIONS")
|
let max_iterations: usize = std::env::var("AGENTIC_MAX_ITERATIONS")
|
||||||
.ok()
|
.ok()
|
||||||
.and_then(|v| v.parse().ok())
|
.and_then(|v| v.parse().ok())
|
||||||
@@ -834,7 +849,7 @@ pub async fn generate_agentic_insight_handler(
|
|||||||
generator_for_task.generate_agentic_insight_for_photo(
|
generator_for_task.generate_agentic_insight_for_photo(
|
||||||
&path_for_task,
|
&path_for_task,
|
||||||
request.model.clone(),
|
request.model.clone(),
|
||||||
request.system_prompt.clone(),
|
system_prompt,
|
||||||
request.num_ctx,
|
request.num_ctx,
|
||||||
request.temperature,
|
request.temperature,
|
||||||
request.top_p,
|
request.top_p,
|
||||||
|
|||||||
+142
-27
@@ -33,6 +33,12 @@ const BYTES_PER_TOKEN: usize = 4;
|
|||||||
/// characters) must NOT be counted as text bytes — doing so dwarfs the entire
|
/// characters) must NOT be counted as text bytes — doing so dwarfs the entire
|
||||||
/// text budget and forces spurious truncation on every turn.
|
/// text budget and forces spurious truncation on every turn.
|
||||||
const IMAGE_TOKENS_EACH: usize = 1300;
|
const IMAGE_TOKENS_EACH: usize = 1300;
|
||||||
|
/// User prompt injected when the agentic loop exhausts its iteration budget
|
||||||
|
/// without producing a tool-free reply. Internal scaffolding only — it is
|
||||||
|
/// stripped from the transcript before persistence (see
|
||||||
|
/// [`push_synthetic_final_prompt`] / [`remove_synthetic_final_prompt`]).
|
||||||
|
const SYNTHETIC_FINAL_ANSWER_PROMPT: &str =
|
||||||
|
"Please write your final answer now without calling any more tools.";
|
||||||
|
|
||||||
pub type ChatLockMap = Arc<TokioMutex<HashMap<(i32, String), Arc<TokioMutex<()>>>>>;
|
pub type ChatLockMap = Arc<TokioMutex<HashMap<(i32, String), Arc<TokioMutex<()>>>>>;
|
||||||
|
|
||||||
@@ -457,9 +463,7 @@ impl InsightChatService {
|
|||||||
"Chat loop exhausted after {} iterations, requesting final answer",
|
"Chat loop exhausted after {} iterations, requesting final answer",
|
||||||
iterations_used
|
iterations_used
|
||||||
);
|
);
|
||||||
messages.push(ChatMessage::user(
|
let synthetic_idx = push_synthetic_final_prompt(&mut messages);
|
||||||
"Please write your final answer now without calling any more tools.",
|
|
||||||
));
|
|
||||||
let (final_response, prompt_tokens, eval_tokens) = backend
|
let (final_response, prompt_tokens, eval_tokens) = backend
|
||||||
.chat()
|
.chat()
|
||||||
.chat_with_tools(messages.clone(), vec![])
|
.chat_with_tools(messages.clone(), vec![])
|
||||||
@@ -468,8 +472,15 @@ impl InsightChatService {
|
|||||||
last_eval_count = eval_tokens;
|
last_eval_count = eval_tokens;
|
||||||
final_content = final_response.content.clone();
|
final_content = final_response.content.clone();
|
||||||
messages.push(final_response);
|
messages.push(final_response);
|
||||||
|
// Drop the synthetic prompt before persistence — internal
|
||||||
|
// scaffolding only (mirrors both streaming variants).
|
||||||
|
remove_synthetic_final_prompt(&mut messages, synthetic_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Strip any leaked <think> reasoning block from the content we
|
||||||
|
// return / persist as the reply (the raw transcript keeps it).
|
||||||
|
let final_content = crate::ai::llm_client::strip_think_blocks(&final_content);
|
||||||
|
|
||||||
loop_cx.span().set_status(Status::Ok);
|
loop_cx.span().set_status(Status::Ok);
|
||||||
|
|
||||||
// Drop the per-turn iteration-budget note from the system message
|
// Drop the per-turn iteration-budget note from the system message
|
||||||
@@ -1039,7 +1050,12 @@ impl InsightChatService {
|
|||||||
);
|
);
|
||||||
let tools = InsightGenerator::build_tool_definitions(gate_opts);
|
let tools = InsightGenerator::build_tool_definitions(gate_opts);
|
||||||
|
|
||||||
let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref());
|
// Server-side persona resolution: explicit client system_prompt wins;
|
||||||
|
// else the active persona's stored prompt; else the neutral default.
|
||||||
|
let persona_prompt = self
|
||||||
|
.generator
|
||||||
|
.persona_system_prompt(req.user_id, &active_persona);
|
||||||
|
let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref(), persona_prompt);
|
||||||
let system_content = build_bootstrap_system_message(
|
let system_content = build_bootstrap_system_message(
|
||||||
&persona,
|
&persona,
|
||||||
&normalized,
|
&normalized,
|
||||||
@@ -1263,10 +1279,7 @@ impl InsightChatService {
|
|||||||
|
|
||||||
// No-tools fallback
|
// No-tools fallback
|
||||||
if final_content.is_empty() {
|
if final_content.is_empty() {
|
||||||
let synthetic_idx = messages.len();
|
let synthetic_idx = push_synthetic_final_prompt(messages);
|
||||||
messages.push(ChatMessage::user(
|
|
||||||
"Please write your final answer now without calling any more tools.",
|
|
||||||
));
|
|
||||||
let mut stream = backend
|
let mut stream = backend
|
||||||
.chat()
|
.chat()
|
||||||
.chat_with_tools_stream(messages.clone(), vec![])
|
.chat_with_tools_stream(messages.clone(), vec![])
|
||||||
@@ -1294,7 +1307,7 @@ impl InsightChatService {
|
|||||||
final_message.ok_or_else(|| anyhow!("final stream ended without a Done event"))?;
|
final_message.ok_or_else(|| anyhow!("final stream ended without a Done event"))?;
|
||||||
final_content = final_response.content.clone();
|
final_content = final_response.content.clone();
|
||||||
messages.push(final_response);
|
messages.push(final_response);
|
||||||
messages.remove(synthetic_idx);
|
remove_synthetic_final_prompt(messages, synthetic_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(AgenticLoopOutcome {
|
Ok(AgenticLoopOutcome {
|
||||||
@@ -1302,7 +1315,9 @@ impl InsightChatService {
|
|||||||
iterations_used,
|
iterations_used,
|
||||||
last_prompt_eval_count,
|
last_prompt_eval_count,
|
||||||
last_eval_count,
|
last_eval_count,
|
||||||
final_content,
|
// Strip any leaked <think> reasoning block from the content the
|
||||||
|
// caller persists as title/summary (the raw transcript keeps it).
|
||||||
|
final_content: crate::ai::llm_client::strip_think_blocks(&final_content),
|
||||||
cancelled: false,
|
cancelled: false,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -1648,7 +1663,12 @@ impl InsightChatService {
|
|||||||
// get_sms_messages / reverse_geocode / get_personal_place_at
|
// get_sms_messages / reverse_geocode / get_personal_place_at
|
||||||
// the args they need. In hybrid mode the visual description
|
// the args they need. In hybrid mode the visual description
|
||||||
// belongs here for the same reason.
|
// belongs here for the same reason.
|
||||||
let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref());
|
// Server-side persona resolution: explicit client system_prompt wins;
|
||||||
|
// else the active persona's stored prompt; else the neutral default.
|
||||||
|
let persona_prompt = self
|
||||||
|
.generator
|
||||||
|
.persona_system_prompt(req.user_id, &active_persona);
|
||||||
|
let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref(), persona_prompt);
|
||||||
let system_content = build_bootstrap_system_message(
|
let system_content = build_bootstrap_system_message(
|
||||||
&persona,
|
&persona,
|
||||||
&normalized,
|
&normalized,
|
||||||
@@ -1866,10 +1886,7 @@ impl InsightChatService {
|
|||||||
// and load_history's user-turn handler doesn't reset
|
// and load_history's user-turn handler doesn't reset
|
||||||
// pending_tools at this position (wiping the prior tool
|
// pending_tools at this position (wiping the prior tool
|
||||||
// calls from the final assistant render).
|
// calls from the final assistant render).
|
||||||
let synthetic_idx = messages.len();
|
let synthetic_idx = push_synthetic_final_prompt(messages);
|
||||||
messages.push(ChatMessage::user(
|
|
||||||
"Please write your final answer now without calling any more tools.",
|
|
||||||
));
|
|
||||||
let mut stream = backend
|
let mut stream = backend
|
||||||
.chat()
|
.chat()
|
||||||
.chat_with_tools_stream(messages.clone(), vec![])
|
.chat_with_tools_stream(messages.clone(), vec![])
|
||||||
@@ -1900,7 +1917,7 @@ impl InsightChatService {
|
|||||||
// Drop the synthetic prompt — internal scaffolding only. The
|
// Drop the synthetic prompt — internal scaffolding only. The
|
||||||
// model's final_response (now at the end) was generated with
|
// model's final_response (now at the end) was generated with
|
||||||
// it in context and reads coherently without it on replay.
|
// it in context and reads coherently without it on replay.
|
||||||
messages.remove(synthetic_idx);
|
remove_synthetic_final_prompt(messages, synthetic_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(AgenticLoopOutcome {
|
Ok(AgenticLoopOutcome {
|
||||||
@@ -1908,7 +1925,9 @@ impl InsightChatService {
|
|||||||
iterations_used,
|
iterations_used,
|
||||||
last_prompt_eval_count,
|
last_prompt_eval_count,
|
||||||
last_eval_count,
|
last_eval_count,
|
||||||
final_content,
|
// Strip any leaked <think> reasoning block from the content the
|
||||||
|
// caller persists as title/summary (the raw transcript keeps it).
|
||||||
|
final_content: crate::ai::llm_client::strip_think_blocks(&final_content),
|
||||||
cancelled: false,
|
cancelled: false,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -1921,15 +1940,21 @@ const BOOTSTRAP_DEFAULT_SYSTEM_PROMPT: &str = "You are a helpful AI assistant an
|
|||||||
Use the available tools to gather context and answer their questions \
|
Use the available tools to gather context and answer their questions \
|
||||||
in a conversational tone.";
|
in a conversational tone.";
|
||||||
|
|
||||||
/// Pick the system prompt for bootstrap. Trimmed-non-empty supplied wins;
|
/// Pick the system prompt for bootstrap. Precedence: trimmed-non-empty
|
||||||
/// otherwise fall back to [`BOOTSTRAP_DEFAULT_SYSTEM_PROMPT`]. Returns an
|
/// `supplied` (the client's explicit `system_prompt`) wins; else
|
||||||
/// owned `String` because the bootstrap caller persists it on the new
|
/// `persona_prompt` (the active persona's stored prompt, resolved
|
||||||
/// insight row.
|
/// server-side from the persona store); else
|
||||||
fn resolve_bootstrap_system_prompt(supplied: Option<&str>) -> String {
|
/// [`BOOTSTRAP_DEFAULT_SYSTEM_PROMPT`]. Returns an owned `String` because
|
||||||
|
/// the bootstrap caller persists it on the new insight row.
|
||||||
|
fn resolve_bootstrap_system_prompt(
|
||||||
|
supplied: Option<&str>,
|
||||||
|
persona_prompt: Option<String>,
|
||||||
|
) -> String {
|
||||||
supplied
|
supplied
|
||||||
.map(str::trim)
|
.map(str::trim)
|
||||||
.filter(|s| !s.is_empty())
|
.filter(|s| !s.is_empty())
|
||||||
.map(str::to_string)
|
.map(str::to_string)
|
||||||
|
.or_else(|| persona_prompt.filter(|s| !s.trim().is_empty()))
|
||||||
.unwrap_or_else(|| BOOTSTRAP_DEFAULT_SYSTEM_PROMPT.to_string())
|
.unwrap_or_else(|| BOOTSTRAP_DEFAULT_SYSTEM_PROMPT.to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2200,6 +2225,30 @@ fn restore_system_content(messages: &mut [ChatMessage], original: Option<String>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Append the synthetic "write your final answer" user prompt, returning the
|
||||||
|
/// index the caller must later hand to [`remove_synthetic_final_prompt`].
|
||||||
|
/// Used when the agentic loop exhausts its budget: the model gets one more
|
||||||
|
/// (tool-free) request, but the nudge itself must never persist — it would
|
||||||
|
/// render as a user bubble in the transcript and reset `load_history`'s
|
||||||
|
/// pending-tools tracking at that position.
|
||||||
|
fn push_synthetic_final_prompt(messages: &mut Vec<ChatMessage>) -> usize {
|
||||||
|
let idx = messages.len();
|
||||||
|
messages.push(ChatMessage::user(SYNTHETIC_FINAL_ANSWER_PROMPT));
|
||||||
|
idx
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Remove the synthetic prompt inserted by [`push_synthetic_final_prompt`].
|
||||||
|
/// Defensive no-op when the message at `idx` isn't the synthetic prompt —
|
||||||
|
/// guards against index drift if the surrounding code is reordered.
|
||||||
|
fn remove_synthetic_final_prompt(messages: &mut Vec<ChatMessage>, idx: usize) {
|
||||||
|
if messages
|
||||||
|
.get(idx)
|
||||||
|
.is_some_and(|m| m.role == "user" && m.content == SYNTHETIC_FINAL_ANSWER_PROMPT)
|
||||||
|
{
|
||||||
|
messages.remove(idx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Receipt produced by [`apply_system_prompt_override`] so the caller can
|
/// Receipt produced by [`apply_system_prompt_override`] so the caller can
|
||||||
/// undo the override before persistence. Two variants because we either
|
/// undo the override before persistence. Two variants because we either
|
||||||
/// replaced an existing system message (need its original content) or
|
/// replaced an existing system message (need its original content) or
|
||||||
@@ -2643,26 +2692,26 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn bootstrap_system_prompt_falls_back_to_default_for_none() {
|
fn bootstrap_system_prompt_falls_back_to_default_for_none() {
|
||||||
let out = resolve_bootstrap_system_prompt(None);
|
let out = resolve_bootstrap_system_prompt(None, None);
|
||||||
assert_eq!(out, BOOTSTRAP_DEFAULT_SYSTEM_PROMPT);
|
assert_eq!(out, BOOTSTRAP_DEFAULT_SYSTEM_PROMPT);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn bootstrap_system_prompt_falls_back_to_default_for_empty_string() {
|
fn bootstrap_system_prompt_falls_back_to_default_for_empty_string() {
|
||||||
// Apollo currently sends `''` when no persona is selected.
|
// Apollo currently sends `''` when no persona is selected.
|
||||||
let out = resolve_bootstrap_system_prompt(Some(""));
|
let out = resolve_bootstrap_system_prompt(Some(""), None);
|
||||||
assert_eq!(out, BOOTSTRAP_DEFAULT_SYSTEM_PROMPT);
|
assert_eq!(out, BOOTSTRAP_DEFAULT_SYSTEM_PROMPT);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn bootstrap_system_prompt_falls_back_to_default_for_whitespace() {
|
fn bootstrap_system_prompt_falls_back_to_default_for_whitespace() {
|
||||||
let out = resolve_bootstrap_system_prompt(Some(" \n\t "));
|
let out = resolve_bootstrap_system_prompt(Some(" \n\t "), None);
|
||||||
assert_eq!(out, BOOTSTRAP_DEFAULT_SYSTEM_PROMPT);
|
assert_eq!(out, BOOTSTRAP_DEFAULT_SYSTEM_PROMPT);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn bootstrap_system_prompt_uses_supplied_when_non_empty() {
|
fn bootstrap_system_prompt_uses_supplied_when_non_empty() {
|
||||||
let out = resolve_bootstrap_system_prompt(Some("you are a journal"));
|
let out = resolve_bootstrap_system_prompt(Some("you are a journal"), None);
|
||||||
assert_eq!(out, "you are a journal");
|
assert_eq!(out, "you are a journal");
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2671,10 +2720,76 @@ mod tests {
|
|||||||
// Trim only happens at the edges — interior newlines and spacing
|
// Trim only happens at the edges — interior newlines and spacing
|
||||||
// (which Apollo's persona uses for tool listings) must survive.
|
// (which Apollo's persona uses for tool listings) must survive.
|
||||||
let prompt = "line one\nline two\n bullet";
|
let prompt = "line one\nline two\n bullet";
|
||||||
let out = resolve_bootstrap_system_prompt(Some(prompt));
|
let out = resolve_bootstrap_system_prompt(Some(prompt), None);
|
||||||
assert_eq!(out, prompt);
|
assert_eq!(out, prompt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn bootstrap_system_prompt_explicit_wins_over_persona_store() {
|
||||||
|
let out = resolve_bootstrap_system_prompt(
|
||||||
|
Some("explicit prompt"),
|
||||||
|
Some("stored persona prompt".to_string()),
|
||||||
|
);
|
||||||
|
assert_eq!(out, "explicit prompt");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn bootstrap_system_prompt_uses_persona_store_when_no_explicit() {
|
||||||
|
// Request carried persona_id but no system_prompt — the persona's
|
||||||
|
// stored prompt must be used, not the neutral default.
|
||||||
|
let out = resolve_bootstrap_system_prompt(None, Some("stored persona prompt".to_string()));
|
||||||
|
assert_eq!(out, "stored persona prompt");
|
||||||
|
|
||||||
|
// Empty explicit prompt behaves like None.
|
||||||
|
let out =
|
||||||
|
resolve_bootstrap_system_prompt(Some(""), Some("stored persona prompt".to_string()));
|
||||||
|
assert_eq!(out, "stored persona prompt");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn bootstrap_system_prompt_blank_persona_prompt_falls_to_default() {
|
||||||
|
let out = resolve_bootstrap_system_prompt(None, Some(" ".to_string()));
|
||||||
|
assert_eq!(out, BOOTSTRAP_DEFAULT_SYSTEM_PROMPT);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Synthetic final-answer prompt scaffolding ──────────────────────
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn synthetic_final_prompt_round_trip_leaves_no_scaffolding() {
|
||||||
|
// Exhausted-loop fallback: nudge pushed, model reply appended, nudge
|
||||||
|
// removed — the persisted transcript must contain the reply but not
|
||||||
|
// the synthetic user prompt (all three loop variants rely on this).
|
||||||
|
let mut msgs = vec![
|
||||||
|
ChatMessage::system("sys"),
|
||||||
|
ChatMessage::user("q"),
|
||||||
|
assistant_with_tool_call("lookup"),
|
||||||
|
ChatMessage::tool_result("data"),
|
||||||
|
];
|
||||||
|
let idx = push_synthetic_final_prompt(&mut msgs);
|
||||||
|
assert_eq!(msgs[idx].content, SYNTHETIC_FINAL_ANSWER_PROMPT);
|
||||||
|
|
||||||
|
msgs.push(assistant_text("final answer"));
|
||||||
|
remove_synthetic_final_prompt(&mut msgs, idx);
|
||||||
|
|
||||||
|
assert_eq!(msgs.len(), 5);
|
||||||
|
assert!(
|
||||||
|
msgs.iter()
|
||||||
|
.all(|m| m.content != SYNTHETIC_FINAL_ANSWER_PROMPT),
|
||||||
|
"synthetic prompt must not persist"
|
||||||
|
);
|
||||||
|
assert_eq!(msgs.last().unwrap().content, "final answer");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn remove_synthetic_final_prompt_is_noop_on_index_mismatch() {
|
||||||
|
// Defensive guard: if the message at idx isn't the synthetic prompt
|
||||||
|
// (index drift), nothing is removed.
|
||||||
|
let mut msgs = vec![ChatMessage::user("q"), assistant_text("a")];
|
||||||
|
remove_synthetic_final_prompt(&mut msgs, 0);
|
||||||
|
remove_synthetic_final_prompt(&mut msgs, 5);
|
||||||
|
assert_eq!(msgs.len(), 2);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn bootstrap_backend_defaults_to_local_when_none() {
|
fn bootstrap_backend_defaults_to_local_when_none() {
|
||||||
let out = resolve_bootstrap_backend(None).unwrap();
|
let out = resolve_bootstrap_backend(None).unwrap();
|
||||||
|
|||||||
Reference in New Issue
Block a user