insight-chat: per-turn system_prompt override on chat continuation

Append mode: applied ephemerally — original system message restored
before persistence so re-opens see the baked persona. Amend mode:
override stays in place and becomes the new insight row's system
message. Pattern mirrors annotate_system_with_budget.

Adds system_prompt field on both ChatTurnHttpRequest and ChatTurnRequest;
plumbs through chat_turn and chat_turn_stream identically.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-05-07 14:34:08 -04:00
parent 177187f6a2
commit faa289882f
2 changed files with 192 additions and 0 deletions

View File

@@ -640,6 +640,10 @@ pub struct ChatTurnHttpRequest {
pub min_p: Option<f32>,
#[serde(default)]
pub max_iterations: Option<usize>,
/// Per-turn system-prompt override. Ephemeral in append mode,
/// persisted in amend mode. See ChatTurnRequest for semantics.
#[serde(default)]
pub system_prompt: Option<String>,
#[serde(default)]
pub amend: bool,
}
@@ -695,6 +699,7 @@ pub async fn chat_turn_handler(
top_k: request.top_k,
min_p: request.min_p,
max_iterations: request.max_iterations,
system_prompt: request.system_prompt.clone(),
amend: request.amend,
};
@@ -909,6 +914,7 @@ pub async fn chat_stream_handler(
top_k: request.top_k,
min_p: request.min_p,
max_iterations: request.max_iterations,
system_prompt: request.system_prompt.clone(),
amend: request.amend,
};

View File

@@ -45,6 +45,11 @@ pub struct ChatTurnRequest {
pub top_k: Option<i32>,
pub min_p: Option<f32>,
pub max_iterations: Option<usize>,
/// Per-turn system-prompt override. In append mode (default), applied
/// ephemerally — original system message restored before persistence.
/// In amend mode, persisted into the new insight row's system message.
/// None / empty = no change.
pub system_prompt: Option<String>,
/// When true, write a new insight row (regenerating title) instead of
/// updating training_messages on the existing row.
pub amend: bool,
@@ -385,6 +390,13 @@ impl InsightChatService {
// 7. Append the new user turn.
messages.push(ChatMessage::user(req.user_message.clone()));
// Apply per-turn system-prompt override BEFORE the budget annotation
// so the budget note attaches to the override, not the original.
// The stash is consumed below before persistence (append mode) or
// dropped (amend mode, where the override stays in place).
let override_stash =
apply_system_prompt_override(&mut messages, req.system_prompt.as_deref());
// Temporarily annotate the system message with this turn's iteration
// budget so the model knows how many tool-calling rounds it has. We
// restore the original content before persistence so the note doesn't
@@ -481,6 +493,14 @@ impl InsightChatService {
// before we persist so it doesn't snowball on each subsequent turn.
restore_system_content(&mut messages, original_system_content);
// Append mode: undo the per-turn system-prompt override so the
// stored transcript keeps the original baked persona. Amend mode:
// keep the override in place — it becomes the new insight row's
// system message.
if !req.amend {
restore_system_prompt_override(&mut messages, override_stash);
}
// 9. Persist. Append mode rewrites the JSON blob in place; amend
// mode regenerates the title and inserts a new insight row,
// relying on store_insight to flip prior rows' is_current=false.
@@ -812,6 +832,10 @@ impl InsightChatService {
messages.push(ChatMessage::user(req.user_message.clone()));
// Mirror chat_turn: per-turn override goes on first, budget note next.
let override_stash =
apply_system_prompt_override(&mut messages, req.system_prompt.as_deref());
let original_system_content = annotate_system_with_budget(&mut messages, max_iterations);
let mut tool_calls_made = 0usize;
@@ -946,6 +970,13 @@ impl InsightChatService {
// before we persist so it doesn't snowball on each subsequent turn.
restore_system_content(&mut messages, original_system_content);
// Append mode: undo the per-turn system-prompt override (mirrors
// chat_turn). Amend mode: keep the override — it becomes the new
// insight row's system message.
if !req.amend {
restore_system_prompt_override(&mut messages, override_stash);
}
// Persist.
let json = serde_json::to_string(&messages)
.map_err(|e| anyhow!("failed to serialize chat history: {}", e))?;
@@ -1153,6 +1184,64 @@ fn restore_system_content(messages: &mut [ChatMessage], original: Option<String>
}
}
/// Receipt produced by [`apply_system_prompt_override`] so the caller can
/// undo the override before persistence. Two variants because we either
/// replaced an existing system message (need its original content) or
/// prepended a synthetic one (need to pop it).
#[derive(Debug)]
pub(crate) enum SystemPromptStash {
Replaced { original: String },
Prepended,
}
/// Apply a per-turn `system_prompt` override to `messages` so the model
/// sees the requested persona for this turn. Returns a stash the caller
/// must pass to [`restore_system_prompt_override`] before persisting the
/// transcript — without that step, append-mode chat would silently
/// rewrite the stored persona.
///
/// No-op (returns `None`) when `override_prompt` is `None` or empty.
pub(crate) fn apply_system_prompt_override(
messages: &mut Vec<ChatMessage>,
override_prompt: Option<&str>,
) -> Option<SystemPromptStash> {
let prompt = match override_prompt {
Some(s) if !s.trim().is_empty() => s.trim().to_string(),
_ => return None,
};
if let Some(first) = messages.first_mut()
&& first.role == "system"
{
let original = std::mem::replace(&mut first.content, prompt);
return Some(SystemPromptStash::Replaced { original });
}
messages.insert(0, ChatMessage::system(prompt));
Some(SystemPromptStash::Prepended)
}
/// Undo an override previously applied by [`apply_system_prompt_override`].
/// No-op when `stash` is `None`.
pub(crate) fn restore_system_prompt_override(
messages: &mut Vec<ChatMessage>,
stash: Option<SystemPromptStash>,
) {
let Some(stash) = stash else { return };
match stash {
SystemPromptStash::Replaced { original } => {
if let Some(first) = messages.first_mut()
&& first.role == "system"
{
first.content = original;
}
}
SystemPromptStash::Prepended => {
if !messages.is_empty() && messages[0].role == "system" {
messages.remove(0);
}
}
}
}
/// View returned to clients for chat-UI rendering.
#[derive(Debug)]
pub struct HistoryView {
@@ -1386,4 +1475,101 @@ mod tests {
let cut = find_raw_cut(&msgs, 2).expect("boundary cut should succeed");
assert_eq!(cut, msgs.len());
}
#[test]
fn apply_override_replaces_existing_system_message() {
let mut msgs = vec![
ChatMessage::system("original persona"),
ChatMessage::user("hi"),
];
let stash = apply_system_prompt_override(&mut msgs, Some("new persona"));
assert_eq!(msgs[0].content, "new persona");
match stash {
Some(SystemPromptStash::Replaced { original }) => {
assert_eq!(original, "original persona");
}
other => panic!("expected Replaced, got {:?}", other),
}
}
#[test]
fn apply_override_prepends_synthetic_when_missing() {
let mut msgs = vec![ChatMessage::user("hi")];
let stash = apply_system_prompt_override(&mut msgs, Some("new persona"));
assert_eq!(msgs.len(), 2);
assert_eq!(msgs[0].role, "system");
assert_eq!(msgs[0].content, "new persona");
assert!(matches!(stash, Some(SystemPromptStash::Prepended)));
}
#[test]
fn apply_override_no_op_when_none() {
let mut msgs = vec![
ChatMessage::system("sys"),
ChatMessage::user("hi"),
];
let stash = apply_system_prompt_override(&mut msgs, None);
assert!(stash.is_none());
assert_eq!(msgs[0].content, "sys");
}
#[test]
fn apply_override_no_op_for_empty_string() {
let mut msgs = vec![ChatMessage::system("sys")];
let stash = apply_system_prompt_override(&mut msgs, Some(""));
assert!(stash.is_none());
assert_eq!(msgs[0].content, "sys");
}
#[test]
fn restore_override_replaces_back() {
let mut msgs = vec![
ChatMessage::system("new"),
ChatMessage::user("hi"),
];
restore_system_prompt_override(
&mut msgs,
Some(SystemPromptStash::Replaced { original: "original".to_string() }),
);
assert_eq!(msgs[0].content, "original");
assert_eq!(msgs.len(), 2);
}
#[test]
fn restore_override_pops_synthetic() {
let mut msgs = vec![
ChatMessage::system("new"),
ChatMessage::user("hi"),
];
restore_system_prompt_override(&mut msgs, Some(SystemPromptStash::Prepended));
assert_eq!(msgs.len(), 1);
assert_eq!(msgs[0].role, "user");
}
#[test]
fn override_round_trip_preserves_original_system_message() {
let mut msgs = vec![
ChatMessage::system("original persona"),
ChatMessage::user("first user"),
assistant_text("first reply"),
];
let stash = apply_system_prompt_override(&mut msgs, Some("ephemeral persona"));
assert_eq!(msgs[0].content, "ephemeral persona");
restore_system_prompt_override(&mut msgs, stash);
assert_eq!(msgs[0].content, "original persona");
assert_eq!(msgs.len(), 3);
assert_eq!(msgs[1].role, "user");
assert_eq!(msgs[2].role, "assistant");
}
#[test]
fn override_with_synthetic_round_trip_drops_extra_message() {
let mut msgs = vec![ChatMessage::user("first user")];
let stash = apply_system_prompt_override(&mut msgs, Some("ephemeral"));
assert_eq!(msgs.len(), 2);
assert_eq!(msgs[0].role, "system");
restore_system_prompt_override(&mut msgs, stash);
assert_eq!(msgs.len(), 1);
assert_eq!(msgs[0].role, "user");
}
}