feature/insight-jobs #102

Merged
cameron merged 13 commits from feature/insight-jobs into master 2026-06-02 23:41:37 +00:00
Showing only changes of commit a542ea411b - Show all commits
+59 -2
View File
@@ -27,6 +27,12 @@ const RESPONSE_HEADROOM_TOKENS: usize = 2048;
/// tokenization is model-specific; this avoids carrying tiktoken just for a /// tokenization is model-specific; this avoids carrying tiktoken just for a
/// soft bound. /// soft bound.
const BYTES_PER_TOKEN: usize = 4; const BYTES_PER_TOKEN: usize = 4;
/// Flat token cost charged per inlined image in the truncation budget. A
/// 1024px-longest-edge JPEG (see `load_image_as_base64`) costs vision models on
/// the order of ~1.3K tokens. Crucially, the raw base64 (hundreds of KB of
/// characters) must NOT be counted as text bytes — doing so dwarfs the entire
/// text budget and forces spurious truncation on every turn.
const IMAGE_TOKENS_EACH: usize = 1300;
pub type ChatLockMap = Arc<TokioMutex<HashMap<(i32, String), Arc<TokioMutex<()>>>>>; pub type ChatLockMap = Arc<TokioMutex<HashMap<(i32, String), Arc<TokioMutex<()>>>>>;
@@ -2359,10 +2365,32 @@ pub(crate) fn apply_context_budget(messages: &mut Vec<ChatMessage>, budget_bytes
dropped_any dropped_any
} }
/// Estimate the serialized byte size of `messages` for the truncation budget,
/// EXCLUDING inlined base64 image payloads. Images are charged a flat
/// `IMAGE_TOKENS_EACH` instead: their base64 is hundreds of KB of characters
/// that have no relation to the text token pressure we're budgeting against,
/// and counting them verbatim makes a single photo exceed the entire budget,
/// spuriously trimming all history on every turn.
fn estimate_bytes(messages: &[ChatMessage]) -> usize { fn estimate_bytes(messages: &[ChatMessage]) -> usize {
serde_json::to_string(messages) let mut image_count = 0usize;
// Clone with image payloads stripped so they don't inflate the byte count.
// We still account for the (small) non-image fields verbatim.
let stripped: Vec<ChatMessage> = messages
.iter()
.map(|m| {
if let Some(imgs) = m.images.as_ref() {
image_count += imgs.len();
}
ChatMessage {
images: None,
..m.clone()
}
})
.collect();
let text_bytes = serde_json::to_string(&stripped)
.map(|s| s.len()) .map(|s| s.len())
.unwrap_or(0) .unwrap_or(0);
text_bytes + image_count * IMAGE_TOKENS_EACH * BYTES_PER_TOKEN
} }
#[cfg(test)] #[cfg(test)]
@@ -2422,6 +2450,35 @@ mod tests {
assert_eq!(msgs.len(), 2); assert_eq!(msgs.len(), 2);
} }
#[test]
fn image_payload_excluded_from_budget() {
// First user message carries a ~400KB base64 image but only a little
// text. Counting the base64 verbatim (old behavior) dwarfs the budget
// and forces all tool history to be dropped on every turn. The image
// must instead be charged a flat per-image cost so a short
// conversation comfortably fits.
let mut user = ChatMessage::user("describe this");
user.images = Some(vec!["A".repeat(400_000)]);
let mut msgs = vec![
ChatMessage::system("sys"),
user,
assistant_with_tool_call("get_x"),
ChatMessage::tool_result("small x result"),
assistant_text("here is the answer"),
];
// Default budget: (8192 - 2048) * 4 bytes ≈ 24KB. The text easily fits;
// only the (excluded) image bytes could blow it.
let budget_bytes = (DEFAULT_NUM_CTX as usize - RESPONSE_HEADROOM_TOKENS) * BYTES_PER_TOKEN;
let original_len = msgs.len();
let dropped = apply_context_budget(&mut msgs, budget_bytes);
assert!(!dropped, "short conversation with one image must not truncate");
assert_eq!(msgs.len(), original_len, "no messages should be dropped");
// Sanity: the flat image charge is accounted for but stays well under budget.
assert!(estimate_bytes(&msgs) <= budget_bytes);
}
#[test] #[test]
fn truncation_returns_false_with_no_droppable_pairs() { fn truncation_returns_false_with_no_droppable_pairs() {
// Only system + user, no tool-call turns to drop. // Only system + user, no tool-call turns to drop.