diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs
index 3622972..f4d478b 100644
--- a/src/ai/insight_chat.rs
+++ b/src/ai/insight_chat.rs
@@ -27,6 +27,12 @@ const RESPONSE_HEADROOM_TOKENS: usize = 2048;
 /// tokenization is model-specific; this avoids carrying tiktoken just for a
 /// soft bound.
 const BYTES_PER_TOKEN: usize = 4;
+/// Flat token cost charged per inlined image in the truncation budget. A
+/// 1024px-longest-edge JPEG (see `load_image_as_base64`) costs vision models on
+/// the order of ~1.3K tokens. Crucially, the raw base64 (hundreds of KB of
+/// characters) must NOT be counted as text bytes — doing so dwarfs the entire
+/// text budget and forces spurious truncation on every turn.
+const IMAGE_TOKENS_EACH: usize = 1300;
 
 pub type ChatLockMap = Arc<TokioMutex<HashMap<(i32, String), Arc<TokioMutex<()>>>>>;
 
@@ -2359,10 +2365,32 @@ pub(crate) fn apply_context_budget(messages: &mut Vec<ChatMessage>, budget_bytes
     dropped_any
 }
 
+/// Estimate the serialized byte size of `messages` for the truncation budget,
+/// EXCLUDING inlined base64 image payloads. Images are charged a flat
+/// `IMAGE_TOKENS_EACH` instead: their base64 is hundreds of KB of characters
+/// that have no relation to the text token pressure we're budgeting against,
+/// and counting them verbatim makes a single photo exceed the entire budget,
+/// spuriously trimming all history on every turn.
 fn estimate_bytes(messages: &[ChatMessage]) -> usize {
-    serde_json::to_string(messages)
+    let mut image_count = 0usize;
+    // Clone with image payloads stripped so they don't inflate the byte count.
+    // We still account for the (small) non-image fields verbatim.
+    let stripped: Vec<ChatMessage> = messages
+        .iter()
+        .map(|m| {
+            if let Some(imgs) = m.images.as_ref() {
+                image_count += imgs.len();
+            }
+            ChatMessage {
+                images: None,
+                ..m.clone()
+            }
+        })
+        .collect();
+    let text_bytes = serde_json::to_string(&stripped)
         .map(|s| s.len())
-        .unwrap_or(0)
+        .unwrap_or(0);
+    text_bytes + image_count * IMAGE_TOKENS_EACH * BYTES_PER_TOKEN
 }
 
 #[cfg(test)]
@@ -2422,6 +2450,35 @@ mod tests {
         assert_eq!(msgs.len(), 2);
     }
 
+    #[test]
+    fn image_payload_excluded_from_budget() {
+        // First user message carries a ~400KB base64 image but only a little
+        // text. Counting the base64 verbatim (old behavior) dwarfs the budget
+        // and forces all tool history to be dropped on every turn. The image
+        // must instead be charged a flat per-image cost so a short
+        // conversation comfortably fits.
+        let mut user = ChatMessage::user("describe this");
+        user.images = Some(vec!["A".repeat(400_000)]);
+        let mut msgs = vec![
+            ChatMessage::system("sys"),
+            user,
+            assistant_with_tool_call("get_x"),
+            ChatMessage::tool_result("small x result"),
+            assistant_text("here is the answer"),
+        ];
+
+        // Default budget: (8192 - 2048) * 4 bytes ≈ 24KB. The text easily fits;
+        // only the (excluded) image bytes could blow it.
+        let budget_bytes = (DEFAULT_NUM_CTX as usize - RESPONSE_HEADROOM_TOKENS) * BYTES_PER_TOKEN;
+        let original_len = msgs.len();
+        let dropped = apply_context_budget(&mut msgs, budget_bytes);
+
+        assert!(!dropped, "short conversation with one image must not truncate");
+        assert_eq!(msgs.len(), original_len, "no messages should be dropped");
+        // Sanity: the flat image charge is accounted for but stays well under budget.
+        assert!(estimate_bytes(&msgs) <= budget_bytes);
+    }
+
     #[test]
     fn truncation_returns_false_with_no_droppable_pairs() {
         // Only system + user, no tool-call turns to drop.