From 24ecf2abd462868d302e02f4869b53eac6b43f79 Mon Sep 17 00:00:00 2001
From: Cameron Cordes <cameronc.dev@gmail.com>
Date: Fri, 8 May 2026 10:59:35 -0400
Subject: [PATCH] insight-chat: prepend Photo file path: <path> to bootstrap
 user turn
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bug: bootstrap user_content was just the user's typed message (plus
the hybrid visual description). Tools that take a file_path arg —
recall_facts_for_photo, get_file_tags, get_faces_in_photo — had no
way to learn the canonical path. Small models would invent
placeholders like "input_file_0.png" or call the tool with a name
guessed from a hidden multimodal input handle, neither of which
matched any real photo.

Fix: prepend a single-line "Photo file path: <normalized>\n\n" block
to user_content. Same shape generate_agentic_insight_for_photo
already uses for non-chat callers — kept the bootstrap minimal
(no date / GPS / tags pre-stuffing; the agentic loop can fetch
those via tools when needed).

Hybrid still injects the visual description block between the path
block and the user message; local mode just gets path + user text.
---
 src/ai/insight_chat.rs | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)
diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs
index b3126f4..f8a023f 100644
--- a/src/ai/insight_chat.rs
+++ b/src/ai/insight_chat.rs
@@ -934,12 +934,20 @@ impl InsightChatService {
         // discusses metadata-only is still useful.
         let image_base64: Option<String> = self.generator.load_image_as_base64(&normalized).ok();
 
+        // Photo path block. Several agentic tools (recall_facts_for_photo,
+        // get_file_tags, get_faces_in_photo, etc.) take a `file_path` arg
+        // that the model has no way to know unless we put it in the user
+        // turn. Without this block small models invent placeholders like
+        // "input_file_0.png" or refuse and ask the user. Mirrors the
+        // user_content layout `generate_agentic_insight_for_photo` uses.
+        let path_block = format!("Photo file path: {}\n\n", normalized);
+
         // Hybrid backend: pre-describe the image via local Ollama vision
         // and inline the description into the user turn. OpenRouter chat
         // models don't see images directly. Mirrors the same pre-describe
         // pass that `generate_agentic_insight_for_photo` does for hybrid.
-        let user_content = if is_hybrid {
-            let visual = match image_base64.as_deref() {
+        let visual_block = if is_hybrid {
+            match image_base64.as_deref() {
                 Some(b64) => match self.ollama.describe_image(b64).await {
                     Ok(desc) => format!(
                         "Visual description (from local vision model):\n{}\n\n",
@@ -951,12 +959,13 @@ impl InsightChatService {
                     }
                 },
                 None => String::new(),
-            };
-            format!("{}{}", visual, req.user_message)
+            }
         } else {
-            req.user_message.clone()
+            String::new()
         };
 
+        let user_content = format!("{}{}{}", path_block, visual_block, req.user_message);
+
         // Tool gates. Local + image present → expose describe_photo so
         // the chat model can re-look at the photo on demand. Hybrid:
         // already inlined, no tool needed.