diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs index b3126f4..f8a023f 100644 --- a/src/ai/insight_chat.rs +++ b/src/ai/insight_chat.rs @@ -934,12 +934,20 @@ impl InsightChatService { // discusses metadata-only is still useful. let image_base64: Option = self.generator.load_image_as_base64(&normalized).ok(); + // Photo path block. Several agentic tools (recall_facts_for_photo, + // get_file_tags, get_faces_in_photo, etc.) take a `file_path` arg + // that the model has no way to know unless we put it in the user + // turn. Without this block small models invent placeholders like + // "input_file_0.png" or refuse and ask the user. Mirrors the + // user_content layout `generate_agentic_insight_for_photo` uses. + let path_block = format!("Photo file path: {}\n\n", normalized); + // Hybrid backend: pre-describe the image via local Ollama vision // and inline the description into the user turn. OpenRouter chat // models don't see images directly. Mirrors the same pre-describe // pass that `generate_agentic_insight_for_photo` does for hybrid. - let user_content = if is_hybrid { - let visual = match image_base64.as_deref() { + let visual_block = if is_hybrid { + match image_base64.as_deref() { Some(b64) => match self.ollama.describe_image(b64).await { Ok(desc) => format!( "Visual description (from local vision model):\n{}\n\n", @@ -951,12 +959,13 @@ impl InsightChatService { } }, None => String::new(), - }; - format!("{}{}", visual, req.user_message) + } } else { - req.user_message.clone() + String::new() }; + let user_content = format!("{}{}{}", path_block, visual_block, req.user_message); + // Tool gates. Local + image present → expose describe_photo so // the chat model can re-look at the photo on demand. Hybrid: // already inlined, no tool needed.