diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 2b8a9d7..2a98498 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -3732,13 +3732,28 @@ Return ONLY the summary, nothing else."#, }; // 9. Build user message - // Compose a single Location: line with both the resolved name and the - // raw coordinates. Falls back to bare GPS when the geocoders failed, - // and to "Location: unknown" when there are no coordinates at all. - let location_info = match (resolved_location.as_deref(), exif.as_ref()) { + // The user message is restructured to lead with photo facts as a + // bulleted "## This photo" block (so small models can't skim past + // them), followed by an imperative "## What to do" recipe and a + // forcing line. Small models bail out of the agentic loop when the + // user message ends with "write a detailed insight" — they just + // write. The forcing line replaces the soft "aim to use 5 tools" + // floor with a hard "do not output text yet" gate. + + // Date with weekday + canonical-date source so the model can hedge + // on filename- or fs_time-derived dates. + let date_bullet = format!( + "- Date: {} (source: {})", + date_taken.format("%A, %B %d, %Y"), + date_taken_source + ); + + // Location: full resolved string + raw coordinates when GPS is + // present, falling back to "unknown" when not. + let location_bullet = match (resolved_location.as_deref(), exif.as_ref()) { (Some(name), Some(e)) if e.gps_latitude.is_some() && e.gps_longitude.is_some() => { format!( - "Location: {} (GPS {:.4}, {:.4})", + "- Location: {} (GPS {:.4}, {:.4})", name, e.gps_latitude.unwrap(), e.gps_longitude.unwrap() @@ -3746,61 +3761,65 @@ Return ONLY the summary, nothing else."#, } (None, Some(e)) if e.gps_latitude.is_some() && e.gps_longitude.is_some() => { format!( - "Location: GPS {:.4}, {:.4} (geocoder unavailable)", + "- Location: GPS {:.4}, {:.4} (geocoder unavailable)", e.gps_latitude.unwrap(), e.gps_longitude.unwrap() ) } - _ => "Location: unknown".to_string(), + _ => "- Location: unknown".to_string(), }; - let tags_info = if tag_names.is_empty() { - "Tags: none".to_string() - } else { - format!("Tags: {}", tag_names.join(", ")) - }; - - let contact_info = contact + let contact_bullet = contact .as_ref() - .map(|c| format!("Contact/Person: {}", c)) - .unwrap_or_else(|| "Contact/Person: unknown".to_string()); + .map(|c| format!("- Contact/Person: {}", c)) + .unwrap_or_else(|| "- Contact/Person: unknown".to_string()); - // Hybrid mode: the chat model never receives the image bytes, so we - // inline the visual description as text and explicitly tell the model - // not to call describe_photo (the tool is gated off in hybrid anyway). - let visual_block = hybrid_visual_description + let tags_bullet = if tag_names.is_empty() { + "- Tags: none".to_string() + } else { + format!("- Tags: {}", tag_names.join(", ")) + }; + + let path_bullet = format!("- File path: {}", file_path); + + // Hybrid: visual description is inlined as a bullet (no image bytes + // reach the chat model). Local: the image is attached to this + // message, no inline description bullet — describe_photo is the tool. + let visual_bullet = hybrid_visual_description .as_deref() .map(|d| { format!( - "Visual description (already generated for you — do not call describe_photo):\n{}\n\n", - d + "- Visual description (already generated — do not call describe_photo):\n {}", + d.lines().collect::>().join("\n ") ) }) .unwrap_or_default(); - // Format date with weekday + the canonical-date source so the model - // can temper claims when the date is filename- or fs_time-derived. - let date_line = format!( - "Date taken: {} (source: {})", - date_taken.format("%A, %B %d, %Y"), - date_taken_source - ); + // Compose the photo block (omit empty visual bullet to avoid stray newline). + let photo_block = if visual_bullet.is_empty() { + format!( + "## This photo\n\n{}\n{}\n{}\n{}\n{}", + path_bullet, date_bullet, contact_bullet, location_bullet, tags_bullet + ) + } else { + format!( + "## This photo\n\n{}\n{}\n{}\n{}\n{}\n{}", + path_bullet, + date_bullet, + contact_bullet, + location_bullet, + tags_bullet, + visual_bullet + ) + }; let user_content = format!( - "{visual_block}Please analyze this photo and gather any relevant context from the surrounding weeks.\n\n\ - Photo file path: {}\n\ - {}\n\ - {}\n\ - {}\n\ - {}\n\n\ - Use the available tools to gather more context about this moment (messages, calendar events, location history, etc.), \ - then write a detailed insight with a title and summary.", - file_path, - date_line, - contact_info, - location_info, - tags_info, - visual_block = visual_block, + "{photo_block}\n\n\ + ## What to do\n\n\ + 1. First, call recall_facts_for_photo and recall_entities to load any prior knowledge about subjects in this photo.\n\ + 2. Then call at least 3 of: search_rag, get_sms_messages (try once with the contact filter and once without), get_calendar_events, get_location_history — pick the ones most relevant to this photo's date and context.\n\ + 3. Only after you have tool results, write the final insight with a title and a detailed summary that references specific facts from the metadata above and from your tool results. Generic narration is not acceptable.\n\n\ + YOUR FIRST RESPONSE MUST BE A TOOL CALL. Do not output any final answer text until you have called at least 5 tools." ); // 10. Define tools. Hybrid mode omits `describe_photo` since the