diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index e03b1af..d05cd4d 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -2943,6 +2943,58 @@ Return ONLY the summary, nothing else."#, } } + /// Assemble the chat system prompt from two named blocks: + /// + /// 1. **Identity / voice / format** — `custom_system_prompt` verbatim + /// when supplied, or a neutral default that doesn't fight a future + /// persona. The framework never asserts an identity that could + /// contradict the persona. + /// 2. **Procedural scaffolding** — tool-use guidance, iteration budget, + /// contact-filter rule. Identity-free; never asserts voice or shape. + /// + /// `owner_id_note` and `fewshot_block` are pre-rendered strings (they + /// already encode their own headers / blank lines). Pass empty / None + /// to skip. + pub(crate) fn build_system_content( + custom_system_prompt: Option<&str>, + owner_id_note: Option<&str>, + fewshot_block: &str, + max_iterations: usize, + ) -> String { + let identity = match custom_system_prompt { + Some(s) if !s.trim().is_empty() => s.trim().to_string(), + _ => String::from( + "You are reconstructing a memory from a photo. Use the gathered \ + context to write a thoughtful summary; you decide voice, length, and shape." + ), + }; + + let owner = owner_id_note.unwrap_or(""); + + let procedural = format!( + "Tool-use guidance:\n\ + - You have a budget of {max_iterations} tool-calling iterations.\n\ + - Call tools to gather context BEFORE writing your final answer; don't answer after one or two calls.\n\ + - When calling get_sms_messages or search_rag, make at least one call WITHOUT a contact filter \ + — surrounding events matter even when a contact is known.\n\ + - Use recall_facts_for_photo + recall_entities to load any prior knowledge about subjects in the photo.\n\ + - When you identify people / places / events / things, use store_entity + store_fact to grow the persistent memory.\n\ + - A tool returning no results is informative; continue with the others.", + max_iterations = max_iterations + ); + + let mut out = identity; + if !owner.is_empty() { + out.push_str(owner); + } + out.push_str("\n\n"); + if !fewshot_block.is_empty() { + out.push_str(fewshot_block); + } + out.push_str(&procedural); + out + } + pub async fn generate_agentic_insight_for_photo( &self, file_path: &str, @@ -3288,9 +3340,12 @@ Return ONLY the summary, nothing else."#, None }; - // 8. Build system message + // 8. Build system message via the two-block helper. Custom prompt + // (when supplied) is the authoritative identity — the framework + // never appends a competing "you are a personal photo memory + // assistant" line. The procedural block stays identity-free. let owner_id_note = match owner_entity_id { - Some(id) => format!( + Some(id) => Some(format!( "\n\nYour identity in the knowledge store: {name} (entity ID: {id}). \ When storing facts where you ({name}) are the object — for example, someone is your friend, \ sibling, or colleague — use subject_entity_id for the other person and set object_value to \ @@ -3298,32 +3353,16 @@ Return ONLY the summary, nothing else."#, {name} directly, use {id} as the subject_entity_id.", name = owner_name, id = id - ), - None => String::new(), + )), + None => None, }; let fewshot_block = Self::render_fewshot_examples(&fewshot_examples); - let base_system = format!( - "You are a personal photo memory assistant helping to reconstruct a memory from a photo.{owner_id_note}\n\n\ - {fewshot_block}\ - IMPORTANT INSTRUCTIONS:\n\ - 1. You MUST call multiple tools to gather context BEFORE writing any final insight. Do not produce a final answer after only one or two tool calls.\n\ - 2. When calling get_sms_messages and search_rag, always make at least one call WITHOUT a contact filter to capture what else was happening in {owner_name}'s life around this date — other conversations, events, and activities provide important wider context even when a specific contact is known.\n\ - 3. Use recall_facts_for_photo to load any previously stored knowledge about subjects in this photo.\n\ - 4. Use recall_entities to look up known people, places, or things that appear in this photo.\n\ - 5. When you identify people, places, events, or notable things in this photo: use store_entity to record them and store_fact to record key facts (relationships, roles, attributes). This builds a persistent memory for future insights.\n\ - 6. Only produce your final insight AFTER you have gathered context from at least 5 tool calls.\n\ - 7. If a tool returns no results, that is useful information — continue calling the remaining tools anyway.\n\ - 8. You have a hard budget of {max_iterations} tool-calling iterations before the loop ends. Plan your context gathering so you can write a complete final insight within that budget.", - owner_id_note = owner_id_note, - fewshot_block = fewshot_block, - owner_name = owner_name, - max_iterations = max_iterations + let system_content = Self::build_system_content( + custom_system_prompt.as_deref(), + owner_id_note.as_deref(), + &fewshot_block, + max_iterations, ); - let system_content = if let Some(ref custom) = custom_system_prompt { - format!("{}\n\n{}", custom, base_system) - } else { - base_system.to_string() - }; // 9. Build user message let gps_info = exif @@ -3353,15 +3392,17 @@ Return ONLY the summary, nothing else."#, .map(|d| format!("Visual description (from local vision model):\n{}\n\n", d)) .unwrap_or_default(); + // Context-only payload — no output-shape prescription. The persona / + // custom_system_prompt owns voice, length, and structure. The "title + // and summary" claim that used to live here was unused (the title is + // regenerated post-hoc from the summary by generate_photo_title). let user_content = format!( - "{visual_block}Please analyze this photo and gather any relevant context from the surrounding weeks.\n\n\ - Photo file path: {}\n\ + "{visual_block}Photo file path: {}\n\ Date taken: {}\n\ {}\n\ {}\n\ {}\n\n\ - Use the available tools to gather more context about this moment (messages, calendar events, location history, etc.), \ - then write a detailed insight with a title and summary.", + Gather context with the available tools, then respond.", file_path, date_taken.format("%B %d, %Y"), contact_info, @@ -3923,6 +3964,44 @@ mod tests { assert_eq!(out, "11 chars"); } + #[test] + fn build_system_content_uses_custom_prompt_verbatim_for_identity() { + let out = InsightGenerator::build_system_content( + Some("You are a journal writer in first person, warm and reflective."), + None, + "", + 6, + ); + assert!( + out.starts_with("You are a journal writer in first person, warm and reflective."), + "custom prompt must lead the system content; got: {}", + &out[..out.len().min(200)], + ); + assert!( + !out.contains("personal photo memory assistant"), + "framework identity must not leak when custom prompt is supplied" + ); + assert!(out.contains("Tool-use guidance")); + assert!(out.contains("budget of 6")); + } + + #[test] + fn build_system_content_uses_neutral_default_when_no_custom() { + let out = InsightGenerator::build_system_content(None, None, "", 6); + assert!(out.contains("reconstructing a memory from a photo")); + assert!(!out.contains("personal photo memory assistant")); + assert!(out.contains("Tool-use guidance")); + } + + #[test] + fn build_system_content_includes_fewshot_and_owner_id() { + let owner = "\n\nYour identity in the knowledge store: Alice (entity ID: 7)."; + let fewshot = "## Examples\n\n### Example 1\n...\n\n---\n\n"; + let out = InsightGenerator::build_system_content(None, Some(owner), fewshot, 6); + assert!(out.contains("Alice (entity ID: 7)")); + assert!(out.contains("## Examples")); + } + #[test] fn render_fewshot_empty_returns_empty_string() { assert!(InsightGenerator::render_fewshot_examples(&[]).is_empty());