From 3699e059a227f2cd30cac98aed657572e7a7dac7 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Fri, 8 May 2026 11:14:39 -0400 Subject: [PATCH] insight-chat: include Date taken + GPS in bootstrap photo context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bootstrap system message gave the model a file path and (in hybrid mode) a visual description, but no temporal anchor. Models defaulted to today's date when calling get_sms_messages — Nov 2014 photos were getting "2024-03-11" passed as `date`, missing every historical message and leading the model to confidently misreport context. This commit folds two more EXIF-sourced facts into the --- PHOTO CONTEXT --- block: Date taken: GPS: (omitted when no GPS) Resolution waterfall for date_taken matches the documented canonical date pipeline at the EXIF / filename steps, but intentionally stops short of the fs-time fallback `generate_agentic_insight_for_photo` uses — for chat we'd rather show "unknown" than mislead the model with an inode mtime. GPS is taken straight from EXIF when both lat/lon are populated; absent GPS suppresses the line entirely so the model doesn't hallucinate coordinates. InsightGenerator gains a `fetch_exif(file_path)` accessor (crate- visible) so the chat service doesn't need its own ExifDao plumbing. build_bootstrap_system_message picks up two new params (date, gps); existing tests updated and 5 new tests cover: - date present / absent / waterfall (EXIF wins, filename fallback, None when neither source has it) - GPS present / absent - ordering (path → date → visual) Total insight_chat unit tests: 33 (up from 27). --- src/ai/insight_chat.rs | 171 ++++++++++++++++++++++++++++++++---- src/ai/insight_generator.rs | 11 +++ 2 files changed, 166 insertions(+), 16 deletions(-) diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs index c58dc4d..e4d8a56 100644 --- a/src/ai/insight_chat.rs +++ b/src/ai/insight_chat.rs @@ -934,6 +934,20 @@ impl InsightChatService { // discusses metadata-only is still useful. let image_base64: Option = self.generator.load_image_as_base64(&normalized).ok(); + // EXIF lookup once — date_taken and GPS go into the photo + // context block in the system message. Without these the model + // hallucinates dates / GPS-keyed tool args (`get_sms_messages` + // would otherwise default to today's date and miss every + // historical photo). + let exif = self.generator.fetch_exif(&normalized); + let date_taken_str = resolve_date_taken_for_context(&exif, &normalized); + let gps = exif + .as_ref() + .and_then(|e| match (e.gps_latitude, e.gps_longitude) { + (Some(lat), Some(lon)) => Some((lat as f64, lon as f64)), + _ => None, + }); + // Hybrid backend: pre-describe the image via local Ollama vision // so OpenRouter chat models (which can't see images directly) get // the visual description as text. Mirrors the same pre-describe @@ -967,10 +981,18 @@ impl InsightChatService { // bubble in the rendered transcript shows only what they typed. // Several agentic tools (recall_facts_for_photo, get_file_tags, // get_faces_in_photo, etc.) take a `file_path` arg the model - // can't know without being told; in hybrid mode the visual - // description belongs here for the same reason. + // can't know without being told. `Date taken:` and `GPS:` give + // get_sms_messages / reverse_geocode / get_personal_place_at + // the args they need. In hybrid mode the visual description + // belongs here for the same reason. let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref()); - let system_content = build_bootstrap_system_message(&persona, &normalized, &visual_block); + let system_content = build_bootstrap_system_message( + &persona, + &normalized, + date_taken_str.as_deref(), + gps, + &visual_block, + ); let system_msg = ChatMessage::system(system_content); let mut user_msg = ChatMessage::user(req.user_message.clone()); if !is_hybrid && let Some(ref img) = image_base64 { @@ -1309,18 +1331,29 @@ fn resolve_bootstrap_system_prompt(supplied: Option<&str>) -> String { } /// Compose the bootstrap system message: the persona on top, followed -/// by a photo-context block carrying the file path (and, in hybrid -/// mode, the local-vision visual description). Lives in the system -/// message — not the user turn — so the rendered transcript shows -/// only what the user typed. +/// by a photo-context block carrying the file path, date taken (when +/// known), GPS (when present), and — in hybrid mode — the local-vision +/// visual description. Lives in the system message — not the user +/// turn — so the rendered transcript shows only what the user typed. fn build_bootstrap_system_message( persona: &str, normalized_path: &str, + date_taken: Option<&str>, + gps: Option<(f64, f64)>, visual_block: &str, ) -> String { let mut out = persona.trim_end().to_string(); out.push_str("\n\n--- PHOTO CONTEXT ---\n"); out.push_str(&format!("Photo file path: {}\n", normalized_path)); + out.push_str(&format!( + "Date taken: {}\n", + date_taken.unwrap_or("unknown") + )); + if let Some((lat, lon)) = gps { + // Four decimal places ≈ 11 m of precision — plenty for any + // place-lookup tool, and keeps the prompt short. + out.push_str(&format!("GPS: {:.4}, {:.4}\n", lat, lon)); + } if !visual_block.is_empty() { // visual_block already ends with a newline; no extra separator // needed. @@ -1329,6 +1362,27 @@ fn build_bootstrap_system_message( out } +/// Resolve a human-readable `YYYY-MM-DD` date string for the photo +/// context block. Waterfall: EXIF `date_taken` → filename pattern → +/// `None`. The fs-time fallback that `generate_agentic_insight_for_photo` +/// uses is intentionally NOT applied here — for chat we'd rather show +/// "unknown" than a misleading inode mtime as the photo's date. +fn resolve_date_taken_for_context( + exif: &Option, + file_path: &str, +) -> Option { + let from_exif = exif + .as_ref() + .and_then(|e| e.date_taken) + .and_then(|ts| chrono::DateTime::from_timestamp(ts, 0)) + .map(|dt| dt.format("%Y-%m-%d").to_string()); + if from_exif.is_some() { + return from_exif; + } + crate::memories::extract_date_from_filename(file_path) + .map(|dt| dt.format("%Y-%m-%d").to_string()) +} + /// Pick the backend label for bootstrap. Bootstrap has no stored insight /// to defer to (that's continuation's behaviour), so the default is /// `"local"`. Returns an error if the supplied label is non-empty but @@ -1960,32 +2014,117 @@ mod tests { #[test] fn bootstrap_system_message_includes_path_and_persona() { - let out = build_bootstrap_system_message("you are helpful", "pics/IMG.jpg", ""); + let out = build_bootstrap_system_message("you are helpful", "pics/IMG.jpg", None, None, ""); assert!(out.starts_with("you are helpful")); assert!(out.contains("--- PHOTO CONTEXT ---")); assert!(out.contains("Photo file path: pics/IMG.jpg")); - // No visual block — should not introduce a stray "Visual" line. + // No date supplied → "unknown" so the model doesn't guess. + assert!(out.contains("Date taken: unknown")); + assert!(!out.contains("GPS:")); assert!(!out.contains("Visual description")); } + #[test] + fn bootstrap_system_message_includes_date_when_supplied() { + let out = + build_bootstrap_system_message("voice", "pics/IMG.jpg", Some("2014-11-08"), None, ""); + assert!(out.contains("Date taken: 2014-11-08")); + assert!(!out.contains("Date taken: unknown")); + } + + #[test] + fn bootstrap_system_message_includes_gps_when_present() { + let out = build_bootstrap_system_message( + "voice", + "p.jpg", + Some("2020-01-01"), + Some((42.36123, -71.05789)), + "", + ); + // Four decimals — enough for place lookup, short enough to + // not bloat the system prompt. + assert!(out.contains("GPS: 42.3612, -71.0579")); + } + + #[test] + fn bootstrap_system_message_omits_gps_when_none() { + let out = build_bootstrap_system_message("voice", "p.jpg", Some("2020-01-01"), None, ""); + assert!(!out.contains("GPS:")); + } + #[test] fn bootstrap_system_message_includes_visual_block_when_supplied() { let visual = "Visual description (from local vision model):\nA dog in a park.\n"; - let out = build_bootstrap_system_message("voice", "p.jpg", visual); + let out = + build_bootstrap_system_message("voice", "p.jpg", Some("2020-01-01"), None, visual); assert!(out.contains("Photo file path: p.jpg")); assert!(out.contains("A dog in a park")); - // Path appears before visual. + // Path before date before visual. let path_pos = out.find("Photo file path:").unwrap(); + let date_pos = out.find("Date taken:").unwrap(); let visual_pos = out.find("A dog in a park").unwrap(); - assert!(path_pos < visual_pos); + assert!(path_pos < date_pos); + assert!(date_pos < visual_pos); } #[test] fn bootstrap_system_message_trims_persona_trailing_whitespace() { - // Two consecutive newlines before the photo-context divider — - // any trailing whitespace from the persona must be collapsed - // so we don't end up with `\n\n\n\n--- PHOTO CONTEXT ---`. - let out = build_bootstrap_system_message("voice \n\n\n", "p.jpg", ""); + let out = build_bootstrap_system_message("voice \n\n\n", "p.jpg", None, None, ""); assert!(out.contains("voice\n\n--- PHOTO CONTEXT ---")); } + + #[test] + fn date_taken_for_context_prefers_exif_over_filename() { + // EXIF wins when both are present (matches the canonical + // date_resolver waterfall — EXIF is more reliable than + // import-named filenames). + let exif = Some(crate::database::models::ImageExif { + id: 0, + library_id: 1, + file_path: "Screenshot_2014-06-01.png".to_string(), + camera_make: None, + camera_model: None, + lens_model: None, + width: None, + height: None, + orientation: None, + gps_latitude: None, + gps_longitude: None, + gps_altitude: None, + focal_length: None, + aperture: None, + shutter_speed: None, + iso: None, + // 2021-08-15 12:00:00 UTC + date_taken: Some(1_629_028_800), + created_time: 0, + last_modified: 0, + content_hash: None, + size_bytes: None, + phash_64: None, + dhash_64: None, + duplicate_of_hash: None, + duplicate_decided_at: None, + date_taken_source: None, + original_date_taken: None, + original_date_taken_source: None, + }); + let out = resolve_date_taken_for_context(&exif, "Screenshot_2014-06-01.png"); + assert_eq!(out.as_deref(), Some("2021-08-15")); + } + + #[test] + fn date_taken_for_context_falls_back_to_filename_when_no_exif() { + // memories::extract_date_from_filename requires date+time in + // the filename — date-only patterns aren't matched. Use the + // canonical screenshot pattern for the regression case. + let out = resolve_date_taken_for_context(&None, "Screenshot_2014-06-01-20-44-50.png"); + assert_eq!(out.as_deref(), Some("2014-06-01")); + } + + #[test] + fn date_taken_for_context_returns_none_when_neither_source() { + let out = resolve_date_taken_for_context(&None, "DSC_5171.JPG"); + assert!(out.is_none()); + } } diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 3e0d7fb..2df22fa 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -229,6 +229,17 @@ impl InsightGenerator { None } + /// Look up the EXIF row for a photo. Returns `None` when no row + /// exists yet (file watcher hasn't reached it) or the DAO call + /// fails. Used by callers — including the chat-bootstrap path — + /// that need a few specific fields (date_taken, GPS) without + /// duplicating DAO plumbing. + pub(crate) fn fetch_exif(&self, file_path: &str) -> Option { + let cx = opentelemetry::Context::current(); + let mut dao = self.exif_dao.lock().expect("Unable to lock ExifDao"); + dao.get_exif(&cx, file_path).ok().flatten() + } + /// Load image file, resize it, and encode as base64 for vision models /// Resizes to max 1024px on longest edge to reduce context usage pub(crate) fn load_image_as_base64(&self, file_path: &str) -> Result {