insight-chat: include Date taken + GPS in bootstrap photo context
The bootstrap system message gave the model a file path and (in hybrid mode) a visual description, but no temporal anchor. Models defaulted to today's date when calling get_sms_messages — Nov 2014 photos were getting "2024-03-11" passed as `date`, missing every historical message and leading the model to confidently misreport context. This commit folds two more EXIF-sourced facts into the --- PHOTO CONTEXT --- block: Date taken: <YYYY-MM-DD or "unknown"> GPS: <lat, lon to 4dp> (omitted when no GPS) Resolution waterfall for date_taken matches the documented canonical date pipeline at the EXIF / filename steps, but intentionally stops short of the fs-time fallback `generate_agentic_insight_for_photo` uses — for chat we'd rather show "unknown" than mislead the model with an inode mtime. GPS is taken straight from EXIF when both lat/lon are populated; absent GPS suppresses the line entirely so the model doesn't hallucinate coordinates. InsightGenerator gains a `fetch_exif(file_path)` accessor (crate- visible) so the chat service doesn't need its own ExifDao plumbing. build_bootstrap_system_message picks up two new params (date, gps); existing tests updated and 5 new tests cover: - date present / absent / waterfall (EXIF wins, filename fallback, None when neither source has it) - GPS present / absent - ordering (path → date → visual) Total insight_chat unit tests: 33 (up from 27).
This commit is contained in:
@@ -934,6 +934,20 @@ impl InsightChatService {
|
|||||||
// discusses metadata-only is still useful.
|
// discusses metadata-only is still useful.
|
||||||
let image_base64: Option<String> = self.generator.load_image_as_base64(&normalized).ok();
|
let image_base64: Option<String> = self.generator.load_image_as_base64(&normalized).ok();
|
||||||
|
|
||||||
|
// EXIF lookup once — date_taken and GPS go into the photo
|
||||||
|
// context block in the system message. Without these the model
|
||||||
|
// hallucinates dates / GPS-keyed tool args (`get_sms_messages`
|
||||||
|
// would otherwise default to today's date and miss every
|
||||||
|
// historical photo).
|
||||||
|
let exif = self.generator.fetch_exif(&normalized);
|
||||||
|
let date_taken_str = resolve_date_taken_for_context(&exif, &normalized);
|
||||||
|
let gps = exif
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|e| match (e.gps_latitude, e.gps_longitude) {
|
||||||
|
(Some(lat), Some(lon)) => Some((lat as f64, lon as f64)),
|
||||||
|
_ => None,
|
||||||
|
});
|
||||||
|
|
||||||
// Hybrid backend: pre-describe the image via local Ollama vision
|
// Hybrid backend: pre-describe the image via local Ollama vision
|
||||||
// so OpenRouter chat models (which can't see images directly) get
|
// so OpenRouter chat models (which can't see images directly) get
|
||||||
// the visual description as text. Mirrors the same pre-describe
|
// the visual description as text. Mirrors the same pre-describe
|
||||||
@@ -967,10 +981,18 @@ impl InsightChatService {
|
|||||||
// bubble in the rendered transcript shows only what they typed.
|
// bubble in the rendered transcript shows only what they typed.
|
||||||
// Several agentic tools (recall_facts_for_photo, get_file_tags,
|
// Several agentic tools (recall_facts_for_photo, get_file_tags,
|
||||||
// get_faces_in_photo, etc.) take a `file_path` arg the model
|
// get_faces_in_photo, etc.) take a `file_path` arg the model
|
||||||
// can't know without being told; in hybrid mode the visual
|
// can't know without being told. `Date taken:` and `GPS:` give
|
||||||
// description belongs here for the same reason.
|
// get_sms_messages / reverse_geocode / get_personal_place_at
|
||||||
|
// the args they need. In hybrid mode the visual description
|
||||||
|
// belongs here for the same reason.
|
||||||
let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref());
|
let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref());
|
||||||
let system_content = build_bootstrap_system_message(&persona, &normalized, &visual_block);
|
let system_content = build_bootstrap_system_message(
|
||||||
|
&persona,
|
||||||
|
&normalized,
|
||||||
|
date_taken_str.as_deref(),
|
||||||
|
gps,
|
||||||
|
&visual_block,
|
||||||
|
);
|
||||||
let system_msg = ChatMessage::system(system_content);
|
let system_msg = ChatMessage::system(system_content);
|
||||||
let mut user_msg = ChatMessage::user(req.user_message.clone());
|
let mut user_msg = ChatMessage::user(req.user_message.clone());
|
||||||
if !is_hybrid && let Some(ref img) = image_base64 {
|
if !is_hybrid && let Some(ref img) = image_base64 {
|
||||||
@@ -1309,18 +1331,29 @@ fn resolve_bootstrap_system_prompt(supplied: Option<&str>) -> String {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Compose the bootstrap system message: the persona on top, followed
|
/// Compose the bootstrap system message: the persona on top, followed
|
||||||
/// by a photo-context block carrying the file path (and, in hybrid
|
/// by a photo-context block carrying the file path, date taken (when
|
||||||
/// mode, the local-vision visual description). Lives in the system
|
/// known), GPS (when present), and — in hybrid mode — the local-vision
|
||||||
/// message — not the user turn — so the rendered transcript shows
|
/// visual description. Lives in the system message — not the user
|
||||||
/// only what the user typed.
|
/// turn — so the rendered transcript shows only what the user typed.
|
||||||
fn build_bootstrap_system_message(
|
fn build_bootstrap_system_message(
|
||||||
persona: &str,
|
persona: &str,
|
||||||
normalized_path: &str,
|
normalized_path: &str,
|
||||||
|
date_taken: Option<&str>,
|
||||||
|
gps: Option<(f64, f64)>,
|
||||||
visual_block: &str,
|
visual_block: &str,
|
||||||
) -> String {
|
) -> String {
|
||||||
let mut out = persona.trim_end().to_string();
|
let mut out = persona.trim_end().to_string();
|
||||||
out.push_str("\n\n--- PHOTO CONTEXT ---\n");
|
out.push_str("\n\n--- PHOTO CONTEXT ---\n");
|
||||||
out.push_str(&format!("Photo file path: {}\n", normalized_path));
|
out.push_str(&format!("Photo file path: {}\n", normalized_path));
|
||||||
|
out.push_str(&format!(
|
||||||
|
"Date taken: {}\n",
|
||||||
|
date_taken.unwrap_or("unknown")
|
||||||
|
));
|
||||||
|
if let Some((lat, lon)) = gps {
|
||||||
|
// Four decimal places ≈ 11 m of precision — plenty for any
|
||||||
|
// place-lookup tool, and keeps the prompt short.
|
||||||
|
out.push_str(&format!("GPS: {:.4}, {:.4}\n", lat, lon));
|
||||||
|
}
|
||||||
if !visual_block.is_empty() {
|
if !visual_block.is_empty() {
|
||||||
// visual_block already ends with a newline; no extra separator
|
// visual_block already ends with a newline; no extra separator
|
||||||
// needed.
|
// needed.
|
||||||
@@ -1329,6 +1362,27 @@ fn build_bootstrap_system_message(
|
|||||||
out
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Resolve a human-readable `YYYY-MM-DD` date string for the photo
|
||||||
|
/// context block. Waterfall: EXIF `date_taken` → filename pattern →
|
||||||
|
/// `None`. The fs-time fallback that `generate_agentic_insight_for_photo`
|
||||||
|
/// uses is intentionally NOT applied here — for chat we'd rather show
|
||||||
|
/// "unknown" than a misleading inode mtime as the photo's date.
|
||||||
|
fn resolve_date_taken_for_context(
|
||||||
|
exif: &Option<crate::database::models::ImageExif>,
|
||||||
|
file_path: &str,
|
||||||
|
) -> Option<String> {
|
||||||
|
let from_exif = exif
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|e| e.date_taken)
|
||||||
|
.and_then(|ts| chrono::DateTime::from_timestamp(ts, 0))
|
||||||
|
.map(|dt| dt.format("%Y-%m-%d").to_string());
|
||||||
|
if from_exif.is_some() {
|
||||||
|
return from_exif;
|
||||||
|
}
|
||||||
|
crate::memories::extract_date_from_filename(file_path)
|
||||||
|
.map(|dt| dt.format("%Y-%m-%d").to_string())
|
||||||
|
}
|
||||||
|
|
||||||
/// Pick the backend label for bootstrap. Bootstrap has no stored insight
|
/// Pick the backend label for bootstrap. Bootstrap has no stored insight
|
||||||
/// to defer to (that's continuation's behaviour), so the default is
|
/// to defer to (that's continuation's behaviour), so the default is
|
||||||
/// `"local"`. Returns an error if the supplied label is non-empty but
|
/// `"local"`. Returns an error if the supplied label is non-empty but
|
||||||
@@ -1960,32 +2014,117 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn bootstrap_system_message_includes_path_and_persona() {
|
fn bootstrap_system_message_includes_path_and_persona() {
|
||||||
let out = build_bootstrap_system_message("you are helpful", "pics/IMG.jpg", "");
|
let out = build_bootstrap_system_message("you are helpful", "pics/IMG.jpg", None, None, "");
|
||||||
assert!(out.starts_with("you are helpful"));
|
assert!(out.starts_with("you are helpful"));
|
||||||
assert!(out.contains("--- PHOTO CONTEXT ---"));
|
assert!(out.contains("--- PHOTO CONTEXT ---"));
|
||||||
assert!(out.contains("Photo file path: pics/IMG.jpg"));
|
assert!(out.contains("Photo file path: pics/IMG.jpg"));
|
||||||
// No visual block — should not introduce a stray "Visual" line.
|
// No date supplied → "unknown" so the model doesn't guess.
|
||||||
|
assert!(out.contains("Date taken: unknown"));
|
||||||
|
assert!(!out.contains("GPS:"));
|
||||||
assert!(!out.contains("Visual description"));
|
assert!(!out.contains("Visual description"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn bootstrap_system_message_includes_date_when_supplied() {
|
||||||
|
let out =
|
||||||
|
build_bootstrap_system_message("voice", "pics/IMG.jpg", Some("2014-11-08"), None, "");
|
||||||
|
assert!(out.contains("Date taken: 2014-11-08"));
|
||||||
|
assert!(!out.contains("Date taken: unknown"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn bootstrap_system_message_includes_gps_when_present() {
|
||||||
|
let out = build_bootstrap_system_message(
|
||||||
|
"voice",
|
||||||
|
"p.jpg",
|
||||||
|
Some("2020-01-01"),
|
||||||
|
Some((42.36123, -71.05789)),
|
||||||
|
"",
|
||||||
|
);
|
||||||
|
// Four decimals — enough for place lookup, short enough to
|
||||||
|
// not bloat the system prompt.
|
||||||
|
assert!(out.contains("GPS: 42.3612, -71.0579"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn bootstrap_system_message_omits_gps_when_none() {
|
||||||
|
let out = build_bootstrap_system_message("voice", "p.jpg", Some("2020-01-01"), None, "");
|
||||||
|
assert!(!out.contains("GPS:"));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn bootstrap_system_message_includes_visual_block_when_supplied() {
|
fn bootstrap_system_message_includes_visual_block_when_supplied() {
|
||||||
let visual = "Visual description (from local vision model):\nA dog in a park.\n";
|
let visual = "Visual description (from local vision model):\nA dog in a park.\n";
|
||||||
let out = build_bootstrap_system_message("voice", "p.jpg", visual);
|
let out =
|
||||||
|
build_bootstrap_system_message("voice", "p.jpg", Some("2020-01-01"), None, visual);
|
||||||
assert!(out.contains("Photo file path: p.jpg"));
|
assert!(out.contains("Photo file path: p.jpg"));
|
||||||
assert!(out.contains("A dog in a park"));
|
assert!(out.contains("A dog in a park"));
|
||||||
// Path appears before visual.
|
// Path before date before visual.
|
||||||
let path_pos = out.find("Photo file path:").unwrap();
|
let path_pos = out.find("Photo file path:").unwrap();
|
||||||
|
let date_pos = out.find("Date taken:").unwrap();
|
||||||
let visual_pos = out.find("A dog in a park").unwrap();
|
let visual_pos = out.find("A dog in a park").unwrap();
|
||||||
assert!(path_pos < visual_pos);
|
assert!(path_pos < date_pos);
|
||||||
|
assert!(date_pos < visual_pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn bootstrap_system_message_trims_persona_trailing_whitespace() {
|
fn bootstrap_system_message_trims_persona_trailing_whitespace() {
|
||||||
// Two consecutive newlines before the photo-context divider —
|
let out = build_bootstrap_system_message("voice \n\n\n", "p.jpg", None, None, "");
|
||||||
// any trailing whitespace from the persona must be collapsed
|
|
||||||
// so we don't end up with `\n\n\n\n--- PHOTO CONTEXT ---`.
|
|
||||||
let out = build_bootstrap_system_message("voice \n\n\n", "p.jpg", "");
|
|
||||||
assert!(out.contains("voice\n\n--- PHOTO CONTEXT ---"));
|
assert!(out.contains("voice\n\n--- PHOTO CONTEXT ---"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn date_taken_for_context_prefers_exif_over_filename() {
|
||||||
|
// EXIF wins when both are present (matches the canonical
|
||||||
|
// date_resolver waterfall — EXIF is more reliable than
|
||||||
|
// import-named filenames).
|
||||||
|
let exif = Some(crate::database::models::ImageExif {
|
||||||
|
id: 0,
|
||||||
|
library_id: 1,
|
||||||
|
file_path: "Screenshot_2014-06-01.png".to_string(),
|
||||||
|
camera_make: None,
|
||||||
|
camera_model: None,
|
||||||
|
lens_model: None,
|
||||||
|
width: None,
|
||||||
|
height: None,
|
||||||
|
orientation: None,
|
||||||
|
gps_latitude: None,
|
||||||
|
gps_longitude: None,
|
||||||
|
gps_altitude: None,
|
||||||
|
focal_length: None,
|
||||||
|
aperture: None,
|
||||||
|
shutter_speed: None,
|
||||||
|
iso: None,
|
||||||
|
// 2021-08-15 12:00:00 UTC
|
||||||
|
date_taken: Some(1_629_028_800),
|
||||||
|
created_time: 0,
|
||||||
|
last_modified: 0,
|
||||||
|
content_hash: None,
|
||||||
|
size_bytes: None,
|
||||||
|
phash_64: None,
|
||||||
|
dhash_64: None,
|
||||||
|
duplicate_of_hash: None,
|
||||||
|
duplicate_decided_at: None,
|
||||||
|
date_taken_source: None,
|
||||||
|
original_date_taken: None,
|
||||||
|
original_date_taken_source: None,
|
||||||
|
});
|
||||||
|
let out = resolve_date_taken_for_context(&exif, "Screenshot_2014-06-01.png");
|
||||||
|
assert_eq!(out.as_deref(), Some("2021-08-15"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn date_taken_for_context_falls_back_to_filename_when_no_exif() {
|
||||||
|
// memories::extract_date_from_filename requires date+time in
|
||||||
|
// the filename — date-only patterns aren't matched. Use the
|
||||||
|
// canonical screenshot pattern for the regression case.
|
||||||
|
let out = resolve_date_taken_for_context(&None, "Screenshot_2014-06-01-20-44-50.png");
|
||||||
|
assert_eq!(out.as_deref(), Some("2014-06-01"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn date_taken_for_context_returns_none_when_neither_source() {
|
||||||
|
let out = resolve_date_taken_for_context(&None, "DSC_5171.JPG");
|
||||||
|
assert!(out.is_none());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -229,6 +229,17 @@ impl InsightGenerator {
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Look up the EXIF row for a photo. Returns `None` when no row
|
||||||
|
/// exists yet (file watcher hasn't reached it) or the DAO call
|
||||||
|
/// fails. Used by callers — including the chat-bootstrap path —
|
||||||
|
/// that need a few specific fields (date_taken, GPS) without
|
||||||
|
/// duplicating DAO plumbing.
|
||||||
|
pub(crate) fn fetch_exif(&self, file_path: &str) -> Option<crate::database::models::ImageExif> {
|
||||||
|
let cx = opentelemetry::Context::current();
|
||||||
|
let mut dao = self.exif_dao.lock().expect("Unable to lock ExifDao");
|
||||||
|
dao.get_exif(&cx, file_path).ok().flatten()
|
||||||
|
}
|
||||||
|
|
||||||
/// Load image file, resize it, and encode as base64 for vision models
|
/// Load image file, resize it, and encode as base64 for vision models
|
||||||
/// Resizes to max 1024px on longest edge to reduce context usage
|
/// Resizes to max 1024px on longest edge to reduce context usage
|
||||||
pub(crate) fn load_image_as_base64(&self, file_path: &str) -> Result<String> {
|
pub(crate) fn load_image_as_base64(&self, file_path: &str) -> Result<String> {
|
||||||
|
|||||||
Reference in New Issue
Block a user