feature/streaming-insights #85
@@ -934,6 +934,20 @@ impl InsightChatService {
|
||||
// discusses metadata-only is still useful.
|
||||
let image_base64: Option<String> = self.generator.load_image_as_base64(&normalized).ok();
|
||||
|
||||
// EXIF lookup once — date_taken and GPS go into the photo
|
||||
// context block in the system message. Without these the model
|
||||
// hallucinates dates / GPS-keyed tool args (`get_sms_messages`
|
||||
// would otherwise default to today's date and miss every
|
||||
// historical photo).
|
||||
let exif = self.generator.fetch_exif(&normalized);
|
||||
let date_taken_str = resolve_date_taken_for_context(&exif, &normalized);
|
||||
let gps = exif
|
||||
.as_ref()
|
||||
.and_then(|e| match (e.gps_latitude, e.gps_longitude) {
|
||||
(Some(lat), Some(lon)) => Some((lat as f64, lon as f64)),
|
||||
_ => None,
|
||||
});
|
||||
|
||||
// Hybrid backend: pre-describe the image via local Ollama vision
|
||||
// so OpenRouter chat models (which can't see images directly) get
|
||||
// the visual description as text. Mirrors the same pre-describe
|
||||
@@ -967,10 +981,18 @@ impl InsightChatService {
|
||||
// bubble in the rendered transcript shows only what they typed.
|
||||
// Several agentic tools (recall_facts_for_photo, get_file_tags,
|
||||
// get_faces_in_photo, etc.) take a `file_path` arg the model
|
||||
// can't know without being told; in hybrid mode the visual
|
||||
// description belongs here for the same reason.
|
||||
// can't know without being told. `Date taken:` and `GPS:` give
|
||||
// get_sms_messages / reverse_geocode / get_personal_place_at
|
||||
// the args they need. In hybrid mode the visual description
|
||||
// belongs here for the same reason.
|
||||
let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref());
|
||||
let system_content = build_bootstrap_system_message(&persona, &normalized, &visual_block);
|
||||
let system_content = build_bootstrap_system_message(
|
||||
&persona,
|
||||
&normalized,
|
||||
date_taken_str.as_deref(),
|
||||
gps,
|
||||
&visual_block,
|
||||
);
|
||||
let system_msg = ChatMessage::system(system_content);
|
||||
let mut user_msg = ChatMessage::user(req.user_message.clone());
|
||||
if !is_hybrid && let Some(ref img) = image_base64 {
|
||||
@@ -1309,18 +1331,29 @@ fn resolve_bootstrap_system_prompt(supplied: Option<&str>) -> String {
|
||||
}
|
||||
|
||||
/// Compose the bootstrap system message: the persona on top, followed
|
||||
/// by a photo-context block carrying the file path (and, in hybrid
|
||||
/// mode, the local-vision visual description). Lives in the system
|
||||
/// message — not the user turn — so the rendered transcript shows
|
||||
/// only what the user typed.
|
||||
/// by a photo-context block carrying the file path, date taken (when
|
||||
/// known), GPS (when present), and — in hybrid mode — the local-vision
|
||||
/// visual description. Lives in the system message — not the user
|
||||
/// turn — so the rendered transcript shows only what the user typed.
|
||||
fn build_bootstrap_system_message(
|
||||
persona: &str,
|
||||
normalized_path: &str,
|
||||
date_taken: Option<&str>,
|
||||
gps: Option<(f64, f64)>,
|
||||
visual_block: &str,
|
||||
) -> String {
|
||||
let mut out = persona.trim_end().to_string();
|
||||
out.push_str("\n\n--- PHOTO CONTEXT ---\n");
|
||||
out.push_str(&format!("Photo file path: {}\n", normalized_path));
|
||||
out.push_str(&format!(
|
||||
"Date taken: {}\n",
|
||||
date_taken.unwrap_or("unknown")
|
||||
));
|
||||
if let Some((lat, lon)) = gps {
|
||||
// Four decimal places ≈ 11 m of precision — plenty for any
|
||||
// place-lookup tool, and keeps the prompt short.
|
||||
out.push_str(&format!("GPS: {:.4}, {:.4}\n", lat, lon));
|
||||
}
|
||||
if !visual_block.is_empty() {
|
||||
// visual_block already ends with a newline; no extra separator
|
||||
// needed.
|
||||
@@ -1329,6 +1362,27 @@ fn build_bootstrap_system_message(
|
||||
out
|
||||
}
|
||||
|
||||
/// Resolve a human-readable `YYYY-MM-DD` date string for the photo
|
||||
/// context block. Waterfall: EXIF `date_taken` → filename pattern →
|
||||
/// `None`. The fs-time fallback that `generate_agentic_insight_for_photo`
|
||||
/// uses is intentionally NOT applied here — for chat we'd rather show
|
||||
/// "unknown" than a misleading inode mtime as the photo's date.
|
||||
fn resolve_date_taken_for_context(
|
||||
exif: &Option<crate::database::models::ImageExif>,
|
||||
file_path: &str,
|
||||
) -> Option<String> {
|
||||
let from_exif = exif
|
||||
.as_ref()
|
||||
.and_then(|e| e.date_taken)
|
||||
.and_then(|ts| chrono::DateTime::from_timestamp(ts, 0))
|
||||
.map(|dt| dt.format("%Y-%m-%d").to_string());
|
||||
if from_exif.is_some() {
|
||||
return from_exif;
|
||||
}
|
||||
crate::memories::extract_date_from_filename(file_path)
|
||||
.map(|dt| dt.format("%Y-%m-%d").to_string())
|
||||
}
|
||||
|
||||
/// Pick the backend label for bootstrap. Bootstrap has no stored insight
|
||||
/// to defer to (that's continuation's behaviour), so the default is
|
||||
/// `"local"`. Returns an error if the supplied label is non-empty but
|
||||
@@ -1960,32 +2014,117 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn bootstrap_system_message_includes_path_and_persona() {
|
||||
let out = build_bootstrap_system_message("you are helpful", "pics/IMG.jpg", "");
|
||||
let out = build_bootstrap_system_message("you are helpful", "pics/IMG.jpg", None, None, "");
|
||||
assert!(out.starts_with("you are helpful"));
|
||||
assert!(out.contains("--- PHOTO CONTEXT ---"));
|
||||
assert!(out.contains("Photo file path: pics/IMG.jpg"));
|
||||
// No visual block — should not introduce a stray "Visual" line.
|
||||
// No date supplied → "unknown" so the model doesn't guess.
|
||||
assert!(out.contains("Date taken: unknown"));
|
||||
assert!(!out.contains("GPS:"));
|
||||
assert!(!out.contains("Visual description"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bootstrap_system_message_includes_date_when_supplied() {
|
||||
let out =
|
||||
build_bootstrap_system_message("voice", "pics/IMG.jpg", Some("2014-11-08"), None, "");
|
||||
assert!(out.contains("Date taken: 2014-11-08"));
|
||||
assert!(!out.contains("Date taken: unknown"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bootstrap_system_message_includes_gps_when_present() {
|
||||
let out = build_bootstrap_system_message(
|
||||
"voice",
|
||||
"p.jpg",
|
||||
Some("2020-01-01"),
|
||||
Some((42.36123, -71.05789)),
|
||||
"",
|
||||
);
|
||||
// Four decimals — enough for place lookup, short enough to
|
||||
// not bloat the system prompt.
|
||||
assert!(out.contains("GPS: 42.3612, -71.0579"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bootstrap_system_message_omits_gps_when_none() {
|
||||
let out = build_bootstrap_system_message("voice", "p.jpg", Some("2020-01-01"), None, "");
|
||||
assert!(!out.contains("GPS:"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bootstrap_system_message_includes_visual_block_when_supplied() {
|
||||
let visual = "Visual description (from local vision model):\nA dog in a park.\n";
|
||||
let out = build_bootstrap_system_message("voice", "p.jpg", visual);
|
||||
let out =
|
||||
build_bootstrap_system_message("voice", "p.jpg", Some("2020-01-01"), None, visual);
|
||||
assert!(out.contains("Photo file path: p.jpg"));
|
||||
assert!(out.contains("A dog in a park"));
|
||||
// Path appears before visual.
|
||||
// Path before date before visual.
|
||||
let path_pos = out.find("Photo file path:").unwrap();
|
||||
let date_pos = out.find("Date taken:").unwrap();
|
||||
let visual_pos = out.find("A dog in a park").unwrap();
|
||||
assert!(path_pos < visual_pos);
|
||||
assert!(path_pos < date_pos);
|
||||
assert!(date_pos < visual_pos);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bootstrap_system_message_trims_persona_trailing_whitespace() {
|
||||
// Two consecutive newlines before the photo-context divider —
|
||||
// any trailing whitespace from the persona must be collapsed
|
||||
// so we don't end up with `\n\n\n\n--- PHOTO CONTEXT ---`.
|
||||
let out = build_bootstrap_system_message("voice \n\n\n", "p.jpg", "");
|
||||
let out = build_bootstrap_system_message("voice \n\n\n", "p.jpg", None, None, "");
|
||||
assert!(out.contains("voice\n\n--- PHOTO CONTEXT ---"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_taken_for_context_prefers_exif_over_filename() {
|
||||
// EXIF wins when both are present (matches the canonical
|
||||
// date_resolver waterfall — EXIF is more reliable than
|
||||
// import-named filenames).
|
||||
let exif = Some(crate::database::models::ImageExif {
|
||||
id: 0,
|
||||
library_id: 1,
|
||||
file_path: "Screenshot_2014-06-01.png".to_string(),
|
||||
camera_make: None,
|
||||
camera_model: None,
|
||||
lens_model: None,
|
||||
width: None,
|
||||
height: None,
|
||||
orientation: None,
|
||||
gps_latitude: None,
|
||||
gps_longitude: None,
|
||||
gps_altitude: None,
|
||||
focal_length: None,
|
||||
aperture: None,
|
||||
shutter_speed: None,
|
||||
iso: None,
|
||||
// 2021-08-15 12:00:00 UTC
|
||||
date_taken: Some(1_629_028_800),
|
||||
created_time: 0,
|
||||
last_modified: 0,
|
||||
content_hash: None,
|
||||
size_bytes: None,
|
||||
phash_64: None,
|
||||
dhash_64: None,
|
||||
duplicate_of_hash: None,
|
||||
duplicate_decided_at: None,
|
||||
date_taken_source: None,
|
||||
original_date_taken: None,
|
||||
original_date_taken_source: None,
|
||||
});
|
||||
let out = resolve_date_taken_for_context(&exif, "Screenshot_2014-06-01.png");
|
||||
assert_eq!(out.as_deref(), Some("2021-08-15"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_taken_for_context_falls_back_to_filename_when_no_exif() {
|
||||
// memories::extract_date_from_filename requires date+time in
|
||||
// the filename — date-only patterns aren't matched. Use the
|
||||
// canonical screenshot pattern for the regression case.
|
||||
let out = resolve_date_taken_for_context(&None, "Screenshot_2014-06-01-20-44-50.png");
|
||||
assert_eq!(out.as_deref(), Some("2014-06-01"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_taken_for_context_returns_none_when_neither_source() {
|
||||
let out = resolve_date_taken_for_context(&None, "DSC_5171.JPG");
|
||||
assert!(out.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -229,6 +229,17 @@ impl InsightGenerator {
|
||||
None
|
||||
}
|
||||
|
||||
/// Look up the EXIF row for a photo. Returns `None` when no row
|
||||
/// exists yet (file watcher hasn't reached it) or the DAO call
|
||||
/// fails. Used by callers — including the chat-bootstrap path —
|
||||
/// that need a few specific fields (date_taken, GPS) without
|
||||
/// duplicating DAO plumbing.
|
||||
pub(crate) fn fetch_exif(&self, file_path: &str) -> Option<crate::database::models::ImageExif> {
|
||||
let cx = opentelemetry::Context::current();
|
||||
let mut dao = self.exif_dao.lock().expect("Unable to lock ExifDao");
|
||||
dao.get_exif(&cx, file_path).ok().flatten()
|
||||
}
|
||||
|
||||
/// Load image file, resize it, and encode as base64 for vision models
|
||||
/// Resizes to max 1024px on longest edge to reduce context usage
|
||||
pub(crate) fn load_image_as_base64(&self, file_path: &str) -> Result<String> {
|
||||
|
||||
Reference in New Issue
Block a user