feature/streaming-insights #85

Merged
cameron merged 5 commits from feature/streaming-insights into master 2026-05-09 20:57:19 +00:00
2 changed files with 166 additions and 16 deletions
Showing only changes of commit 3699e059a2 - Show all commits

View File

@@ -934,6 +934,20 @@ impl InsightChatService {
// discusses metadata-only is still useful.
let image_base64: Option<String> = self.generator.load_image_as_base64(&normalized).ok();
// EXIF lookup once — date_taken and GPS go into the photo
// context block in the system message. Without these the model
// hallucinates dates / GPS-keyed tool args (`get_sms_messages`
// would otherwise default to today's date and miss every
// historical photo).
let exif = self.generator.fetch_exif(&normalized);
let date_taken_str = resolve_date_taken_for_context(&exif, &normalized);
let gps = exif
.as_ref()
.and_then(|e| match (e.gps_latitude, e.gps_longitude) {
(Some(lat), Some(lon)) => Some((lat as f64, lon as f64)),
_ => None,
});
// Hybrid backend: pre-describe the image via local Ollama vision
// so OpenRouter chat models (which can't see images directly) get
// the visual description as text. Mirrors the same pre-describe
@@ -967,10 +981,18 @@ impl InsightChatService {
// bubble in the rendered transcript shows only what they typed.
// Several agentic tools (recall_facts_for_photo, get_file_tags,
// get_faces_in_photo, etc.) take a `file_path` arg the model
// can't know without being told; in hybrid mode the visual
// description belongs here for the same reason.
// can't know without being told. `Date taken:` and `GPS:` give
// get_sms_messages / reverse_geocode / get_personal_place_at
// the args they need. In hybrid mode the visual description
// belongs here for the same reason.
let persona = resolve_bootstrap_system_prompt(req.system_prompt.as_deref());
let system_content = build_bootstrap_system_message(&persona, &normalized, &visual_block);
let system_content = build_bootstrap_system_message(
&persona,
&normalized,
date_taken_str.as_deref(),
gps,
&visual_block,
);
let system_msg = ChatMessage::system(system_content);
let mut user_msg = ChatMessage::user(req.user_message.clone());
if !is_hybrid && let Some(ref img) = image_base64 {
@@ -1309,18 +1331,29 @@ fn resolve_bootstrap_system_prompt(supplied: Option<&str>) -> String {
}
/// Compose the bootstrap system message: the persona on top, followed
/// by a photo-context block carrying the file path (and, in hybrid
/// mode, the local-vision visual description). Lives in the system
/// message — not the user turn — so the rendered transcript shows
/// only what the user typed.
/// by a photo-context block carrying the file path, date taken (when
/// known), GPS (when present), and — in hybrid mode — the local-vision
/// visual description. Lives in the system message — not the user
/// turn — so the rendered transcript shows only what the user typed.
fn build_bootstrap_system_message(
persona: &str,
normalized_path: &str,
date_taken: Option<&str>,
gps: Option<(f64, f64)>,
visual_block: &str,
) -> String {
let mut out = persona.trim_end().to_string();
out.push_str("\n\n--- PHOTO CONTEXT ---\n");
out.push_str(&format!("Photo file path: {}\n", normalized_path));
out.push_str(&format!(
"Date taken: {}\n",
date_taken.unwrap_or("unknown")
));
if let Some((lat, lon)) = gps {
// Four decimal places ≈ 11 m of precision — plenty for any
// place-lookup tool, and keeps the prompt short.
out.push_str(&format!("GPS: {:.4}, {:.4}\n", lat, lon));
}
if !visual_block.is_empty() {
// visual_block already ends with a newline; no extra separator
// needed.
@@ -1329,6 +1362,27 @@ fn build_bootstrap_system_message(
out
}
/// Resolve a human-readable `YYYY-MM-DD` date string for the photo
/// context block. Waterfall: EXIF `date_taken` → filename pattern →
/// `None`. The fs-time fallback that `generate_agentic_insight_for_photo`
/// uses is intentionally NOT applied here — for chat we'd rather show
/// "unknown" than a misleading inode mtime as the photo's date.
fn resolve_date_taken_for_context(
exif: &Option<crate::database::models::ImageExif>,
file_path: &str,
) -> Option<String> {
let from_exif = exif
.as_ref()
.and_then(|e| e.date_taken)
.and_then(|ts| chrono::DateTime::from_timestamp(ts, 0))
.map(|dt| dt.format("%Y-%m-%d").to_string());
if from_exif.is_some() {
return from_exif;
}
crate::memories::extract_date_from_filename(file_path)
.map(|dt| dt.format("%Y-%m-%d").to_string())
}
/// Pick the backend label for bootstrap. Bootstrap has no stored insight
/// to defer to (that's continuation's behaviour), so the default is
/// `"local"`. Returns an error if the supplied label is non-empty but
@@ -1960,32 +2014,117 @@ mod tests {
#[test]
fn bootstrap_system_message_includes_path_and_persona() {
let out = build_bootstrap_system_message("you are helpful", "pics/IMG.jpg", "");
let out = build_bootstrap_system_message("you are helpful", "pics/IMG.jpg", None, None, "");
assert!(out.starts_with("you are helpful"));
assert!(out.contains("--- PHOTO CONTEXT ---"));
assert!(out.contains("Photo file path: pics/IMG.jpg"));
// No visual block — should not introduce a stray "Visual" line.
// No date supplied → "unknown" so the model doesn't guess.
assert!(out.contains("Date taken: unknown"));
assert!(!out.contains("GPS:"));
assert!(!out.contains("Visual description"));
}
#[test]
fn bootstrap_system_message_includes_date_when_supplied() {
let out =
build_bootstrap_system_message("voice", "pics/IMG.jpg", Some("2014-11-08"), None, "");
assert!(out.contains("Date taken: 2014-11-08"));
assert!(!out.contains("Date taken: unknown"));
}
#[test]
fn bootstrap_system_message_includes_gps_when_present() {
let out = build_bootstrap_system_message(
"voice",
"p.jpg",
Some("2020-01-01"),
Some((42.36123, -71.05789)),
"",
);
// Four decimals — enough for place lookup, short enough to
// not bloat the system prompt.
assert!(out.contains("GPS: 42.3612, -71.0579"));
}
#[test]
fn bootstrap_system_message_omits_gps_when_none() {
let out = build_bootstrap_system_message("voice", "p.jpg", Some("2020-01-01"), None, "");
assert!(!out.contains("GPS:"));
}
#[test]
fn bootstrap_system_message_includes_visual_block_when_supplied() {
let visual = "Visual description (from local vision model):\nA dog in a park.\n";
let out = build_bootstrap_system_message("voice", "p.jpg", visual);
let out =
build_bootstrap_system_message("voice", "p.jpg", Some("2020-01-01"), None, visual);
assert!(out.contains("Photo file path: p.jpg"));
assert!(out.contains("A dog in a park"));
// Path appears before visual.
// Path before date before visual.
let path_pos = out.find("Photo file path:").unwrap();
let date_pos = out.find("Date taken:").unwrap();
let visual_pos = out.find("A dog in a park").unwrap();
assert!(path_pos < visual_pos);
assert!(path_pos < date_pos);
assert!(date_pos < visual_pos);
}
#[test]
fn bootstrap_system_message_trims_persona_trailing_whitespace() {
// Two consecutive newlines before the photo-context divider —
// any trailing whitespace from the persona must be collapsed
// so we don't end up with `\n\n\n\n--- PHOTO CONTEXT ---`.
let out = build_bootstrap_system_message("voice \n\n\n", "p.jpg", "");
let out = build_bootstrap_system_message("voice \n\n\n", "p.jpg", None, None, "");
assert!(out.contains("voice\n\n--- PHOTO CONTEXT ---"));
}
#[test]
fn date_taken_for_context_prefers_exif_over_filename() {
// EXIF wins when both are present (matches the canonical
// date_resolver waterfall — EXIF is more reliable than
// import-named filenames).
let exif = Some(crate::database::models::ImageExif {
id: 0,
library_id: 1,
file_path: "Screenshot_2014-06-01.png".to_string(),
camera_make: None,
camera_model: None,
lens_model: None,
width: None,
height: None,
orientation: None,
gps_latitude: None,
gps_longitude: None,
gps_altitude: None,
focal_length: None,
aperture: None,
shutter_speed: None,
iso: None,
// 2021-08-15 12:00:00 UTC
date_taken: Some(1_629_028_800),
created_time: 0,
last_modified: 0,
content_hash: None,
size_bytes: None,
phash_64: None,
dhash_64: None,
duplicate_of_hash: None,
duplicate_decided_at: None,
date_taken_source: None,
original_date_taken: None,
original_date_taken_source: None,
});
let out = resolve_date_taken_for_context(&exif, "Screenshot_2014-06-01.png");
assert_eq!(out.as_deref(), Some("2021-08-15"));
}
#[test]
fn date_taken_for_context_falls_back_to_filename_when_no_exif() {
// memories::extract_date_from_filename requires date+time in
// the filename — date-only patterns aren't matched. Use the
// canonical screenshot pattern for the regression case.
let out = resolve_date_taken_for_context(&None, "Screenshot_2014-06-01-20-44-50.png");
assert_eq!(out.as_deref(), Some("2014-06-01"));
}
#[test]
fn date_taken_for_context_returns_none_when_neither_source() {
let out = resolve_date_taken_for_context(&None, "DSC_5171.JPG");
assert!(out.is_none());
}
}

View File

@@ -229,6 +229,17 @@ impl InsightGenerator {
None
}
/// Look up the EXIF row for a photo. Returns `None` when no row
/// exists yet (file watcher hasn't reached it) or the DAO call
/// fails. Used by callers — including the chat-bootstrap path —
/// that need a few specific fields (date_taken, GPS) without
/// duplicating DAO plumbing.
pub(crate) fn fetch_exif(&self, file_path: &str) -> Option<crate::database::models::ImageExif> {
let cx = opentelemetry::Context::current();
let mut dao = self.exif_dao.lock().expect("Unable to lock ExifDao");
dao.get_exif(&cx, file_path).ok().flatten()
}
/// Load image file, resize it, and encode as base64 for vision models
/// Resizes to max 1024px on longest edge to reduce context usage
pub(crate) fn load_image_as_base64(&self, file_path: &str) -> Result<String> {