feat: photo-first RAG enrichment — early vision description + tags in RAG and search context

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Cameron
2026-03-18 17:23:49 -04:00
parent e58b8fe743
commit 8196ef94a0

View File

@@ -153,6 +153,7 @@ impl InsightGenerator {
contact: Option<&str>,
topics: Option<&[String]>,
limit: usize,
extra_context: Option<&str>,
) -> Result<Vec<String>> {
let tracer = global_tracer();
let span = tracer.start_with_context("ai.rag.filter_historical", parent_cx);
@@ -174,7 +175,7 @@ impl InsightGenerator {
}
let query_results = self
.find_relevant_messages_rag(date, location, contact, topics, limit * 2)
.find_relevant_messages_rag(date, location, contact, topics, limit * 2, extra_context)
.await?;
filter_cx.span().set_attribute(KeyValue::new(
@@ -236,6 +237,7 @@ impl InsightGenerator {
contact: Option<&str>,
topics: Option<&[String]>,
limit: usize,
extra_context: Option<&str>,
) -> Result<Vec<String>> {
let tracer = global_tracer();
let current_cx = opentelemetry::Context::current();
@@ -250,7 +252,7 @@ impl InsightGenerator {
}
// Build query string - prioritize topics if available (semantically meaningful)
let query = if let Some(topics) = topics {
let base_query = if let Some(topics) = topics {
if !topics.is_empty() {
// Use topics for semantic search - these are actual content keywords
let topic_str = topics.join(", ");
@@ -268,6 +270,12 @@ impl InsightGenerator {
Self::build_metadata_query(date, location, contact)
};
let query = if let Some(extra) = extra_context {
format!("{}. {}", base_query, extra)
} else {
base_query
};
span.set_attribute(KeyValue::new("query", query.clone()));
// Create context with this span for child operations
@@ -718,6 +726,20 @@ impl InsightGenerator {
.set_attribute(KeyValue::new("contact", c.clone()));
}
// Fetch file tags (used to enrich RAG and final context)
let tag_names: Vec<String> = {
let mut dao = self.tag_dao.lock().expect("Unable to lock TagDao");
dao.get_tags_for_path(&insight_cx, &file_path)
.unwrap_or_else(|e| {
log::warn!("Failed to fetch tags for insight {}: {}", file_path, e);
Vec::new()
})
.into_iter()
.map(|t| t.name)
.collect()
};
log::info!("Fetched {} tags for photo: {:?}", tag_names.len(), tag_names);
// 4. Get location name from GPS coordinates (needed for RAG query)
let location = match exif {
Some(ref exif) => {
@@ -744,6 +766,90 @@ impl InsightGenerator {
None => None,
};
// Check if the model has vision capabilities
let model_to_check = ollama_client.primary_model.clone();
let has_vision = match OllamaClient::check_model_capabilities(
&ollama_client.primary_url,
&model_to_check,
)
.await
{
Ok(capabilities) => {
log::info!(
"Model '{}' vision capability: {}",
model_to_check,
capabilities.has_vision
);
capabilities.has_vision
}
Err(e) => {
log::warn!(
"Failed to check vision capabilities for model '{}', assuming no vision support: {}",
model_to_check,
e
);
false
}
};
insight_cx
.span()
.set_attribute(KeyValue::new("model_has_vision", has_vision));
// Load image and encode as base64 only if model supports vision
let image_base64 = if has_vision {
match self.load_image_as_base64(&file_path) {
Ok(b64) => {
log::info!(
"Successfully loaded image for vision-capable model '{}'",
model_to_check
);
Some(b64)
}
Err(e) => {
log::warn!("Failed to load image for vision model: {}", e);
None
}
}
} else {
log::info!(
"Model '{}' does not support vision, skipping image processing",
model_to_check
);
None
};
// Generate brief photo description for RAG enrichment (vision models only)
let photo_description: Option<String> = if let Some(ref img_b64) = image_base64 {
match ollama_client.generate_photo_description(img_b64).await {
Ok(desc) => {
log::info!("Photo description for RAG enrichment: {}", desc);
Some(desc)
}
Err(e) => {
log::warn!("Failed to generate photo description for RAG enrichment: {}", e);
None
}
}
} else {
None
};
// Build enriched context string for RAG: photo description + tags
// (SMS topics are passed separately to RAG functions)
let enriched_query: Option<String> = {
let mut parts: Vec<String> = Vec::new();
if let Some(ref desc) = photo_description {
parts.push(desc.clone());
}
if !tag_names.is_empty() {
parts.push(format!("tags: {}", tag_names.join(", ")));
}
if parts.is_empty() { None } else { Some(parts.join(". ")) }
};
let mut search_enrichment: Option<String> = enriched_query.clone();
// 5. Intelligent retrieval: Hybrid approach for better context
let mut sms_summary = None;
let mut used_rag = false;
@@ -782,6 +888,21 @@ impl InsightGenerator {
log::info!("Extracted topics for query enrichment: {:?}", topics);
// Build full search enrichment: SMS topics + photo description + tag names
search_enrichment = {
let mut parts: Vec<String> = Vec::new();
if !topics.is_empty() {
parts.push(topics.join(", "));
}
if let Some(ref desc) = photo_description {
parts.push(desc.clone());
}
if !tag_names.is_empty() {
parts.push(format!("tags: {}", tag_names.join(", ")));
}
if parts.is_empty() { None } else { Some(parts.join(". ")) }
};
// Step 3: Try historical RAG (>30 days ago) using extracted topics
let topics_slice = if topics.is_empty() {
None
@@ -796,6 +917,7 @@ impl InsightGenerator {
contact.as_deref(),
topics_slice,
10, // Top 10 historical matches
enriched_query.as_deref(),
)
.await
{
@@ -858,7 +980,7 @@ impl InsightGenerator {
log::info!("No immediate messages found, trying basic RAG as fallback");
// Fallback to basic RAG even without strong query
match self
.find_relevant_messages_rag(date_taken, None, contact.as_deref(), None, 20)
.find_relevant_messages_rag(date_taken, None, contact.as_deref(), None, 20, enriched_query.as_deref())
.await
{
Ok(rag_messages) if !rag_messages.is_empty() => {
@@ -955,19 +1077,25 @@ impl InsightGenerator {
timestamp,
location.as_deref(),
contact.as_deref(),
None, // enrichment — wired up in Task 5
search_enrichment.as_deref(),
)
.await
.ok()
.flatten();
// 7. Combine all context sources with equal weight
let tags_context = if tag_names.is_empty() {
None
} else {
Some(tag_names.join(", "))
};
let combined_context = Self::combine_contexts(
sms_summary,
calendar_context,
location_context,
search_context,
None, // tags — wired up in Task 5
tags_context,
);
log::info!(
@@ -975,59 +1103,6 @@ impl InsightGenerator {
combined_context.len()
);
// 8. Check if the model has vision capabilities
let model_to_check = ollama_client.primary_model.clone();
let has_vision = match OllamaClient::check_model_capabilities(
&ollama_client.primary_url,
&model_to_check,
)
.await
{
Ok(capabilities) => {
log::info!(
"Model '{}' vision capability: {}",
model_to_check,
capabilities.has_vision
);
capabilities.has_vision
}
Err(e) => {
log::warn!(
"Failed to check vision capabilities for model '{}', assuming no vision support: {}",
model_to_check,
e
);
false
}
};
insight_cx
.span()
.set_attribute(KeyValue::new("model_has_vision", has_vision));
// 9. Load image and encode as base64 only if model supports vision
let image_base64 = if has_vision {
match self.load_image_as_base64(&file_path) {
Ok(b64) => {
log::info!(
"Successfully loaded image for vision-capable model '{}'",
model_to_check
);
Some(b64)
}
Err(e) => {
log::warn!("Failed to load image for vision model: {}", e);
None
}
}
} else {
log::info!(
"Model '{}' does not support vision, skipping image processing",
model_to_check
);
None
};
// 10. Generate summary first, then derive title from the summary
let summary = ollama_client
.generate_photo_summary(
@@ -1036,7 +1111,7 @@ impl InsightGenerator {
contact.as_deref(),
Some(&combined_context),
custom_system_prompt.as_deref(),
image_base64,
image_base64.clone(),
)
.await?;