feat: photo-first RAG enrichment — early vision description + tags in RAG and search context
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -153,6 +153,7 @@ impl InsightGenerator {
|
||||
contact: Option<&str>,
|
||||
topics: Option<&[String]>,
|
||||
limit: usize,
|
||||
extra_context: Option<&str>,
|
||||
) -> Result<Vec<String>> {
|
||||
let tracer = global_tracer();
|
||||
let span = tracer.start_with_context("ai.rag.filter_historical", parent_cx);
|
||||
@@ -174,7 +175,7 @@ impl InsightGenerator {
|
||||
}
|
||||
|
||||
let query_results = self
|
||||
.find_relevant_messages_rag(date, location, contact, topics, limit * 2)
|
||||
.find_relevant_messages_rag(date, location, contact, topics, limit * 2, extra_context)
|
||||
.await?;
|
||||
|
||||
filter_cx.span().set_attribute(KeyValue::new(
|
||||
@@ -236,6 +237,7 @@ impl InsightGenerator {
|
||||
contact: Option<&str>,
|
||||
topics: Option<&[String]>,
|
||||
limit: usize,
|
||||
extra_context: Option<&str>,
|
||||
) -> Result<Vec<String>> {
|
||||
let tracer = global_tracer();
|
||||
let current_cx = opentelemetry::Context::current();
|
||||
@@ -250,7 +252,7 @@ impl InsightGenerator {
|
||||
}
|
||||
|
||||
// Build query string - prioritize topics if available (semantically meaningful)
|
||||
let query = if let Some(topics) = topics {
|
||||
let base_query = if let Some(topics) = topics {
|
||||
if !topics.is_empty() {
|
||||
// Use topics for semantic search - these are actual content keywords
|
||||
let topic_str = topics.join(", ");
|
||||
@@ -268,6 +270,12 @@ impl InsightGenerator {
|
||||
Self::build_metadata_query(date, location, contact)
|
||||
};
|
||||
|
||||
let query = if let Some(extra) = extra_context {
|
||||
format!("{}. {}", base_query, extra)
|
||||
} else {
|
||||
base_query
|
||||
};
|
||||
|
||||
span.set_attribute(KeyValue::new("query", query.clone()));
|
||||
|
||||
// Create context with this span for child operations
|
||||
@@ -718,6 +726,20 @@ impl InsightGenerator {
|
||||
.set_attribute(KeyValue::new("contact", c.clone()));
|
||||
}
|
||||
|
||||
// Fetch file tags (used to enrich RAG and final context)
|
||||
let tag_names: Vec<String> = {
|
||||
let mut dao = self.tag_dao.lock().expect("Unable to lock TagDao");
|
||||
dao.get_tags_for_path(&insight_cx, &file_path)
|
||||
.unwrap_or_else(|e| {
|
||||
log::warn!("Failed to fetch tags for insight {}: {}", file_path, e);
|
||||
Vec::new()
|
||||
})
|
||||
.into_iter()
|
||||
.map(|t| t.name)
|
||||
.collect()
|
||||
};
|
||||
log::info!("Fetched {} tags for photo: {:?}", tag_names.len(), tag_names);
|
||||
|
||||
// 4. Get location name from GPS coordinates (needed for RAG query)
|
||||
let location = match exif {
|
||||
Some(ref exif) => {
|
||||
@@ -744,6 +766,90 @@ impl InsightGenerator {
|
||||
None => None,
|
||||
};
|
||||
|
||||
// Check if the model has vision capabilities
|
||||
let model_to_check = ollama_client.primary_model.clone();
|
||||
let has_vision = match OllamaClient::check_model_capabilities(
|
||||
&ollama_client.primary_url,
|
||||
&model_to_check,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(capabilities) => {
|
||||
log::info!(
|
||||
"Model '{}' vision capability: {}",
|
||||
model_to_check,
|
||||
capabilities.has_vision
|
||||
);
|
||||
capabilities.has_vision
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!(
|
||||
"Failed to check vision capabilities for model '{}', assuming no vision support: {}",
|
||||
model_to_check,
|
||||
e
|
||||
);
|
||||
false
|
||||
}
|
||||
};
|
||||
|
||||
insight_cx
|
||||
.span()
|
||||
.set_attribute(KeyValue::new("model_has_vision", has_vision));
|
||||
|
||||
// Load image and encode as base64 only if model supports vision
|
||||
let image_base64 = if has_vision {
|
||||
match self.load_image_as_base64(&file_path) {
|
||||
Ok(b64) => {
|
||||
log::info!(
|
||||
"Successfully loaded image for vision-capable model '{}'",
|
||||
model_to_check
|
||||
);
|
||||
Some(b64)
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("Failed to load image for vision model: {}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log::info!(
|
||||
"Model '{}' does not support vision, skipping image processing",
|
||||
model_to_check
|
||||
);
|
||||
None
|
||||
};
|
||||
|
||||
// Generate brief photo description for RAG enrichment (vision models only)
|
||||
let photo_description: Option<String> = if let Some(ref img_b64) = image_base64 {
|
||||
match ollama_client.generate_photo_description(img_b64).await {
|
||||
Ok(desc) => {
|
||||
log::info!("Photo description for RAG enrichment: {}", desc);
|
||||
Some(desc)
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("Failed to generate photo description for RAG enrichment: {}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Build enriched context string for RAG: photo description + tags
|
||||
// (SMS topics are passed separately to RAG functions)
|
||||
let enriched_query: Option<String> = {
|
||||
let mut parts: Vec<String> = Vec::new();
|
||||
if let Some(ref desc) = photo_description {
|
||||
parts.push(desc.clone());
|
||||
}
|
||||
if !tag_names.is_empty() {
|
||||
parts.push(format!("tags: {}", tag_names.join(", ")));
|
||||
}
|
||||
if parts.is_empty() { None } else { Some(parts.join(". ")) }
|
||||
};
|
||||
|
||||
let mut search_enrichment: Option<String> = enriched_query.clone();
|
||||
|
||||
// 5. Intelligent retrieval: Hybrid approach for better context
|
||||
let mut sms_summary = None;
|
||||
let mut used_rag = false;
|
||||
@@ -782,6 +888,21 @@ impl InsightGenerator {
|
||||
|
||||
log::info!("Extracted topics for query enrichment: {:?}", topics);
|
||||
|
||||
// Build full search enrichment: SMS topics + photo description + tag names
|
||||
search_enrichment = {
|
||||
let mut parts: Vec<String> = Vec::new();
|
||||
if !topics.is_empty() {
|
||||
parts.push(topics.join(", "));
|
||||
}
|
||||
if let Some(ref desc) = photo_description {
|
||||
parts.push(desc.clone());
|
||||
}
|
||||
if !tag_names.is_empty() {
|
||||
parts.push(format!("tags: {}", tag_names.join(", ")));
|
||||
}
|
||||
if parts.is_empty() { None } else { Some(parts.join(". ")) }
|
||||
};
|
||||
|
||||
// Step 3: Try historical RAG (>30 days ago) using extracted topics
|
||||
let topics_slice = if topics.is_empty() {
|
||||
None
|
||||
@@ -796,6 +917,7 @@ impl InsightGenerator {
|
||||
contact.as_deref(),
|
||||
topics_slice,
|
||||
10, // Top 10 historical matches
|
||||
enriched_query.as_deref(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
@@ -858,7 +980,7 @@ impl InsightGenerator {
|
||||
log::info!("No immediate messages found, trying basic RAG as fallback");
|
||||
// Fallback to basic RAG even without strong query
|
||||
match self
|
||||
.find_relevant_messages_rag(date_taken, None, contact.as_deref(), None, 20)
|
||||
.find_relevant_messages_rag(date_taken, None, contact.as_deref(), None, 20, enriched_query.as_deref())
|
||||
.await
|
||||
{
|
||||
Ok(rag_messages) if !rag_messages.is_empty() => {
|
||||
@@ -955,19 +1077,25 @@ impl InsightGenerator {
|
||||
timestamp,
|
||||
location.as_deref(),
|
||||
contact.as_deref(),
|
||||
None, // enrichment — wired up in Task 5
|
||||
search_enrichment.as_deref(),
|
||||
)
|
||||
.await
|
||||
.ok()
|
||||
.flatten();
|
||||
|
||||
// 7. Combine all context sources with equal weight
|
||||
let tags_context = if tag_names.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(tag_names.join(", "))
|
||||
};
|
||||
|
||||
let combined_context = Self::combine_contexts(
|
||||
sms_summary,
|
||||
calendar_context,
|
||||
location_context,
|
||||
search_context,
|
||||
None, // tags — wired up in Task 5
|
||||
tags_context,
|
||||
);
|
||||
|
||||
log::info!(
|
||||
@@ -975,59 +1103,6 @@ impl InsightGenerator {
|
||||
combined_context.len()
|
||||
);
|
||||
|
||||
// 8. Check if the model has vision capabilities
|
||||
let model_to_check = ollama_client.primary_model.clone();
|
||||
let has_vision = match OllamaClient::check_model_capabilities(
|
||||
&ollama_client.primary_url,
|
||||
&model_to_check,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(capabilities) => {
|
||||
log::info!(
|
||||
"Model '{}' vision capability: {}",
|
||||
model_to_check,
|
||||
capabilities.has_vision
|
||||
);
|
||||
capabilities.has_vision
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!(
|
||||
"Failed to check vision capabilities for model '{}', assuming no vision support: {}",
|
||||
model_to_check,
|
||||
e
|
||||
);
|
||||
false
|
||||
}
|
||||
};
|
||||
|
||||
insight_cx
|
||||
.span()
|
||||
.set_attribute(KeyValue::new("model_has_vision", has_vision));
|
||||
|
||||
// 9. Load image and encode as base64 only if model supports vision
|
||||
let image_base64 = if has_vision {
|
||||
match self.load_image_as_base64(&file_path) {
|
||||
Ok(b64) => {
|
||||
log::info!(
|
||||
"Successfully loaded image for vision-capable model '{}'",
|
||||
model_to_check
|
||||
);
|
||||
Some(b64)
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("Failed to load image for vision model: {}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log::info!(
|
||||
"Model '{}' does not support vision, skipping image processing",
|
||||
model_to_check
|
||||
);
|
||||
None
|
||||
};
|
||||
|
||||
// 10. Generate summary first, then derive title from the summary
|
||||
let summary = ollama_client
|
||||
.generate_photo_summary(
|
||||
@@ -1036,7 +1111,7 @@ impl InsightGenerator {
|
||||
contact.as_deref(),
|
||||
Some(&combined_context),
|
||||
custom_system_prompt.as_deref(),
|
||||
image_base64,
|
||||
image_base64.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user