feat: photo-first RAG enrichment — early vision description + tags in RAG and search context
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -153,6 +153,7 @@ impl InsightGenerator {
|
|||||||
contact: Option<&str>,
|
contact: Option<&str>,
|
||||||
topics: Option<&[String]>,
|
topics: Option<&[String]>,
|
||||||
limit: usize,
|
limit: usize,
|
||||||
|
extra_context: Option<&str>,
|
||||||
) -> Result<Vec<String>> {
|
) -> Result<Vec<String>> {
|
||||||
let tracer = global_tracer();
|
let tracer = global_tracer();
|
||||||
let span = tracer.start_with_context("ai.rag.filter_historical", parent_cx);
|
let span = tracer.start_with_context("ai.rag.filter_historical", parent_cx);
|
||||||
@@ -174,7 +175,7 @@ impl InsightGenerator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let query_results = self
|
let query_results = self
|
||||||
.find_relevant_messages_rag(date, location, contact, topics, limit * 2)
|
.find_relevant_messages_rag(date, location, contact, topics, limit * 2, extra_context)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
filter_cx.span().set_attribute(KeyValue::new(
|
filter_cx.span().set_attribute(KeyValue::new(
|
||||||
@@ -236,6 +237,7 @@ impl InsightGenerator {
|
|||||||
contact: Option<&str>,
|
contact: Option<&str>,
|
||||||
topics: Option<&[String]>,
|
topics: Option<&[String]>,
|
||||||
limit: usize,
|
limit: usize,
|
||||||
|
extra_context: Option<&str>,
|
||||||
) -> Result<Vec<String>> {
|
) -> Result<Vec<String>> {
|
||||||
let tracer = global_tracer();
|
let tracer = global_tracer();
|
||||||
let current_cx = opentelemetry::Context::current();
|
let current_cx = opentelemetry::Context::current();
|
||||||
@@ -250,7 +252,7 @@ impl InsightGenerator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Build query string - prioritize topics if available (semantically meaningful)
|
// Build query string - prioritize topics if available (semantically meaningful)
|
||||||
let query = if let Some(topics) = topics {
|
let base_query = if let Some(topics) = topics {
|
||||||
if !topics.is_empty() {
|
if !topics.is_empty() {
|
||||||
// Use topics for semantic search - these are actual content keywords
|
// Use topics for semantic search - these are actual content keywords
|
||||||
let topic_str = topics.join(", ");
|
let topic_str = topics.join(", ");
|
||||||
@@ -268,6 +270,12 @@ impl InsightGenerator {
|
|||||||
Self::build_metadata_query(date, location, contact)
|
Self::build_metadata_query(date, location, contact)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let query = if let Some(extra) = extra_context {
|
||||||
|
format!("{}. {}", base_query, extra)
|
||||||
|
} else {
|
||||||
|
base_query
|
||||||
|
};
|
||||||
|
|
||||||
span.set_attribute(KeyValue::new("query", query.clone()));
|
span.set_attribute(KeyValue::new("query", query.clone()));
|
||||||
|
|
||||||
// Create context with this span for child operations
|
// Create context with this span for child operations
|
||||||
@@ -718,6 +726,20 @@ impl InsightGenerator {
|
|||||||
.set_attribute(KeyValue::new("contact", c.clone()));
|
.set_attribute(KeyValue::new("contact", c.clone()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fetch file tags (used to enrich RAG and final context)
|
||||||
|
let tag_names: Vec<String> = {
|
||||||
|
let mut dao = self.tag_dao.lock().expect("Unable to lock TagDao");
|
||||||
|
dao.get_tags_for_path(&insight_cx, &file_path)
|
||||||
|
.unwrap_or_else(|e| {
|
||||||
|
log::warn!("Failed to fetch tags for insight {}: {}", file_path, e);
|
||||||
|
Vec::new()
|
||||||
|
})
|
||||||
|
.into_iter()
|
||||||
|
.map(|t| t.name)
|
||||||
|
.collect()
|
||||||
|
};
|
||||||
|
log::info!("Fetched {} tags for photo: {:?}", tag_names.len(), tag_names);
|
||||||
|
|
||||||
// 4. Get location name from GPS coordinates (needed for RAG query)
|
// 4. Get location name from GPS coordinates (needed for RAG query)
|
||||||
let location = match exif {
|
let location = match exif {
|
||||||
Some(ref exif) => {
|
Some(ref exif) => {
|
||||||
@@ -744,6 +766,90 @@ impl InsightGenerator {
|
|||||||
None => None,
|
None => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Check if the model has vision capabilities
|
||||||
|
let model_to_check = ollama_client.primary_model.clone();
|
||||||
|
let has_vision = match OllamaClient::check_model_capabilities(
|
||||||
|
&ollama_client.primary_url,
|
||||||
|
&model_to_check,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(capabilities) => {
|
||||||
|
log::info!(
|
||||||
|
"Model '{}' vision capability: {}",
|
||||||
|
model_to_check,
|
||||||
|
capabilities.has_vision
|
||||||
|
);
|
||||||
|
capabilities.has_vision
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
log::warn!(
|
||||||
|
"Failed to check vision capabilities for model '{}', assuming no vision support: {}",
|
||||||
|
model_to_check,
|
||||||
|
e
|
||||||
|
);
|
||||||
|
false
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
insight_cx
|
||||||
|
.span()
|
||||||
|
.set_attribute(KeyValue::new("model_has_vision", has_vision));
|
||||||
|
|
||||||
|
// Load image and encode as base64 only if model supports vision
|
||||||
|
let image_base64 = if has_vision {
|
||||||
|
match self.load_image_as_base64(&file_path) {
|
||||||
|
Ok(b64) => {
|
||||||
|
log::info!(
|
||||||
|
"Successfully loaded image for vision-capable model '{}'",
|
||||||
|
model_to_check
|
||||||
|
);
|
||||||
|
Some(b64)
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
log::warn!("Failed to load image for vision model: {}", e);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
log::info!(
|
||||||
|
"Model '{}' does not support vision, skipping image processing",
|
||||||
|
model_to_check
|
||||||
|
);
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
// Generate brief photo description for RAG enrichment (vision models only)
|
||||||
|
let photo_description: Option<String> = if let Some(ref img_b64) = image_base64 {
|
||||||
|
match ollama_client.generate_photo_description(img_b64).await {
|
||||||
|
Ok(desc) => {
|
||||||
|
log::info!("Photo description for RAG enrichment: {}", desc);
|
||||||
|
Some(desc)
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
log::warn!("Failed to generate photo description for RAG enrichment: {}", e);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
// Build enriched context string for RAG: photo description + tags
|
||||||
|
// (SMS topics are passed separately to RAG functions)
|
||||||
|
let enriched_query: Option<String> = {
|
||||||
|
let mut parts: Vec<String> = Vec::new();
|
||||||
|
if let Some(ref desc) = photo_description {
|
||||||
|
parts.push(desc.clone());
|
||||||
|
}
|
||||||
|
if !tag_names.is_empty() {
|
||||||
|
parts.push(format!("tags: {}", tag_names.join(", ")));
|
||||||
|
}
|
||||||
|
if parts.is_empty() { None } else { Some(parts.join(". ")) }
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut search_enrichment: Option<String> = enriched_query.clone();
|
||||||
|
|
||||||
// 5. Intelligent retrieval: Hybrid approach for better context
|
// 5. Intelligent retrieval: Hybrid approach for better context
|
||||||
let mut sms_summary = None;
|
let mut sms_summary = None;
|
||||||
let mut used_rag = false;
|
let mut used_rag = false;
|
||||||
@@ -782,6 +888,21 @@ impl InsightGenerator {
|
|||||||
|
|
||||||
log::info!("Extracted topics for query enrichment: {:?}", topics);
|
log::info!("Extracted topics for query enrichment: {:?}", topics);
|
||||||
|
|
||||||
|
// Build full search enrichment: SMS topics + photo description + tag names
|
||||||
|
search_enrichment = {
|
||||||
|
let mut parts: Vec<String> = Vec::new();
|
||||||
|
if !topics.is_empty() {
|
||||||
|
parts.push(topics.join(", "));
|
||||||
|
}
|
||||||
|
if let Some(ref desc) = photo_description {
|
||||||
|
parts.push(desc.clone());
|
||||||
|
}
|
||||||
|
if !tag_names.is_empty() {
|
||||||
|
parts.push(format!("tags: {}", tag_names.join(", ")));
|
||||||
|
}
|
||||||
|
if parts.is_empty() { None } else { Some(parts.join(". ")) }
|
||||||
|
};
|
||||||
|
|
||||||
// Step 3: Try historical RAG (>30 days ago) using extracted topics
|
// Step 3: Try historical RAG (>30 days ago) using extracted topics
|
||||||
let topics_slice = if topics.is_empty() {
|
let topics_slice = if topics.is_empty() {
|
||||||
None
|
None
|
||||||
@@ -796,6 +917,7 @@ impl InsightGenerator {
|
|||||||
contact.as_deref(),
|
contact.as_deref(),
|
||||||
topics_slice,
|
topics_slice,
|
||||||
10, // Top 10 historical matches
|
10, // Top 10 historical matches
|
||||||
|
enriched_query.as_deref(),
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
@@ -858,7 +980,7 @@ impl InsightGenerator {
|
|||||||
log::info!("No immediate messages found, trying basic RAG as fallback");
|
log::info!("No immediate messages found, trying basic RAG as fallback");
|
||||||
// Fallback to basic RAG even without strong query
|
// Fallback to basic RAG even without strong query
|
||||||
match self
|
match self
|
||||||
.find_relevant_messages_rag(date_taken, None, contact.as_deref(), None, 20)
|
.find_relevant_messages_rag(date_taken, None, contact.as_deref(), None, 20, enriched_query.as_deref())
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Ok(rag_messages) if !rag_messages.is_empty() => {
|
Ok(rag_messages) if !rag_messages.is_empty() => {
|
||||||
@@ -955,19 +1077,25 @@ impl InsightGenerator {
|
|||||||
timestamp,
|
timestamp,
|
||||||
location.as_deref(),
|
location.as_deref(),
|
||||||
contact.as_deref(),
|
contact.as_deref(),
|
||||||
None, // enrichment — wired up in Task 5
|
search_enrichment.as_deref(),
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.ok()
|
.ok()
|
||||||
.flatten();
|
.flatten();
|
||||||
|
|
||||||
// 7. Combine all context sources with equal weight
|
// 7. Combine all context sources with equal weight
|
||||||
|
let tags_context = if tag_names.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(tag_names.join(", "))
|
||||||
|
};
|
||||||
|
|
||||||
let combined_context = Self::combine_contexts(
|
let combined_context = Self::combine_contexts(
|
||||||
sms_summary,
|
sms_summary,
|
||||||
calendar_context,
|
calendar_context,
|
||||||
location_context,
|
location_context,
|
||||||
search_context,
|
search_context,
|
||||||
None, // tags — wired up in Task 5
|
tags_context,
|
||||||
);
|
);
|
||||||
|
|
||||||
log::info!(
|
log::info!(
|
||||||
@@ -975,59 +1103,6 @@ impl InsightGenerator {
|
|||||||
combined_context.len()
|
combined_context.len()
|
||||||
);
|
);
|
||||||
|
|
||||||
// 8. Check if the model has vision capabilities
|
|
||||||
let model_to_check = ollama_client.primary_model.clone();
|
|
||||||
let has_vision = match OllamaClient::check_model_capabilities(
|
|
||||||
&ollama_client.primary_url,
|
|
||||||
&model_to_check,
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
{
|
|
||||||
Ok(capabilities) => {
|
|
||||||
log::info!(
|
|
||||||
"Model '{}' vision capability: {}",
|
|
||||||
model_to_check,
|
|
||||||
capabilities.has_vision
|
|
||||||
);
|
|
||||||
capabilities.has_vision
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
log::warn!(
|
|
||||||
"Failed to check vision capabilities for model '{}', assuming no vision support: {}",
|
|
||||||
model_to_check,
|
|
||||||
e
|
|
||||||
);
|
|
||||||
false
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
insight_cx
|
|
||||||
.span()
|
|
||||||
.set_attribute(KeyValue::new("model_has_vision", has_vision));
|
|
||||||
|
|
||||||
// 9. Load image and encode as base64 only if model supports vision
|
|
||||||
let image_base64 = if has_vision {
|
|
||||||
match self.load_image_as_base64(&file_path) {
|
|
||||||
Ok(b64) => {
|
|
||||||
log::info!(
|
|
||||||
"Successfully loaded image for vision-capable model '{}'",
|
|
||||||
model_to_check
|
|
||||||
);
|
|
||||||
Some(b64)
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
log::warn!("Failed to load image for vision model: {}", e);
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
log::info!(
|
|
||||||
"Model '{}' does not support vision, skipping image processing",
|
|
||||||
model_to_check
|
|
||||||
);
|
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
// 10. Generate summary first, then derive title from the summary
|
// 10. Generate summary first, then derive title from the summary
|
||||||
let summary = ollama_client
|
let summary = ollama_client
|
||||||
.generate_photo_summary(
|
.generate_photo_summary(
|
||||||
@@ -1036,7 +1111,7 @@ impl InsightGenerator {
|
|||||||
contact.as_deref(),
|
contact.as_deref(),
|
||||||
Some(&combined_context),
|
Some(&combined_context),
|
||||||
custom_system_prompt.as_deref(),
|
custom_system_prompt.as_deref(),
|
||||||
image_base64,
|
image_base64.clone(),
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user