Daily Summary Embedding Testing
This commit is contained in:
@@ -10,6 +10,75 @@ use crate::ai::{OllamaClient, SmsApiClient, SmsMessage};
|
||||
use crate::database::{DailySummaryDao, InsertDailySummary};
|
||||
use crate::otel::global_tracer;
|
||||
|
||||
/// Strip boilerplate prefixes and common phrases from summaries before embedding.
|
||||
/// This improves embedding diversity by removing structural similarity.
|
||||
pub fn strip_summary_boilerplate(summary: &str) -> String {
|
||||
let mut text = summary.trim().to_string();
|
||||
|
||||
// Remove markdown headers
|
||||
while text.starts_with('#') {
|
||||
if let Some(pos) = text.find('\n') {
|
||||
text = text[pos..].trim_start().to_string();
|
||||
} else {
|
||||
// Single line with just headers, try to extract content after #s
|
||||
text = text.trim_start_matches('#').trim().to_string();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove "Summary:" prefix variations (with optional markdown bold)
|
||||
let prefixes = [
|
||||
"**Summary:**",
|
||||
"**Summary**:",
|
||||
"*Summary:*",
|
||||
"Summary:",
|
||||
"**summary:**",
|
||||
"summary:",
|
||||
];
|
||||
for prefix in prefixes {
|
||||
if text.to_lowercase().starts_with(&prefix.to_lowercase()) {
|
||||
text = text[prefix.len()..].trim_start().to_string();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove common opening phrases that add no semantic value
|
||||
let opening_phrases = [
|
||||
"Today, Melissa and I discussed",
|
||||
"Today, Amanda and I discussed",
|
||||
"Today Melissa and I discussed",
|
||||
"Today Amanda and I discussed",
|
||||
"Melissa and I discussed",
|
||||
"Amanda and I discussed",
|
||||
"Today, I discussed",
|
||||
"Today I discussed",
|
||||
"The conversation covered",
|
||||
"This conversation covered",
|
||||
"In this conversation,",
|
||||
"During this conversation,",
|
||||
];
|
||||
|
||||
for phrase in opening_phrases {
|
||||
if text.to_lowercase().starts_with(&phrase.to_lowercase()) {
|
||||
text = text[phrase.len()..].trim_start().to_string();
|
||||
// Remove leading punctuation/articles after stripping phrase
|
||||
text = text.trim_start_matches(|c| c == ',' || c == ':' || c == '-').trim_start().to_string();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove any remaining leading markdown bold markers
|
||||
if text.starts_with("**") {
|
||||
if let Some(end) = text[2..].find("**") {
|
||||
// Keep the content between ** but remove the markers
|
||||
let bold_content = &text[2..2 + end];
|
||||
text = format!("{}{}", bold_content, &text[4 + end..]);
|
||||
}
|
||||
}
|
||||
|
||||
text.trim().to_string()
|
||||
}
|
||||
|
||||
/// Generate and embed daily conversation summaries for a date range
|
||||
/// Default: August 2024 ±30 days (July 1 - September 30, 2024)
|
||||
pub async fn generate_daily_summaries(
|
||||
@@ -238,22 +307,34 @@ async fn generate_and_store_daily_summary(
|
||||
let weekday = date.format("%A");
|
||||
|
||||
let prompt = format!(
|
||||
r#"Summarize this day's conversation in 3-5 sentences. Focus on:
|
||||
- Key topics, activities, and events discussed
|
||||
- Places, people, or organizations mentioned
|
||||
- Plans made or decisions discussed
|
||||
- Overall mood or themes of the day
|
||||
r#"Summarize this day's conversation between me and {}.
|
||||
|
||||
IMPORTANT: Clearly distinguish between what "I" or "Me" did versus what {} did.
|
||||
Always explicitly attribute actions, plans, and activities to the correct person.
|
||||
Use "I" or "Me" for my actions and "{}" for their actions.
|
||||
CRITICAL FORMAT RULES:
|
||||
- Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles
|
||||
- Do NOT repeat the date at the beginning
|
||||
- Start DIRECTLY with the content - begin with a person's name or action
|
||||
- Write in past tense, as if recording what happened
|
||||
|
||||
NARRATIVE (3-5 sentences):
|
||||
- What specific topics, activities, or events were discussed?
|
||||
- What places, people, or organizations were mentioned?
|
||||
- What plans were made or decisions discussed?
|
||||
- Clearly distinguish between what "I" did versus what {} did
|
||||
|
||||
KEYWORDS (comma-separated):
|
||||
5-10 specific keywords that capture this conversation's unique content:
|
||||
- Proper nouns (people, places, brands)
|
||||
- Specific activities ("drum corps audition" not just "music")
|
||||
- Distinctive terms that make this day unique
|
||||
|
||||
Date: {} ({})
|
||||
Messages:
|
||||
{}
|
||||
|
||||
Write a natural, informative summary with clear subject attribution.
|
||||
Summary:"#,
|
||||
YOUR RESPONSE (follow this format EXACTLY):
|
||||
Summary: [Start directly with content, NO preamble]
|
||||
|
||||
Keywords: [specific, unique terms]"#,
|
||||
contact,
|
||||
contact,
|
||||
date.format("%B %d, %Y"),
|
||||
@@ -265,7 +346,7 @@ Summary:"#,
|
||||
let summary = ollama
|
||||
.generate(
|
||||
&prompt,
|
||||
Some("You are a conversation summarizer. Create clear, factual summaries that maintain precise subject attribution - clearly distinguishing who said or did what."),
|
||||
Some("You are a conversation summarizer. Create clear, factual summaries with precise subject attribution AND extract distinctive keywords. Focus on specific, unique terms that differentiate this conversation from others."),
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -277,8 +358,15 @@ Summary:"#,
|
||||
|
||||
span.set_attribute(KeyValue::new("summary_length", summary.len() as i64));
|
||||
|
||||
// Embed the summary
|
||||
let embedding = ollama.generate_embedding(&summary).await?;
|
||||
// Strip boilerplate before embedding to improve vector diversity
|
||||
let stripped_summary = strip_summary_boilerplate(&summary);
|
||||
log::debug!(
|
||||
"Stripped summary for embedding: {}",
|
||||
stripped_summary.chars().take(100).collect::<String>()
|
||||
);
|
||||
|
||||
// Embed the stripped summary (store original summary in DB)
|
||||
let embedding = ollama.generate_embedding(&stripped_summary).await?;
|
||||
|
||||
span.set_attribute(KeyValue::new(
|
||||
"embedding_dimensions",
|
||||
@@ -293,7 +381,8 @@ Summary:"#,
|
||||
message_count: messages.len() as i32,
|
||||
embedding,
|
||||
created_at: Utc::now().timestamp(),
|
||||
model_version: "nomic-embed-text:v1.5".to_string(),
|
||||
// model_version: "nomic-embed-text:v1.5".to_string(),
|
||||
model_version: "mxbai-embed-large:335m".to_string(),
|
||||
};
|
||||
|
||||
// Create context from current span for DB operation
|
||||
|
||||
@@ -9,7 +9,9 @@ use std::sync::{Arc, Mutex};
|
||||
use crate::ai::ollama::OllamaClient;
|
||||
use crate::ai::sms_client::SmsApiClient;
|
||||
use crate::database::models::InsertPhotoInsight;
|
||||
use crate::database::{CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, LocationHistoryDao, SearchHistoryDao};
|
||||
use crate::database::{
|
||||
CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, LocationHistoryDao, SearchHistoryDao,
|
||||
};
|
||||
use crate::memories::extract_date_from_filename;
|
||||
use crate::otel::global_tracer;
|
||||
use crate::utils::normalize_path;
|
||||
@@ -98,6 +100,7 @@ impl InsightGenerator {
|
||||
date: chrono::NaiveDate,
|
||||
location: Option<&str>,
|
||||
contact: Option<&str>,
|
||||
topics: Option<&[String]>,
|
||||
limit: usize,
|
||||
) -> Result<Vec<String>> {
|
||||
let tracer = global_tracer();
|
||||
@@ -113,9 +116,14 @@ impl InsightGenerator {
|
||||
filter_cx
|
||||
.span()
|
||||
.set_attribute(KeyValue::new("exclusion_window_days", 30));
|
||||
if let Some(t) = topics {
|
||||
filter_cx
|
||||
.span()
|
||||
.set_attribute(KeyValue::new("topics", t.join(", ")));
|
||||
}
|
||||
|
||||
let query_results = self
|
||||
.find_relevant_messages_rag(date, location, contact, limit * 2)
|
||||
.find_relevant_messages_rag(date, location, contact, topics, limit * 2)
|
||||
.await?;
|
||||
|
||||
filter_cx.span().set_attribute(KeyValue::new(
|
||||
@@ -177,6 +185,7 @@ impl InsightGenerator {
|
||||
date: chrono::NaiveDate,
|
||||
location: Option<&str>,
|
||||
contact: Option<&str>,
|
||||
topics: Option<&[String]>,
|
||||
limit: usize,
|
||||
) -> Result<Vec<String>> {
|
||||
let tracer = global_tracer();
|
||||
@@ -191,27 +200,24 @@ impl InsightGenerator {
|
||||
span.set_attribute(KeyValue::new("contact", c.to_string()));
|
||||
}
|
||||
|
||||
// Build more detailed query string from photo context
|
||||
let mut query_parts = Vec::new();
|
||||
|
||||
// Add temporal context
|
||||
query_parts.push(format!("On {}", date.format("%B %d, %Y")));
|
||||
|
||||
// Add location if available
|
||||
if let Some(loc) = location {
|
||||
query_parts.push(format!("at {}", loc));
|
||||
}
|
||||
|
||||
// Add contact context if available
|
||||
if let Some(c) = contact {
|
||||
query_parts.push(format!("conversation with {}", c));
|
||||
}
|
||||
|
||||
// Add day of week for temporal context
|
||||
let weekday = date.format("%A");
|
||||
query_parts.push(format!("it was a {}", weekday));
|
||||
|
||||
let query = query_parts.join(", ");
|
||||
// Build query string - prioritize topics if available (semantically meaningful)
|
||||
let query = if let Some(topics) = topics {
|
||||
if !topics.is_empty() {
|
||||
// Use topics for semantic search - these are actual content keywords
|
||||
let topic_str = topics.join(", ");
|
||||
if let Some(c) = contact {
|
||||
format!("Conversations about {} with {}", topic_str, c)
|
||||
} else {
|
||||
format!("Conversations about {}", topic_str)
|
||||
}
|
||||
} else {
|
||||
// Fallback to metadata-based query
|
||||
Self::build_metadata_query(date, location, contact)
|
||||
}
|
||||
} else {
|
||||
// Fallback to metadata-based query
|
||||
Self::build_metadata_query(date, location, contact)
|
||||
};
|
||||
|
||||
span.set_attribute(KeyValue::new("query", query.clone()));
|
||||
|
||||
@@ -225,14 +231,16 @@ impl InsightGenerator {
|
||||
// Generate embedding for the query
|
||||
let query_embedding = self.ollama.generate_embedding(&query).await?;
|
||||
|
||||
// Search for similar daily summaries
|
||||
// Search for similar daily summaries with time-based weighting
|
||||
// This prioritizes summaries temporally close to the query date
|
||||
let mut summary_dao = self
|
||||
.daily_summary_dao
|
||||
.lock()
|
||||
.expect("Unable to lock DailySummaryDao");
|
||||
|
||||
let date_str = date.format("%Y-%m-%d").to_string();
|
||||
let similar_summaries = summary_dao
|
||||
.find_similar_summaries(&search_cx, &query_embedding, limit)
|
||||
.find_similar_summaries_with_time_weight(&search_cx, &query_embedding, &date_str, limit)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to find similar summaries: {:?}", e))?;
|
||||
|
||||
log::info!(
|
||||
@@ -261,6 +269,34 @@ impl InsightGenerator {
|
||||
Ok(formatted)
|
||||
}
|
||||
|
||||
/// Build a metadata-based query (fallback when no topics available)
|
||||
fn build_metadata_query(
|
||||
date: chrono::NaiveDate,
|
||||
location: Option<&str>,
|
||||
contact: Option<&str>,
|
||||
) -> String {
|
||||
let mut query_parts = Vec::new();
|
||||
|
||||
// Add temporal context
|
||||
query_parts.push(format!("On {}", date.format("%B %d, %Y")));
|
||||
|
||||
// Add location if available
|
||||
if let Some(loc) = location {
|
||||
query_parts.push(format!("at {}", loc));
|
||||
}
|
||||
|
||||
// Add contact context if available
|
||||
if let Some(c) = contact {
|
||||
query_parts.push(format!("conversation with {}", c));
|
||||
}
|
||||
|
||||
// Add day of week for temporal context
|
||||
let weekday = date.format("%A");
|
||||
query_parts.push(format!("it was a {}", weekday));
|
||||
|
||||
query_parts.join(", ")
|
||||
}
|
||||
|
||||
/// Haversine distance calculation for GPS proximity (in kilometers)
|
||||
fn haversine_distance(lat1: f64, lon1: f64, lat2: f64, lon2: f64) -> f64 {
|
||||
const R: f64 = 6371.0; // Earth radius in km
|
||||
@@ -296,7 +332,10 @@ impl InsightGenerator {
|
||||
};
|
||||
|
||||
let events = {
|
||||
let mut dao = self.calendar_dao.lock().expect("Unable to lock CalendarEventDao");
|
||||
let mut dao = self
|
||||
.calendar_dao
|
||||
.lock()
|
||||
.expect("Unable to lock CalendarEventDao");
|
||||
dao.find_relevant_events_hybrid(
|
||||
&calendar_cx,
|
||||
timestamp,
|
||||
@@ -321,7 +360,8 @@ impl InsightGenerator {
|
||||
.map(|dt| dt.format("%Y-%m-%d %H:%M").to_string())
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
|
||||
let attendees = e.attendees
|
||||
let attendees = e
|
||||
.attendees
|
||||
.as_ref()
|
||||
.and_then(|a| serde_json::from_str::<Vec<String>>(a).ok())
|
||||
.map(|list| format!(" (with {})", list.join(", ")))
|
||||
@@ -351,11 +391,14 @@ impl InsightGenerator {
|
||||
let location_cx = parent_cx.with_span(span);
|
||||
|
||||
let nearest = {
|
||||
let mut dao = self.location_dao.lock().expect("Unable to lock LocationHistoryDao");
|
||||
let mut dao = self
|
||||
.location_dao
|
||||
.lock()
|
||||
.expect("Unable to lock LocationHistoryDao");
|
||||
dao.find_nearest_location(
|
||||
&location_cx,
|
||||
timestamp,
|
||||
1800, // ±30 minutes
|
||||
10800, // ±3 hours (more realistic for photo timing)
|
||||
)
|
||||
.ok()
|
||||
.flatten()
|
||||
@@ -366,26 +409,33 @@ impl InsightGenerator {
|
||||
if let Some(loc) = nearest {
|
||||
// Check if this adds NEW information compared to EXIF
|
||||
if let Some((exif_lat, exif_lon)) = exif_gps {
|
||||
let distance = Self::haversine_distance(
|
||||
exif_lat,
|
||||
exif_lon,
|
||||
loc.latitude,
|
||||
loc.longitude,
|
||||
);
|
||||
let distance =
|
||||
Self::haversine_distance(exif_lat, exif_lon, loc.latitude, loc.longitude);
|
||||
|
||||
// Only use if it's significantly different (>100m) or EXIF lacks GPS
|
||||
if distance < 0.1 {
|
||||
log::info!("Location history matches EXIF GPS ({}m), skipping", (distance * 1000.0) as i32);
|
||||
// Skip only if very close AND no useful activity/place info
|
||||
// Allow activity context even if coordinates match
|
||||
if distance < 0.5 && loc.activity.is_none() && loc.place_name.is_none() {
|
||||
log::debug!(
|
||||
"Location history matches EXIF GPS ({}m) with no extra context, skipping",
|
||||
(distance * 1000.0) as i32
|
||||
);
|
||||
return Ok(None);
|
||||
} else if distance < 0.5 {
|
||||
log::debug!(
|
||||
"Location history close to EXIF ({}m) but has activity/place info",
|
||||
(distance * 1000.0) as i32
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let activity = loc.activity
|
||||
let activity = loc
|
||||
.activity
|
||||
.as_ref()
|
||||
.map(|a| format!(" ({})", a))
|
||||
.unwrap_or_default();
|
||||
|
||||
let place = loc.place_name
|
||||
let place = loc
|
||||
.place_name
|
||||
.as_ref()
|
||||
.map(|p| format!(" at {}", p))
|
||||
.unwrap_or_default();
|
||||
@@ -425,7 +475,9 @@ impl InsightGenerator {
|
||||
.map(|dt| dt.format("%B %Y").to_string())
|
||||
.unwrap_or_else(|| "".to_string()),
|
||||
location.unwrap_or(""),
|
||||
contact.map(|c| format!("involving {}", c)).unwrap_or_default()
|
||||
contact
|
||||
.map(|c| format!("involving {}", c))
|
||||
.unwrap_or_default()
|
||||
);
|
||||
|
||||
let query_embedding = match self.ollama.generate_embedding(&query_text).await {
|
||||
@@ -440,7 +492,10 @@ impl InsightGenerator {
|
||||
};
|
||||
|
||||
let searches = {
|
||||
let mut dao = self.search_dao.lock().expect("Unable to lock SearchHistoryDao");
|
||||
let mut dao = self
|
||||
.search_dao
|
||||
.lock()
|
||||
.expect("Unable to lock SearchHistoryDao");
|
||||
dao.find_relevant_searches_hybrid(
|
||||
&search_cx,
|
||||
timestamp,
|
||||
@@ -455,6 +510,10 @@ impl InsightGenerator {
|
||||
|
||||
if let Some(searches) = searches {
|
||||
if searches.is_empty() {
|
||||
log::warn!(
|
||||
"No relevant searches found for photo timestamp {}",
|
||||
timestamp
|
||||
);
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
@@ -599,8 +658,16 @@ impl InsightGenerator {
|
||||
insight_cx
|
||||
.span()
|
||||
.set_attribute(KeyValue::new("location", l.clone()));
|
||||
Some(l.clone())
|
||||
} else {
|
||||
// Fallback: If reverse geocoding fails, use coordinates
|
||||
log::warn!(
|
||||
"Reverse geocoding failed for {}, {}, using coordinates as fallback",
|
||||
lat,
|
||||
lon
|
||||
);
|
||||
Some(format!("{:.4}, {:.4}", lat, lon))
|
||||
}
|
||||
loc
|
||||
} else {
|
||||
None
|
||||
}
|
||||
@@ -615,31 +682,15 @@ impl InsightGenerator {
|
||||
// TEMPORARY: Set to true to disable RAG and use only time-based retrieval for testing
|
||||
let disable_rag_for_testing = false;
|
||||
|
||||
// Decide strategy based on available metadata
|
||||
let has_strong_query = location.is_some();
|
||||
|
||||
if disable_rag_for_testing {
|
||||
log::warn!("RAG DISABLED FOR TESTING - Using only time-based retrieval (±1 day)");
|
||||
log::warn!("RAG DISABLED FOR TESTING - Using only time-based retrieval (±2 days)");
|
||||
// Skip directly to fallback
|
||||
} else if has_strong_query {
|
||||
// Strategy A: Pure RAG (we have location for good semantic matching)
|
||||
log::info!("Using RAG with location-based query");
|
||||
match self
|
||||
.find_relevant_messages_rag(date_taken, location.as_deref(), contact.as_deref(), 20)
|
||||
.await
|
||||
{
|
||||
Ok(rag_messages) if !rag_messages.is_empty() => {
|
||||
used_rag = true;
|
||||
sms_summary = self.summarize_messages(&rag_messages, &ollama_client).await;
|
||||
}
|
||||
Ok(_) => log::info!("RAG returned no messages"),
|
||||
Err(e) => log::warn!("RAG failed: {}", e),
|
||||
}
|
||||
} else {
|
||||
// Strategy B: Expanded immediate context + historical RAG
|
||||
// ALWAYS use Strategy B: Expanded immediate context + historical RAG
|
||||
// This is more reliable than pure semantic search which can match irrelevant messages
|
||||
log::info!("Using expanded immediate context + historical RAG approach");
|
||||
|
||||
// Step 1: Get FULL immediate temporal context (±1 day, ALL messages)
|
||||
// Step 1: Get FULL immediate temporal context (±2 days, ALL messages)
|
||||
let immediate_messages = self
|
||||
.sms_client
|
||||
.fetch_messages_for_contact(contact.as_deref(), timestamp)
|
||||
@@ -650,7 +701,7 @@ impl InsightGenerator {
|
||||
});
|
||||
|
||||
log::info!(
|
||||
"Fetched {} messages from ±1 day window (using ALL for immediate context)",
|
||||
"Fetched {} messages from ±2 days window (using ALL for immediate context)",
|
||||
immediate_messages.len()
|
||||
);
|
||||
|
||||
@@ -662,13 +713,19 @@ impl InsightGenerator {
|
||||
|
||||
log::info!("Extracted topics for query enrichment: {:?}", topics);
|
||||
|
||||
// Step 3: Try historical RAG (>30 days ago)
|
||||
// Step 3: Try historical RAG (>30 days ago) using extracted topics
|
||||
let topics_slice = if topics.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(topics.as_slice())
|
||||
};
|
||||
match self
|
||||
.find_relevant_messages_rag_historical(
|
||||
&insight_cx,
|
||||
date_taken,
|
||||
None,
|
||||
contact.as_deref(),
|
||||
topics_slice,
|
||||
10, // Top 10 historical matches
|
||||
)
|
||||
.await
|
||||
@@ -694,7 +751,7 @@ impl InsightGenerator {
|
||||
|
||||
// Combine summaries
|
||||
sms_summary = Some(format!(
|
||||
"Immediate context (±1 day): {}\n\nSimilar moments from the past: {}",
|
||||
"Immediate context (±2 days): {}\n\nSimilar moments from the past: {}",
|
||||
immediate_summary, historical_summary
|
||||
));
|
||||
}
|
||||
@@ -716,7 +773,7 @@ impl InsightGenerator {
|
||||
log::info!("No immediate messages found, trying basic RAG as fallback");
|
||||
// Fallback to basic RAG even without strong query
|
||||
match self
|
||||
.find_relevant_messages_rag(date_taken, None, contact.as_deref(), 20)
|
||||
.find_relevant_messages_rag(date_taken, None, contact.as_deref(), None, 20)
|
||||
.await
|
||||
{
|
||||
Ok(rag_messages) if !rag_messages.is_empty() => {
|
||||
@@ -730,7 +787,7 @@ impl InsightGenerator {
|
||||
|
||||
// 6. Fallback to traditional time-based message retrieval if RAG didn't work
|
||||
if !used_rag {
|
||||
log::info!("Using traditional time-based message retrieval (±1 day)");
|
||||
log::info!("Using traditional time-based message retrieval (±2 days)");
|
||||
let sms_messages = self
|
||||
.sms_client
|
||||
.fetch_messages_for_contact(contact.as_deref(), timestamp)
|
||||
@@ -802,7 +859,12 @@ impl InsightGenerator {
|
||||
.flatten();
|
||||
|
||||
let search_context = self
|
||||
.gather_search_context(&insight_cx, timestamp, location.as_deref(), contact.as_deref())
|
||||
.gather_search_context(
|
||||
&insight_cx,
|
||||
timestamp,
|
||||
location.as_deref(),
|
||||
contact.as_deref(),
|
||||
)
|
||||
.await
|
||||
.ok()
|
||||
.flatten();
|
||||
@@ -815,7 +877,10 @@ impl InsightGenerator {
|
||||
search_context,
|
||||
);
|
||||
|
||||
log::info!("Combined context from all sources ({} chars)", combined_context.len());
|
||||
log::info!(
|
||||
"Combined context from all sources ({} chars)",
|
||||
combined_context.len()
|
||||
);
|
||||
|
||||
// 8. Generate title and summary with Ollama (using multi-source context)
|
||||
let title = ollama_client
|
||||
@@ -905,13 +970,23 @@ Return ONLY the comma-separated list, nothing else."#,
|
||||
.await
|
||||
{
|
||||
Ok(response) => {
|
||||
log::debug!("Topic extraction raw response: {}", response);
|
||||
|
||||
// Parse comma-separated topics
|
||||
response
|
||||
let topics: Vec<String> = response
|
||||
.split(',')
|
||||
.map(|s| s.trim().to_string())
|
||||
.filter(|s| !s.is_empty() && s.len() > 1) // Filter out single chars
|
||||
.take(7) // Increased from 5 to 7
|
||||
.collect()
|
||||
.collect();
|
||||
|
||||
if topics.is_empty() {
|
||||
log::warn!("Topic extraction returned empty list from {} messages", messages.len());
|
||||
} else {
|
||||
log::info!("Extracted {} topics from {} messages: {}", topics.len(), messages.len(), topics.join(", "));
|
||||
}
|
||||
|
||||
topics
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("Failed to extract topics from messages: {}", e);
|
||||
@@ -953,7 +1028,7 @@ Return ONLY the comma-separated list, nothing else."#,
|
||||
log::info!("========================================");
|
||||
|
||||
// Use existing RAG method with enriched query
|
||||
self.find_relevant_messages_rag(date, None, contact, limit)
|
||||
self.find_relevant_messages_rag(date, None, contact, None, limit)
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -995,7 +1070,7 @@ Return ONLY the summary, nothing else."#,
|
||||
}
|
||||
|
||||
/// Convert SmsMessage objects to formatted strings and summarize with more detail
|
||||
/// This is used for immediate context (±1 day) to preserve conversation details
|
||||
/// This is used for immediate context (±2 days) to preserve conversation details
|
||||
async fn summarize_context_from_messages(
|
||||
&self,
|
||||
messages: &[crate::ai::SmsMessage],
|
||||
@@ -1058,17 +1133,25 @@ Return ONLY the summary, nothing else."#,
|
||||
lat, lon
|
||||
);
|
||||
|
||||
log::debug!("Reverse geocoding {}, {} via Nominatim", lat, lon);
|
||||
|
||||
let client = reqwest::Client::new();
|
||||
let response = client
|
||||
let response = match client
|
||||
.get(&url)
|
||||
.header("User-Agent", "ImageAPI/1.0") // Nominatim requires User-Agent
|
||||
.send()
|
||||
.await
|
||||
.ok()?;
|
||||
{
|
||||
Ok(resp) => resp,
|
||||
Err(e) => {
|
||||
log::warn!("Geocoding network error for {}, {}: {}", lat, lon, e);
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
if !response.status().is_success() {
|
||||
log::warn!(
|
||||
"Geocoding failed for {}, {}: {}",
|
||||
"Geocoding HTTP error for {}, {}: {}",
|
||||
lat,
|
||||
lon,
|
||||
response.status()
|
||||
@@ -1076,7 +1159,13 @@ Return ONLY the summary, nothing else."#,
|
||||
return None;
|
||||
}
|
||||
|
||||
let data: NominatimResponse = response.json().await.ok()?;
|
||||
let data: NominatimResponse = match response.json().await {
|
||||
Ok(d) => d,
|
||||
Err(e) => {
|
||||
log::warn!("Geocoding JSON parse error for {}, {}: {}", lat, lon, e);
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
// Try to build a concise location name
|
||||
if let Some(addr) = data.address {
|
||||
@@ -1093,11 +1182,22 @@ Return ONLY the summary, nothing else."#,
|
||||
}
|
||||
|
||||
if !parts.is_empty() {
|
||||
log::info!("Reverse geocoded {}, {} -> {}", lat, lon, parts.join(", "));
|
||||
return Some(parts.join(", "));
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to display_name if structured address not available
|
||||
if let Some(ref display_name) = data.display_name {
|
||||
log::info!(
|
||||
"Reverse geocoded {}, {} -> {} (display_name)",
|
||||
lat,
|
||||
lon,
|
||||
display_name
|
||||
);
|
||||
} else {
|
||||
log::warn!("Geocoding returned no address data for {}, {}", lat, lon);
|
||||
}
|
||||
data.display_name
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@ pub mod insight_generator;
|
||||
pub mod ollama;
|
||||
pub mod sms_client;
|
||||
|
||||
pub use daily_summary_job::generate_daily_summaries;
|
||||
pub use daily_summary_job::{generate_daily_summaries, strip_summary_boilerplate};
|
||||
pub use handlers::{
|
||||
delete_insight_handler, generate_insight_handler, get_all_insights_handler,
|
||||
get_available_models_handler, get_insight_handler,
|
||||
|
||||
@@ -46,12 +46,12 @@ impl SmsApiClient {
|
||||
) -> Result<Vec<SmsMessage>> {
|
||||
use chrono::Duration;
|
||||
|
||||
// Calculate ±1 day range around the center timestamp
|
||||
// Calculate ±2 days range around the center timestamp
|
||||
let center_dt = chrono::DateTime::from_timestamp(center_timestamp, 0)
|
||||
.ok_or_else(|| anyhow::anyhow!("Invalid timestamp"))?;
|
||||
|
||||
let start_dt = center_dt - Duration::days(1);
|
||||
let end_dt = center_dt + Duration::days(1);
|
||||
let start_dt = center_dt - Duration::days(2);
|
||||
let end_dt = center_dt + Duration::days(2);
|
||||
|
||||
let start_ts = start_dt.timestamp();
|
||||
let end_ts = end_dt.timestamp();
|
||||
@@ -59,7 +59,7 @@ impl SmsApiClient {
|
||||
// If contact specified, try fetching for that contact first
|
||||
if let Some(contact_name) = contact {
|
||||
log::info!(
|
||||
"Fetching SMS for contact: {} (±1 day from {})",
|
||||
"Fetching SMS for contact: {} (±2 days from {})",
|
||||
contact_name,
|
||||
center_dt.format("%Y-%m-%d %H:%M:%S")
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user