diff --git a/src/ai/daily_summary_job.rs b/src/ai/daily_summary_job.rs index b587750..182b96e 100644 --- a/src/ai/daily_summary_job.rs +++ b/src/ai/daily_summary_job.rs @@ -10,6 +10,75 @@ use crate::ai::{OllamaClient, SmsApiClient, SmsMessage}; use crate::database::{DailySummaryDao, InsertDailySummary}; use crate::otel::global_tracer; +/// Strip boilerplate prefixes and common phrases from summaries before embedding. +/// This improves embedding diversity by removing structural similarity. +pub fn strip_summary_boilerplate(summary: &str) -> String { + let mut text = summary.trim().to_string(); + + // Remove markdown headers + while text.starts_with('#') { + if let Some(pos) = text.find('\n') { + text = text[pos..].trim_start().to_string(); + } else { + // Single line with just headers, try to extract content after #s + text = text.trim_start_matches('#').trim().to_string(); + break; + } + } + + // Remove "Summary:" prefix variations (with optional markdown bold) + let prefixes = [ + "**Summary:**", + "**Summary**:", + "*Summary:*", + "Summary:", + "**summary:**", + "summary:", + ]; + for prefix in prefixes { + if text.to_lowercase().starts_with(&prefix.to_lowercase()) { + text = text[prefix.len()..].trim_start().to_string(); + break; + } + } + + // Remove common opening phrases that add no semantic value + let opening_phrases = [ + "Today, Melissa and I discussed", + "Today, Amanda and I discussed", + "Today Melissa and I discussed", + "Today Amanda and I discussed", + "Melissa and I discussed", + "Amanda and I discussed", + "Today, I discussed", + "Today I discussed", + "The conversation covered", + "This conversation covered", + "In this conversation,", + "During this conversation,", + ]; + + for phrase in opening_phrases { + if text.to_lowercase().starts_with(&phrase.to_lowercase()) { + text = text[phrase.len()..].trim_start().to_string(); + // Remove leading punctuation/articles after stripping phrase + text = text.trim_start_matches(|c| c == ',' || c == ':' || c == '-').trim_start().to_string(); + break; + } + } + + // Remove any remaining leading markdown bold markers + if text.starts_with("**") { + if let Some(end) = text[2..].find("**") { + // Keep the content between ** but remove the markers + let bold_content = &text[2..2 + end]; + text = format!("{}{}", bold_content, &text[4 + end..]); + } + } + + text.trim().to_string() +} + /// Generate and embed daily conversation summaries for a date range /// Default: August 2024 ±30 days (July 1 - September 30, 2024) pub async fn generate_daily_summaries( @@ -238,22 +307,34 @@ async fn generate_and_store_daily_summary( let weekday = date.format("%A"); let prompt = format!( - r#"Summarize this day's conversation in 3-5 sentences. Focus on: -- Key topics, activities, and events discussed -- Places, people, or organizations mentioned -- Plans made or decisions discussed -- Overall mood or themes of the day + r#"Summarize this day's conversation between me and {}. -IMPORTANT: Clearly distinguish between what "I" or "Me" did versus what {} did. -Always explicitly attribute actions, plans, and activities to the correct person. -Use "I" or "Me" for my actions and "{}" for their actions. +CRITICAL FORMAT RULES: +- Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles +- Do NOT repeat the date at the beginning +- Start DIRECTLY with the content - begin with a person's name or action +- Write in past tense, as if recording what happened + +NARRATIVE (3-5 sentences): +- What specific topics, activities, or events were discussed? +- What places, people, or organizations were mentioned? +- What plans were made or decisions discussed? +- Clearly distinguish between what "I" did versus what {} did + +KEYWORDS (comma-separated): +5-10 specific keywords that capture this conversation's unique content: +- Proper nouns (people, places, brands) +- Specific activities ("drum corps audition" not just "music") +- Distinctive terms that make this day unique Date: {} ({}) Messages: {} -Write a natural, informative summary with clear subject attribution. -Summary:"#, +YOUR RESPONSE (follow this format EXACTLY): +Summary: [Start directly with content, NO preamble] + +Keywords: [specific, unique terms]"#, contact, contact, date.format("%B %d, %Y"), @@ -265,7 +346,7 @@ Summary:"#, let summary = ollama .generate( &prompt, - Some("You are a conversation summarizer. Create clear, factual summaries that maintain precise subject attribution - clearly distinguishing who said or did what."), + Some("You are a conversation summarizer. Create clear, factual summaries with precise subject attribution AND extract distinctive keywords. Focus on specific, unique terms that differentiate this conversation from others."), ) .await?; @@ -277,8 +358,15 @@ Summary:"#, span.set_attribute(KeyValue::new("summary_length", summary.len() as i64)); - // Embed the summary - let embedding = ollama.generate_embedding(&summary).await?; + // Strip boilerplate before embedding to improve vector diversity + let stripped_summary = strip_summary_boilerplate(&summary); + log::debug!( + "Stripped summary for embedding: {}", + stripped_summary.chars().take(100).collect::() + ); + + // Embed the stripped summary (store original summary in DB) + let embedding = ollama.generate_embedding(&stripped_summary).await?; span.set_attribute(KeyValue::new( "embedding_dimensions", @@ -293,7 +381,8 @@ Summary:"#, message_count: messages.len() as i32, embedding, created_at: Utc::now().timestamp(), - model_version: "nomic-embed-text:v1.5".to_string(), + // model_version: "nomic-embed-text:v1.5".to_string(), + model_version: "mxbai-embed-large:335m".to_string(), }; // Create context from current span for DB operation diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index e33d262..7d8acf3 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -9,7 +9,9 @@ use std::sync::{Arc, Mutex}; use crate::ai::ollama::OllamaClient; use crate::ai::sms_client::SmsApiClient; use crate::database::models::InsertPhotoInsight; -use crate::database::{CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, LocationHistoryDao, SearchHistoryDao}; +use crate::database::{ + CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, LocationHistoryDao, SearchHistoryDao, +}; use crate::memories::extract_date_from_filename; use crate::otel::global_tracer; use crate::utils::normalize_path; @@ -98,6 +100,7 @@ impl InsightGenerator { date: chrono::NaiveDate, location: Option<&str>, contact: Option<&str>, + topics: Option<&[String]>, limit: usize, ) -> Result> { let tracer = global_tracer(); @@ -113,9 +116,14 @@ impl InsightGenerator { filter_cx .span() .set_attribute(KeyValue::new("exclusion_window_days", 30)); + if let Some(t) = topics { + filter_cx + .span() + .set_attribute(KeyValue::new("topics", t.join(", "))); + } let query_results = self - .find_relevant_messages_rag(date, location, contact, limit * 2) + .find_relevant_messages_rag(date, location, contact, topics, limit * 2) .await?; filter_cx.span().set_attribute(KeyValue::new( @@ -177,6 +185,7 @@ impl InsightGenerator { date: chrono::NaiveDate, location: Option<&str>, contact: Option<&str>, + topics: Option<&[String]>, limit: usize, ) -> Result> { let tracer = global_tracer(); @@ -191,27 +200,24 @@ impl InsightGenerator { span.set_attribute(KeyValue::new("contact", c.to_string())); } - // Build more detailed query string from photo context - let mut query_parts = Vec::new(); - - // Add temporal context - query_parts.push(format!("On {}", date.format("%B %d, %Y"))); - - // Add location if available - if let Some(loc) = location { - query_parts.push(format!("at {}", loc)); - } - - // Add contact context if available - if let Some(c) = contact { - query_parts.push(format!("conversation with {}", c)); - } - - // Add day of week for temporal context - let weekday = date.format("%A"); - query_parts.push(format!("it was a {}", weekday)); - - let query = query_parts.join(", "); + // Build query string - prioritize topics if available (semantically meaningful) + let query = if let Some(topics) = topics { + if !topics.is_empty() { + // Use topics for semantic search - these are actual content keywords + let topic_str = topics.join(", "); + if let Some(c) = contact { + format!("Conversations about {} with {}", topic_str, c) + } else { + format!("Conversations about {}", topic_str) + } + } else { + // Fallback to metadata-based query + Self::build_metadata_query(date, location, contact) + } + } else { + // Fallback to metadata-based query + Self::build_metadata_query(date, location, contact) + }; span.set_attribute(KeyValue::new("query", query.clone())); @@ -225,14 +231,16 @@ impl InsightGenerator { // Generate embedding for the query let query_embedding = self.ollama.generate_embedding(&query).await?; - // Search for similar daily summaries + // Search for similar daily summaries with time-based weighting + // This prioritizes summaries temporally close to the query date let mut summary_dao = self .daily_summary_dao .lock() .expect("Unable to lock DailySummaryDao"); + let date_str = date.format("%Y-%m-%d").to_string(); let similar_summaries = summary_dao - .find_similar_summaries(&search_cx, &query_embedding, limit) + .find_similar_summaries_with_time_weight(&search_cx, &query_embedding, &date_str, limit) .map_err(|e| anyhow::anyhow!("Failed to find similar summaries: {:?}", e))?; log::info!( @@ -261,6 +269,34 @@ impl InsightGenerator { Ok(formatted) } + /// Build a metadata-based query (fallback when no topics available) + fn build_metadata_query( + date: chrono::NaiveDate, + location: Option<&str>, + contact: Option<&str>, + ) -> String { + let mut query_parts = Vec::new(); + + // Add temporal context + query_parts.push(format!("On {}", date.format("%B %d, %Y"))); + + // Add location if available + if let Some(loc) = location { + query_parts.push(format!("at {}", loc)); + } + + // Add contact context if available + if let Some(c) = contact { + query_parts.push(format!("conversation with {}", c)); + } + + // Add day of week for temporal context + let weekday = date.format("%A"); + query_parts.push(format!("it was a {}", weekday)); + + query_parts.join(", ") + } + /// Haversine distance calculation for GPS proximity (in kilometers) fn haversine_distance(lat1: f64, lon1: f64, lat2: f64, lon2: f64) -> f64 { const R: f64 = 6371.0; // Earth radius in km @@ -296,7 +332,10 @@ impl InsightGenerator { }; let events = { - let mut dao = self.calendar_dao.lock().expect("Unable to lock CalendarEventDao"); + let mut dao = self + .calendar_dao + .lock() + .expect("Unable to lock CalendarEventDao"); dao.find_relevant_events_hybrid( &calendar_cx, timestamp, @@ -321,7 +360,8 @@ impl InsightGenerator { .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string()) .unwrap_or_else(|| "unknown".to_string()); - let attendees = e.attendees + let attendees = e + .attendees .as_ref() .and_then(|a| serde_json::from_str::>(a).ok()) .map(|list| format!(" (with {})", list.join(", "))) @@ -351,11 +391,14 @@ impl InsightGenerator { let location_cx = parent_cx.with_span(span); let nearest = { - let mut dao = self.location_dao.lock().expect("Unable to lock LocationHistoryDao"); + let mut dao = self + .location_dao + .lock() + .expect("Unable to lock LocationHistoryDao"); dao.find_nearest_location( &location_cx, timestamp, - 1800, // ±30 minutes + 10800, // ±3 hours (more realistic for photo timing) ) .ok() .flatten() @@ -366,26 +409,33 @@ impl InsightGenerator { if let Some(loc) = nearest { // Check if this adds NEW information compared to EXIF if let Some((exif_lat, exif_lon)) = exif_gps { - let distance = Self::haversine_distance( - exif_lat, - exif_lon, - loc.latitude, - loc.longitude, - ); + let distance = + Self::haversine_distance(exif_lat, exif_lon, loc.latitude, loc.longitude); - // Only use if it's significantly different (>100m) or EXIF lacks GPS - if distance < 0.1 { - log::info!("Location history matches EXIF GPS ({}m), skipping", (distance * 1000.0) as i32); + // Skip only if very close AND no useful activity/place info + // Allow activity context even if coordinates match + if distance < 0.5 && loc.activity.is_none() && loc.place_name.is_none() { + log::debug!( + "Location history matches EXIF GPS ({}m) with no extra context, skipping", + (distance * 1000.0) as i32 + ); return Ok(None); + } else if distance < 0.5 { + log::debug!( + "Location history close to EXIF ({}m) but has activity/place info", + (distance * 1000.0) as i32 + ); } } - let activity = loc.activity + let activity = loc + .activity .as_ref() .map(|a| format!(" ({})", a)) .unwrap_or_default(); - let place = loc.place_name + let place = loc + .place_name .as_ref() .map(|p| format!(" at {}", p)) .unwrap_or_default(); @@ -425,7 +475,9 @@ impl InsightGenerator { .map(|dt| dt.format("%B %Y").to_string()) .unwrap_or_else(|| "".to_string()), location.unwrap_or(""), - contact.map(|c| format!("involving {}", c)).unwrap_or_default() + contact + .map(|c| format!("involving {}", c)) + .unwrap_or_default() ); let query_embedding = match self.ollama.generate_embedding(&query_text).await { @@ -440,7 +492,10 @@ impl InsightGenerator { }; let searches = { - let mut dao = self.search_dao.lock().expect("Unable to lock SearchHistoryDao"); + let mut dao = self + .search_dao + .lock() + .expect("Unable to lock SearchHistoryDao"); dao.find_relevant_searches_hybrid( &search_cx, timestamp, @@ -455,6 +510,10 @@ impl InsightGenerator { if let Some(searches) = searches { if searches.is_empty() { + log::warn!( + "No relevant searches found for photo timestamp {}", + timestamp + ); return Ok(None); } @@ -599,8 +658,16 @@ impl InsightGenerator { insight_cx .span() .set_attribute(KeyValue::new("location", l.clone())); + Some(l.clone()) + } else { + // Fallback: If reverse geocoding fails, use coordinates + log::warn!( + "Reverse geocoding failed for {}, {}, using coordinates as fallback", + lat, + lon + ); + Some(format!("{:.4}, {:.4}", lat, lon)) } - loc } else { None } @@ -615,31 +682,15 @@ impl InsightGenerator { // TEMPORARY: Set to true to disable RAG and use only time-based retrieval for testing let disable_rag_for_testing = false; - // Decide strategy based on available metadata - let has_strong_query = location.is_some(); - if disable_rag_for_testing { - log::warn!("RAG DISABLED FOR TESTING - Using only time-based retrieval (±1 day)"); + log::warn!("RAG DISABLED FOR TESTING - Using only time-based retrieval (±2 days)"); // Skip directly to fallback - } else if has_strong_query { - // Strategy A: Pure RAG (we have location for good semantic matching) - log::info!("Using RAG with location-based query"); - match self - .find_relevant_messages_rag(date_taken, location.as_deref(), contact.as_deref(), 20) - .await - { - Ok(rag_messages) if !rag_messages.is_empty() => { - used_rag = true; - sms_summary = self.summarize_messages(&rag_messages, &ollama_client).await; - } - Ok(_) => log::info!("RAG returned no messages"), - Err(e) => log::warn!("RAG failed: {}", e), - } } else { - // Strategy B: Expanded immediate context + historical RAG + // ALWAYS use Strategy B: Expanded immediate context + historical RAG + // This is more reliable than pure semantic search which can match irrelevant messages log::info!("Using expanded immediate context + historical RAG approach"); - // Step 1: Get FULL immediate temporal context (±1 day, ALL messages) + // Step 1: Get FULL immediate temporal context (±2 days, ALL messages) let immediate_messages = self .sms_client .fetch_messages_for_contact(contact.as_deref(), timestamp) @@ -650,7 +701,7 @@ impl InsightGenerator { }); log::info!( - "Fetched {} messages from ±1 day window (using ALL for immediate context)", + "Fetched {} messages from ±2 days window (using ALL for immediate context)", immediate_messages.len() ); @@ -662,13 +713,19 @@ impl InsightGenerator { log::info!("Extracted topics for query enrichment: {:?}", topics); - // Step 3: Try historical RAG (>30 days ago) + // Step 3: Try historical RAG (>30 days ago) using extracted topics + let topics_slice = if topics.is_empty() { + None + } else { + Some(topics.as_slice()) + }; match self .find_relevant_messages_rag_historical( &insight_cx, date_taken, None, contact.as_deref(), + topics_slice, 10, // Top 10 historical matches ) .await @@ -694,7 +751,7 @@ impl InsightGenerator { // Combine summaries sms_summary = Some(format!( - "Immediate context (±1 day): {}\n\nSimilar moments from the past: {}", + "Immediate context (±2 days): {}\n\nSimilar moments from the past: {}", immediate_summary, historical_summary )); } @@ -716,7 +773,7 @@ impl InsightGenerator { log::info!("No immediate messages found, trying basic RAG as fallback"); // Fallback to basic RAG even without strong query match self - .find_relevant_messages_rag(date_taken, None, contact.as_deref(), 20) + .find_relevant_messages_rag(date_taken, None, contact.as_deref(), None, 20) .await { Ok(rag_messages) if !rag_messages.is_empty() => { @@ -730,7 +787,7 @@ impl InsightGenerator { // 6. Fallback to traditional time-based message retrieval if RAG didn't work if !used_rag { - log::info!("Using traditional time-based message retrieval (±1 day)"); + log::info!("Using traditional time-based message retrieval (±2 days)"); let sms_messages = self .sms_client .fetch_messages_for_contact(contact.as_deref(), timestamp) @@ -802,7 +859,12 @@ impl InsightGenerator { .flatten(); let search_context = self - .gather_search_context(&insight_cx, timestamp, location.as_deref(), contact.as_deref()) + .gather_search_context( + &insight_cx, + timestamp, + location.as_deref(), + contact.as_deref(), + ) .await .ok() .flatten(); @@ -815,7 +877,10 @@ impl InsightGenerator { search_context, ); - log::info!("Combined context from all sources ({} chars)", combined_context.len()); + log::info!( + "Combined context from all sources ({} chars)", + combined_context.len() + ); // 8. Generate title and summary with Ollama (using multi-source context) let title = ollama_client @@ -905,13 +970,23 @@ Return ONLY the comma-separated list, nothing else."#, .await { Ok(response) => { + log::debug!("Topic extraction raw response: {}", response); + // Parse comma-separated topics - response + let topics: Vec = response .split(',') .map(|s| s.trim().to_string()) .filter(|s| !s.is_empty() && s.len() > 1) // Filter out single chars .take(7) // Increased from 5 to 7 - .collect() + .collect(); + + if topics.is_empty() { + log::warn!("Topic extraction returned empty list from {} messages", messages.len()); + } else { + log::info!("Extracted {} topics from {} messages: {}", topics.len(), messages.len(), topics.join(", ")); + } + + topics } Err(e) => { log::warn!("Failed to extract topics from messages: {}", e); @@ -953,7 +1028,7 @@ Return ONLY the comma-separated list, nothing else."#, log::info!("========================================"); // Use existing RAG method with enriched query - self.find_relevant_messages_rag(date, None, contact, limit) + self.find_relevant_messages_rag(date, None, contact, None, limit) .await } @@ -995,7 +1070,7 @@ Return ONLY the summary, nothing else."#, } /// Convert SmsMessage objects to formatted strings and summarize with more detail - /// This is used for immediate context (±1 day) to preserve conversation details + /// This is used for immediate context (±2 days) to preserve conversation details async fn summarize_context_from_messages( &self, messages: &[crate::ai::SmsMessage], @@ -1058,17 +1133,25 @@ Return ONLY the summary, nothing else."#, lat, lon ); + log::debug!("Reverse geocoding {}, {} via Nominatim", lat, lon); + let client = reqwest::Client::new(); - let response = client + let response = match client .get(&url) .header("User-Agent", "ImageAPI/1.0") // Nominatim requires User-Agent .send() .await - .ok()?; + { + Ok(resp) => resp, + Err(e) => { + log::warn!("Geocoding network error for {}, {}: {}", lat, lon, e); + return None; + } + }; if !response.status().is_success() { log::warn!( - "Geocoding failed for {}, {}: {}", + "Geocoding HTTP error for {}, {}: {}", lat, lon, response.status() @@ -1076,7 +1159,13 @@ Return ONLY the summary, nothing else."#, return None; } - let data: NominatimResponse = response.json().await.ok()?; + let data: NominatimResponse = match response.json().await { + Ok(d) => d, + Err(e) => { + log::warn!("Geocoding JSON parse error for {}, {}: {}", lat, lon, e); + return None; + } + }; // Try to build a concise location name if let Some(addr) = data.address { @@ -1093,11 +1182,22 @@ Return ONLY the summary, nothing else."#, } if !parts.is_empty() { + log::info!("Reverse geocoded {}, {} -> {}", lat, lon, parts.join(", ")); return Some(parts.join(", ")); } } // Fallback to display_name if structured address not available + if let Some(ref display_name) = data.display_name { + log::info!( + "Reverse geocoded {}, {} -> {} (display_name)", + lat, + lon, + display_name + ); + } else { + log::warn!("Geocoding returned no address data for {}, {}", lat, lon); + } data.display_name } } diff --git a/src/ai/mod.rs b/src/ai/mod.rs index fe4f1d2..fbee000 100644 --- a/src/ai/mod.rs +++ b/src/ai/mod.rs @@ -5,7 +5,7 @@ pub mod insight_generator; pub mod ollama; pub mod sms_client; -pub use daily_summary_job::generate_daily_summaries; +pub use daily_summary_job::{generate_daily_summaries, strip_summary_boilerplate}; pub use handlers::{ delete_insight_handler, generate_insight_handler, get_all_insights_handler, get_available_models_handler, get_insight_handler, diff --git a/src/ai/sms_client.rs b/src/ai/sms_client.rs index 0043452..ea91ae1 100644 --- a/src/ai/sms_client.rs +++ b/src/ai/sms_client.rs @@ -46,12 +46,12 @@ impl SmsApiClient { ) -> Result> { use chrono::Duration; - // Calculate ±1 day range around the center timestamp + // Calculate ±2 days range around the center timestamp let center_dt = chrono::DateTime::from_timestamp(center_timestamp, 0) .ok_or_else(|| anyhow::anyhow!("Invalid timestamp"))?; - let start_dt = center_dt - Duration::days(1); - let end_dt = center_dt + Duration::days(1); + let start_dt = center_dt - Duration::days(2); + let end_dt = center_dt + Duration::days(2); let start_ts = start_dt.timestamp(); let end_ts = end_dt.timestamp(); @@ -59,7 +59,7 @@ impl SmsApiClient { // If contact specified, try fetching for that contact first if let Some(contact_name) = contact { log::info!( - "Fetching SMS for contact: {} (±1 day from {})", + "Fetching SMS for contact: {} (±2 days from {})", contact_name, center_dt.format("%Y-%m-%d %H:%M:%S") ); diff --git a/src/bin/diagnose_embeddings.rs b/src/bin/diagnose_embeddings.rs new file mode 100644 index 0000000..1348cf3 --- /dev/null +++ b/src/bin/diagnose_embeddings.rs @@ -0,0 +1,282 @@ +use anyhow::Result; +use clap::Parser; +use diesel::prelude::*; +use diesel::sql_query; +use diesel::sqlite::SqliteConnection; +use std::env; + +#[derive(Parser, Debug)] +#[command(author, version, about = "Diagnose embedding distribution and identify problematic summaries", long_about = None)] +struct Args { + /// Show detailed per-summary statistics + #[arg(short, long, default_value_t = false)] + verbose: bool, + + /// Number of top "central" summaries to show (ones that match everything) + #[arg(short, long, default_value_t = 10)] + top: usize, + + /// Test a specific query to see what matches + #[arg(short, long)] + query: Option, +} + +#[derive(QueryableByName, Debug)] +struct EmbeddingRow { + #[diesel(sql_type = diesel::sql_types::Integer)] + id: i32, + #[diesel(sql_type = diesel::sql_types::Text)] + date: String, + #[diesel(sql_type = diesel::sql_types::Text)] + contact: String, + #[diesel(sql_type = diesel::sql_types::Text)] + summary: String, + #[diesel(sql_type = diesel::sql_types::Binary)] + embedding: Vec, +} + +fn deserialize_embedding(bytes: &[u8]) -> Result> { + if bytes.len() % 4 != 0 { + return Err(anyhow::anyhow!("Invalid embedding byte length")); + } + + let count = bytes.len() / 4; + let mut vec = Vec::with_capacity(count); + + for chunk in bytes.chunks_exact(4) { + let float = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]); + vec.push(float); + } + + Ok(vec) +} + +fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { + if a.len() != b.len() { + return 0.0; + } + + let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum(); + let magnitude_a: f32 = a.iter().map(|x| x * x).sum::().sqrt(); + let magnitude_b: f32 = b.iter().map(|x| x * x).sum::().sqrt(); + + if magnitude_a == 0.0 || magnitude_b == 0.0 { + return 0.0; + } + + dot_product / (magnitude_a * magnitude_b) +} + +fn main() -> Result<()> { + dotenv::dotenv().ok(); + let args = Args::parse(); + + let database_url = env::var("DATABASE_URL").unwrap_or_else(|_| "auth.db".to_string()); + println!("Connecting to database: {}", database_url); + + let mut conn = SqliteConnection::establish(&database_url)?; + + // Load all embeddings + println!("\nLoading embeddings from daily_conversation_summaries..."); + let rows: Vec = sql_query( + "SELECT id, date, contact, summary, embedding FROM daily_conversation_summaries ORDER BY date" + ) + .load(&mut conn)?; + + println!("Found {} summaries with embeddings\n", rows.len()); + + if rows.is_empty() { + println!("No summaries found!"); + return Ok(()); + } + + // Parse all embeddings + let mut embeddings: Vec<(i32, String, String, String, Vec)> = Vec::new(); + for row in &rows { + match deserialize_embedding(&row.embedding) { + Ok(emb) => { + embeddings.push(( + row.id, + row.date.clone(), + row.contact.clone(), + row.summary.clone(), + emb, + )); + } + Err(e) => { + println!("Warning: Failed to parse embedding for id {}: {}", row.id, e); + } + } + } + + println!("Successfully parsed {} embeddings\n", embeddings.len()); + + // Compute embedding statistics + println!("========================================"); + println!("EMBEDDING STATISTICS"); + println!("========================================\n"); + + // Check embedding variance (are values clustered or spread out?) + let first_emb = &embeddings[0].4; + let dim = first_emb.len(); + println!("Embedding dimensions: {}", dim); + + // Calculate mean and std dev per dimension + let mut dim_means: Vec = vec![0.0; dim]; + let mut dim_vars: Vec = vec![0.0; dim]; + + for (_, _, _, _, emb) in &embeddings { + for (i, &val) in emb.iter().enumerate() { + dim_means[i] += val; + } + } + for m in &mut dim_means { + *m /= embeddings.len() as f32; + } + + for (_, _, _, _, emb) in &embeddings { + for (i, &val) in emb.iter().enumerate() { + let diff = val - dim_means[i]; + dim_vars[i] += diff * diff; + } + } + for v in &mut dim_vars { + *v = (*v / embeddings.len() as f32).sqrt(); + } + + let avg_std_dev: f32 = dim_vars.iter().sum::() / dim as f32; + let min_std_dev: f32 = dim_vars.iter().cloned().fold(f32::INFINITY, f32::min); + let max_std_dev: f32 = dim_vars.iter().cloned().fold(f32::NEG_INFINITY, f32::max); + + println!("Per-dimension standard deviation:"); + println!(" Average: {:.6}", avg_std_dev); + println!(" Min: {:.6}", min_std_dev); + println!(" Max: {:.6}", max_std_dev); + println!(); + + // Compute pairwise similarities + println!("Computing pairwise similarities (this may take a moment)...\n"); + + let mut all_similarities: Vec = Vec::new(); + let mut per_embedding_avg: Vec<(usize, f32)> = Vec::new(); + + for i in 0..embeddings.len() { + let mut sum = 0.0; + let mut count = 0; + for j in 0..embeddings.len() { + if i != j { + let sim = cosine_similarity(&embeddings[i].4, &embeddings[j].4); + all_similarities.push(sim); + sum += sim; + count += 1; + } + } + per_embedding_avg.push((i, sum / count as f32)); + } + + // Sort similarities for percentile analysis + all_similarities.sort_by(|a, b| a.partial_cmp(b).unwrap()); + + let min_sim = all_similarities.first().copied().unwrap_or(0.0); + let max_sim = all_similarities.last().copied().unwrap_or(0.0); + let median_sim = all_similarities[all_similarities.len() / 2]; + let p25 = all_similarities[all_similarities.len() / 4]; + let p75 = all_similarities[3 * all_similarities.len() / 4]; + let mean_sim: f32 = all_similarities.iter().sum::() / all_similarities.len() as f32; + + println!("========================================"); + println!("PAIRWISE SIMILARITY DISTRIBUTION"); + println!("========================================\n"); + println!("Total pairs analyzed: {}", all_similarities.len()); + println!(); + println!("Min similarity: {:.4}", min_sim); + println!("25th percentile: {:.4}", p25); + println!("Median similarity: {:.4}", median_sim); + println!("Mean similarity: {:.4}", mean_sim); + println!("75th percentile: {:.4}", p75); + println!("Max similarity: {:.4}", max_sim); + println!(); + + // Analyze distribution + let count_above_08 = all_similarities.iter().filter(|&&s| s > 0.8).count(); + let count_above_07 = all_similarities.iter().filter(|&&s| s > 0.7).count(); + let count_above_06 = all_similarities.iter().filter(|&&s| s > 0.6).count(); + let count_above_05 = all_similarities.iter().filter(|&&s| s > 0.5).count(); + let count_below_03 = all_similarities.iter().filter(|&&s| s < 0.3).count(); + + println!("Similarity distribution:"); + println!(" > 0.8: {} ({:.1}%)", count_above_08, 100.0 * count_above_08 as f32 / all_similarities.len() as f32); + println!(" > 0.7: {} ({:.1}%)", count_above_07, 100.0 * count_above_07 as f32 / all_similarities.len() as f32); + println!(" > 0.6: {} ({:.1}%)", count_above_06, 100.0 * count_above_06 as f32 / all_similarities.len() as f32); + println!(" > 0.5: {} ({:.1}%)", count_above_05, 100.0 * count_above_05 as f32 / all_similarities.len() as f32); + println!(" < 0.3: {} ({:.1}%)", count_below_03, 100.0 * count_below_03 as f32 / all_similarities.len() as f32); + println!(); + + // Identify "central" embeddings (high average similarity to all others) + per_embedding_avg.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap()); + + println!("========================================"); + println!("TOP {} MOST 'CENTRAL' SUMMARIES", args.top); + println!("(These match everything with high similarity)"); + println!("========================================\n"); + + for (rank, (idx, avg_sim)) in per_embedding_avg.iter().take(args.top).enumerate() { + let (id, date, contact, summary, _) = &embeddings[*idx]; + let preview: String = summary.chars().take(80).collect(); + println!("{}. [id={}, avg_sim={:.4}]", rank + 1, id, avg_sim); + println!(" Date: {}, Contact: {}", date, contact); + println!(" Preview: {}...", preview.replace('\n', " ")); + println!(); + } + + // Also show the least central (most unique) + println!("========================================"); + println!("TOP {} MOST UNIQUE SUMMARIES", args.top); + println!("(These are most different from others)"); + println!("========================================\n"); + + for (rank, (idx, avg_sim)) in per_embedding_avg.iter().rev().take(args.top).enumerate() { + let (id, date, contact, summary, _) = &embeddings[*idx]; + let preview: String = summary.chars().take(80).collect(); + println!("{}. [id={}, avg_sim={:.4}]", rank + 1, id, avg_sim); + println!(" Date: {}, Contact: {}", date, contact); + println!(" Preview: {}...", preview.replace('\n', " ")); + println!(); + } + + // Diagnosis + println!("========================================"); + println!("DIAGNOSIS"); + println!("========================================\n"); + + if mean_sim > 0.7 { + println!("⚠️ HIGH AVERAGE SIMILARITY ({:.4})", mean_sim); + println!(" All embeddings are very similar to each other."); + println!(" This explains why the same summaries always match."); + println!(); + println!(" Possible causes:"); + println!(" 1. Summaries have similar structure/phrasing (e.g., all start with 'Summary:')"); + println!(" 2. Embedding model isn't capturing semantic differences well"); + println!(" 3. Daily conversations have similar topics (e.g., 'good morning', plans)"); + println!(); + println!(" Recommendations:"); + println!(" 1. Try a different embedding model (mxbai-embed-large, bge-large)"); + println!(" 2. Improve summary diversity by varying the prompt"); + println!(" 3. Extract and embed only keywords/entities, not full summaries"); + } else if mean_sim > 0.5 { + println!("⚡ MODERATE AVERAGE SIMILARITY ({:.4})", mean_sim); + println!(" Some clustering in embeddings, but some differentiation exists."); + println!(); + println!(" The 'central' summaries above are likely dominating search results."); + println!(" Consider:"); + println!(" 1. Filtering out summaries with very high centrality"); + println!(" 2. Adding time-based weighting to prefer recent/relevant dates"); + println!(" 3. Increasing the similarity threshold from 0.3 to 0.5"); + } else { + println!("✅ GOOD EMBEDDING DIVERSITY ({:.4})", mean_sim); + println!(" Embeddings are well-differentiated."); + println!(" If same results keep appearing, the issue may be elsewhere."); + } + + Ok(()) +} diff --git a/src/bin/test_daily_summary.rs b/src/bin/test_daily_summary.rs new file mode 100644 index 0000000..5e5e679 --- /dev/null +++ b/src/bin/test_daily_summary.rs @@ -0,0 +1,285 @@ +use anyhow::Result; +use chrono::NaiveDate; +use clap::Parser; +use image_api::ai::{strip_summary_boilerplate, OllamaClient, SmsApiClient}; +use image_api::database::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao}; +use std::env; +use std::sync::{Arc, Mutex}; + +#[derive(Parser, Debug)] +#[command(author, version, about = "Test daily summary generation with different models and prompts", long_about = None)] +struct Args { + /// Contact name to generate summaries for + #[arg(short, long)] + contact: String, + + /// Start date (YYYY-MM-DD) + #[arg(short, long)] + start: String, + + /// End date (YYYY-MM-DD) + #[arg(short, long)] + end: String, + + /// Optional: Override the model to use (e.g., "qwen2.5:32b", "llama3.1:30b") + #[arg(short, long)] + model: Option, + + /// Test mode: Generate but don't save to database (shows output only) + #[arg(short = 't', long, default_value_t = false)] + test_mode: bool, + + /// Show message count and preview + #[arg(short, long, default_value_t = false)] + verbose: bool, +} + +#[tokio::main] +async fn main() -> Result<()> { + // Load .env file + dotenv::dotenv().ok(); + + // Initialize logging + env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init(); + + let args = Args::parse(); + + // Parse dates + let start_date = NaiveDate::parse_from_str(&args.start, "%Y-%m-%d") + .expect("Invalid start date format. Use YYYY-MM-DD"); + let end_date = NaiveDate::parse_from_str(&args.end, "%Y-%m-%d") + .expect("Invalid end date format. Use YYYY-MM-DD"); + + println!("========================================"); + println!("Daily Summary Generation Test Tool"); + println!("========================================"); + println!("Contact: {}", args.contact); + println!("Date range: {} to {}", start_date, end_date); + println!("Days: {}", (end_date - start_date).num_days() + 1); + if let Some(ref model) = args.model { + println!("Model: {}", model); + } else { + println!( + "Model: {} (from env)", + env::var("OLLAMA_PRIMARY_MODEL") + .or_else(|_| env::var("OLLAMA_MODEL")) + .unwrap_or_else(|_| "nemotron-3-nano:30b".to_string()) + ); + } + if args.test_mode { + println!("⚠ TEST MODE: Results will NOT be saved to database"); + } + println!("========================================"); + println!(); + + // Initialize AI clients + let ollama_primary_url = env::var("OLLAMA_PRIMARY_URL") + .or_else(|_| env::var("OLLAMA_URL")) + .unwrap_or_else(|_| "http://localhost:11434".to_string()); + + let ollama_fallback_url = env::var("OLLAMA_FALLBACK_URL").ok(); + + // Use provided model or fallback to env + let model_to_use = args.model.clone().unwrap_or_else(|| { + env::var("OLLAMA_PRIMARY_MODEL") + .or_else(|_| env::var("OLLAMA_MODEL")) + .unwrap_or_else(|_| "nemotron-3-nano:30b".to_string()) + }); + + let ollama = OllamaClient::new( + ollama_primary_url, + ollama_fallback_url.clone(), + model_to_use.clone(), + Some(model_to_use), // Use same model for fallback + ); + + let sms_api_url = + env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string()); + let sms_api_token = env::var("SMS_API_TOKEN").ok(); + let sms_client = SmsApiClient::new(sms_api_url, sms_api_token); + + // Initialize DAO + let summary_dao: Arc>> = + Arc::new(Mutex::new(Box::new(SqliteDailySummaryDao::new()))); + + // Fetch messages for contact + println!("Fetching messages for {}...", args.contact); + let all_messages = sms_client + .fetch_all_messages_for_contact(&args.contact) + .await?; + + println!( + "Found {} total messages for {}", + all_messages.len(), + args.contact + ); + println!(); + + // Filter to date range and group by date + let mut messages_by_date = std::collections::HashMap::new(); + + for msg in all_messages { + if let Some(dt) = chrono::DateTime::from_timestamp(msg.timestamp, 0) { + let date = dt.date_naive(); + if date >= start_date && date <= end_date { + messages_by_date + .entry(date) + .or_insert_with(Vec::new) + .push(msg); + } + } + } + + if messages_by_date.is_empty() { + println!("⚠ No messages found in date range"); + return Ok(()); + } + + println!("Found {} days with messages", messages_by_date.len()); + println!(); + + // Sort dates + let mut dates: Vec = messages_by_date.keys().cloned().collect(); + dates.sort(); + + // Process each day + for (idx, date) in dates.iter().enumerate() { + let messages = messages_by_date.get(date).unwrap(); + let date_str = date.format("%Y-%m-%d").to_string(); + let weekday = date.format("%A"); + + println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); + println!( + "Day {}/{}: {} ({}) - {} messages", + idx + 1, + dates.len(), + date_str, + weekday, + messages.len() + ); + println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); + + if args.verbose { + println!("\nMessage preview:"); + for (i, msg) in messages.iter().take(3).enumerate() { + let sender = if msg.is_sent { "Me" } else { &msg.contact }; + let preview = msg.body.chars().take(60).collect::(); + println!(" {}. {}: {}...", i + 1, sender, preview); + } + if messages.len() > 3 { + println!(" ... and {} more", messages.len() - 3); + } + println!(); + } + + // Format messages for LLM + let messages_text: String = messages + .iter() + .take(200) + .map(|m| { + if m.is_sent { + format!("Me: {}", m.body) + } else { + format!("{}: {}", m.contact, m.body) + } + }) + .collect::>() + .join("\n"); + + let prompt = format!( + r#"Summarize this day's conversation between me and {}. + +CRITICAL FORMAT RULES: +- Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles +- Do NOT repeat the date at the beginning +- Start DIRECTLY with the content - begin with a person's name or action +- Write in past tense, as if recording what happened + +NARRATIVE (3-5 sentences): +- What specific topics, activities, or events were discussed? +- What places, people, or organizations were mentioned? +- What plans were made or decisions discussed? +- Clearly distinguish between what "I" did versus what {} did + +KEYWORDS (comma-separated): +5-10 specific keywords that capture this conversation's unique content: +- Proper nouns (people, places, brands) +- Specific activities ("drum corps audition" not just "music") +- Distinctive terms that make this day unique + +Date: {} ({}) +Messages: +{} + +YOUR RESPONSE (follow this format EXACTLY): +Summary: [Start directly with content, NO preamble] + +Keywords: [specific, unique terms]"#, + args.contact, + args.contact, + date.format("%B %d, %Y"), + weekday, + messages_text + ); + + println!("Generating summary..."); + + let summary = ollama + .generate( + &prompt, + Some("You are a conversation summarizer. Create clear, factual summaries with precise subject attribution AND extract distinctive keywords. Focus on specific, unique terms that differentiate this conversation from others."), + ) + .await?; + + println!("\n📝 GENERATED SUMMARY:"); + println!("─────────────────────────────────────────"); + println!("{}", summary.trim()); + println!("─────────────────────────────────────────"); + + if !args.test_mode { + println!("\nStripping boilerplate for embedding..."); + let stripped = strip_summary_boilerplate(&summary); + println!("Stripped: {}...", stripped.chars().take(80).collect::()); + + println!("\nGenerating embedding..."); + let embedding = ollama.generate_embedding(&stripped).await?; + println!("✓ Embedding generated ({} dimensions)", embedding.len()); + + println!("Saving to database..."); + let insert = InsertDailySummary { + date: date_str.clone(), + contact: args.contact.clone(), + summary: summary.trim().to_string(), + message_count: messages.len() as i32, + embedding, + created_at: chrono::Utc::now().timestamp(), + // model_version: "nomic-embed-text:v1.5".to_string(), + model_version: "mxbai-embed-large:335m".to_string(), + }; + + let mut dao = summary_dao.lock().expect("Unable to lock DailySummaryDao"); + let context = opentelemetry::Context::new(); + + match dao.store_summary(&context, insert) { + Ok(_) => println!("✓ Saved to database"), + Err(e) => println!("✗ Database error: {:?}", e), + } + } else { + println!("\n⚠ TEST MODE: Not saved to database"); + } + + println!(); + + // Rate limiting between days + if idx < dates.len() - 1 { + tokio::time::sleep(tokio::time::Duration::from_millis(500)).await; + } + } + + println!("========================================"); + println!("✓ Complete!"); + println!("Processed {} days", dates.len()); + println!("========================================"); + + Ok(()) +} diff --git a/src/database/daily_summary_dao.rs b/src/database/daily_summary_dao.rs index 343abd4..93d77b4 100644 --- a/src/database/daily_summary_dao.rs +++ b/src/database/daily_summary_dao.rs @@ -1,3 +1,4 @@ +use chrono::NaiveDate; use diesel::prelude::*; use diesel::sqlite::SqliteConnection; use serde::Serialize; @@ -47,6 +48,17 @@ pub trait DailySummaryDao: Sync + Send { limit: usize, ) -> Result, DbError>; + /// Find semantically similar daily summaries with time-based weighting + /// Combines cosine similarity with temporal proximity to target_date + /// Final score = similarity * time_weight, where time_weight decays with distance from target_date + fn find_similar_summaries_with_time_weight( + &mut self, + context: &opentelemetry::Context, + query_embedding: &[f32], + target_date: &str, + limit: usize, + ) -> Result, DbError>; + /// Check if a summary exists for a given date and contact fn summary_exists( &mut self, @@ -231,14 +243,22 @@ impl DailySummaryDao for SqliteDailySummaryDao { // Sort by similarity (highest first) scored_summaries.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)); + // Filter out poor matches (similarity < 0.3 is likely noise) + scored_summaries.retain(|(similarity, _)| *similarity >= 0.3); + // Log similarity distribution if !scored_summaries.is_empty() { + let top_score = scored_summaries.first().map(|(s, _)| *s).unwrap_or(0.0); + let median_score = scored_summaries.get(scored_summaries.len() / 2).map(|(s, _)| *s).unwrap_or(0.0); + log::info!( - "Daily summary similarity - Top: {:.3}, Median: {:.3}, Count: {}", - scored_summaries.first().map(|(s, _)| *s).unwrap_or(0.0), - scored_summaries.get(scored_summaries.len() / 2).map(|(s, _)| *s).unwrap_or(0.0), + "Daily summary similarity - Top: {:.3}, Median: {:.3}, Count: {} (after 0.3 threshold)", + top_score, + median_score, scored_summaries.len() ); + } else { + log::warn!("No daily summaries met the 0.3 similarity threshold"); } // Take top N and log matches @@ -262,6 +282,128 @@ impl DailySummaryDao for SqliteDailySummaryDao { .map_err(|_| DbError::new(DbErrorKind::QueryError)) } + fn find_similar_summaries_with_time_weight( + &mut self, + context: &opentelemetry::Context, + query_embedding: &[f32], + target_date: &str, + limit: usize, + ) -> Result, DbError> { + trace_db_call(context, "query", "find_similar_summaries_with_time_weight", |_span| { + let mut conn = self.connection.lock().expect("Unable to get DailySummaryDao"); + + if query_embedding.len() != 768 { + return Err(anyhow::anyhow!( + "Invalid query embedding dimensions: {} (expected 768)", + query_embedding.len() + )); + } + + // Parse target date + let target = NaiveDate::parse_from_str(target_date, "%Y-%m-%d") + .map_err(|e| anyhow::anyhow!("Invalid target date: {}", e))?; + + // Load all summaries with embeddings + let results = diesel::sql_query( + "SELECT id, date, contact, summary, message_count, embedding, created_at, model_version + FROM daily_conversation_summaries" + ) + .load::(conn.deref_mut()) + .map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?; + + log::info!("Loaded {} daily summaries for time-weighted similarity (target: {})", results.len(), target_date); + + // Compute time-weighted similarity for each summary + // Score = cosine_similarity * time_weight + // time_weight = 1 / (1 + days_distance/30) - decays with ~30 day half-life + let mut scored_summaries: Vec<(f32, f32, i64, DailySummary)> = results + .into_iter() + .filter_map(|row| { + match Self::deserialize_vector(&row.embedding) { + Ok(embedding) => { + let similarity = Self::cosine_similarity(query_embedding, &embedding); + + // Calculate time weight + let summary_date = NaiveDate::parse_from_str(&row.date, "%Y-%m-%d").ok()?; + let days_distance = (target - summary_date).num_days().abs(); + + // Exponential decay with 30-day half-life + // At 0 days: weight = 1.0 + // At 30 days: weight = 0.5 + // At 60 days: weight = 0.25 + // At 365 days: weight ~= 0.0001 + let time_weight = 0.5_f32.powf(days_distance as f32 / 30.0); + + // Combined score - but ensure semantic similarity still matters + // We use sqrt to soften the time weight's impact + let combined_score = similarity * time_weight.sqrt(); + + Some(( + combined_score, + similarity, + days_distance, + DailySummary { + id: row.id, + date: row.date, + contact: row.contact, + summary: row.summary, + message_count: row.message_count, + created_at: row.created_at, + model_version: row.model_version, + }, + )) + } + Err(e) => { + log::warn!("Failed to deserialize embedding for summary {}: {:?}", row.id, e); + None + } + } + }) + .collect(); + + // Sort by combined score (highest first) + scored_summaries.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)); + + // Filter out poor matches (base similarity < 0.5 - stricter than before since we have time weighting) + scored_summaries.retain(|(_, similarity, _, _)| *similarity >= 0.5); + + // Log similarity distribution + if !scored_summaries.is_empty() { + let (top_combined, top_sim, top_days, _) = &scored_summaries[0]; + log::info!( + "Time-weighted similarity - Top: combined={:.3} (sim={:.3}, days={}), Count: {} matches", + top_combined, + top_sim, + top_days, + scored_summaries.len() + ); + } else { + log::warn!("No daily summaries met the 0.5 similarity threshold"); + } + + // Take top N and log matches + let top_results: Vec = scored_summaries + .into_iter() + .take(limit) + .map(|(combined, similarity, days, summary)| { + log::info!( + "Summary match: combined={:.3} (sim={:.3}, days={}), date={}, contact={}, summary=\"{}\"", + combined, + similarity, + days, + summary.date, + summary.contact, + summary.summary.chars().take(80).collect::() + ); + summary + }) + .collect(); + + Ok(top_results) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + fn summary_exists( &mut self, context: &opentelemetry::Context, diff --git a/src/database/mod.rs b/src/database/mod.rs index cec38bb..bbdf33f 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -18,15 +18,11 @@ pub mod models; pub mod schema; pub mod search_dao; -pub use calendar_dao::{ - CalendarEventDao, SqliteCalendarEventDao, -}; +pub use calendar_dao::{CalendarEventDao, SqliteCalendarEventDao}; pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao}; pub use embeddings_dao::{EmbeddingDao, InsertMessageEmbedding}; pub use insights_dao::{InsightDao, SqliteInsightDao}; -pub use location_dao::{ - LocationHistoryDao, SqliteLocationHistoryDao, -}; +pub use location_dao::{LocationHistoryDao, SqliteLocationHistoryDao}; pub use search_dao::{SearchHistoryDao, SqliteSearchHistoryDao}; pub trait UserDao {