Add Google Takeout data import infrastructure
Implements Phase 1 & 2 of Google Takeout RAG integration: - Database migrations for calendar_events, location_history, search_history - DAO implementations with hybrid time + semantic search - Parsers for .ics, JSON, and HTML Google Takeout formats - Import utilities with batch insert optimization Features: - CalendarEventDao: Hybrid time-range + semantic search for events - LocationHistoryDao: GPS proximity with Haversine distance calculation - SearchHistoryDao: Semantic-first search (queries are embedding-rich) - Batch inserts for performance (1M+ records in minutes vs hours) - OpenTelemetry tracing for all database operations Import utilities: - import_calendar: Parse .ics with optional embedding generation - import_location_history: High-volume GPS data with batch inserts - import_search_history: Always generates embeddings for semantic search 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
553
src/database/calendar_dao.rs
Normal file
553
src/database/calendar_dao.rs
Normal file
@@ -0,0 +1,553 @@
|
||||
use diesel::prelude::*;
|
||||
use diesel::sqlite::SqliteConnection;
|
||||
use serde::Serialize;
|
||||
use std::ops::DerefMut;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use crate::database::{DbError, DbErrorKind, connect};
|
||||
use crate::otel::trace_db_call;
|
||||
|
||||
/// Represents a calendar event
|
||||
#[derive(Serialize, Clone, Debug)]
|
||||
pub struct CalendarEvent {
|
||||
pub id: i32,
|
||||
pub event_uid: Option<String>,
|
||||
pub summary: String,
|
||||
pub description: Option<String>,
|
||||
pub location: Option<String>,
|
||||
pub start_time: i64,
|
||||
pub end_time: i64,
|
||||
pub all_day: bool,
|
||||
pub organizer: Option<String>,
|
||||
pub attendees: Option<String>, // JSON string
|
||||
pub created_at: i64,
|
||||
pub source_file: Option<String>,
|
||||
}
|
||||
|
||||
/// Data for inserting a new calendar event
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct InsertCalendarEvent {
|
||||
pub event_uid: Option<String>,
|
||||
pub summary: String,
|
||||
pub description: Option<String>,
|
||||
pub location: Option<String>,
|
||||
pub start_time: i64,
|
||||
pub end_time: i64,
|
||||
pub all_day: bool,
|
||||
pub organizer: Option<String>,
|
||||
pub attendees: Option<String>,
|
||||
pub embedding: Option<Vec<f32>>, // 768-dim, optional
|
||||
pub created_at: i64,
|
||||
pub source_file: Option<String>,
|
||||
}
|
||||
|
||||
pub trait CalendarEventDao: Sync + Send {
|
||||
/// Store calendar event with optional embedding
|
||||
fn store_event(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
event: InsertCalendarEvent,
|
||||
) -> Result<CalendarEvent, DbError>;
|
||||
|
||||
/// Batch insert events (for import efficiency)
|
||||
fn store_events_batch(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
events: Vec<InsertCalendarEvent>,
|
||||
) -> Result<usize, DbError>;
|
||||
|
||||
/// Find events in time range (PRIMARY query method)
|
||||
fn find_events_in_range(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
start_ts: i64,
|
||||
end_ts: i64,
|
||||
) -> Result<Vec<CalendarEvent>, DbError>;
|
||||
|
||||
/// Find semantically similar events (SECONDARY - requires embeddings)
|
||||
fn find_similar_events(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
query_embedding: &[f32],
|
||||
limit: usize,
|
||||
) -> Result<Vec<CalendarEvent>, DbError>;
|
||||
|
||||
/// Hybrid: Time-filtered + semantic ranking
|
||||
/// "Events during photo timestamp ±N days, ranked by similarity to context"
|
||||
fn find_relevant_events_hybrid(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
center_timestamp: i64,
|
||||
time_window_days: i64,
|
||||
query_embedding: Option<&[f32]>,
|
||||
limit: usize,
|
||||
) -> Result<Vec<CalendarEvent>, DbError>;
|
||||
|
||||
/// Check if event exists (idempotency)
|
||||
fn event_exists(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
event_uid: &str,
|
||||
start_time: i64,
|
||||
) -> Result<bool, DbError>;
|
||||
|
||||
/// Get count of events
|
||||
fn get_event_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError>;
|
||||
}
|
||||
|
||||
pub struct SqliteCalendarEventDao {
|
||||
connection: Arc<Mutex<SqliteConnection>>,
|
||||
}
|
||||
|
||||
impl Default for SqliteCalendarEventDao {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl SqliteCalendarEventDao {
|
||||
pub fn new() -> Self {
|
||||
SqliteCalendarEventDao {
|
||||
connection: Arc::new(Mutex::new(connect())),
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_vector(vec: &[f32]) -> Vec<u8> {
|
||||
use zerocopy::IntoBytes;
|
||||
vec.as_bytes().to_vec()
|
||||
}
|
||||
|
||||
fn deserialize_vector(bytes: &[u8]) -> Result<Vec<f32>, DbError> {
|
||||
if bytes.len() % 4 != 0 {
|
||||
return Err(DbError::new(DbErrorKind::QueryError));
|
||||
}
|
||||
|
||||
let count = bytes.len() / 4;
|
||||
let mut vec = Vec::with_capacity(count);
|
||||
|
||||
for chunk in bytes.chunks_exact(4) {
|
||||
let float = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
|
||||
vec.push(float);
|
||||
}
|
||||
|
||||
Ok(vec)
|
||||
}
|
||||
|
||||
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||
if a.len() != b.len() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
||||
let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
|
||||
if magnitude_a == 0.0 || magnitude_b == 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
dot_product / (magnitude_a * magnitude_b)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct CalendarEventWithVectorRow {
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
id: i32,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
event_uid: Option<String>,
|
||||
#[diesel(sql_type = diesel::sql_types::Text)]
|
||||
summary: String,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
description: Option<String>,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
location: Option<String>,
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
start_time: i64,
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
end_time: i64,
|
||||
#[diesel(sql_type = diesel::sql_types::Bool)]
|
||||
all_day: bool,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
organizer: Option<String>,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
attendees: Option<String>,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Binary>)]
|
||||
embedding: Option<Vec<u8>>,
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
created_at: i64,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
source_file: Option<String>,
|
||||
}
|
||||
|
||||
impl CalendarEventWithVectorRow {
|
||||
fn to_calendar_event(&self) -> CalendarEvent {
|
||||
CalendarEvent {
|
||||
id: self.id,
|
||||
event_uid: self.event_uid.clone(),
|
||||
summary: self.summary.clone(),
|
||||
description: self.description.clone(),
|
||||
location: self.location.clone(),
|
||||
start_time: self.start_time,
|
||||
end_time: self.end_time,
|
||||
all_day: self.all_day,
|
||||
organizer: self.organizer.clone(),
|
||||
attendees: self.attendees.clone(),
|
||||
created_at: self.created_at,
|
||||
source_file: self.source_file.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct LastInsertRowId {
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
id: i32,
|
||||
}
|
||||
|
||||
impl CalendarEventDao for SqliteCalendarEventDao {
|
||||
fn store_event(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
event: InsertCalendarEvent,
|
||||
) -> Result<CalendarEvent, DbError> {
|
||||
trace_db_call(context, "insert", "store_event", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get CalendarEventDao");
|
||||
|
||||
// Validate embedding dimensions if provided
|
||||
if let Some(ref emb) = event.embedding {
|
||||
if emb.len() != 768 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Invalid embedding dimensions: {} (expected 768)",
|
||||
emb.len()
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
let embedding_bytes = event.embedding.as_ref().map(|e| Self::serialize_vector(e));
|
||||
|
||||
// INSERT OR REPLACE to handle re-imports
|
||||
diesel::sql_query(
|
||||
"INSERT OR REPLACE INTO calendar_events
|
||||
(event_uid, summary, description, location, start_time, end_time, all_day,
|
||||
organizer, attendees, embedding, created_at, source_file)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.event_uid)
|
||||
.bind::<diesel::sql_types::Text, _>(&event.summary)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.description)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.location)
|
||||
.bind::<diesel::sql_types::BigInt, _>(event.start_time)
|
||||
.bind::<diesel::sql_types::BigInt, _>(event.end_time)
|
||||
.bind::<diesel::sql_types::Bool, _>(event.all_day)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.organizer)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.attendees)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Binary>, _>(&embedding_bytes)
|
||||
.bind::<diesel::sql_types::BigInt, _>(event.created_at)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&event.source_file)
|
||||
.execute(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Insert error: {:?}", e))?;
|
||||
|
||||
let row_id: i32 = diesel::sql_query("SELECT last_insert_rowid() as id")
|
||||
.get_result::<LastInsertRowId>(conn.deref_mut())
|
||||
.map(|r| r.id)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to get last insert ID: {:?}", e))?;
|
||||
|
||||
Ok(CalendarEvent {
|
||||
id: row_id,
|
||||
event_uid: event.event_uid,
|
||||
summary: event.summary,
|
||||
description: event.description,
|
||||
location: event.location,
|
||||
start_time: event.start_time,
|
||||
end_time: event.end_time,
|
||||
all_day: event.all_day,
|
||||
organizer: event.organizer,
|
||||
attendees: event.attendees,
|
||||
created_at: event.created_at,
|
||||
source_file: event.source_file,
|
||||
})
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||
}
|
||||
|
||||
fn store_events_batch(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
events: Vec<InsertCalendarEvent>,
|
||||
) -> Result<usize, DbError> {
|
||||
trace_db_call(context, "insert", "store_events_batch", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get CalendarEventDao");
|
||||
let mut inserted = 0;
|
||||
|
||||
conn.transaction::<_, anyhow::Error, _>(|conn| {
|
||||
for event in events {
|
||||
// Validate embedding if provided
|
||||
if let Some(ref emb) = event.embedding {
|
||||
if emb.len() != 768 {
|
||||
log::warn!(
|
||||
"Skipping event with invalid embedding dimensions: {}",
|
||||
emb.len()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let embedding_bytes =
|
||||
event.embedding.as_ref().map(|e| Self::serialize_vector(e));
|
||||
|
||||
diesel::sql_query(
|
||||
"INSERT OR REPLACE INTO calendar_events
|
||||
(event_uid, summary, description, location, start_time, end_time, all_day,
|
||||
organizer, attendees, embedding, created_at, source_file)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&event.event_uid,
|
||||
)
|
||||
.bind::<diesel::sql_types::Text, _>(&event.summary)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&event.description,
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&event.location,
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(event.start_time)
|
||||
.bind::<diesel::sql_types::BigInt, _>(event.end_time)
|
||||
.bind::<diesel::sql_types::Bool, _>(event.all_day)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&event.organizer,
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&event.attendees,
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Binary>, _>(
|
||||
&embedding_bytes,
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(event.created_at)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&event.source_file,
|
||||
)
|
||||
.execute(conn)
|
||||
.map_err(|e| anyhow::anyhow!("Batch insert error: {:?}", e))?;
|
||||
|
||||
inserted += 1;
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
.map_err(|e| anyhow::anyhow!("Transaction error: {:?}", e))?;
|
||||
|
||||
Ok(inserted)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||
}
|
||||
|
||||
fn find_events_in_range(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
start_ts: i64,
|
||||
end_ts: i64,
|
||||
) -> Result<Vec<CalendarEvent>, DbError> {
|
||||
trace_db_call(context, "query", "find_events_in_range", |_span| {
|
||||
let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
|
||||
|
||||
diesel::sql_query(
|
||||
"SELECT id, event_uid, summary, description, location, start_time, end_time, all_day,
|
||||
organizer, attendees, NULL as embedding, created_at, source_file
|
||||
FROM calendar_events
|
||||
WHERE start_time >= ?1 AND start_time <= ?2
|
||||
ORDER BY start_time ASC"
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(start_ts)
|
||||
.bind::<diesel::sql_types::BigInt, _>(end_ts)
|
||||
.load::<CalendarEventWithVectorRow>(conn.deref_mut())
|
||||
.map(|rows| rows.into_iter().map(|r| r.to_calendar_event()).collect())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn find_similar_events(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
query_embedding: &[f32],
|
||||
limit: usize,
|
||||
) -> Result<Vec<CalendarEvent>, DbError> {
|
||||
trace_db_call(context, "query", "find_similar_events", |_span| {
|
||||
let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
|
||||
|
||||
if query_embedding.len() != 768 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Invalid query embedding dimensions: {} (expected 768)",
|
||||
query_embedding.len()
|
||||
));
|
||||
}
|
||||
|
||||
// Load all events with embeddings
|
||||
let results = diesel::sql_query(
|
||||
"SELECT id, event_uid, summary, description, location, start_time, end_time, all_day,
|
||||
organizer, attendees, embedding, created_at, source_file
|
||||
FROM calendar_events
|
||||
WHERE embedding IS NOT NULL"
|
||||
)
|
||||
.load::<CalendarEventWithVectorRow>(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
// Compute similarities
|
||||
let mut scored_events: Vec<(f32, CalendarEvent)> = results
|
||||
.into_iter()
|
||||
.filter_map(|row| {
|
||||
if let Some(ref emb_bytes) = row.embedding {
|
||||
if let Ok(emb) = Self::deserialize_vector(emb_bytes) {
|
||||
let similarity = Self::cosine_similarity(query_embedding, &emb);
|
||||
Some((similarity, row.to_calendar_event()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Sort by similarity descending
|
||||
scored_events.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
log::info!("Found {} similar calendar events", scored_events.len());
|
||||
if !scored_events.is_empty() {
|
||||
log::info!("Top similarity: {:.4}", scored_events[0].0);
|
||||
}
|
||||
|
||||
Ok(scored_events.into_iter().take(limit).map(|(_, event)| event).collect())
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn find_relevant_events_hybrid(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
center_timestamp: i64,
|
||||
time_window_days: i64,
|
||||
query_embedding: Option<&[f32]>,
|
||||
limit: usize,
|
||||
) -> Result<Vec<CalendarEvent>, DbError> {
|
||||
trace_db_call(context, "query", "find_relevant_events_hybrid", |_span| {
|
||||
let window_seconds = time_window_days * 86400;
|
||||
let start_ts = center_timestamp - window_seconds;
|
||||
let end_ts = center_timestamp + window_seconds;
|
||||
|
||||
let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
|
||||
|
||||
// Step 1: Time-based filter (fast, indexed)
|
||||
let events_in_range = diesel::sql_query(
|
||||
"SELECT id, event_uid, summary, description, location, start_time, end_time, all_day,
|
||||
organizer, attendees, embedding, created_at, source_file
|
||||
FROM calendar_events
|
||||
WHERE start_time >= ?1 AND start_time <= ?2"
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(start_ts)
|
||||
.bind::<diesel::sql_types::BigInt, _>(end_ts)
|
||||
.load::<CalendarEventWithVectorRow>(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
// Step 2: If query embedding provided, rank by semantic similarity
|
||||
if let Some(query_emb) = query_embedding {
|
||||
if query_emb.len() != 768 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Invalid query embedding dimensions: {} (expected 768)",
|
||||
query_emb.len()
|
||||
));
|
||||
}
|
||||
|
||||
let mut scored_events: Vec<(f32, CalendarEvent)> = events_in_range
|
||||
.into_iter()
|
||||
.map(|row| {
|
||||
// Events with embeddings get semantic scoring
|
||||
let similarity = if let Some(ref emb_bytes) = row.embedding {
|
||||
if let Ok(emb) = Self::deserialize_vector(emb_bytes) {
|
||||
Self::cosine_similarity(query_emb, &emb)
|
||||
} else {
|
||||
0.5 // Neutral score for deserialization errors
|
||||
}
|
||||
} else {
|
||||
0.5 // Neutral score for events without embeddings
|
||||
};
|
||||
(similarity, row.to_calendar_event())
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Sort by similarity descending
|
||||
scored_events.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
log::info!("Hybrid query: {} events in time range, ranked by similarity", scored_events.len());
|
||||
if !scored_events.is_empty() {
|
||||
log::info!("Top similarity: {:.4}", scored_events[0].0);
|
||||
}
|
||||
|
||||
Ok(scored_events.into_iter().take(limit).map(|(_, event)| event).collect())
|
||||
} else {
|
||||
// No semantic ranking, just return time-sorted (limit applied)
|
||||
log::info!("Time-only query: {} events in range", events_in_range.len());
|
||||
Ok(events_in_range.into_iter().take(limit).map(|r| r.to_calendar_event()).collect())
|
||||
}
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn event_exists(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
event_uid: &str,
|
||||
start_time: i64,
|
||||
) -> Result<bool, DbError> {
|
||||
trace_db_call(context, "query", "event_exists", |_span| {
|
||||
let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct CountResult {
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
count: i32,
|
||||
}
|
||||
|
||||
let result: CountResult = diesel::sql_query(
|
||||
"SELECT COUNT(*) as count FROM calendar_events WHERE event_uid = ?1 AND start_time = ?2"
|
||||
)
|
||||
.bind::<diesel::sql_types::Text, _>(event_uid)
|
||||
.bind::<diesel::sql_types::BigInt, _>(start_time)
|
||||
.get_result(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
Ok(result.count > 0)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn get_event_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
|
||||
trace_db_call(context, "query", "get_event_count", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get CalendarEventDao");
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct CountResult {
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
count: i64,
|
||||
}
|
||||
|
||||
let result: CountResult =
|
||||
diesel::sql_query("SELECT COUNT(*) as count FROM calendar_events")
|
||||
.get_result(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
Ok(result.count)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
}
|
||||
528
src/database/location_dao.rs
Normal file
528
src/database/location_dao.rs
Normal file
@@ -0,0 +1,528 @@
|
||||
use diesel::prelude::*;
|
||||
use diesel::sqlite::SqliteConnection;
|
||||
use serde::Serialize;
|
||||
use std::ops::DerefMut;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use crate::database::{DbError, DbErrorKind, connect};
|
||||
use crate::otel::trace_db_call;
|
||||
|
||||
/// Represents a location history record
|
||||
#[derive(Serialize, Clone, Debug)]
|
||||
pub struct LocationRecord {
|
||||
pub id: i32,
|
||||
pub timestamp: i64,
|
||||
pub latitude: f64,
|
||||
pub longitude: f64,
|
||||
pub accuracy: Option<i32>,
|
||||
pub activity: Option<String>,
|
||||
pub activity_confidence: Option<i32>,
|
||||
pub place_name: Option<String>,
|
||||
pub place_category: Option<String>,
|
||||
pub created_at: i64,
|
||||
pub source_file: Option<String>,
|
||||
}
|
||||
|
||||
/// Data for inserting a new location record
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct InsertLocationRecord {
|
||||
pub timestamp: i64,
|
||||
pub latitude: f64,
|
||||
pub longitude: f64,
|
||||
pub accuracy: Option<i32>,
|
||||
pub activity: Option<String>,
|
||||
pub activity_confidence: Option<i32>,
|
||||
pub place_name: Option<String>,
|
||||
pub place_category: Option<String>,
|
||||
pub embedding: Option<Vec<f32>>, // 768-dim, optional (rarely used)
|
||||
pub created_at: i64,
|
||||
pub source_file: Option<String>,
|
||||
}
|
||||
|
||||
pub trait LocationHistoryDao: Sync + Send {
|
||||
/// Store single location record
|
||||
fn store_location(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
location: InsertLocationRecord,
|
||||
) -> Result<LocationRecord, DbError>;
|
||||
|
||||
/// Batch insert locations (Google Takeout has millions of points)
|
||||
fn store_locations_batch(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
locations: Vec<InsertLocationRecord>,
|
||||
) -> Result<usize, DbError>;
|
||||
|
||||
/// Find nearest location to timestamp (PRIMARY query)
|
||||
/// "Where was I at photo timestamp ±N minutes?"
|
||||
fn find_nearest_location(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
timestamp: i64,
|
||||
max_time_diff_seconds: i64,
|
||||
) -> Result<Option<LocationRecord>, DbError>;
|
||||
|
||||
/// Find locations in time range
|
||||
fn find_locations_in_range(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
start_ts: i64,
|
||||
end_ts: i64,
|
||||
) -> Result<Vec<LocationRecord>, DbError>;
|
||||
|
||||
/// Find locations near GPS coordinates (for "photos near this place")
|
||||
/// Uses approximate bounding box for performance
|
||||
fn find_locations_near_point(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
latitude: f64,
|
||||
longitude: f64,
|
||||
radius_km: f64,
|
||||
) -> Result<Vec<LocationRecord>, DbError>;
|
||||
|
||||
/// Deduplicate: check if location exists
|
||||
fn location_exists(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
timestamp: i64,
|
||||
latitude: f64,
|
||||
longitude: f64,
|
||||
) -> Result<bool, DbError>;
|
||||
|
||||
/// Get count of location records
|
||||
fn get_location_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError>;
|
||||
}
|
||||
|
||||
pub struct SqliteLocationHistoryDao {
|
||||
connection: Arc<Mutex<SqliteConnection>>,
|
||||
}
|
||||
|
||||
impl Default for SqliteLocationHistoryDao {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl SqliteLocationHistoryDao {
|
||||
pub fn new() -> Self {
|
||||
SqliteLocationHistoryDao {
|
||||
connection: Arc::new(Mutex::new(connect())),
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_vector(vec: &[f32]) -> Vec<u8> {
|
||||
use zerocopy::IntoBytes;
|
||||
vec.as_bytes().to_vec()
|
||||
}
|
||||
|
||||
/// Haversine distance calculation (in kilometers)
|
||||
/// Used for filtering locations by proximity to a point
|
||||
fn haversine_distance(lat1: f64, lon1: f64, lat2: f64, lon2: f64) -> f64 {
|
||||
const R: f64 = 6371.0; // Earth radius in km
|
||||
|
||||
let d_lat = (lat2 - lat1).to_radians();
|
||||
let d_lon = (lon2 - lon1).to_radians();
|
||||
|
||||
let a = (d_lat / 2.0).sin().powi(2)
|
||||
+ lat1.to_radians().cos() * lat2.to_radians().cos() * (d_lon / 2.0).sin().powi(2);
|
||||
|
||||
let c = 2.0 * a.sqrt().atan2((1.0 - a).sqrt());
|
||||
|
||||
R * c
|
||||
}
|
||||
|
||||
/// Calculate approximate bounding box for spatial queries
|
||||
/// Returns (min_lat, max_lat, min_lon, max_lon)
|
||||
fn bounding_box(lat: f64, lon: f64, radius_km: f64) -> (f64, f64, f64, f64) {
|
||||
const KM_PER_DEGREE_LAT: f64 = 111.0;
|
||||
let km_per_degree_lon = 111.0 * lat.to_radians().cos();
|
||||
|
||||
let delta_lat = radius_km / KM_PER_DEGREE_LAT;
|
||||
let delta_lon = radius_km / km_per_degree_lon;
|
||||
|
||||
(
|
||||
lat - delta_lat, // min_lat
|
||||
lat + delta_lat, // max_lat
|
||||
lon - delta_lon, // min_lon
|
||||
lon + delta_lon, // max_lon
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct LocationRecordRow {
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
id: i32,
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
timestamp: i64,
|
||||
#[diesel(sql_type = diesel::sql_types::Float)]
|
||||
latitude: f32,
|
||||
#[diesel(sql_type = diesel::sql_types::Float)]
|
||||
longitude: f32,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Integer>)]
|
||||
accuracy: Option<i32>,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
activity: Option<String>,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Integer>)]
|
||||
activity_confidence: Option<i32>,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
place_name: Option<String>,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
place_category: Option<String>,
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
created_at: i64,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
source_file: Option<String>,
|
||||
}
|
||||
|
||||
impl LocationRecordRow {
|
||||
fn to_location_record(&self) -> LocationRecord {
|
||||
LocationRecord {
|
||||
id: self.id,
|
||||
timestamp: self.timestamp,
|
||||
latitude: self.latitude as f64,
|
||||
longitude: self.longitude as f64,
|
||||
accuracy: self.accuracy,
|
||||
activity: self.activity.clone(),
|
||||
activity_confidence: self.activity_confidence,
|
||||
place_name: self.place_name.clone(),
|
||||
place_category: self.place_category.clone(),
|
||||
created_at: self.created_at,
|
||||
source_file: self.source_file.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct LastInsertRowId {
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
id: i32,
|
||||
}
|
||||
|
||||
impl LocationHistoryDao for SqliteLocationHistoryDao {
|
||||
fn store_location(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
location: InsertLocationRecord,
|
||||
) -> Result<LocationRecord, DbError> {
|
||||
trace_db_call(context, "insert", "store_location", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get LocationHistoryDao");
|
||||
|
||||
// Validate embedding dimensions if provided (rare for location data)
|
||||
if let Some(ref emb) = location.embedding {
|
||||
if emb.len() != 768 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Invalid embedding dimensions: {} (expected 768)",
|
||||
emb.len()
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
let embedding_bytes = location
|
||||
.embedding
|
||||
.as_ref()
|
||||
.map(|e| Self::serialize_vector(e));
|
||||
|
||||
// INSERT OR IGNORE to handle re-imports (UNIQUE constraint on timestamp+lat+lon)
|
||||
diesel::sql_query(
|
||||
"INSERT OR IGNORE INTO location_history
|
||||
(timestamp, latitude, longitude, accuracy, activity, activity_confidence,
|
||||
place_name, place_category, embedding, created_at, source_file)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)",
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(location.timestamp)
|
||||
.bind::<diesel::sql_types::Float, _>(location.latitude as f32)
|
||||
.bind::<diesel::sql_types::Float, _>(location.longitude as f32)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Integer>, _>(&location.accuracy)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&location.activity)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Integer>, _>(
|
||||
&location.activity_confidence,
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&location.place_name)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&location.place_category,
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Binary>, _>(&embedding_bytes)
|
||||
.bind::<diesel::sql_types::BigInt, _>(location.created_at)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&location.source_file)
|
||||
.execute(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Insert error: {:?}", e))?;
|
||||
|
||||
let row_id: i32 = diesel::sql_query("SELECT last_insert_rowid() as id")
|
||||
.get_result::<LastInsertRowId>(conn.deref_mut())
|
||||
.map(|r| r.id)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to get last insert ID: {:?}", e))?;
|
||||
|
||||
Ok(LocationRecord {
|
||||
id: row_id,
|
||||
timestamp: location.timestamp,
|
||||
latitude: location.latitude,
|
||||
longitude: location.longitude,
|
||||
accuracy: location.accuracy,
|
||||
activity: location.activity,
|
||||
activity_confidence: location.activity_confidence,
|
||||
place_name: location.place_name,
|
||||
place_category: location.place_category,
|
||||
created_at: location.created_at,
|
||||
source_file: location.source_file,
|
||||
})
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||
}
|
||||
|
||||
fn store_locations_batch(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
locations: Vec<InsertLocationRecord>,
|
||||
) -> Result<usize, DbError> {
|
||||
trace_db_call(context, "insert", "store_locations_batch", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get LocationHistoryDao");
|
||||
let mut inserted = 0;
|
||||
|
||||
conn.transaction::<_, anyhow::Error, _>(|conn| {
|
||||
for location in locations {
|
||||
// Validate embedding if provided (rare)
|
||||
if let Some(ref emb) = location.embedding {
|
||||
if emb.len() != 768 {
|
||||
log::warn!(
|
||||
"Skipping location with invalid embedding dimensions: {}",
|
||||
emb.len()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let embedding_bytes = location
|
||||
.embedding
|
||||
.as_ref()
|
||||
.map(|e| Self::serialize_vector(e));
|
||||
|
||||
let rows_affected = diesel::sql_query(
|
||||
"INSERT OR IGNORE INTO location_history
|
||||
(timestamp, latitude, longitude, accuracy, activity, activity_confidence,
|
||||
place_name, place_category, embedding, created_at, source_file)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)",
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(location.timestamp)
|
||||
.bind::<diesel::sql_types::Float, _>(location.latitude as f32)
|
||||
.bind::<diesel::sql_types::Float, _>(location.longitude as f32)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Integer>, _>(
|
||||
&location.accuracy,
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&location.activity,
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Integer>, _>(
|
||||
&location.activity_confidence,
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&location.place_name,
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&location.place_category,
|
||||
)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Binary>, _>(
|
||||
&embedding_bytes,
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(location.created_at)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&location.source_file,
|
||||
)
|
||||
.execute(conn)
|
||||
.map_err(|e| anyhow::anyhow!("Batch insert error: {:?}", e))?;
|
||||
|
||||
if rows_affected > 0 {
|
||||
inserted += 1;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
.map_err(|e| anyhow::anyhow!("Transaction error: {:?}", e))?;
|
||||
|
||||
Ok(inserted)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||
}
|
||||
|
||||
fn find_nearest_location(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
timestamp: i64,
|
||||
max_time_diff_seconds: i64,
|
||||
) -> Result<Option<LocationRecord>, DbError> {
|
||||
trace_db_call(context, "query", "find_nearest_location", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get LocationHistoryDao");
|
||||
|
||||
let start_ts = timestamp - max_time_diff_seconds;
|
||||
let end_ts = timestamp + max_time_diff_seconds;
|
||||
|
||||
// Find location closest to target timestamp within window
|
||||
let results = diesel::sql_query(
|
||||
"SELECT id, timestamp, latitude, longitude, accuracy, activity, activity_confidence,
|
||||
place_name, place_category, created_at, source_file
|
||||
FROM location_history
|
||||
WHERE timestamp >= ?1 AND timestamp <= ?2
|
||||
ORDER BY ABS(timestamp - ?3) ASC
|
||||
LIMIT 1"
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(start_ts)
|
||||
.bind::<diesel::sql_types::BigInt, _>(end_ts)
|
||||
.bind::<diesel::sql_types::BigInt, _>(timestamp)
|
||||
.load::<LocationRecordRow>(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
Ok(results.into_iter().next().map(|r| r.to_location_record()))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn find_locations_in_range(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
start_ts: i64,
|
||||
end_ts: i64,
|
||||
) -> Result<Vec<LocationRecord>, DbError> {
|
||||
trace_db_call(context, "query", "find_locations_in_range", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get LocationHistoryDao");
|
||||
|
||||
diesel::sql_query(
|
||||
"SELECT id, timestamp, latitude, longitude, accuracy, activity, activity_confidence,
|
||||
place_name, place_category, created_at, source_file
|
||||
FROM location_history
|
||||
WHERE timestamp >= ?1 AND timestamp <= ?2
|
||||
ORDER BY timestamp ASC"
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(start_ts)
|
||||
.bind::<diesel::sql_types::BigInt, _>(end_ts)
|
||||
.load::<LocationRecordRow>(conn.deref_mut())
|
||||
.map(|rows| rows.into_iter().map(|r| r.to_location_record()).collect())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn find_locations_near_point(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
latitude: f64,
|
||||
longitude: f64,
|
||||
radius_km: f64,
|
||||
) -> Result<Vec<LocationRecord>, DbError> {
|
||||
trace_db_call(context, "query", "find_locations_near_point", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get LocationHistoryDao");
|
||||
|
||||
// Use bounding box for initial filter (fast, indexed)
|
||||
let (min_lat, max_lat, min_lon, max_lon) =
|
||||
Self::bounding_box(latitude, longitude, radius_km);
|
||||
|
||||
let results = diesel::sql_query(
|
||||
"SELECT id, timestamp, latitude, longitude, accuracy, activity, activity_confidence,
|
||||
place_name, place_category, created_at, source_file
|
||||
FROM location_history
|
||||
WHERE latitude >= ?1 AND latitude <= ?2
|
||||
AND longitude >= ?3 AND longitude <= ?4"
|
||||
)
|
||||
.bind::<diesel::sql_types::Float, _>(min_lat as f32)
|
||||
.bind::<diesel::sql_types::Float, _>(max_lat as f32)
|
||||
.bind::<diesel::sql_types::Float, _>(min_lon as f32)
|
||||
.bind::<diesel::sql_types::Float, _>(max_lon as f32)
|
||||
.load::<LocationRecordRow>(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
// Refine with Haversine distance (in-memory, post-filter)
|
||||
let filtered: Vec<LocationRecord> = results
|
||||
.into_iter()
|
||||
.map(|r| r.to_location_record())
|
||||
.filter(|loc| {
|
||||
let distance =
|
||||
Self::haversine_distance(latitude, longitude, loc.latitude, loc.longitude);
|
||||
distance <= radius_km
|
||||
})
|
||||
.collect();
|
||||
|
||||
log::info!(
|
||||
"Found {} locations within {} km of ({}, {})",
|
||||
filtered.len(),
|
||||
radius_km,
|
||||
latitude,
|
||||
longitude
|
||||
);
|
||||
|
||||
Ok(filtered)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn location_exists(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
timestamp: i64,
|
||||
latitude: f64,
|
||||
longitude: f64,
|
||||
) -> Result<bool, DbError> {
|
||||
trace_db_call(context, "query", "location_exists", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get LocationHistoryDao");
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct CountResult {
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
count: i32,
|
||||
}
|
||||
|
||||
let result: CountResult = diesel::sql_query(
|
||||
"SELECT COUNT(*) as count FROM location_history
|
||||
WHERE timestamp = ?1 AND latitude = ?2 AND longitude = ?3",
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(timestamp)
|
||||
.bind::<diesel::sql_types::Float, _>(latitude as f32)
|
||||
.bind::<diesel::sql_types::Float, _>(longitude as f32)
|
||||
.get_result(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
Ok(result.count > 0)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn get_location_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
|
||||
trace_db_call(context, "query", "get_location_count", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get LocationHistoryDao");
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct CountResult {
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
count: i64,
|
||||
}
|
||||
|
||||
let result: CountResult =
|
||||
diesel::sql_query("SELECT COUNT(*) as count FROM location_history")
|
||||
.get_result(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
Ok(result.count)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
}
|
||||
@@ -9,15 +9,25 @@ use crate::database::models::{
|
||||
};
|
||||
use crate::otel::trace_db_call;
|
||||
|
||||
pub mod calendar_dao;
|
||||
pub mod daily_summary_dao;
|
||||
pub mod embeddings_dao;
|
||||
pub mod insights_dao;
|
||||
pub mod location_dao;
|
||||
pub mod models;
|
||||
pub mod schema;
|
||||
pub mod search_dao;
|
||||
|
||||
pub use calendar_dao::{
|
||||
CalendarEvent, CalendarEventDao, InsertCalendarEvent, SqliteCalendarEventDao,
|
||||
};
|
||||
pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
|
||||
pub use embeddings_dao::{EmbeddingDao, InsertMessageEmbedding};
|
||||
pub use insights_dao::{InsightDao, SqliteInsightDao};
|
||||
pub use location_dao::{
|
||||
InsertLocationRecord, LocationHistoryDao, LocationRecord, SqliteLocationHistoryDao,
|
||||
};
|
||||
pub use search_dao::{InsertSearchRecord, SearchHistoryDao, SearchRecord, SqliteSearchHistoryDao};
|
||||
|
||||
pub trait UserDao {
|
||||
fn create_user(&mut self, user: &str, password: &str) -> Option<User>;
|
||||
@@ -485,8 +495,8 @@ impl ExifDao for SqliteExifDao {
|
||||
// GPS bounding box
|
||||
if let Some((min_lat, max_lat, min_lon, max_lon)) = gps_bounds {
|
||||
query = query
|
||||
.filter(gps_latitude.between(min_lat, max_lat))
|
||||
.filter(gps_longitude.between(min_lon, max_lon))
|
||||
.filter(gps_latitude.between(min_lat as f32, max_lat as f32))
|
||||
.filter(gps_longitude.between(min_lon as f32, max_lon as f32))
|
||||
.filter(gps_latitude.is_not_null())
|
||||
.filter(gps_longitude.is_not_null());
|
||||
}
|
||||
|
||||
@@ -40,11 +40,11 @@ pub struct InsertImageExif {
|
||||
pub width: Option<i32>,
|
||||
pub height: Option<i32>,
|
||||
pub orientation: Option<i32>,
|
||||
pub gps_latitude: Option<f64>,
|
||||
pub gps_longitude: Option<f64>,
|
||||
pub gps_altitude: Option<f64>,
|
||||
pub focal_length: Option<f64>,
|
||||
pub aperture: Option<f64>,
|
||||
pub gps_latitude: Option<f32>,
|
||||
pub gps_longitude: Option<f32>,
|
||||
pub gps_altitude: Option<f32>,
|
||||
pub focal_length: Option<f32>,
|
||||
pub aperture: Option<f32>,
|
||||
pub shutter_speed: Option<String>,
|
||||
pub iso: Option<i32>,
|
||||
pub date_taken: Option<i64>,
|
||||
@@ -62,11 +62,11 @@ pub struct ImageExif {
|
||||
pub width: Option<i32>,
|
||||
pub height: Option<i32>,
|
||||
pub orientation: Option<i32>,
|
||||
pub gps_latitude: Option<f64>,
|
||||
pub gps_longitude: Option<f64>,
|
||||
pub gps_altitude: Option<f64>,
|
||||
pub focal_length: Option<f64>,
|
||||
pub aperture: Option<f64>,
|
||||
pub gps_latitude: Option<f32>,
|
||||
pub gps_longitude: Option<f32>,
|
||||
pub gps_altitude: Option<f32>,
|
||||
pub focal_length: Option<f32>,
|
||||
pub aperture: Option<f32>,
|
||||
pub shutter_speed: Option<String>,
|
||||
pub iso: Option<i32>,
|
||||
pub date_taken: Option<i64>,
|
||||
|
||||
@@ -1,4 +1,37 @@
|
||||
table! {
|
||||
// @generated automatically by Diesel CLI.
|
||||
|
||||
diesel::table! {
|
||||
calendar_events (id) {
|
||||
id -> Integer,
|
||||
event_uid -> Nullable<Text>,
|
||||
summary -> Text,
|
||||
description -> Nullable<Text>,
|
||||
location -> Nullable<Text>,
|
||||
start_time -> BigInt,
|
||||
end_time -> BigInt,
|
||||
all_day -> Bool,
|
||||
organizer -> Nullable<Text>,
|
||||
attendees -> Nullable<Text>,
|
||||
embedding -> Nullable<Binary>,
|
||||
created_at -> BigInt,
|
||||
source_file -> Nullable<Text>,
|
||||
}
|
||||
}
|
||||
|
||||
diesel::table! {
|
||||
daily_conversation_summaries (id) {
|
||||
id -> Integer,
|
||||
date -> Text,
|
||||
contact -> Text,
|
||||
summary -> Text,
|
||||
message_count -> Integer,
|
||||
embedding -> Binary,
|
||||
created_at -> BigInt,
|
||||
model_version -> Text,
|
||||
}
|
||||
}
|
||||
|
||||
diesel::table! {
|
||||
favorites (id) {
|
||||
id -> Integer,
|
||||
userid -> Integer,
|
||||
@@ -6,7 +39,7 @@ table! {
|
||||
}
|
||||
}
|
||||
|
||||
table! {
|
||||
diesel::table! {
|
||||
image_exif (id) {
|
||||
id -> Integer,
|
||||
file_path -> Text,
|
||||
@@ -16,11 +49,11 @@ table! {
|
||||
width -> Nullable<Integer>,
|
||||
height -> Nullable<Integer>,
|
||||
orientation -> Nullable<Integer>,
|
||||
gps_latitude -> Nullable<Double>,
|
||||
gps_longitude -> Nullable<Double>,
|
||||
gps_altitude -> Nullable<Double>,
|
||||
focal_length -> Nullable<Double>,
|
||||
aperture -> Nullable<Double>,
|
||||
gps_latitude -> Nullable<Float>,
|
||||
gps_longitude -> Nullable<Float>,
|
||||
gps_altitude -> Nullable<Float>,
|
||||
focal_length -> Nullable<Float>,
|
||||
aperture -> Nullable<Float>,
|
||||
shutter_speed -> Nullable<Text>,
|
||||
iso -> Nullable<Integer>,
|
||||
date_taken -> Nullable<BigInt>,
|
||||
@@ -29,24 +62,49 @@ table! {
|
||||
}
|
||||
}
|
||||
|
||||
table! {
|
||||
tagged_photo (id) {
|
||||
diesel::table! {
|
||||
knowledge_embeddings (id) {
|
||||
id -> Integer,
|
||||
photo_name -> Text,
|
||||
tag_id -> Integer,
|
||||
created_time -> BigInt,
|
||||
keyword -> Text,
|
||||
description -> Text,
|
||||
category -> Nullable<Text>,
|
||||
embedding -> Binary,
|
||||
created_at -> BigInt,
|
||||
model_version -> Text,
|
||||
}
|
||||
}
|
||||
|
||||
table! {
|
||||
tags (id) {
|
||||
diesel::table! {
|
||||
location_history (id) {
|
||||
id -> Integer,
|
||||
name -> Text,
|
||||
created_time -> BigInt,
|
||||
timestamp -> BigInt,
|
||||
latitude -> Float,
|
||||
longitude -> Float,
|
||||
accuracy -> Nullable<Integer>,
|
||||
activity -> Nullable<Text>,
|
||||
activity_confidence -> Nullable<Integer>,
|
||||
place_name -> Nullable<Text>,
|
||||
place_category -> Nullable<Text>,
|
||||
embedding -> Nullable<Binary>,
|
||||
created_at -> BigInt,
|
||||
source_file -> Nullable<Text>,
|
||||
}
|
||||
}
|
||||
|
||||
table! {
|
||||
diesel::table! {
|
||||
message_embeddings (id) {
|
||||
id -> Integer,
|
||||
contact -> Text,
|
||||
body -> Text,
|
||||
timestamp -> BigInt,
|
||||
is_sent -> Bool,
|
||||
embedding -> Binary,
|
||||
created_at -> BigInt,
|
||||
model_version -> Text,
|
||||
}
|
||||
}
|
||||
|
||||
diesel::table! {
|
||||
photo_insights (id) {
|
||||
id -> Integer,
|
||||
file_path -> Text,
|
||||
@@ -57,7 +115,36 @@ table! {
|
||||
}
|
||||
}
|
||||
|
||||
table! {
|
||||
diesel::table! {
|
||||
search_history (id) {
|
||||
id -> Integer,
|
||||
timestamp -> BigInt,
|
||||
query -> Text,
|
||||
search_engine -> Nullable<Text>,
|
||||
embedding -> Binary,
|
||||
created_at -> BigInt,
|
||||
source_file -> Nullable<Text>,
|
||||
}
|
||||
}
|
||||
|
||||
diesel::table! {
|
||||
tagged_photo (id) {
|
||||
id -> Integer,
|
||||
photo_name -> Text,
|
||||
tag_id -> Integer,
|
||||
created_time -> BigInt,
|
||||
}
|
||||
}
|
||||
|
||||
diesel::table! {
|
||||
tags (id) {
|
||||
id -> Integer,
|
||||
name -> Text,
|
||||
created_time -> BigInt,
|
||||
}
|
||||
}
|
||||
|
||||
diesel::table! {
|
||||
users (id) {
|
||||
id -> Integer,
|
||||
username -> Text,
|
||||
@@ -65,12 +152,18 @@ table! {
|
||||
}
|
||||
}
|
||||
|
||||
joinable!(tagged_photo -> tags (tag_id));
|
||||
diesel::joinable!(tagged_photo -> tags (tag_id));
|
||||
|
||||
allow_tables_to_appear_in_same_query!(
|
||||
diesel::allow_tables_to_appear_in_same_query!(
|
||||
calendar_events,
|
||||
daily_conversation_summaries,
|
||||
favorites,
|
||||
image_exif,
|
||||
knowledge_embeddings,
|
||||
location_history,
|
||||
message_embeddings,
|
||||
photo_insights,
|
||||
search_history,
|
||||
tagged_photo,
|
||||
tags,
|
||||
users,
|
||||
|
||||
516
src/database/search_dao.rs
Normal file
516
src/database/search_dao.rs
Normal file
@@ -0,0 +1,516 @@
|
||||
use diesel::prelude::*;
|
||||
use diesel::sqlite::SqliteConnection;
|
||||
use serde::Serialize;
|
||||
use std::ops::DerefMut;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use crate::database::{DbError, DbErrorKind, connect};
|
||||
use crate::otel::trace_db_call;
|
||||
|
||||
/// Represents a search history record
|
||||
#[derive(Serialize, Clone, Debug)]
|
||||
pub struct SearchRecord {
|
||||
pub id: i32,
|
||||
pub timestamp: i64,
|
||||
pub query: String,
|
||||
pub search_engine: Option<String>,
|
||||
pub created_at: i64,
|
||||
pub source_file: Option<String>,
|
||||
}
|
||||
|
||||
/// Data for inserting a new search record
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct InsertSearchRecord {
|
||||
pub timestamp: i64,
|
||||
pub query: String,
|
||||
pub search_engine: Option<String>,
|
||||
pub embedding: Vec<f32>, // 768-dim, REQUIRED
|
||||
pub created_at: i64,
|
||||
pub source_file: Option<String>,
|
||||
}
|
||||
|
||||
pub trait SearchHistoryDao: Sync + Send {
|
||||
/// Store search with embedding
|
||||
fn store_search(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
search: InsertSearchRecord,
|
||||
) -> Result<SearchRecord, DbError>;
|
||||
|
||||
/// Batch insert searches
|
||||
fn store_searches_batch(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
searches: Vec<InsertSearchRecord>,
|
||||
) -> Result<usize, DbError>;
|
||||
|
||||
/// Find searches in time range (for temporal context)
|
||||
fn find_searches_in_range(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
start_ts: i64,
|
||||
end_ts: i64,
|
||||
) -> Result<Vec<SearchRecord>, DbError>;
|
||||
|
||||
/// Find semantically similar searches (PRIMARY - embeddings shine here)
|
||||
fn find_similar_searches(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
query_embedding: &[f32],
|
||||
limit: usize,
|
||||
) -> Result<Vec<SearchRecord>, DbError>;
|
||||
|
||||
/// Hybrid: Time window + semantic ranking
|
||||
fn find_relevant_searches_hybrid(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
center_timestamp: i64,
|
||||
time_window_days: i64,
|
||||
query_embedding: Option<&[f32]>,
|
||||
limit: usize,
|
||||
) -> Result<Vec<SearchRecord>, DbError>;
|
||||
|
||||
/// Deduplication check
|
||||
fn search_exists(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
timestamp: i64,
|
||||
query: &str,
|
||||
) -> Result<bool, DbError>;
|
||||
|
||||
/// Get count of search records
|
||||
fn get_search_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError>;
|
||||
}
|
||||
|
||||
pub struct SqliteSearchHistoryDao {
|
||||
connection: Arc<Mutex<SqliteConnection>>,
|
||||
}
|
||||
|
||||
impl Default for SqliteSearchHistoryDao {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl SqliteSearchHistoryDao {
|
||||
pub fn new() -> Self {
|
||||
SqliteSearchHistoryDao {
|
||||
connection: Arc::new(Mutex::new(connect())),
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_vector(vec: &[f32]) -> Vec<u8> {
|
||||
use zerocopy::IntoBytes;
|
||||
vec.as_bytes().to_vec()
|
||||
}
|
||||
|
||||
fn deserialize_vector(bytes: &[u8]) -> Result<Vec<f32>, DbError> {
|
||||
if bytes.len() % 4 != 0 {
|
||||
return Err(DbError::new(DbErrorKind::QueryError));
|
||||
}
|
||||
|
||||
let count = bytes.len() / 4;
|
||||
let mut vec = Vec::with_capacity(count);
|
||||
|
||||
for chunk in bytes.chunks_exact(4) {
|
||||
let float = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
|
||||
vec.push(float);
|
||||
}
|
||||
|
||||
Ok(vec)
|
||||
}
|
||||
|
||||
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||
if a.len() != b.len() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
||||
let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
|
||||
if magnitude_a == 0.0 || magnitude_b == 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
dot_product / (magnitude_a * magnitude_b)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct SearchRecordWithVectorRow {
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
id: i32,
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
timestamp: i64,
|
||||
#[diesel(sql_type = diesel::sql_types::Text)]
|
||||
query: String,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
search_engine: Option<String>,
|
||||
#[diesel(sql_type = diesel::sql_types::Binary)]
|
||||
embedding: Vec<u8>,
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
created_at: i64,
|
||||
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
||||
source_file: Option<String>,
|
||||
}
|
||||
|
||||
impl SearchRecordWithVectorRow {
|
||||
fn to_search_record(&self) -> SearchRecord {
|
||||
SearchRecord {
|
||||
id: self.id,
|
||||
timestamp: self.timestamp,
|
||||
query: self.query.clone(),
|
||||
search_engine: self.search_engine.clone(),
|
||||
created_at: self.created_at,
|
||||
source_file: self.source_file.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct LastInsertRowId {
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
id: i32,
|
||||
}
|
||||
|
||||
impl SearchHistoryDao for SqliteSearchHistoryDao {
|
||||
fn store_search(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
search: InsertSearchRecord,
|
||||
) -> Result<SearchRecord, DbError> {
|
||||
trace_db_call(context, "insert", "store_search", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get SearchHistoryDao");
|
||||
|
||||
// Validate embedding dimensions (REQUIRED for searches)
|
||||
if search.embedding.len() != 768 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Invalid embedding dimensions: {} (expected 768)",
|
||||
search.embedding.len()
|
||||
));
|
||||
}
|
||||
|
||||
let embedding_bytes = Self::serialize_vector(&search.embedding);
|
||||
|
||||
// INSERT OR IGNORE to handle re-imports (UNIQUE constraint on timestamp+query)
|
||||
diesel::sql_query(
|
||||
"INSERT OR IGNORE INTO search_history
|
||||
(timestamp, query, search_engine, embedding, created_at, source_file)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(search.timestamp)
|
||||
.bind::<diesel::sql_types::Text, _>(&search.query)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&search.search_engine)
|
||||
.bind::<diesel::sql_types::Binary, _>(&embedding_bytes)
|
||||
.bind::<diesel::sql_types::BigInt, _>(search.created_at)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(&search.source_file)
|
||||
.execute(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Insert error: {:?}", e))?;
|
||||
|
||||
let row_id: i32 = diesel::sql_query("SELECT last_insert_rowid() as id")
|
||||
.get_result::<LastInsertRowId>(conn.deref_mut())
|
||||
.map(|r| r.id)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to get last insert ID: {:?}", e))?;
|
||||
|
||||
Ok(SearchRecord {
|
||||
id: row_id,
|
||||
timestamp: search.timestamp,
|
||||
query: search.query,
|
||||
search_engine: search.search_engine,
|
||||
created_at: search.created_at,
|
||||
source_file: search.source_file,
|
||||
})
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||
}
|
||||
|
||||
fn store_searches_batch(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
searches: Vec<InsertSearchRecord>,
|
||||
) -> Result<usize, DbError> {
|
||||
trace_db_call(context, "insert", "store_searches_batch", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get SearchHistoryDao");
|
||||
let mut inserted = 0;
|
||||
|
||||
conn.transaction::<_, anyhow::Error, _>(|conn| {
|
||||
for search in searches {
|
||||
// Validate embedding (REQUIRED)
|
||||
if search.embedding.len() != 768 {
|
||||
log::warn!(
|
||||
"Skipping search with invalid embedding dimensions: {}",
|
||||
search.embedding.len()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
let embedding_bytes = Self::serialize_vector(&search.embedding);
|
||||
|
||||
let rows_affected = diesel::sql_query(
|
||||
"INSERT OR IGNORE INTO search_history
|
||||
(timestamp, query, search_engine, embedding, created_at, source_file)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(search.timestamp)
|
||||
.bind::<diesel::sql_types::Text, _>(&search.query)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&search.search_engine,
|
||||
)
|
||||
.bind::<diesel::sql_types::Binary, _>(&embedding_bytes)
|
||||
.bind::<diesel::sql_types::BigInt, _>(search.created_at)
|
||||
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(
|
||||
&search.source_file,
|
||||
)
|
||||
.execute(conn)
|
||||
.map_err(|e| anyhow::anyhow!("Batch insert error: {:?}", e))?;
|
||||
|
||||
if rows_affected > 0 {
|
||||
inserted += 1;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
.map_err(|e| anyhow::anyhow!("Transaction error: {:?}", e))?;
|
||||
|
||||
Ok(inserted)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||
}
|
||||
|
||||
fn find_searches_in_range(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
start_ts: i64,
|
||||
end_ts: i64,
|
||||
) -> Result<Vec<SearchRecord>, DbError> {
|
||||
trace_db_call(context, "query", "find_searches_in_range", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get SearchHistoryDao");
|
||||
|
||||
diesel::sql_query(
|
||||
"SELECT id, timestamp, query, search_engine, embedding, created_at, source_file
|
||||
FROM search_history
|
||||
WHERE timestamp >= ?1 AND timestamp <= ?2
|
||||
ORDER BY timestamp DESC",
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(start_ts)
|
||||
.bind::<diesel::sql_types::BigInt, _>(end_ts)
|
||||
.load::<SearchRecordWithVectorRow>(conn.deref_mut())
|
||||
.map(|rows| rows.into_iter().map(|r| r.to_search_record()).collect())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn find_similar_searches(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
query_embedding: &[f32],
|
||||
limit: usize,
|
||||
) -> Result<Vec<SearchRecord>, DbError> {
|
||||
trace_db_call(context, "query", "find_similar_searches", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get SearchHistoryDao");
|
||||
|
||||
if query_embedding.len() != 768 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Invalid query embedding dimensions: {} (expected 768)",
|
||||
query_embedding.len()
|
||||
));
|
||||
}
|
||||
|
||||
// Load all searches with embeddings
|
||||
let results = diesel::sql_query(
|
||||
"SELECT id, timestamp, query, search_engine, embedding, created_at, source_file
|
||||
FROM search_history",
|
||||
)
|
||||
.load::<SearchRecordWithVectorRow>(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
// Compute similarities
|
||||
let mut scored_searches: Vec<(f32, SearchRecord)> = results
|
||||
.into_iter()
|
||||
.filter_map(|row| {
|
||||
if let Ok(emb) = Self::deserialize_vector(&row.embedding) {
|
||||
let similarity = Self::cosine_similarity(query_embedding, &emb);
|
||||
Some((similarity, row.to_search_record()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Sort by similarity descending
|
||||
scored_searches
|
||||
.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
log::info!("Found {} similar searches", scored_searches.len());
|
||||
if !scored_searches.is_empty() {
|
||||
log::info!(
|
||||
"Top similarity: {:.4} for query: '{}'",
|
||||
scored_searches[0].0,
|
||||
scored_searches[0].1.query
|
||||
);
|
||||
}
|
||||
|
||||
Ok(scored_searches
|
||||
.into_iter()
|
||||
.take(limit)
|
||||
.map(|(_, search)| search)
|
||||
.collect())
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn find_relevant_searches_hybrid(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
center_timestamp: i64,
|
||||
time_window_days: i64,
|
||||
query_embedding: Option<&[f32]>,
|
||||
limit: usize,
|
||||
) -> Result<Vec<SearchRecord>, DbError> {
|
||||
trace_db_call(context, "query", "find_relevant_searches_hybrid", |_span| {
|
||||
let window_seconds = time_window_days * 86400;
|
||||
let start_ts = center_timestamp - window_seconds;
|
||||
let end_ts = center_timestamp + window_seconds;
|
||||
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get SearchHistoryDao");
|
||||
|
||||
// Step 1: Time-based filter (fast, indexed)
|
||||
let searches_in_range = diesel::sql_query(
|
||||
"SELECT id, timestamp, query, search_engine, embedding, created_at, source_file
|
||||
FROM search_history
|
||||
WHERE timestamp >= ?1 AND timestamp <= ?2",
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(start_ts)
|
||||
.bind::<diesel::sql_types::BigInt, _>(end_ts)
|
||||
.load::<SearchRecordWithVectorRow>(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
// Step 2: If query embedding provided, rank by semantic similarity
|
||||
if let Some(query_emb) = query_embedding {
|
||||
if query_emb.len() != 768 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Invalid query embedding dimensions: {} (expected 768)",
|
||||
query_emb.len()
|
||||
));
|
||||
}
|
||||
|
||||
let mut scored_searches: Vec<(f32, SearchRecord)> = searches_in_range
|
||||
.into_iter()
|
||||
.filter_map(|row| {
|
||||
if let Ok(emb) = Self::deserialize_vector(&row.embedding) {
|
||||
let similarity = Self::cosine_similarity(query_emb, &emb);
|
||||
Some((similarity, row.to_search_record()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Sort by similarity descending
|
||||
scored_searches
|
||||
.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
log::info!(
|
||||
"Hybrid query: {} searches in time range, ranked by similarity",
|
||||
scored_searches.len()
|
||||
);
|
||||
if !scored_searches.is_empty() {
|
||||
log::info!(
|
||||
"Top similarity: {:.4} for '{}'",
|
||||
scored_searches[0].0,
|
||||
scored_searches[0].1.query
|
||||
);
|
||||
}
|
||||
|
||||
Ok(scored_searches
|
||||
.into_iter()
|
||||
.take(limit)
|
||||
.map(|(_, search)| search)
|
||||
.collect())
|
||||
} else {
|
||||
// No semantic ranking, just return time-sorted (most recent first)
|
||||
log::info!(
|
||||
"Time-only query: {} searches in range",
|
||||
searches_in_range.len()
|
||||
);
|
||||
Ok(searches_in_range
|
||||
.into_iter()
|
||||
.take(limit)
|
||||
.map(|r| r.to_search_record())
|
||||
.collect())
|
||||
}
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn search_exists(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
timestamp: i64,
|
||||
query: &str,
|
||||
) -> Result<bool, DbError> {
|
||||
trace_db_call(context, "query", "search_exists", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get SearchHistoryDao");
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct CountResult {
|
||||
#[diesel(sql_type = diesel::sql_types::Integer)]
|
||||
count: i32,
|
||||
}
|
||||
|
||||
let result: CountResult = diesel::sql_query(
|
||||
"SELECT COUNT(*) as count FROM search_history WHERE timestamp = ?1 AND query = ?2",
|
||||
)
|
||||
.bind::<diesel::sql_types::BigInt, _>(timestamp)
|
||||
.bind::<diesel::sql_types::Text, _>(query)
|
||||
.get_result(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
Ok(result.count > 0)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn get_search_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
|
||||
trace_db_call(context, "query", "get_search_count", |_span| {
|
||||
let mut conn = self
|
||||
.connection
|
||||
.lock()
|
||||
.expect("Unable to get SearchHistoryDao");
|
||||
|
||||
#[derive(QueryableByName)]
|
||||
struct CountResult {
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
count: i64,
|
||||
}
|
||||
|
||||
let result: CountResult =
|
||||
diesel::sql_query("SELECT COUNT(*) as count FROM search_history")
|
||||
.get_result(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {:?}", e))?;
|
||||
|
||||
Ok(result.count)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user