Add Google Takeout data import infrastructure
Implements Phase 1 & 2 of Google Takeout RAG integration: - Database migrations for calendar_events, location_history, search_history - DAO implementations with hybrid time + semantic search - Parsers for .ics, JSON, and HTML Google Takeout formats - Import utilities with batch insert optimization Features: - CalendarEventDao: Hybrid time-range + semantic search for events - LocationHistoryDao: GPS proximity with Haversine distance calculation - SearchHistoryDao: Semantic-first search (queries are embedding-rich) - Batch inserts for performance (1M+ records in minutes vs hours) - OpenTelemetry tracing for all database operations Import utilities: - import_calendar: Parse .ics with optional embedding generation - import_location_history: High-volume GPS data with batch inserts - import_search_history: Always generates embeddings for semantic search 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
133
src/parsers/location_json_parser.rs
Normal file
133
src/parsers/location_json_parser.rs
Normal file
@@ -0,0 +1,133 @@
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::DateTime;
|
||||
use serde::Deserialize;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ParsedLocationRecord {
|
||||
pub timestamp: i64,
|
||||
pub latitude: f64,
|
||||
pub longitude: f64,
|
||||
pub accuracy: Option<i32>,
|
||||
pub activity: Option<String>,
|
||||
pub activity_confidence: Option<i32>,
|
||||
}
|
||||
|
||||
// Google Takeout Location History JSON structures
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct LocationHistory {
|
||||
locations: Vec<LocationPoint>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct LocationPoint {
|
||||
timestamp_ms: Option<String>, // Older format
|
||||
timestamp: Option<String>, // Newer format (ISO8601)
|
||||
latitude_e7: Option<i64>,
|
||||
longitude_e7: Option<i64>,
|
||||
accuracy: Option<i32>,
|
||||
activity: Option<Vec<ActivityRecord>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ActivityRecord {
|
||||
activity: Vec<ActivityType>,
|
||||
timestamp_ms: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ActivityType {
|
||||
#[serde(rename = "type")]
|
||||
activity_type: String,
|
||||
confidence: i32,
|
||||
}
|
||||
|
||||
pub fn parse_location_json(path: &str) -> Result<Vec<ParsedLocationRecord>> {
|
||||
let file = File::open(path).context("Failed to open location JSON file")?;
|
||||
let reader = BufReader::new(file);
|
||||
|
||||
let history: LocationHistory =
|
||||
serde_json::from_reader(reader).context("Failed to parse location history JSON")?;
|
||||
|
||||
let mut records = Vec::new();
|
||||
|
||||
for point in history.locations {
|
||||
// Parse timestamp (try both formats)
|
||||
let timestamp = if let Some(ts_ms) = point.timestamp_ms {
|
||||
// Milliseconds since epoch
|
||||
ts_ms
|
||||
.parse::<i64>()
|
||||
.context("Failed to parse timestamp_ms")?
|
||||
/ 1000
|
||||
} else if let Some(ts_iso) = point.timestamp {
|
||||
// ISO8601 format
|
||||
DateTime::parse_from_rfc3339(&ts_iso)
|
||||
.context("Failed to parse ISO8601 timestamp")?
|
||||
.timestamp()
|
||||
} else {
|
||||
continue; // Skip points without timestamp
|
||||
};
|
||||
|
||||
// Convert E7 format to decimal degrees
|
||||
let latitude = point.latitude_e7.map(|e7| e7 as f64 / 10_000_000.0);
|
||||
let longitude = point.longitude_e7.map(|e7| e7 as f64 / 10_000_000.0);
|
||||
|
||||
// Extract highest-confidence activity
|
||||
let (activity, activity_confidence) = point
|
||||
.activity
|
||||
.as_ref()
|
||||
.and_then(|activities| activities.first())
|
||||
.and_then(|record| {
|
||||
record
|
||||
.activity
|
||||
.iter()
|
||||
.max_by_key(|a| a.confidence)
|
||||
.map(|a| (a.activity_type.clone(), a.confidence))
|
||||
})
|
||||
.unzip();
|
||||
|
||||
if let (Some(lat), Some(lon)) = (latitude, longitude) {
|
||||
records.push(ParsedLocationRecord {
|
||||
timestamp,
|
||||
latitude: lat,
|
||||
longitude: lon,
|
||||
accuracy: point.accuracy,
|
||||
activity,
|
||||
activity_confidence,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(records)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_e7_conversion() {
|
||||
let lat_e7 = 374228300_i64;
|
||||
let lat = lat_e7 as f64 / 10_000_000.0;
|
||||
assert!((lat - 37.42283).abs() < 0.00001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_sample_json() {
|
||||
let json = r#"{
|
||||
"locations": [
|
||||
{
|
||||
"latitudeE7": 374228300,
|
||||
"longitudeE7": -1221086100,
|
||||
"accuracy": 20,
|
||||
"timestampMs": "1692115200000"
|
||||
}
|
||||
]
|
||||
}"#;
|
||||
|
||||
let history: LocationHistory = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(history.locations.len(), 1);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user