Run clippy fix
This commit is contained in:
@@ -63,7 +63,7 @@ pub fn strip_summary_boilerplate(summary: &str) -> String {
|
||||
text = text[phrase.len()..].trim_start().to_string();
|
||||
// Remove leading punctuation/articles after stripping phrase
|
||||
text = text
|
||||
.trim_start_matches(|c| c == ',' || c == ':' || c == '-')
|
||||
.trim_start_matches([',', ':', '-'])
|
||||
.trim_start()
|
||||
.to_string();
|
||||
break;
|
||||
@@ -71,13 +71,12 @@ pub fn strip_summary_boilerplate(summary: &str) -> String {
|
||||
}
|
||||
|
||||
// Remove any remaining leading markdown bold markers
|
||||
if text.starts_with("**") {
|
||||
if let Some(end) = text[2..].find("**") {
|
||||
if text.starts_with("**")
|
||||
&& let Some(end) = text[2..].find("**") {
|
||||
// Keep the content between ** but remove the markers
|
||||
let bold_content = &text[2..2 + end];
|
||||
text = format!("{}{}", bold_content, &text[4 + end..]);
|
||||
}
|
||||
}
|
||||
|
||||
text.trim().to_string()
|
||||
}
|
||||
@@ -144,7 +143,7 @@ pub async fn generate_daily_summaries(
|
||||
if date >= start && date <= end {
|
||||
messages_by_date
|
||||
.entry(date)
|
||||
.or_insert_with(Vec::new)
|
||||
.or_default()
|
||||
.push(msg);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,7 +106,7 @@ pub async fn embed_contact_messages(
|
||||
log::info!(
|
||||
"Processing batch {}/{}: messages {}-{} ({:.1}% complete)",
|
||||
batch_idx + 1,
|
||||
(to_embed + batch_size - 1) / batch_size,
|
||||
to_embed.div_ceil(batch_size),
|
||||
batch_start + 1,
|
||||
batch_end,
|
||||
(batch_end as f64 / to_embed as f64) * 100.0
|
||||
|
||||
@@ -84,13 +84,11 @@ impl InsightGenerator {
|
||||
let components: Vec<_> = path.components().collect();
|
||||
|
||||
// If path has at least 2 components (directory + file), extract first directory
|
||||
if components.len() >= 2 {
|
||||
if let Some(component) = components.first() {
|
||||
if let Some(os_str) = component.as_os_str().to_str() {
|
||||
if components.len() >= 2
|
||||
&& let Some(component) = components.first()
|
||||
&& let Some(os_str) = component.as_os_str().to_str() {
|
||||
return Some(os_str.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
@@ -191,8 +189,8 @@ impl InsightGenerator {
|
||||
.into_iter()
|
||||
.filter(|msg| {
|
||||
// Extract date from formatted daily summary "[2024-08-15] Contact ..."
|
||||
if let Some(bracket_end) = msg.find(']') {
|
||||
if let Some(date_str) = msg.get(1..bracket_end) {
|
||||
if let Some(bracket_end) = msg.find(']')
|
||||
&& let Some(date_str) = msg.get(1..bracket_end) {
|
||||
// Parse just the date (daily summaries don't have time)
|
||||
if let Ok(msg_date) =
|
||||
chrono::NaiveDate::parse_from_str(date_str, "%Y-%m-%d")
|
||||
@@ -206,7 +204,6 @@ impl InsightGenerator {
|
||||
return time_diff > exclusion_window;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
})
|
||||
.take(limit)
|
||||
@@ -521,7 +518,7 @@ impl InsightGenerator {
|
||||
"searches about {} {} {}",
|
||||
DateTime::from_timestamp(timestamp, 0)
|
||||
.map(|dt| dt.format("%B %Y").to_string())
|
||||
.unwrap_or_else(|| "".to_string()),
|
||||
.unwrap_or_default(),
|
||||
location.unwrap_or(""),
|
||||
contact
|
||||
.map(|c| format!("involving {}", c))
|
||||
|
||||
132
src/ai/ollama.rs
132
src/ai/ollama.rs
@@ -78,12 +78,11 @@ impl OllamaClient {
|
||||
// Check cache first
|
||||
{
|
||||
let cache = MODEL_LIST_CACHE.lock().unwrap();
|
||||
if let Some(entry) = cache.get(url) {
|
||||
if !entry.is_expired() {
|
||||
if let Some(entry) = cache.get(url)
|
||||
&& !entry.is_expired() {
|
||||
log::debug!("Returning cached model list for {}", url);
|
||||
return Ok(entry.data.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log::debug!("Fetching fresh model list from {}", url);
|
||||
@@ -93,7 +92,7 @@ impl OllamaClient {
|
||||
.timeout(Duration::from_secs(10))
|
||||
.build()?;
|
||||
|
||||
let response = client.get(&format!("{}/api/tags", url)).send().await?;
|
||||
let response = client.get(format!("{}/api/tags", url)).send().await?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
return Err(anyhow::anyhow!("Failed to list models from {}", url));
|
||||
@@ -157,7 +156,7 @@ impl OllamaClient {
|
||||
}
|
||||
|
||||
let response = client
|
||||
.post(&format!("{}/api/show", url))
|
||||
.post(format!("{}/api/show", url))
|
||||
.json(&ShowRequest {
|
||||
model: model_name.to_string(),
|
||||
})
|
||||
@@ -188,12 +187,11 @@ impl OllamaClient {
|
||||
// Check cache first
|
||||
{
|
||||
let cache = MODEL_CAPABILITIES_CACHE.lock().unwrap();
|
||||
if let Some(entry) = cache.get(url) {
|
||||
if !entry.is_expired() {
|
||||
if let Some(entry) = cache.get(url)
|
||||
&& !entry.is_expired() {
|
||||
log::debug!("Returning cached model capabilities for {}", url);
|
||||
return Ok(entry.data.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log::debug!("Fetching fresh model capabilities from {}", url);
|
||||
@@ -260,7 +258,7 @@ impl OllamaClient {
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&format!("{}/api/generate", url))
|
||||
.post(format!("{}/api/generate", url))
|
||||
.json(&request)
|
||||
.send()
|
||||
.await?;
|
||||
@@ -421,42 +419,40 @@ Return ONLY the title, nothing else."#,
|
||||
sms_str
|
||||
)
|
||||
}
|
||||
} else {
|
||||
if let Some(contact_name) = contact {
|
||||
format!(
|
||||
r#"Create a short title (maximum 8 words) about this moment:
|
||||
} else if let Some(contact_name) = contact {
|
||||
format!(
|
||||
r#"Create a short title (maximum 8 words) about this moment:
|
||||
|
||||
Date: {}
|
||||
Location: {}
|
||||
Person/Contact: {}
|
||||
Messages: {}
|
||||
Date: {}
|
||||
Location: {}
|
||||
Person/Contact: {}
|
||||
Messages: {}
|
||||
|
||||
Use specific details from the context above. The photo is from a folder for {}, so they are likely related to this moment. If no specific details are available, use a simple descriptive title.
|
||||
Use specific details from the context above. The photo is from a folder for {}, so they are likely related to this moment. If no specific details are available, use a simple descriptive title.
|
||||
|
||||
Return ONLY the title, nothing else."#,
|
||||
date.format("%B %d, %Y"),
|
||||
location_str,
|
||||
contact_name,
|
||||
sms_str,
|
||||
contact_name
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
r#"Create a short title (maximum 8 words) about this moment:
|
||||
Return ONLY the title, nothing else."#,
|
||||
date.format("%B %d, %Y"),
|
||||
location_str,
|
||||
contact_name,
|
||||
sms_str,
|
||||
contact_name
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
r#"Create a short title (maximum 8 words) about this moment:
|
||||
|
||||
Date: {}
|
||||
Location: {}
|
||||
Messages: {}
|
||||
Date: {}
|
||||
Location: {}
|
||||
Messages: {}
|
||||
|
||||
Use specific details from the context above. If no specific details are available, use a simple descriptive title.
|
||||
Use specific details from the context above. If no specific details are available, use a simple descriptive title.
|
||||
|
||||
Return ONLY the title, nothing else."#,
|
||||
date.format("%B %d, %Y"),
|
||||
location_str,
|
||||
sms_str
|
||||
)
|
||||
}
|
||||
};
|
||||
Return ONLY the title, nothing else."#,
|
||||
date.format("%B %d, %Y"),
|
||||
location_str,
|
||||
sms_str
|
||||
)
|
||||
};
|
||||
|
||||
let system = custom_system.unwrap_or("You are my long term memory assistant. Use only the information provided. Do not invent details.");
|
||||
|
||||
@@ -512,39 +508,37 @@ Analyze the image and use specific details from both the visual content and the
|
||||
sms_str
|
||||
)
|
||||
}
|
||||
} else {
|
||||
if let Some(contact_name) = contact {
|
||||
format!(
|
||||
r#"Write a 1-3 paragraph description of this moment based on the available information:
|
||||
} else if let Some(contact_name) = contact {
|
||||
format!(
|
||||
r#"Write a 1-3 paragraph description of this moment based on the available information:
|
||||
|
||||
Date: {}
|
||||
Location: {}
|
||||
Person/Contact: {}
|
||||
Messages: {}
|
||||
Date: {}
|
||||
Location: {}
|
||||
Person/Contact: {}
|
||||
Messages: {}
|
||||
|
||||
Use only the specific details provided above. The photo is from a folder for {}, so they are likely related to this moment. Mention people's names (especially {}), places, or activities if they appear in the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
|
||||
date.format("%B %d, %Y"),
|
||||
location_str,
|
||||
contact_name,
|
||||
sms_str,
|
||||
contact_name,
|
||||
contact_name
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
r#"Write a 1-3 paragraph description of this moment based on the available information:
|
||||
Use only the specific details provided above. The photo is from a folder for {}, so they are likely related to this moment. Mention people's names (especially {}), places, or activities if they appear in the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
|
||||
date.format("%B %d, %Y"),
|
||||
location_str,
|
||||
contact_name,
|
||||
sms_str,
|
||||
contact_name,
|
||||
contact_name
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
r#"Write a 1-3 paragraph description of this moment based on the available information:
|
||||
|
||||
Date: {}
|
||||
Location: {}
|
||||
Messages: {}
|
||||
Date: {}
|
||||
Location: {}
|
||||
Messages: {}
|
||||
|
||||
Use only the specific details provided above. Mention people's names, places, or activities if they appear in the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
|
||||
date.format("%B %d, %Y"),
|
||||
location_str,
|
||||
sms_str
|
||||
)
|
||||
}
|
||||
};
|
||||
Use only the specific details provided above. Mention people's names, places, or activities if they appear in the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
|
||||
date.format("%B %d, %Y"),
|
||||
location_str,
|
||||
sms_str
|
||||
)
|
||||
};
|
||||
|
||||
let system = custom_system.unwrap_or("You are a memory refreshing assistant who is able to provide insights through analyzing past conversations. Use only the information provided. Do not invent details.");
|
||||
|
||||
@@ -671,7 +665,7 @@ Use only the specific details provided above. Mention people's names, places, or
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&format!("{}/api/embed", url))
|
||||
.post(format!("{}/api/embed", url))
|
||||
.json(&request)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
@@ -36,7 +36,7 @@ struct EmbeddingRow {
|
||||
}
|
||||
|
||||
fn deserialize_embedding(bytes: &[u8]) -> Result<Vec<f32>> {
|
||||
if bytes.len() % 4 != 0 {
|
||||
if !bytes.len().is_multiple_of(4) {
|
||||
return Err(anyhow::anyhow!("Invalid embedding byte length"));
|
||||
}
|
||||
|
||||
|
||||
@@ -74,18 +74,16 @@ async fn main() -> Result<()> {
|
||||
let mut dao_instance = SqliteCalendarEventDao::new();
|
||||
|
||||
// Check if event exists
|
||||
if args.skip_existing {
|
||||
if let Ok(exists) = dao_instance.event_exists(
|
||||
if args.skip_existing
|
||||
&& let Ok(exists) = dao_instance.event_exists(
|
||||
&context,
|
||||
event.event_uid.as_deref().unwrap_or(""),
|
||||
event.start_time,
|
||||
) {
|
||||
if exists {
|
||||
)
|
||||
&& exists {
|
||||
*skipped_count.lock().unwrap() += 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Generate embedding if requested (blocking call)
|
||||
let embedding = if let Some(ref ollama_client) = ollama {
|
||||
|
||||
@@ -58,19 +58,17 @@ async fn main() -> Result<()> {
|
||||
|
||||
for location in chunk {
|
||||
// Skip existing check if requested (makes import much slower)
|
||||
if args.skip_existing {
|
||||
if let Ok(exists) = dao_instance.location_exists(
|
||||
if args.skip_existing
|
||||
&& let Ok(exists) = dao_instance.location_exists(
|
||||
&context,
|
||||
location.timestamp,
|
||||
location.latitude,
|
||||
location.longitude,
|
||||
) {
|
||||
if exists {
|
||||
)
|
||||
&& exists {
|
||||
skipped_count += 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
batch_inserts.push(InsertLocationRecord {
|
||||
timestamp: location.timestamp,
|
||||
|
||||
@@ -92,16 +92,13 @@ async fn main() -> Result<()> {
|
||||
|
||||
for (search, embedding_opt) in chunk.iter().zip(embeddings_result.iter()) {
|
||||
// Check if search exists (optional for speed)
|
||||
if args.skip_existing {
|
||||
if let Ok(exists) =
|
||||
if args.skip_existing
|
||||
&& let Ok(exists) =
|
||||
dao_instance.search_exists(&context, search.timestamp, &search.query)
|
||||
{
|
||||
if exists {
|
||||
&& exists {
|
||||
skipped_count += 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Only insert if we have an embedding
|
||||
if let Some(embedding) = embedding_opt {
|
||||
|
||||
@@ -3,7 +3,6 @@ use std::sync::{Arc, Mutex};
|
||||
|
||||
use chrono::Utc;
|
||||
use clap::Parser;
|
||||
use opentelemetry;
|
||||
use rayon::prelude::*;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
|
||||
@@ -118,7 +118,7 @@ impl SqliteCalendarEventDao {
|
||||
}
|
||||
|
||||
fn deserialize_vector(bytes: &[u8]) -> Result<Vec<f32>, DbError> {
|
||||
if bytes.len() % 4 != 0 {
|
||||
if !bytes.len().is_multiple_of(4) {
|
||||
return Err(DbError::new(DbErrorKind::QueryError));
|
||||
}
|
||||
|
||||
@@ -218,14 +218,13 @@ impl CalendarEventDao for SqliteCalendarEventDao {
|
||||
.expect("Unable to get CalendarEventDao");
|
||||
|
||||
// Validate embedding dimensions if provided
|
||||
if let Some(ref emb) = event.embedding {
|
||||
if emb.len() != 768 {
|
||||
if let Some(ref emb) = event.embedding
|
||||
&& emb.len() != 768 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Invalid embedding dimensions: {} (expected 768)",
|
||||
emb.len()
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
let embedding_bytes = event.embedding.as_ref().map(|e| Self::serialize_vector(e));
|
||||
|
||||
@@ -289,15 +288,14 @@ impl CalendarEventDao for SqliteCalendarEventDao {
|
||||
conn.transaction::<_, anyhow::Error, _>(|conn| {
|
||||
for event in events {
|
||||
// Validate embedding if provided
|
||||
if let Some(ref emb) = event.embedding {
|
||||
if emb.len() != 768 {
|
||||
if let Some(ref emb) = event.embedding
|
||||
&& emb.len() != 768 {
|
||||
log::warn!(
|
||||
"Skipping event with invalid embedding dimensions: {}",
|
||||
emb.len()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let embedding_bytes =
|
||||
event.embedding.as_ref().map(|e| Self::serialize_vector(e));
|
||||
|
||||
@@ -98,7 +98,7 @@ impl SqliteDailySummaryDao {
|
||||
}
|
||||
|
||||
fn deserialize_vector(bytes: &[u8]) -> Result<Vec<f32>, DbError> {
|
||||
if bytes.len() % 4 != 0 {
|
||||
if !bytes.len().is_multiple_of(4) {
|
||||
return Err(DbError::new(DbErrorKind::QueryError));
|
||||
}
|
||||
|
||||
@@ -448,7 +448,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {
|
||||
.bind::<diesel::sql_types::Text, _>(contact)
|
||||
.get_result::<CountResult>(conn.deref_mut())
|
||||
.map(|r| r.count)
|
||||
.map_err(|e| anyhow::anyhow!("Count query error: {:?}", e).into())
|
||||
.map_err(|e| anyhow::anyhow!("Count query error: {:?}", e))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
@@ -109,7 +109,7 @@ impl SqliteEmbeddingDao {
|
||||
|
||||
/// Deserialize bytes from BLOB back to f32 vector
|
||||
fn deserialize_vector(bytes: &[u8]) -> Result<Vec<f32>, DbError> {
|
||||
if bytes.len() % 4 != 0 {
|
||||
if !bytes.len().is_multiple_of(4) {
|
||||
return Err(DbError::new(DbErrorKind::QueryError));
|
||||
}
|
||||
|
||||
|
||||
@@ -213,14 +213,13 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {
|
||||
.expect("Unable to get LocationHistoryDao");
|
||||
|
||||
// Validate embedding dimensions if provided (rare for location data)
|
||||
if let Some(ref emb) = location.embedding {
|
||||
if emb.len() != 768 {
|
||||
if let Some(ref emb) = location.embedding
|
||||
&& emb.len() != 768 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Invalid embedding dimensions: {} (expected 768)",
|
||||
emb.len()
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
let embedding_bytes = location
|
||||
.embedding
|
||||
@@ -289,15 +288,14 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {
|
||||
conn.transaction::<_, anyhow::Error, _>(|conn| {
|
||||
for location in locations {
|
||||
// Validate embedding if provided (rare)
|
||||
if let Some(ref emb) = location.embedding {
|
||||
if emb.len() != 768 {
|
||||
if let Some(ref emb) = location.embedding
|
||||
&& emb.len() != 768 {
|
||||
log::warn!(
|
||||
"Skipping location with invalid embedding dimensions: {}",
|
||||
emb.len()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let embedding_bytes = location
|
||||
.embedding
|
||||
|
||||
@@ -105,7 +105,7 @@ impl SqliteSearchHistoryDao {
|
||||
}
|
||||
|
||||
fn deserialize_vector(bytes: &[u8]) -> Result<Vec<f32>, DbError> {
|
||||
if bytes.len() % 4 != 0 {
|
||||
if !bytes.len().is_multiple_of(4) {
|
||||
return Err(DbError::new(DbErrorKind::QueryError));
|
||||
}
|
||||
|
||||
|
||||
@@ -36,17 +36,17 @@ pub fn is_media_file(path: &Path) -> bool {
|
||||
|
||||
/// Check if a DirEntry is an image file (for walkdir usage)
|
||||
pub fn direntry_is_image(entry: &DirEntry) -> bool {
|
||||
is_image_file(&entry.path())
|
||||
is_image_file(entry.path())
|
||||
}
|
||||
|
||||
/// Check if a DirEntry is a video file (for walkdir usage)
|
||||
pub fn direntry_is_video(entry: &DirEntry) -> bool {
|
||||
is_video_file(&entry.path())
|
||||
is_video_file(entry.path())
|
||||
}
|
||||
|
||||
/// Check if a DirEntry is a media file (for walkdir usage)
|
||||
pub fn direntry_is_media(entry: &DirEntry) -> bool {
|
||||
is_media_file(&entry.path())
|
||||
is_media_file(entry.path())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
17
src/files.rs
17
src/files.rs
@@ -234,8 +234,8 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
|
||||
(exif.gps_latitude, exif.gps_longitude)
|
||||
{
|
||||
let distance = haversine_distance(
|
||||
lat as f64,
|
||||
lon as f64,
|
||||
lat,
|
||||
lon,
|
||||
photo_lat as f64,
|
||||
photo_lon as f64,
|
||||
);
|
||||
@@ -344,7 +344,7 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
|
||||
sort_type,
|
||||
&mut exif_dao_guard,
|
||||
&span_context,
|
||||
(&app_state.base_path).as_ref(),
|
||||
app_state.base_path.as_ref(),
|
||||
);
|
||||
drop(exif_dao_guard);
|
||||
result
|
||||
@@ -410,14 +410,9 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
|
||||
)
|
||||
})
|
||||
.map(|path: &PathBuf| {
|
||||
let relative = path.strip_prefix(&app_state.base_path).expect(
|
||||
format!(
|
||||
"Unable to strip base path {} from file path {}",
|
||||
let relative = path.strip_prefix(&app_state.base_path).unwrap_or_else(|_| panic!("Unable to strip base path {} from file path {}",
|
||||
&app_state.base_path.path(),
|
||||
path.display()
|
||||
)
|
||||
.as_str(),
|
||||
);
|
||||
path.display()));
|
||||
relative.to_path_buf()
|
||||
})
|
||||
.map(|f| f.to_str().unwrap().to_string())
|
||||
@@ -493,7 +488,7 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
|
||||
sort_type,
|
||||
&mut exif_dao_guard,
|
||||
&span_context,
|
||||
(&app_state.base_path).as_ref(),
|
||||
app_state.base_path.as_ref(),
|
||||
);
|
||||
drop(exif_dao_guard);
|
||||
result
|
||||
|
||||
@@ -229,8 +229,7 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
|
||||
|
||||
// For 14-16 digits, treat first 10 digits as seconds to avoid far future dates
|
||||
// Examples: att_1422489664680106 (16 digits), att_142248967186928 (15 digits)
|
||||
if len >= 14
|
||||
&& len <= 16
|
||||
if (14..=16).contains(&len)
|
||||
&& let Some(date_time) = timestamp_str[0..10]
|
||||
.parse::<i64>()
|
||||
.ok()
|
||||
|
||||
@@ -142,12 +142,12 @@ fn parse_ical_datetime(value: &str, property: &Property) -> Result<Option<i64>>
|
||||
}
|
||||
|
||||
fn extract_email_from_mailto(value: Option<&str>) -> Option<String> {
|
||||
value.and_then(|v| {
|
||||
value.map(|v| {
|
||||
// ORGANIZER and ATTENDEE often have format: mailto:user@example.com
|
||||
if v.starts_with("mailto:") {
|
||||
Some(v.trim_start_matches("mailto:").to_string())
|
||||
v.trim_start_matches("mailto:").to_string()
|
||||
} else {
|
||||
Some(v.to_string())
|
||||
v.to_string()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -29,24 +29,23 @@ pub fn parse_search_html(path: &str) -> Result<Vec<ParsedSearchRecord>> {
|
||||
}
|
||||
|
||||
// Strategy 2: Look for outer-cell structure (older format)
|
||||
if records.is_empty() {
|
||||
if let Ok(outer_selector) = Selector::parse("div.outer-cell") {
|
||||
if records.is_empty()
|
||||
&& let Ok(outer_selector) = Selector::parse("div.outer-cell") {
|
||||
for cell in document.select(&outer_selector) {
|
||||
if let Some(record) = parse_outer_cell(&cell) {
|
||||
records.push(record);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Strategy 3: Generic approach - look for links and timestamps
|
||||
if records.is_empty() {
|
||||
if let Ok(link_selector) = Selector::parse("a") {
|
||||
if records.is_empty()
|
||||
&& let Ok(link_selector) = Selector::parse("a") {
|
||||
for link in document.select(&link_selector) {
|
||||
if let Some(href) = link.value().attr("href") {
|
||||
// Check if it's a search URL
|
||||
if href.contains("google.com/search?q=") || href.contains("search?q=") {
|
||||
if let Some(query) = extract_query_from_url(href) {
|
||||
if (href.contains("google.com/search?q=") || href.contains("search?q="))
|
||||
&& let Some(query) = extract_query_from_url(href) {
|
||||
// Try to find nearby timestamp
|
||||
let timestamp = find_nearby_timestamp(&link);
|
||||
|
||||
@@ -56,11 +55,9 @@ pub fn parse_search_html(path: &str) -> Result<Vec<ParsedSearchRecord>> {
|
||||
search_engine: Some("Google".to_string()),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(records)
|
||||
}
|
||||
@@ -120,13 +117,12 @@ fn extract_query_from_url(url: &str) -> Option<String> {
|
||||
|
||||
fn find_nearby_timestamp(element: &scraper::ElementRef) -> Option<i64> {
|
||||
// Look for timestamp in parent or sibling elements
|
||||
if let Some(parent) = element.parent() {
|
||||
if parent.value().as_element().is_some() {
|
||||
if let Some(parent) = element.parent()
|
||||
&& parent.value().as_element().is_some() {
|
||||
let parent_ref = scraper::ElementRef::wrap(parent)?;
|
||||
let text = parent_ref.text().collect::<Vec<_>>().join(" ");
|
||||
return parse_timestamp_from_text(&text);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
@@ -139,11 +135,9 @@ fn parse_timestamp_from_text(text: &str) -> Option<i64> {
|
||||
if let Some(iso_match) = text
|
||||
.split_whitespace()
|
||||
.find(|s| s.contains('T') && s.contains('-'))
|
||||
{
|
||||
if let Ok(dt) = DateTime::parse_from_rfc3339(iso_match) {
|
||||
&& let Ok(dt) = DateTime::parse_from_rfc3339(iso_match) {
|
||||
return Some(dt.timestamp());
|
||||
}
|
||||
}
|
||||
|
||||
// Try common date patterns
|
||||
let patterns = [
|
||||
@@ -154,11 +148,10 @@ fn parse_timestamp_from_text(text: &str) -> Option<i64> {
|
||||
|
||||
for pattern in patterns {
|
||||
// Extract potential date string
|
||||
if let Some(date_part) = extract_date_substring(text) {
|
||||
if let Ok(dt) = NaiveDateTime::parse_from_str(&date_part, pattern) {
|
||||
if let Some(date_part) = extract_date_substring(text)
|
||||
&& let Ok(dt) = NaiveDateTime::parse_from_str(&date_part, pattern) {
|
||||
return Some(dt.and_utc().timestamp());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
|
||||
Reference in New Issue
Block a user