From 85093ff0c771a21ef54c3a6294501867eb501472 Mon Sep 17 00:00:00 2001 From: Cameron Date: Tue, 12 Aug 2025 20:55:22 -0400 Subject: [PATCH] Add parsing date from filename for memories --- Cargo.lock | 1 + Cargo.toml | 3 +- src/files.rs | 1 - src/memories.rs | 389 +++++++++++++++++++++++++++++++++++------------- 4 files changed, 289 insertions(+), 105 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fa96ad5..9105ad1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1642,6 +1642,7 @@ dependencies = [ "prometheus", "rand", "rayon", + "regex", "serde", "serde_json", "tempfile", diff --git a/Cargo.toml b/Cargo.toml index a3c12e6..89afd12 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,4 +42,5 @@ opentelemetry_sdk = { version = "0.28.0", features = ["default", "rt-tokio-curre opentelemetry-otlp = { version = "0.28.0", features = ["default", "metrics", "tracing", "grpc-tonic"] } opentelemetry-stdout = "0.28.0" opentelemetry-appender-log = "0.28.0" -tempfile = "3.20.0" \ No newline at end of file +tempfile = "3.20.0" +regex = "1.11.1" \ No newline at end of file diff --git a/src/files.rs b/src/files.rs index 72b9393..44285df 100644 --- a/src/files.rs +++ b/src/files.rs @@ -505,7 +505,6 @@ mod tests { mod api { use super::*; - use actix::Actor; use actix_web::{web::Query, HttpResponse}; use crate::{ diff --git a/src/memories.rs b/src/memories.rs index e6365f5..5d3189b 100644 --- a/src/memories.rs +++ b/src/memories.rs @@ -1,6 +1,8 @@ use actix_web::web::Data; use actix_web::{get, web, HttpRequest, HttpResponse, Responder}; -use chrono::{DateTime, Datelike, FixedOffset, Local, NaiveDate, TimeZone, Utc}; +use chrono::LocalResult::{Ambiguous, Single}; +use chrono::{DateTime, Datelike, FixedOffset, Local, LocalResult, NaiveDate, TimeZone, Utc}; +use log::{debug, info, trace, warn}; use opentelemetry::trace::{Span, Status, Tracer}; use opentelemetry::KeyValue; use serde::{Deserialize, Serialize}; @@ -39,6 +41,175 @@ pub struct MemoriesResponse { pub items: Vec, } +fn get_file_date_info( + path: &Path, + client_timezone: &Option, +) -> Option<(NaiveDate, Option, Option)> { + // Read file metadata once + let meta = std::fs::metadata(path).ok()?; + + // Extract metadata timestamps + let metadata_created = meta.created().ok().map(|t| { + let utc: DateTime = t.into(); + if let Some(tz) = client_timezone { + utc.with_timezone(tz).timestamp() + } else { + utc.timestamp() + } + }); + + let metadata_modified = meta.modified().ok().map(|t| { + let utc: DateTime = t.into(); + if let Some(tz) = client_timezone { + utc.with_timezone(tz).timestamp() + } else { + utc.timestamp() + } + }); + + // Try to get date from filename + if let Some(date_time) = path + .file_name() + .and_then(|filename| filename.to_str()) + .and_then(extract_date_from_filename) + { + // Convert to client timezone if specified + let date_in_timezone = if let Some(tz) = client_timezone { + date_time.with_timezone(tz) + } else { + date_time.with_timezone(&Local).fixed_offset() + }; + + // Use the timestamp from the filename date + let created_ts = date_in_timezone.timestamp(); + + debug!( + "File date from file {:?} > {:?} = {:?}", + path.file_name(), + date_time, + date_in_timezone + ); + return Some(( + date_in_timezone.date_naive(), + Some(created_ts), + metadata_modified, + )); + } + + // Fall back to metadata if no date in filename + let system_time = meta.created().ok().or_else(|| meta.modified().ok())?; + let dt_utc: DateTime = system_time.into(); + + let date_in_timezone = if let Some(tz) = client_timezone { + dt_utc.with_timezone(tz).date_naive() + } else { + dt_utc.with_timezone(&Local).date_naive() + }; + + trace!("Fallback metadata create date = {:?}", date_in_timezone); + Some((date_in_timezone, metadata_created, metadata_modified)) +} + +fn extract_date_from_filename(filename: &str) -> Option> { + // 1. Screenshot format: Screenshot_2014-06-01-20-44-50.png + if let Some(captures) = + regex::Regex::new(r"Screenshot_(\d{4})-(\d{2})-(\d{2})-(\d{2})-(\d{2})-(\d{2})") + .ok()? + .captures(filename) + { + let year = captures.get(1)?.as_str().parse::().ok()?; + let month = captures.get(2)?.as_str().parse::().ok()?; + let day = captures.get(3)?.as_str().parse::().ok()?; + let hour = captures.get(4)?.as_str().parse::().ok()?; + let min = captures.get(5)?.as_str().parse::().ok()?; + let sec = captures.get(6)?.as_str().parse::().ok()?; + + return match Local.from_local_datetime( + &NaiveDate::from_ymd_opt(year, month, day)?.and_hms_opt(hour, min, sec)?, + ) { + Single(dt) => Some(dt.fixed_offset()), + Ambiguous(early_dt, _) => Some(early_dt.fixed_offset()), + LocalResult::None => { + warn!("Weird local date: {:?}", filename); + return None; + } + }; + } + + // 2. Dash format: 2015-01-09_02-15-15.jpg + if let Some(captures) = regex::Regex::new(r"(\d{4})-(\d{2})-(\d{2})_(\d{2})-(\d{2})-(\d{2})") + .ok()? + .captures(filename) + { + let year = captures.get(1)?.as_str().parse::().ok()?; + let month = captures.get(2)?.as_str().parse::().ok()?; + let day = captures.get(3)?.as_str().parse::().ok()?; + let hour = captures.get(4)?.as_str().parse::().ok()?; + let min = captures.get(5)?.as_str().parse::().ok()?; + let sec = captures.get(6)?.as_str().parse::().ok()?; + + return match Local.from_local_datetime( + &NaiveDate::from_ymd_opt(year, month, day)?.and_hms_opt(hour, min, sec)?, + ) { + Single(dt) => Some(dt.fixed_offset()), + Ambiguous(early_dt, _) => Some(early_dt.fixed_offset()), + LocalResult::None => { + warn!("Weird local date: {:?}", filename); + return None; + } + }; + } + + // 3. Compact format: 20140927101712.jpg + if let Some(captures) = regex::Regex::new(r"(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})") + .ok()? + .captures(filename) + { + let year = captures.get(1)?.as_str().parse::().ok()?; + let month = captures.get(2)?.as_str().parse::().ok()?; + let day = captures.get(3)?.as_str().parse::().ok()?; + let hour = captures.get(4)?.as_str().parse::().ok()?; + let min = captures.get(5)?.as_str().parse::().ok()?; + let sec = captures.get(6)?.as_str().parse::().ok()?; + + return match Local.from_local_datetime( + &NaiveDate::from_ymd_opt(year, month, day)?.and_hms_opt(hour, min, sec)?, + ) { + Single(dt) => Some(dt.fixed_offset()), + Ambiguous(early_dt, _) => Some(early_dt.fixed_offset()), + LocalResult::None => { + warn!("Weird local date: {:?}", filename); + return None; + } + }; + } + + // 4. Timestamp format: xyz_1401638400.jpeg + if let Some(captures) = regex::Regex::new(r"_(\d{10,})\.").ok()?.captures(filename) { + let timestamp_str = captures.get(1)?.as_str(); + + // Millisecond timestamp (13 digits) + if timestamp_str.len() >= 13 { + if let Ok(ts_millis) = timestamp_str[0..13].parse::() { + if let Some(naive_dt) = DateTime::from_timestamp_millis(ts_millis) { + return Some(naive_dt.fixed_offset()); + } + } + } + + // Second timestamp (10 digits) + if timestamp_str.len() >= 10 { + if let Ok(ts_secs) = timestamp_str[0..10].parse::() { + if let Some(naive_dt) = DateTime::from_timestamp(ts_secs, 0) { + return Some(naive_dt.fixed_offset()); + } + } + } + } + + None +} + #[get("/memories")] pub async fn list_memories( _claims: Claims, @@ -86,14 +257,16 @@ pub async fn list_memories( continue; } - // Use created date if available, otherwise modified date for matching - let file_date = match file_best_date(path, &client_timezone) { - Some(d) => d, - None => continue, + // Get file date and timestamps in one operation + let (file_date, created, modified) = match get_file_date_info(path, &client_timezone) { + Some(info) => info, + None => { + warn!("No date info found for file: {:?}", path); + continue; + } }; if is_memories_match(file_date, now, span_mode, years_back) { - let (created, modified) = file_times_epoch_secs(path); if let Ok(rel) = path.strip_prefix(base) { memories_with_dates.push(( MemoryItem { @@ -103,6 +276,8 @@ pub async fn list_memories( }, file_date, )); + } else { + warn!("Failed to strip prefix from path: {:?}", path); } } } @@ -132,40 +307,6 @@ pub async fn list_memories( HttpResponse::Ok().json(MemoriesResponse { items }) } -fn file_best_date(path: &Path, client_timezone: &Option) -> Option { - let meta = std::fs::metadata(path).ok()?; - let system_time = meta.created().ok().or_else(|| meta.modified().ok())?; - - let dt = if let Some(tz) = client_timezone { - let utc_dt: DateTime = system_time.into(); - utc_dt.with_timezone(tz).date_naive() - } else { - let local_dt = chrono::DateTime::::from(system_time); - local_dt.date_naive() - }; - - Some(dt) -} - -fn file_times_epoch_secs(path: &Path) -> (Option, Option) { - let meta = match std::fs::metadata(path) { - Ok(m) => m, - Err(_) => return (None, None), - }; - - let created = meta.created().ok().map(|t| { - let utc: DateTime = t.into(); - utc.timestamp() - }); - - let modified = meta.modified().ok().map(|t| { - let utc: DateTime = t.into(); - utc.timestamp() - }); - - (created, modified) -} - fn is_memories_match( file_date: NaiveDate, today: NaiveDate, @@ -177,6 +318,10 @@ fn is_memories_match( } let years_diff = (today.year() - file_date.year()).unsigned_abs(); if years_diff > years_back { + info!( + "File date is too far in the past: {:?} vs {:?}", + file_date, today + ); return false; } @@ -193,9 +338,6 @@ fn same_month_day_any_year(a: NaiveDate, b: NaiveDate) -> bool { // Match same ISO week number and same weekday (ignoring year) fn same_week_any_year(a: NaiveDate, b: NaiveDate) -> bool { - // let (_ay, aw, _) = a.iso_week().year_week(); - // let (_by, bw, _) = b.iso_week().year_week(); - // aw == bw && a.weekday() == b.weekday() a.iso_week().week().eq(&b.iso_week().week()) } @@ -207,86 +349,127 @@ fn same_month_any_year(a: NaiveDate, b: NaiveDate) -> bool { #[cfg(test)] mod tests { use super::*; - use chrono::NaiveDate; + use chrono::Timelike; + use std::fs::File; + use tempfile::tempdir; + + // Add new tests for our date extraction functionality #[test] - fn test_same_month_day_any_year() { - let today = NaiveDate::from_ymd_opt(2025, 8, 8).unwrap(); - assert!(same_month_day_any_year( - NaiveDate::from_ymd_opt(2019, 8, 8).unwrap(), - today - )); - assert!(!same_month_day_any_year( - NaiveDate::from_ymd_opt(2019, 8, 7).unwrap(), - today - )); + fn test_extract_date_from_filename_screenshot_format() { + let filename = "Screenshot_2014-06-01-20-44-50.png"; + let date_time = extract_date_from_filename(filename).unwrap(); + + assert_eq!(date_time.year(), 2014); + assert_eq!(date_time.month(), 6); + assert_eq!(date_time.day(), 1); + assert_eq!(date_time.hour(), 20); + assert_eq!(date_time.minute(), 44); + assert_eq!(date_time.second(), 50); } #[test] - fn test_same_week_any_year() { - let b = NaiveDate::from_ymd_opt(2025, 8, 8).unwrap(); // Friday - let a = NaiveDate::from_ymd_opt(2024, 8, 9).unwrap(); // Friday, same ISO week number - assert_eq!(b.weekday(), a.weekday()); - assert_eq!(b.iso_week().week(), a.iso_week().week()); - assert!(same_week_any_year(a, b)); + fn test_extract_date_from_filename_dash_format() { + let filename = "2015-01-09_02-15-15.jpg"; + let date_time = extract_date_from_filename(filename).unwrap(); + + assert_eq!(date_time.year(), 2015); + assert_eq!(date_time.month(), 1); + assert_eq!(date_time.day(), 9); + assert_eq!(date_time.hour(), 2); + assert_eq!(date_time.minute(), 15); + assert_eq!(date_time.second(), 15); } #[test] - fn test_same_month_any_year() { - let today = NaiveDate::from_ymd_opt(2025, 8, 8).unwrap(); - // Same month, different year and day - should match - assert!(same_month_any_year( - NaiveDate::from_ymd_opt(2019, 8, 1).unwrap(), - today - )); - // Different month - should not match - assert!(!same_month_any_year( - NaiveDate::from_ymd_opt(2019, 9, 8).unwrap(), - today - )); - // Same month, same year, different day - should match - assert!(same_month_any_year( - NaiveDate::from_ymd_opt(2025, 8, 15).unwrap(), - today - )); - // Test January vs December - assert!(!same_month_any_year( - NaiveDate::from_ymd_opt(2020, 1, 1).unwrap(), - NaiveDate::from_ymd_opt(2025, 12, 31).unwrap() - )); + fn test_extract_date_from_filename_compact_format() { + let filename = "20140927101712.jpg"; + let date_time = extract_date_from_filename(filename).unwrap(); + + assert_eq!(date_time.year(), 2014); + assert_eq!(date_time.month(), 9); + assert_eq!(date_time.day(), 27); + assert_eq!(date_time.hour(), 10); + assert_eq!(date_time.minute(), 17); + assert_eq!(date_time.second(), 12); } #[test] - fn test_years_back_limit() { - let today = NaiveDate::from_ymd_opt(2025, 8, 8).unwrap(); - let file_date = NaiveDate::from_ymd_opt(2010, 8, 8).unwrap(); - assert!(!is_memories_match(file_date, today, MemoriesSpan::Day, 10)); - assert!(is_memories_match(file_date, today, MemoriesSpan::Day, 20)); + fn test_extract_date_from_filename_timestamp_format() { + let filename = "xyz_1401638400.jpeg"; // Unix timestamp for 2014-06-01 16:00:00 UTC + // Timestamps are already in UTC, so timezone doesn't matter for this test + let date_time = extract_date_from_filename(filename).unwrap(); + + assert_eq!(date_time.year(), 2014); + assert_eq!(date_time.month(), 6); + assert_eq!(date_time.day(), 1); + assert_eq!(date_time.hour(), 16); + assert_eq!(date_time.minute(), 0); + assert_eq!(date_time.second(), 0); } #[test] - fn test_timezone_conversion() { - // Test file_best_date with different timezones - use std::fs::File; - use tempfile::tempdir; + fn test_extract_date_from_filename_timestamp_millis_format() { + let filename = "xyz_1401638400000.jpeg"; // Unix timestamp in milliseconds + let date_time = extract_date_from_filename(filename).unwrap(); + assert_eq!(date_time.year(), 2014); + assert_eq!(date_time.month(), 6); + assert_eq!(date_time.day(), 1); + assert_eq!(date_time.hour(), 16); + assert_eq!(date_time.minute(), 0); + assert_eq!(date_time.second(), 0); + } + + #[test] + fn test_get_file_date_info_from_filename() { let temp_dir = tempdir().unwrap(); - let temp_file = temp_dir.path().join("test_file.jpg"); + let temp_file = temp_dir.path().join("Screenshot_2014-06-01-20-44-50.png"); File::create(&temp_file).unwrap(); - // Test with PST (-8 hours = -480 minutes) - let pst_offset = FixedOffset::west_opt(8 * 3600).unwrap(); - let client_tz = Some(pst_offset); + let (date, created, _) = + get_file_date_info(&temp_file, &Some(*Local::now().fixed_offset().offset())).unwrap(); - let date = file_best_date(&temp_file, &client_tz); - assert!(date.is_some()); + // Check that date is from filename + assert_eq!(date.year(), 2014); + assert_eq!(date.month(), 6); + assert_eq!(date.day(), 1); - // Test with no timezone (local) - let date_local = file_best_date(&temp_file, &None); - assert!(date_local.is_some()); + // Check that created timestamp matches the date from filename + assert!(created.is_some()); + let ts = created.unwrap(); + // The timestamp should be for 2014-06-01 20:44:50 in the LOCAL timezone + let dt_from_ts = Local.timestamp_opt(ts, 0).unwrap(); + assert_eq!(dt_from_ts.year(), 2014); + assert_eq!(dt_from_ts.month(), 6); + assert_eq!(dt_from_ts.day(), 1); + assert_eq!(dt_from_ts.hour(), 20); + assert_eq!(dt_from_ts.minute(), 44); + assert_eq!(dt_from_ts.second(), 50); + } - // Both should return valid dates - assert!(date.unwrap().year() >= 2020); - assert!(date_local.unwrap().year() >= 2020); + #[test] + fn test_get_file_date_info_from_metadata() { + let temp_dir = tempdir().unwrap(); + let temp_file = temp_dir.path().join("regular_image.jpg"); + File::create(&temp_file).unwrap(); + + let (date, created, modified) = get_file_date_info(&temp_file, &None).unwrap(); + + // Both date and timestamps should be from metadata (recent) + let today = Local::now().date_naive(); + assert_eq!(date.year(), today.year()); + assert_eq!(date.month(), today.month()); + + // Both timestamps should be valid + assert!(created.is_some()); + assert!(modified.is_some()); + + // Check that timestamps are recent + let dt_created = DateTime::::from_timestamp(created.unwrap(), 0).unwrap(); + assert_eq!(dt_created.year(), today.year()); + + let dt_modified = DateTime::::from_timestamp(modified.unwrap(), 0).unwrap(); + assert_eq!(dt_modified.year(), today.year()); } }