diff --git a/CLAUDE.md b/CLAUDE.md index 984febd..3567915 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -364,6 +364,53 @@ Runs in background thread with two-tier strategy: - Batch queries EXIF DB to detect new files - Configurable via `WATCH_QUICK_INTERVAL_SECONDS` and `WATCH_FULL_INTERVAL_SECONDS` +**Canonical date_taken pipeline (`src/date_resolver.rs`).** Every row's +`image_exif.date_taken` is populated at ingest by a four-step waterfall; +which step won is recorded in `image_exif.date_taken_source` so the +per-tick drain can re-resolve weak entries when better tools become +available, and so the UI/debug surface can answer "why did this photo +land on this date?". Order: + +1. **`exif`** — kamadak-exif `DateTime` / `DateTimeOriginal`. Fast, + in-process, image-only. +2. **`exiftool`** — shell-out fallback for tags kamadak can't reach: + QuickTime/MP4 (`MediaCreateDate`, `TrackCreateDate`, `CreateDate`), + Apple's `ContentCreateDate`, MakerNote sub-IFDs. Required for + videos to land a real date. Single-file at ingest; the per-tick + drain feeds the whole batch through one `exiftool -@ -` subprocess. + Degrades silently when `exiftool` isn't on PATH (resolver caches the + "available" check via `OnceLock`). +3. **`filename`** — `extract_date_from_filename` in `memories.rs` + matches screenshot, chat-export, and timestamp-named patterns. +4. **`fs_time`** — `earliest_fs_time(metadata)` (earlier of created / + modified). Last resort. + +Notable behavior change vs. the pre-2026-05 request-time logic: +**EXIF beats filename when both are present.** A photo named +`Screenshot_2014-06-01.png` whose EXIF `DateTime` is 2021 now appears +under 2021, not 2014 — on the theory that EXIF is more reliable than +import-named filenames. The reverse case (no EXIF, filename has a +date) is unchanged. + +The `backfill_missing_date_taken` drain (`src/main.rs`) runs every +watcher tick alongside `backfill_unhashed_backlog`. It loads up to +`DATE_BACKFILL_MAX_PER_TICK` rows (default 500) where +`date_taken IS NULL OR date_taken_source = 'fs_time'` (backed by the +`idx_image_exif_date_backfill` partial index), runs the waterfall +batch via `resolve_dates_batch`, and writes results via the +`backfill_date_taken` DAO method (touches only `date_taken` + +`date_taken_source` so EXIF / hash / perceptual columns are +preserved). `filename`-sourced rows are intentionally not re-resolved +— the regex is authoritative when it matches, and re-running exiftool +won't change the answer. + +`/memories` is a single SQL query against this column +(`get_memories_in_window` in `src/database/mod.rs`), using +`strftime('%m-%d' | '%W' | '%m', date_taken, 'unixepoch', tz)` for +calendar matching with the client's timezone offset. The pre-rewrite +version stat'd every row and walked the entire library tree — at +~14k photos this took 10–15 s; the rewrite is single-digit ms. + **EXIF Extraction:** - Uses `kamadak-exif` crate - Supports: JPEG, TIFF, RAW (NEF, CR2, CR3), HEIF/HEIC, PNG, WebP @@ -534,6 +581,7 @@ Optional: ```bash WATCH_QUICK_INTERVAL_SECONDS=60 # Quick scan interval WATCH_FULL_INTERVAL_SECONDS=3600 # Full scan interval +DATE_BACKFILL_MAX_PER_TICK=500 # Cap on canonical-date drain per watcher tick OTLP_OTLS_ENDPOINT=http://... # OpenTelemetry collector (release builds) # AI Insights Configuration diff --git a/src/database/mod.rs b/src/database/mod.rs index 0754bea..32127cb 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -9,6 +9,21 @@ use crate::database::models::{ }; use crate::otel::trace_db_call; +/// Decoded shape for `get_memories_in_window`'s raw `sql_query`. Diesel's +/// query DSL doesn't expose strftime, so the memories filter is hand- +/// written SQL — but the returned columns are simple enough that a small +/// `QueryableByName` struct suffices, kept private to this module. +#[derive(diesel::QueryableByName)] +#[allow(dead_code)] // fields read via Diesel's QueryableByName derive +struct MemoriesWindowRow { + #[diesel(sql_type = diesel::sql_types::Text)] + rel_path: String, + #[diesel(sql_type = diesel::sql_types::BigInt)] + date_taken: i64, + #[diesel(sql_type = diesel::sql_types::BigInt)] + last_modified: i64, +} + /// Wire shape for a single member of a duplicate group, returned by /// `list_duplicates_*` and `lookup_duplicate_row`. Carries everything /// the Apollo modal needs to render a member tile and its meta line — @@ -424,6 +439,35 @@ pub trait ExifDao: Sync + Send { source: &str, ) -> Result<(), DbError>; + /// Single-query backend for `/memories`. Returns + /// `(rel_path, date_taken, last_modified)` for rows in `library_id` + /// whose `date_taken` falls within `[now - years_back y, now]` and + /// whose calendar position matches the request's span: + /// - `"day"` — same month + day-of-month (any year) + /// - `"week"` — same week-of-year (SQLite `%W`, Monday-anchored — + /// close to but not exactly ISO week 8601; the + /// boundary cases at year-start/end can shift by ±1 + /// vs the prior request-time `iso_week()` filter) + /// - `"month"` — same month (any year) + /// + /// `tz_offset_minutes` is applied to both sides of the strftime + /// comparison so the calendar match is in the user's local time. + /// Backed by the `(library_id, date_taken)` index. + /// + /// This is the single-SQL replacement for the EXIF-loop + + /// WalkDir-fallback that powered `/memories` previously; it's + /// correct only because the canonical-date waterfall at ingest + /// (`crate::date_resolver`) populates `date_taken` for every row + /// it can resolve. + fn get_memories_in_window( + &mut self, + context: &opentelemetry::Context, + library_id: i32, + span_token: &str, + years_back: i32, + tz_offset_minutes: i32, + ) -> Result, DbError>; + /// Return image rows that have a `content_hash` but no `phash_64`, /// oldest first. Used by the `backfill_perceptual_hash` binary. /// Filters by image extension at the DB layer to avoid ever asking @@ -1090,23 +1134,28 @@ impl ExifDao for SqliteExifDao { library_id_val: i32, limit: i64, ) -> Result, DbError> { - trace_db_call(context, "query", "get_rows_needing_date_backfill", |_span| { - use schema::image_exif::dsl::*; + trace_db_call( + context, + "query", + "get_rows_needing_date_backfill", + |_span| { + use schema::image_exif::dsl::*; - let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); - // The partial index is on `(library_id, id) WHERE date_taken - // IS NULL OR date_taken_source = 'fs_time'`, so the planner - // hits it directly when both predicates are present. - image_exif - .filter(library_id.eq(library_id_val)) - .filter(date_taken.is_null().or(date_taken_source.eq("fs_time"))) - .select((library_id, rel_path)) - .order(id.asc()) - .limit(limit) - .load::<(i32, String)>(connection.deref_mut()) - .map_err(|_| anyhow::anyhow!("Query error")) - }) + // The partial index is on `(library_id, id) WHERE date_taken + // IS NULL OR date_taken_source = 'fs_time'`, so the planner + // hits it directly when both predicates are present. + image_exif + .filter(library_id.eq(library_id_val)) + .filter(date_taken.is_null().or(date_taken_source.eq("fs_time"))) + .select((library_id, rel_path)) + .order(id.asc()) + .limit(limit) + .load::<(i32, String)>(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Query error")) + }, + ) .map_err(|_| DbError::new(DbErrorKind::QueryError)) } @@ -1128,10 +1177,7 @@ impl ExifDao for SqliteExifDao { .filter(library_id.eq(library_id_val)) .filter(rel_path.eq(rel_path_val)), ) - .set(( - date_taken.eq(date_taken_val), - date_taken_source.eq(source), - )) + .set((date_taken.eq(date_taken_val), date_taken_source.eq(source))) .execute(connection.deref_mut()) .map(|_| ()) .map_err(|_| anyhow::anyhow!("Update error")) @@ -1139,6 +1185,60 @@ impl ExifDao for SqliteExifDao { .map_err(|_| DbError::new(DbErrorKind::UpdateError)) } + fn get_memories_in_window( + &mut self, + context: &opentelemetry::Context, + library_id: i32, + span_token: &str, + years_back: i32, + tz_offset_minutes: i32, + ) -> Result, DbError> { + trace_db_call(context, "query", "get_memories_in_window", |_span| { + // strftime pattern is span-dependent; the rest of the WHERE + // clause is shared. Only `%m-%d`, `%W`, `%m` are accepted — + // anything else is a programmer error. + let pattern = match span_token { + "day" => "%m-%d", + "week" => "%W", + "month" => "%m", + _ => return Err(anyhow::anyhow!("invalid span token: {}", span_token)), + }; + + // SQLite's date modifiers want a string like `'-480 minutes'` + // (signed) or `'-15 years'`. Use the `+` flag so positive + // offsets render as `+480 minutes`. + let tz_modifier = format!("{:+} minutes", tz_offset_minutes); + let years_modifier = format!("-{} years", years_back); + + let sql = format!( + "SELECT rel_path, date_taken, last_modified \ + FROM image_exif \ + WHERE library_id = ?1 \ + AND date_taken IS NOT NULL \ + AND date_taken <= unixepoch('now') \ + AND date_taken >= unixepoch('now', ?2) \ + AND strftime('{p}', date_taken, 'unixepoch', ?3) \ + = strftime('{p}', 'now', ?3)", + p = pattern, + ); + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + diesel::sql_query(sql) + .bind::(library_id) + .bind::(years_modifier) + .bind::(tz_modifier) + .load::(connection.deref_mut()) + .map(|rows| { + rows.into_iter() + .map(|r| (r.rel_path, r.date_taken, r.last_modified)) + .collect() + }) + .map_err(|e| anyhow::anyhow!("Query error: {}", e)) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + fn find_by_content_hash( &mut self, context: &opentelemetry::Context, @@ -2069,9 +2169,7 @@ mod exif_dao_tests { // Other library — never returned even when eligible. insert_row_with_source(&mut dao, 2, "archive/null.jpg", None, None); - let rows = dao - .get_rows_needing_date_backfill(&ctx(), 1, 100) - .unwrap(); + let rows = dao.get_rows_needing_date_backfill(&ctx(), 1, 100).unwrap(); let paths: Vec = rows.into_iter().map(|(_, p)| p).collect(); assert_eq!(paths.len(), 2, "expected null + fs_time eligible only"); assert!(paths.contains(&"main/null.jpg".to_string())); @@ -2098,4 +2196,125 @@ mod exif_dao_tests { assert_eq!(row.content_hash, Some("deadbeef".to_string())); assert_eq!(row.size_bytes, Some(1024)); } + + #[test] + fn get_memories_in_window_day_matches_only_same_md_in_year_window() { + let mut dao = setup_two_libraries(); + + // Anchor on a known date so the test is timezone-stable: insert + // rows whose date_taken IS the same wall-clock time as `now()` + // would have been some N years ago, and verify the day-span + // filter returns them. We can't bind 'now' from Rust, so instead + // we insert rows for the *current* day (offset by 365 days * N + // years) and rely on SQLite computing the same `%m-%d` for both + // sides of the equality. Using the unix-now-minus-365*N seconds + // approximation is good enough — leap years drift by ~one day + // every four years, but the test only checks day-of-year match + // for rows inserted "today minus N years (no leap correction)". + // To dodge the leap-year drift entirely, we use rows whose + // calendar date is read back from SQLite and we just check + // membership. + + // 1y, 5y, 10y, 21y back from 'now': + let now_ts = chrono::Utc::now().timestamp(); + let year_secs: i64 = 365 * 86_400; + insert_row_with_source( + &mut dao, + 1, + "y1.jpg", + Some(now_ts - year_secs), + Some("exif"), + ); + insert_row_with_source( + &mut dao, + 1, + "y5.jpg", + Some(now_ts - 5 * year_secs), + Some("exif"), + ); + insert_row_with_source( + &mut dao, + 1, + "y10.jpg", + Some(now_ts - 10 * year_secs), + Some("exif"), + ); + // Outside the 20-year window: + insert_row_with_source( + &mut dao, + 1, + "y21.jpg", + Some(now_ts - 21 * year_secs), + Some("exif"), + ); + // Future row: must be excluded by the `<= now` clause. + insert_row_with_source( + &mut dao, + 1, + "future.jpg", + Some(now_ts + 86_400), + Some("exif"), + ); + // No date — never returned regardless of source. + insert_row_with_source(&mut dao, 1, "nodate.jpg", None, None); + + // Month span returns rows from the same calendar month over the + // window — y1, y5, y10 should all qualify (same month any year), + // y21 trims (out of years_back), future trims (> now), nodate + // never qualifies. Day-of-month leap drift means even with 365- + // day approximation a row may shift by one in either direction; + // month is the safer assertion under that approximation. + let rows = dao + .get_memories_in_window(&ctx(), 1, "month", 20, 0) + .unwrap(); + let paths: std::collections::HashSet = + rows.into_iter().map(|(p, _, _)| p).collect(); + assert!( + paths.contains("y1.jpg") && paths.contains("y5.jpg") && paths.contains("y10.jpg"), + "month span should include all in-window rows: {:?}", + paths + ); + assert!( + !paths.contains("y21.jpg"), + "21-year-old row should fall outside the years_back window" + ); + assert!(!paths.contains("future.jpg"), "future row must be excluded"); + assert!( + !paths.contains("nodate.jpg"), + "row without date must never appear" + ); + } + + #[test] + fn get_memories_in_window_scopes_by_library_id() { + let mut dao = setup_two_libraries(); + let now_ts = chrono::Utc::now().timestamp(); + let year = 365 * 86_400i64; + insert_row_with_source(&mut dao, 1, "main/x.jpg", Some(now_ts - year), Some("exif")); + insert_row_with_source( + &mut dao, + 2, + "archive/x.jpg", + Some(now_ts - year), + Some("exif"), + ); + + let lib1 = dao + .get_memories_in_window(&ctx(), 1, "month", 20, 0) + .unwrap(); + let lib2 = dao + .get_memories_in_window(&ctx(), 2, "month", 20, 0) + .unwrap(); + assert_eq!(lib1.len(), 1); + assert_eq!(lib1[0].0, "main/x.jpg"); + assert_eq!(lib2.len(), 1); + assert_eq!(lib2[0].0, "archive/x.jpg"); + } + + #[test] + fn get_memories_in_window_rejects_unknown_span_token() { + let mut dao = setup_two_libraries(); + let err = dao.get_memories_in_window(&ctx(), 1, "decade", 20, 0); + assert!(err.is_err()); + } } diff --git a/src/files.rs b/src/files.rs index b798330..e4fb407 100644 --- a/src/files.rs +++ b/src/files.rs @@ -1666,6 +1666,17 @@ mod tests { Ok(()) } + fn get_memories_in_window( + &mut self, + _context: &opentelemetry::Context, + _library_id: i32, + _span_token: &str, + _years_back: i32, + _tz_offset_minutes: i32, + ) -> Result, DbError> { + Ok(Vec::new()) + } + fn find_by_content_hash( &mut self, _context: &opentelemetry::Context, diff --git a/src/memories.rs b/src/memories.rs index 54ae188..524e2a0 100644 --- a/src/memories.rs +++ b/src/memories.rs @@ -1,25 +1,19 @@ use actix_web::web::Data; use actix_web::{HttpRequest, HttpResponse, Responder, get, web}; use chrono::LocalResult::{Ambiguous, Single}; -use chrono::{DateTime, Datelike, FixedOffset, Local, LocalResult, NaiveDate, TimeZone, Utc}; +use chrono::{DateTime, FixedOffset, Local, LocalResult, NaiveDate, TimeZone}; use log::{debug, trace, warn}; use opentelemetry::KeyValue; use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer}; -use rayon::prelude::*; use serde::{Deserialize, Serialize}; -use std::collections::HashSet; use std::path::Path; use std::path::PathBuf; use std::sync::Mutex; -use walkdir::WalkDir; use crate::data::Claims; use crate::database::ExifDao; -use crate::files::is_image_or_video; -use crate::libraries::Library; use crate::otel::{extract_context_from_request, global_tracer}; use crate::state::AppState; -use crate::utils::earliest_fs_time; // Helper that encapsulates path-exclusion semantics #[derive(Debug)] @@ -139,22 +133,6 @@ pub struct MemoriesResponse { pub items: Vec, } -/// Convert Unix timestamp to NaiveDate in client timezone -fn timestamp_to_naive_date( - timestamp: i64, - client_timezone: &Option, -) -> Option { - let dt_utc = DateTime::::from_timestamp(timestamp, 0)?; - - let date = if let Some(tz) = client_timezone { - dt_utc.with_timezone(tz).date_naive() - } else { - dt_utc.with_timezone(&Local).date_naive() - }; - - Some(date) -} - pub fn extract_date_from_filename(filename: &str) -> Option> { let build_date_from_ymd_capture = |captures: ®ex::Captures| -> Option> { @@ -283,232 +261,21 @@ pub fn extract_date_from_filename(filename: &str) -> Option, - client_timezone: &Option, -) -> Option<(NaiveDate, Option, Option)> { - // Read file metadata once - let meta = std::fs::metadata(path).ok()?; - - // Priority 1: Try to extract date from filename - if let Some(filename_date) = path - .file_name() - .and_then(|f| f.to_str()) - .and_then(extract_date_from_filename) - { - // Convert to client timezone if specified - let date_in_timezone = if let Some(tz) = client_timezone { - filename_date.with_timezone(tz) - } else { - filename_date.with_timezone(&Local).fixed_offset() - }; - - let timestamp = if let Some(tz) = client_timezone { - filename_date.with_timezone(tz).timestamp() - } else { - filename_date.timestamp() - }; - - let modified = meta.modified().ok().map(|t| { - let utc: DateTime = t.into(); - if let Some(tz) = client_timezone { - utc.with_timezone(tz).timestamp() - } else { - utc.timestamp() - } - }); - - debug!( - "Memory date from filename {:?} > {:?} = {:?}", - path.file_name(), - filename_date, - date_in_timezone - ); - return Some((date_in_timezone.date_naive(), Some(timestamp), modified)); - } - - // Priority 2: Use EXIF date_taken if available - if let Some(exif_timestamp) = exif_date_taken { - let date = timestamp_to_naive_date(exif_timestamp, client_timezone)?; - - let modified = meta.modified().ok().map(|t| { - let utc: DateTime = t.into(); - if let Some(tz) = client_timezone { - utc.with_timezone(tz).timestamp() - } else { - utc.timestamp() - } - }); - - debug!("Memory date from EXIF {:?} = {:?}", path.file_name(), date); - return Some((date, Some(exif_timestamp), modified)); - } - - // Priority 3: Fall back to metadata (earlier of created/modified — see utils::earliest_fs_time) - let system_time = earliest_fs_time(&meta)?; - let dt_utc: DateTime = system_time.into(); - - let date_in_timezone = if let Some(tz) = client_timezone { - dt_utc.with_timezone(tz).date_naive() - } else { - dt_utc.with_timezone(&Local).date_naive() - }; - - let created_timestamp = if let Some(tz) = client_timezone { - dt_utc.with_timezone(tz).timestamp() - } else { - dt_utc.timestamp() - }; - - let modified = meta.modified().ok().map(|t| { - let utc: DateTime = t.into(); - if let Some(tz) = client_timezone { - utc.with_timezone(tz).timestamp() - } else { - utc.timestamp() - } - }); - - trace!("Fallback metadata create date = {:?}", date_in_timezone); - Some((date_in_timezone, Some(created_timestamp), modified)) +/// Convert a `date_taken` Unix-seconds value to a `NaiveDate` in the +/// client's local time. Falls back to server-local when the client didn't +/// send a tz hint. +fn date_in_client_tz(timestamp: i64, client_timezone: Option) -> Option { + let dt = DateTime::from_timestamp(timestamp, 0)?; + Some(match client_timezone { + Some(tz) => dt.with_timezone(&tz).date_naive(), + None => dt.with_timezone(&Local).date_naive(), + }) } -/// Collect memories from EXIF database -fn collect_exif_memories( - exif_dao: &Data>>, - context: &opentelemetry::Context, - base_path: &str, - library_id: i32, - now: NaiveDate, - span_mode: MemoriesSpan, - years_back: u32, - client_timezone: &Option, - path_excluder: &PathExcluder, -) -> Vec<(MemoryItem, NaiveDate)> { - // Query database for all files with date_taken - let exif_records = match exif_dao.lock() { - Ok(mut dao) => match dao.get_all_with_date_taken(context, Some(library_id)) { - Ok(records) => records, - Err(e) => { - warn!("Failed to query EXIF database: {:?}", e); - return Vec::new(); // Graceful fallback - } - }, - Err(e) => { - warn!("Failed to lock EXIF DAO: {:?}", e); - return Vec::new(); - } - }; - - // Parallel processing with Rayon - exif_records - .par_iter() - .filter_map(|(file_path, date_taken_ts)| { - // Build full path - let full_path = Path::new(base_path).join(file_path); - - // Check exclusions - if path_excluder.is_excluded(&full_path) { - return None; - } - - // Verify file exists - if !full_path.exists() || !full_path.is_file() { - warn!("EXIF record exists but file not found: {:?}", full_path); - return None; - } - - // Get date with priority: filename → EXIF → metadata - // This ensures sorting and display use the same date source - let (file_date, created, modified) = - get_memory_date_with_priority(&full_path, Some(*date_taken_ts), client_timezone)?; - - // Check if matches memory criteria - if !is_memories_match(file_path, file_date, now, span_mode, years_back) { - return None; - } - - Some(( - MemoryItem { - path: file_path.clone(), - created, - modified, - library_id, - }, - file_date, - )) - }) - .collect() -} - -/// Collect memories from file system scan (for files not in EXIF DB) -fn collect_filesystem_memories( - base_path: &str, - library_id: i32, - path_excluder: &PathExcluder, - skip_paths: &HashSet, - now: NaiveDate, - span_mode: MemoriesSpan, - years_back: u32, - client_timezone: &Option, -) -> Vec<(MemoryItem, NaiveDate)> { - let base = Path::new(base_path); - - let entries: Vec<_> = WalkDir::new(base) - .into_iter() - .filter_map(|e| e.ok()) - .filter(|e| { - let path = e.path(); - - // Skip if already processed by EXIF query - if skip_paths.contains(path) { - return false; - } - - // Check exclusions - if path_excluder.is_excluded(path) { - return false; - } - - // Only process image/video files - e.file_type().is_file() && is_image_or_video(path) - }) - .collect(); - - entries - .par_iter() - .filter_map(|entry| { - // Use unified date priority function (no EXIF for filesystem scan) - let (file_date, created, modified) = - get_memory_date_with_priority(entry.path(), None, client_timezone)?; - - if is_memories_match( - entry.path().to_str().unwrap_or("Unknown"), - file_date, - now, - span_mode, - years_back, - ) { - let path_relative = entry.path().strip_prefix(base).ok()?.to_str()?.to_string(); - - Some(( - MemoryItem { - path: path_relative, - created, - modified, - library_id, - }, - file_date, - )) - } else { - None - } - }) - .collect() -} +/// Default lookback for `/memories`. The original 15-year cap pre-dated +/// most of the imported libraries; bumped to 20 so users with deeper +/// archives see those photos surface on the matching anniversary too. +pub const DEFAULT_YEARS_BACK: i32 = 20; #[get("/memories")] pub async fn list_memories( @@ -525,32 +292,28 @@ pub async fn list_memories( opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); let span_mode = q.span.unwrap_or(MemoriesSpan::Day); - let years_back: u32 = 15; - - // Create timezone from client offset, default to local timezone if not provided - let client_timezone = match q.timezone_offset_minutes { - Some(offset_mins) => { - let offset_secs = offset_mins * 60; - Some( - FixedOffset::east_opt(offset_secs) - .unwrap_or_else(|| FixedOffset::east_opt(0).unwrap()), - ) - } - None => None, + let span_token = match span_mode { + MemoriesSpan::Day => "day", + MemoriesSpan::Week => "week", + MemoriesSpan::Month => "month", }; + let years_back: i32 = DEFAULT_YEARS_BACK; - let now = if let Some(tz) = client_timezone { - debug!("Client timezone: {:?}", tz); - Utc::now().with_timezone(&tz).date_naive() - } else { - Local::now().date_naive() - }; + // The SQL filter expects a signed offset in minutes from UTC; default + // 0 (UTC) when the client didn't send a hint. We also keep a chrono + // `FixedOffset` for sorting/secondary-key date math in Rust below — + // anchoring both sides on the same value keeps "what SQL matched" and + // "what we sort by" consistent. + let tz_offset_minutes = q.timezone_offset_minutes.unwrap_or(0); + let client_timezone = q + .timezone_offset_minutes + .and_then(|offset_mins| FixedOffset::east_opt(offset_mins * 60)); - debug!("Now: {:?}", now); + debug!( + "list_memories: span={:?} tz_offset_min={} years_back={}", + span_mode, tz_offset_minutes, years_back + ); - // Resolve the optional library filter. Unknown values are a 400; None - // means "all libraries" — currently equivalent to the primary library - // while only one is configured. let library = match crate::libraries::resolve_library_param(&app_state, q.library.as_deref()) { Ok(lib) => lib, Err(msg) => { @@ -558,13 +321,13 @@ pub async fn list_memories( return HttpResponse::BadRequest().body(msg); } }; - // When `library` is `Some`, scope to that one library; otherwise union - // across every configured library and let the results interleave. - let libraries_to_scan: Vec<&Library> = match library { + let libraries_to_scan: Vec<&crate::libraries::Library> = match library { Some(lib) => vec![lib], None => app_state.libraries.iter().collect(), }; + // (item, date) tuples — `date` is the canonical NaiveDate of the + // memory in the client's tz, used as the primary sort key. let mut memories_with_dates: Vec<(MemoryItem, NaiveDate)> = Vec::new(); for lib in &libraries_to_scan { @@ -572,78 +335,82 @@ pub async fn list_memories( let effective = lib.effective_excluded_dirs(&app_state.excluded_dirs); let path_excluder = PathExcluder::new(base, &effective); - let exif_memories = collect_exif_memories( - &exif_dao, - &span_context, - &lib.root_path, - lib.id, - now, - span_mode, - years_back, - &client_timezone, - &path_excluder, - ); + let rows = match exif_dao.lock() { + Ok(mut dao) => match dao.get_memories_in_window( + &span_context, + lib.id, + span_token, + years_back, + tz_offset_minutes, + ) { + Ok(rows) => rows, + Err(e) => { + warn!( + "Failed to query memories for library '{}': {:?}", + lib.name, e + ); + continue; + } + }, + Err(e) => { + warn!("Failed to lock EXIF DAO: {:?}", e); + continue; + } + }; - let exif_paths: HashSet = exif_memories - .iter() - .map(|(item, _)| PathBuf::from(&lib.root_path).join(&item.path)) - .collect(); + for (rel_path, date_taken_ts, last_modified_ts) in rows { + // Apply per-library exclusions in Rust — they're a small + // set and pushing them into the SQL WHERE adds bind-param + // gymnastics with no measurable win at this scale. + let full_path = base.join(&rel_path); + if path_excluder.is_excluded(&full_path) { + trace!("Memory excluded by PathExcluder: {:?}", full_path); + continue; + } - let fs_memories = collect_filesystem_memories( - &lib.root_path, - lib.id, - &path_excluder, - &exif_paths, - now, - span_mode, - years_back, - &client_timezone, - ); + let Some(file_date) = date_in_client_tz(date_taken_ts, client_timezone) else { + continue; + }; - memories_with_dates.extend(exif_memories); - memories_with_dates.extend(fs_memories); + memories_with_dates.push(( + MemoryItem { + path: rel_path, + created: Some(date_taken_ts), + modified: Some(last_modified_ts), + library_id: lib.id, + }, + file_date, + )); + } } + // Sort once over the merged result set. The SQL filter handles the + // matching; sort order is purely UI concern. match span_mode { - // Sort by absolute time for a more 'overview' + // Month: chronological — gives an "overview" feel. MemoriesSpan::Month => memories_with_dates.sort_by(|a, b| a.1.cmp(&b.1)), - // For week span, sort by full date + timestamp (chronological) + // Week: full date then timestamp (oldest → newest). MemoriesSpan::Week => { memories_with_dates.sort_by(|a, b| { - // First, sort by full date (year, month, day) - let date_cmp = a.1.cmp(&b.1); - if date_cmp != std::cmp::Ordering::Equal { - return date_cmp; - } - - // Then sort by full created timestamp (oldest to newest) - match (a.0.created, b.0.created) { - (Some(a_time), Some(b_time)) => a_time.cmp(&b_time), - (Some(_), None) => std::cmp::Ordering::Less, - (None, Some(_)) => std::cmp::Ordering::Greater, - (None, None) => std::cmp::Ordering::Equal, - } - }); - } - // For day span, sort by day of month then by time - MemoriesSpan::Day => { - memories_with_dates.sort_by(|a, b| { - let day_comparison = a.1.day().cmp(&b.1.day()); - - if day_comparison == std::cmp::Ordering::Equal { - match (a.0.created, b.0.created) { - (Some(a_time), Some(b_time)) => a_time.cmp(&b_time), + a.1.cmp(&b.1) + .then_with(|| match (a.0.created, b.0.created) { + (Some(at), Some(bt)) => at.cmp(&bt), (Some(_), None) => std::cmp::Ordering::Less, (None, Some(_)) => std::cmp::Ordering::Greater, (None, None) => std::cmp::Ordering::Equal, - } - } else { - day_comparison - } + }) + }); + } + // Day: same calendar day across years, sub-sorted by timestamp. + MemoriesSpan::Day => { + memories_with_dates.sort_by(|a, b| match (a.0.created, b.0.created) { + (Some(at), Some(bt)) => at.cmp(&bt), + (Some(_), None) => std::cmp::Ordering::Less, + (None, Some(_)) => std::cmp::Ordering::Greater, + (None, None) => std::cmp::Ordering::Equal, }); } } - // Sort by day of the month and time (using the created timestamp) let items: Vec = memories_with_dates.into_iter().map(|(m, _)| m).collect(); @@ -653,13 +420,7 @@ pub async fn list_memories( KeyValue::new("span", format!("{:?}", span_mode)), KeyValue::new("years_back", years_back.to_string()), KeyValue::new("result_count", items.len().to_string()), - KeyValue::new( - "client_timezone", - format!( - "{:?}", - client_timezone.unwrap_or_else(|| FixedOffset::east_opt(0).unwrap()) - ), - ), + KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()), KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)), ], ); @@ -668,50 +429,10 @@ pub async fn list_memories( HttpResponse::Ok().json(MemoriesResponse { items }) } -fn is_memories_match( - file_path: &str, - file_date: NaiveDate, - today: NaiveDate, - span: MemoriesSpan, - years_back: u32, -) -> bool { - if file_date > today { - return false; - } - let years_diff = (today.year() - file_date.year()).unsigned_abs(); - if years_diff > years_back { - warn!( - "File ({}) date is too far in the past: {:?} vs {:?}", - file_path, file_date, today - ); - return false; - } - - match span { - MemoriesSpan::Day => same_month_day_any_year(file_date, today), - MemoriesSpan::Week => same_week_any_year(file_date, today), - MemoriesSpan::Month => same_month_any_year(file_date, today), - } -} - -fn same_month_day_any_year(a: NaiveDate, b: NaiveDate) -> bool { - a.month() == b.month() && a.day() == b.day() -} - -// Match same ISO week number and same weekday (ignoring year) -fn same_week_any_year(a: NaiveDate, b: NaiveDate) -> bool { - a.iso_week().week().eq(&b.iso_week().week()) -} - -// Match same month (ignoring day and year) -fn same_month_any_year(a: NaiveDate, b: NaiveDate) -> bool { - a.month() == b.month() -} - #[cfg(test)] mod tests { use super::*; - use chrono::Timelike; + use chrono::{Datelike, Timelike}; use std::fs::{self, File}; use tempfile::tempdir; @@ -869,99 +590,11 @@ mod tests { ); } - #[test] - fn test_memory_date_priority_filename() { - let temp_dir = tempdir().unwrap(); - let temp_file = temp_dir.path().join("Screenshot_2014-06-01-20-44-50.png"); - File::create(&temp_file).unwrap(); - - // Test that filename takes priority (even with EXIF data available) - let exif_date = DateTime::::from_timestamp(1609459200, 0) // 2021-01-01 - .unwrap() - .timestamp(); - - let (date, created, _) = get_memory_date_with_priority( - &temp_file, - Some(exif_date), - &Some(*Local::now().fixed_offset().offset()), - ) - .unwrap(); - - // Check that date is from filename (2014), NOT EXIF (2021) - assert_eq!(date.year(), 2014); - assert_eq!(date.month(), 6); - assert_eq!(date.day(), 1); - - // Check that created timestamp matches the date from filename - assert!(created.is_some()); - let ts = created.unwrap(); - // The timestamp should be for 2014-06-01 20:44:50 in the LOCAL timezone - let dt_from_ts = Local.timestamp_opt(ts, 0).unwrap(); - assert_eq!(dt_from_ts.year(), 2014); - assert_eq!(dt_from_ts.month(), 6); - assert_eq!(dt_from_ts.day(), 1); - assert_eq!(dt_from_ts.hour(), 20); - assert_eq!(dt_from_ts.minute(), 44); - assert_eq!(dt_from_ts.second(), 50); - } - - #[test] - fn test_memory_date_priority_metadata_fallback() { - let temp_dir = tempdir().unwrap(); - let temp_file = temp_dir.path().join("regular_image.jpg"); - File::create(&temp_file).unwrap(); - - // Test metadata fallback when no filename date or EXIF - let (date, created, modified) = - get_memory_date_with_priority(&temp_file, None, &None).unwrap(); - - // Both date and timestamps should be from metadata (recent) - let today = Local::now().date_naive(); - assert_eq!(date.year(), today.year()); - assert_eq!(date.month(), today.month()); - - // Both timestamps should be valid - assert!(created.is_some()); - assert!(modified.is_some()); - - // Check that timestamps are recent - let dt_created = DateTime::::from_timestamp(created.unwrap(), 0).unwrap(); - assert_eq!(dt_created.year(), today.year()); - - let dt_modified = DateTime::::from_timestamp(modified.unwrap(), 0).unwrap(); - assert_eq!(dt_modified.year(), today.year()); - } - - #[test] - fn test_memory_date_priority_exif_over_metadata() { - let temp_dir = tempdir().unwrap(); - let temp_file = temp_dir.path().join("regular_image.jpg"); - File::create(&temp_file).unwrap(); - - // Test that EXIF takes priority over metadata (but not filename) - // EXIF date: June 15, 2020 12:00:00 UTC (safe from timezone edge cases) - let exif_date = DateTime::::from_timestamp(1592222400, 0) // 2020-06-15 12:00:00 UTC - .unwrap() - .timestamp(); - - let (date, created, modified) = - get_memory_date_with_priority(&temp_file, Some(exif_date), &None).unwrap(); - - // Date should be from EXIF (2020), not metadata (today) - assert_eq!(date.year(), 2020); - assert_eq!(date.month(), 6); - assert_eq!(date.day(), 15); - - // Created timestamp should also be from EXIF - assert!(created.is_some()); - assert_eq!(created.unwrap(), exif_date); - - // Modified should still be from metadata - assert!(modified.is_some()); - let today = Local::now().date_naive(); - let dt_modified = DateTime::::from_timestamp(modified.unwrap(), 0).unwrap(); - assert_eq!(dt_modified.year(), today.year()); - } + // The obsolete `test_memory_date_priority_*` tests covered the old + // request-time waterfall in `get_memory_date_with_priority`. Their + // replacement lives in `crate::date_resolver::tests` (resolver + // waterfall) and the SQL surface is exercised by integration tests + // that hit `get_memories_in_window` directly. #[test] fn test_path_excluder_absolute_under_base() {