From feaae9b6d38639fb8c7359c8a2c55caabf2acd5b Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Thu, 7 May 2026 12:02:07 -0400 Subject: [PATCH] memories: reject implausible filename-derived timestamps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Filenames like `000227580005.jpg` (film-scan ID) and `IMG_21323906751390.jpeg` were matched by the 10-16 digit timestamp regex and resolved to 1970 / 2037, then written into `image_exif.date_taken` with `source = 'filename'`. EXIF-less photos showed up under those bogus dates everywhere date_taken is read. Two new guards in `extract_date_from_filename`: - leading zero → reject (real epoch values don't have one at any sane resolution). - resolved year outside [1995, now+1y] → reject. Both let the date_resolver waterfall fall through to fs_time, which is a much better proxy for content age than a fake epoch date. Regression tests cover the two reported filenames. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/memories.rs | 50 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/src/memories.rs b/src/memories.rs index 695d486..909be99 100644 --- a/src/memories.rs +++ b/src/memories.rs @@ -212,6 +212,14 @@ pub fn extract_date_from_filename(filename: &str) -> Option Option Option Option Option Option) -> Option> { + use chrono::Datelike; + let year = dt.year(); + // 1995 predates digital photography for most users; allowing one year + // past `now` covers clock-skew on freshly-taken shots without letting + // 2037 timestamps through. + let max_year = Utc::now().year() + 1; + if (1995..=max_year).contains(&year) { + Some(dt) + } else { + None + } +} + /// Convert a `date_taken` Unix-seconds value to a `NaiveDate` in the /// client's local time. Falls back to server-local when the client didn't /// send a tz hint. @@ -590,6 +623,23 @@ mod tests { ); } + #[test] + fn test_extract_date_from_filename_leading_zero_scan_id_should_not_match() { + // Sequential film-scan IDs like 000227580005.jpg parsed as a 12-digit + // ms timestamp resolve to 1970-01-03; the leading zero rules out a + // real epoch value at any sane resolution. Resolver should fall + // through to fs_time instead of pinning the photo to 1970. + assert!(extract_date_from_filename("000227580005.jpg").is_none()); + } + + #[test] + fn test_extract_date_from_filename_far_future_should_not_match() { + // IMG_21323906751390.jpeg → first 10 digits = 2132390675 → 2037. + // Plausibility gate rejects it so the resolver falls through to + // fs_time (which carries the real ingest date). + assert!(extract_date_from_filename("IMG_21323906751390.jpeg").is_none()); + } + // The obsolete `test_memory_date_priority_*` tests covered the old // request-time waterfall in `get_memory_date_with_priority`. Their // replacement lives in `crate::date_resolver::tests` (resolver -- 2.49.1