Merge pull request 'memories: reject implausible filename-derived timestamps' (#80) from feature/filename-date-plausibility into master

Reviewed-on: #80
This commit was merged in pull request #80.
This commit is contained in:
2026-05-07 16:02:50 +00:00

View File

@@ -212,6 +212,14 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
return None;
}
// A leading zero rules out a real unix timestamp at any sane
// resolution (seconds since 2001-09-09, ms since 1970-01-01 are
// both 10+ digits with no leading zero). Filenames like
// `000227580005.jpg` are sequential scan IDs, not timestamps.
if timestamp_str.starts_with('0') {
return None;
}
// Try milliseconds first (13 digits exactly)
if len == 13
&& let Some(date_time) = timestamp_str
@@ -219,6 +227,7 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
.ok()
.and_then(DateTime::from_timestamp_millis)
.map(|naive_dt| naive_dt.fixed_offset())
.and_then(plausible_filename_date)
{
return Some(date_time);
}
@@ -231,6 +240,7 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
.ok()
.and_then(|timestamp_secs| DateTime::from_timestamp(timestamp_secs, 0))
.map(|naive_dt| naive_dt.fixed_offset())
.and_then(plausible_filename_date)
{
return Some(date_time);
}
@@ -242,6 +252,7 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
.ok()
.and_then(|timestamp_secs| DateTime::from_timestamp(timestamp_secs, 0))
.map(|naive_dt| naive_dt.fixed_offset())
.and_then(plausible_filename_date)
{
return Some(date_time);
}
@@ -253,6 +264,7 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
.ok()
.and_then(DateTime::from_timestamp_millis)
.map(|naive_dt| naive_dt.fixed_offset())
.and_then(plausible_filename_date)
{
return Some(date_time);
}
@@ -261,6 +273,27 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
None
}
/// Sanity gate for filename-derived timestamps. Real photo capture dates
/// live in a narrow window; values outside it are almost always sequential
/// scan IDs (`000227580005.jpg` → 1970) or arbitrary numeric suffixes
/// (`IMG_21323906751390.jpeg` → 2037) that the regex caught by accident.
/// Rejecting them lets the date_resolver waterfall fall through to
/// `fs_time`, which is a much better proxy for content age than a fake
/// epoch date.
fn plausible_filename_date(dt: DateTime<FixedOffset>) -> Option<DateTime<FixedOffset>> {
use chrono::Datelike;
let year = dt.year();
// 1995 predates digital photography for most users; allowing one year
// past `now` covers clock-skew on freshly-taken shots without letting
// 2037 timestamps through.
let max_year = Utc::now().year() + 1;
if (1995..=max_year).contains(&year) {
Some(dt)
} else {
None
}
}
/// Convert a `date_taken` Unix-seconds value to a `NaiveDate` in the
/// client's local time. Falls back to server-local when the client didn't
/// send a tz hint.
@@ -590,6 +623,23 @@ mod tests {
);
}
#[test]
fn test_extract_date_from_filename_leading_zero_scan_id_should_not_match() {
// Sequential film-scan IDs like 000227580005.jpg parsed as a 12-digit
// ms timestamp resolve to 1970-01-03; the leading zero rules out a
// real epoch value at any sane resolution. Resolver should fall
// through to fs_time instead of pinning the photo to 1970.
assert!(extract_date_from_filename("000227580005.jpg").is_none());
}
#[test]
fn test_extract_date_from_filename_far_future_should_not_match() {
// IMG_21323906751390.jpeg → first 10 digits = 2132390675 → 2037.
// Plausibility gate rejects it so the resolver falls through to
// fs_time (which carries the real ingest date).
assert!(extract_date_from_filename("IMG_21323906751390.jpeg").is_none());
}
// The obsolete `test_memory_date_priority_*` tests covered the old
// request-time waterfall in `get_memory_date_with_priority`. Their
// replacement lives in `crate::date_resolver::tests` (resolver