Merge pull request 'memories: reject implausible filename-derived timestamps' (#80) from feature/filename-date-plausibility into master
Reviewed-on: #80
This commit was merged in pull request #80.
This commit is contained in:
@@ -212,6 +212,14 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
|
||||
return None;
|
||||
}
|
||||
|
||||
// A leading zero rules out a real unix timestamp at any sane
|
||||
// resolution (seconds since 2001-09-09, ms since 1970-01-01 are
|
||||
// both 10+ digits with no leading zero). Filenames like
|
||||
// `000227580005.jpg` are sequential scan IDs, not timestamps.
|
||||
if timestamp_str.starts_with('0') {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Try milliseconds first (13 digits exactly)
|
||||
if len == 13
|
||||
&& let Some(date_time) = timestamp_str
|
||||
@@ -219,6 +227,7 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
|
||||
.ok()
|
||||
.and_then(DateTime::from_timestamp_millis)
|
||||
.map(|naive_dt| naive_dt.fixed_offset())
|
||||
.and_then(plausible_filename_date)
|
||||
{
|
||||
return Some(date_time);
|
||||
}
|
||||
@@ -231,6 +240,7 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
|
||||
.ok()
|
||||
.and_then(|timestamp_secs| DateTime::from_timestamp(timestamp_secs, 0))
|
||||
.map(|naive_dt| naive_dt.fixed_offset())
|
||||
.and_then(plausible_filename_date)
|
||||
{
|
||||
return Some(date_time);
|
||||
}
|
||||
@@ -242,6 +252,7 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
|
||||
.ok()
|
||||
.and_then(|timestamp_secs| DateTime::from_timestamp(timestamp_secs, 0))
|
||||
.map(|naive_dt| naive_dt.fixed_offset())
|
||||
.and_then(plausible_filename_date)
|
||||
{
|
||||
return Some(date_time);
|
||||
}
|
||||
@@ -253,6 +264,7 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
|
||||
.ok()
|
||||
.and_then(DateTime::from_timestamp_millis)
|
||||
.map(|naive_dt| naive_dt.fixed_offset())
|
||||
.and_then(plausible_filename_date)
|
||||
{
|
||||
return Some(date_time);
|
||||
}
|
||||
@@ -261,6 +273,27 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
|
||||
None
|
||||
}
|
||||
|
||||
/// Sanity gate for filename-derived timestamps. Real photo capture dates
|
||||
/// live in a narrow window; values outside it are almost always sequential
|
||||
/// scan IDs (`000227580005.jpg` → 1970) or arbitrary numeric suffixes
|
||||
/// (`IMG_21323906751390.jpeg` → 2037) that the regex caught by accident.
|
||||
/// Rejecting them lets the date_resolver waterfall fall through to
|
||||
/// `fs_time`, which is a much better proxy for content age than a fake
|
||||
/// epoch date.
|
||||
fn plausible_filename_date(dt: DateTime<FixedOffset>) -> Option<DateTime<FixedOffset>> {
|
||||
use chrono::Datelike;
|
||||
let year = dt.year();
|
||||
// 1995 predates digital photography for most users; allowing one year
|
||||
// past `now` covers clock-skew on freshly-taken shots without letting
|
||||
// 2037 timestamps through.
|
||||
let max_year = Utc::now().year() + 1;
|
||||
if (1995..=max_year).contains(&year) {
|
||||
Some(dt)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a `date_taken` Unix-seconds value to a `NaiveDate` in the
|
||||
/// client's local time. Falls back to server-local when the client didn't
|
||||
/// send a tz hint.
|
||||
@@ -590,6 +623,23 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_date_from_filename_leading_zero_scan_id_should_not_match() {
|
||||
// Sequential film-scan IDs like 000227580005.jpg parsed as a 12-digit
|
||||
// ms timestamp resolve to 1970-01-03; the leading zero rules out a
|
||||
// real epoch value at any sane resolution. Resolver should fall
|
||||
// through to fs_time instead of pinning the photo to 1970.
|
||||
assert!(extract_date_from_filename("000227580005.jpg").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_date_from_filename_far_future_should_not_match() {
|
||||
// IMG_21323906751390.jpeg → first 10 digits = 2132390675 → 2037.
|
||||
// Plausibility gate rejects it so the resolver falls through to
|
||||
// fs_time (which carries the real ingest date).
|
||||
assert!(extract_date_from_filename("IMG_21323906751390.jpeg").is_none());
|
||||
}
|
||||
|
||||
// The obsolete `test_memory_date_priority_*` tests covered the old
|
||||
// request-time waterfall in `get_memory_date_with_priority`. Their
|
||||
// replacement lives in `crate::date_resolver::tests` (resolver
|
||||
|
||||
Reference in New Issue
Block a user