Merge pull request 'memories: reject implausible filename-derived timestamps' (#80) from feature/filename-date-plausibility into master
Reviewed-on: #80
This commit was merged in pull request #80.
This commit is contained in:
@@ -212,6 +212,14 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
|
|||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// A leading zero rules out a real unix timestamp at any sane
|
||||||
|
// resolution (seconds since 2001-09-09, ms since 1970-01-01 are
|
||||||
|
// both 10+ digits with no leading zero). Filenames like
|
||||||
|
// `000227580005.jpg` are sequential scan IDs, not timestamps.
|
||||||
|
if timestamp_str.starts_with('0') {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
// Try milliseconds first (13 digits exactly)
|
// Try milliseconds first (13 digits exactly)
|
||||||
if len == 13
|
if len == 13
|
||||||
&& let Some(date_time) = timestamp_str
|
&& let Some(date_time) = timestamp_str
|
||||||
@@ -219,6 +227,7 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
|
|||||||
.ok()
|
.ok()
|
||||||
.and_then(DateTime::from_timestamp_millis)
|
.and_then(DateTime::from_timestamp_millis)
|
||||||
.map(|naive_dt| naive_dt.fixed_offset())
|
.map(|naive_dt| naive_dt.fixed_offset())
|
||||||
|
.and_then(plausible_filename_date)
|
||||||
{
|
{
|
||||||
return Some(date_time);
|
return Some(date_time);
|
||||||
}
|
}
|
||||||
@@ -231,6 +240,7 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
|
|||||||
.ok()
|
.ok()
|
||||||
.and_then(|timestamp_secs| DateTime::from_timestamp(timestamp_secs, 0))
|
.and_then(|timestamp_secs| DateTime::from_timestamp(timestamp_secs, 0))
|
||||||
.map(|naive_dt| naive_dt.fixed_offset())
|
.map(|naive_dt| naive_dt.fixed_offset())
|
||||||
|
.and_then(plausible_filename_date)
|
||||||
{
|
{
|
||||||
return Some(date_time);
|
return Some(date_time);
|
||||||
}
|
}
|
||||||
@@ -242,6 +252,7 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
|
|||||||
.ok()
|
.ok()
|
||||||
.and_then(|timestamp_secs| DateTime::from_timestamp(timestamp_secs, 0))
|
.and_then(|timestamp_secs| DateTime::from_timestamp(timestamp_secs, 0))
|
||||||
.map(|naive_dt| naive_dt.fixed_offset())
|
.map(|naive_dt| naive_dt.fixed_offset())
|
||||||
|
.and_then(plausible_filename_date)
|
||||||
{
|
{
|
||||||
return Some(date_time);
|
return Some(date_time);
|
||||||
}
|
}
|
||||||
@@ -253,6 +264,7 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
|
|||||||
.ok()
|
.ok()
|
||||||
.and_then(DateTime::from_timestamp_millis)
|
.and_then(DateTime::from_timestamp_millis)
|
||||||
.map(|naive_dt| naive_dt.fixed_offset())
|
.map(|naive_dt| naive_dt.fixed_offset())
|
||||||
|
.and_then(plausible_filename_date)
|
||||||
{
|
{
|
||||||
return Some(date_time);
|
return Some(date_time);
|
||||||
}
|
}
|
||||||
@@ -261,6 +273,27 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Sanity gate for filename-derived timestamps. Real photo capture dates
|
||||||
|
/// live in a narrow window; values outside it are almost always sequential
|
||||||
|
/// scan IDs (`000227580005.jpg` → 1970) or arbitrary numeric suffixes
|
||||||
|
/// (`IMG_21323906751390.jpeg` → 2037) that the regex caught by accident.
|
||||||
|
/// Rejecting them lets the date_resolver waterfall fall through to
|
||||||
|
/// `fs_time`, which is a much better proxy for content age than a fake
|
||||||
|
/// epoch date.
|
||||||
|
fn plausible_filename_date(dt: DateTime<FixedOffset>) -> Option<DateTime<FixedOffset>> {
|
||||||
|
use chrono::Datelike;
|
||||||
|
let year = dt.year();
|
||||||
|
// 1995 predates digital photography for most users; allowing one year
|
||||||
|
// past `now` covers clock-skew on freshly-taken shots without letting
|
||||||
|
// 2037 timestamps through.
|
||||||
|
let max_year = Utc::now().year() + 1;
|
||||||
|
if (1995..=max_year).contains(&year) {
|
||||||
|
Some(dt)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Convert a `date_taken` Unix-seconds value to a `NaiveDate` in the
|
/// Convert a `date_taken` Unix-seconds value to a `NaiveDate` in the
|
||||||
/// client's local time. Falls back to server-local when the client didn't
|
/// client's local time. Falls back to server-local when the client didn't
|
||||||
/// send a tz hint.
|
/// send a tz hint.
|
||||||
@@ -590,6 +623,23 @@ mod tests {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_extract_date_from_filename_leading_zero_scan_id_should_not_match() {
|
||||||
|
// Sequential film-scan IDs like 000227580005.jpg parsed as a 12-digit
|
||||||
|
// ms timestamp resolve to 1970-01-03; the leading zero rules out a
|
||||||
|
// real epoch value at any sane resolution. Resolver should fall
|
||||||
|
// through to fs_time instead of pinning the photo to 1970.
|
||||||
|
assert!(extract_date_from_filename("000227580005.jpg").is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_extract_date_from_filename_far_future_should_not_match() {
|
||||||
|
// IMG_21323906751390.jpeg → first 10 digits = 2132390675 → 2037.
|
||||||
|
// Plausibility gate rejects it so the resolver falls through to
|
||||||
|
// fs_time (which carries the real ingest date).
|
||||||
|
assert!(extract_date_from_filename("IMG_21323906751390.jpeg").is_none());
|
||||||
|
}
|
||||||
|
|
||||||
// The obsolete `test_memory_date_priority_*` tests covered the old
|
// The obsolete `test_memory_date_priority_*` tests covered the old
|
||||||
// request-time waterfall in `get_memory_date_with_priority`. Their
|
// request-time waterfall in `get_memory_date_with_priority`. Their
|
||||||
// replacement lives in `crate::date_resolver::tests` (resolver
|
// replacement lives in `crate::date_resolver::tests` (resolver
|
||||||
|
|||||||
Reference in New Issue
Block a user