memories: restore early-era Snapchat unix-epoch filenames
The recent blanket "snapchat-" prefix denylist (43f8f83) rejected ALL
Snapchat-prefixed filenames from timestamp parsing, which fixed the
sequential-ID false positives but also broke real unix-second
filenames from Snapchat's early era. `Snapchat-1383929602.jpg`
(2013-11-08 16:53:22 UTC) now falls through to fs_time — and on files
with broken filesystem metadata, fs_time pins to 1970.
Replace the blanket prefix denial with a tighter discriminator:
- exactly 10 captured digits AND timestamp >= 2011-09-23 (Snapchat
launch) → real unix epoch, accept
- any other length under this prefix → sequential ID, reject
This keeps the existing rejections intact:
Snapchat-1021849065.mp4 → 10 digits, 2002 < launch → reject
Snapchat-1751031586660373917.jpg → 19 digits truncates to 16 → reject
And restores the regression case:
Snapchat-1383929602.jpg → 10 digits, 2013 ≥ launch → accept
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -206,14 +206,20 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
|
||||
let timestamp_str = captures.get(1)?.as_str();
|
||||
let len = timestamp_str.len();
|
||||
|
||||
// Known apps whose filenames carry sequential IDs that happen to
|
||||
// overlap real epoch values (e.g. `Snapchat-1021849065.mp4` parses
|
||||
// as 2002-05-19, but the file was actually saved in 2021). The
|
||||
// digits alone are indistinguishable from a unix timestamp, so we
|
||||
// dispatch on the source-app prefix instead.
|
||||
const NON_TIMESTAMP_PREFIXES: &[&str] = &["snapchat-"];
|
||||
// Snapchat used real unix-second filenames in its early era
|
||||
// (e.g. `Snapchat-1383929602.jpg` = 2013-11-08), then switched to
|
||||
// monotonic sequential IDs whose digits overlap plausible epoch
|
||||
// ranges (`Snapchat-1021849065.mp4` truncates to 2002, actually
|
||||
// saved 2021; `Snapchat-1751031586660373917.jpg` is 19 digits,
|
||||
// truncates to 2002, actually 2016). Discriminate by:
|
||||
// - exactly 10 captured digits AND post-2011-09-23 (launch) → real epoch
|
||||
// - anything else under this prefix → sequential ID, fall through
|
||||
// The Snapchat-launch floor catches the 10-digit-2002 case; the
|
||||
// length=10 gate catches the multi-digit sequential IDs (which
|
||||
// get truncated to 16 by the regex above).
|
||||
let lower = filename.to_ascii_lowercase();
|
||||
if NON_TIMESTAMP_PREFIXES.iter().any(|p| lower.starts_with(p)) {
|
||||
let is_snapchat = lower.starts_with("snapchat-");
|
||||
if is_snapchat && len != 10 {
|
||||
return None;
|
||||
}
|
||||
|
||||
@@ -265,6 +271,13 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
|
||||
.map(|naive_dt| naive_dt.fixed_offset())
|
||||
.and_then(plausible_filename_date)
|
||||
{
|
||||
// Snapchat launched 2011-09-23. A 10-digit Snapchat filename
|
||||
// dated before that is a sequential ID (e.g.
|
||||
// `Snapchat-1021849065.mp4` parses to 2002), not a real epoch.
|
||||
const SNAPCHAT_LAUNCH_TS: i64 = 1_316_736_000;
|
||||
if is_snapchat && date_time.timestamp() < SNAPCHAT_LAUNCH_TS {
|
||||
return None;
|
||||
}
|
||||
return Some(date_time);
|
||||
}
|
||||
|
||||
@@ -652,21 +665,31 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_date_from_filename_snapchat_should_not_match() {
|
||||
// Snapchat-prefixed filenames carry sequential app-assigned IDs
|
||||
// that happen to fall inside plausible epoch ranges. The two
|
||||
// reported cases:
|
||||
// Snapchat-1021849065.mp4 → 10 digits → 2002-05-19
|
||||
// Snapchat-1751031586660373917.jpg → 19 digits → 2002-09-09
|
||||
// Real save dates are 2021 and 2016 respectively per
|
||||
// FileModifyDate; the prefix denylist forces a fall-through to
|
||||
// fs_time.
|
||||
fn test_extract_date_from_filename_snapchat_sequential_ids_rejected() {
|
||||
// Modern Snapchat-prefixed filenames carry sequential app-assigned
|
||||
// IDs whose digits happen to fall inside plausible epoch ranges
|
||||
// when truncated. Reported cases (real save dates per FileModifyDate):
|
||||
// Snapchat-1021849065.mp4 → 10 digits → 2002-05-19 (saved 2021)
|
||||
// Snapchat-1751031586660373917.jpg → 19 digits → 2002-09-09 (saved 2016)
|
||||
// We discriminate by length + Snapchat-launch floor: only exactly
|
||||
// 10 digits AND post-2011-09-23 (Snapchat launch) is treated as
|
||||
// a real unix epoch. Anything else falls through to fs_time.
|
||||
assert!(extract_date_from_filename("Snapchat-1021849065.mp4").is_none());
|
||||
assert!(extract_date_from_filename("Snapchat-1751031586660373917.jpg").is_none());
|
||||
// Case-insensitive match — lowercase variant should also reject.
|
||||
assert!(extract_date_from_filename("snapchat-1021849065.mp4").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_date_from_filename_snapchat_early_era_unix_epoch() {
|
||||
// Early Snapchat (2013-2014ish) wrote real unix-second filenames.
|
||||
// Snapchat-1383929602.jpg → 1383929602 = 2013-11-08 16:53:22 UTC.
|
||||
// The blanket-prefix denial introduced for sequential IDs broke
|
||||
// these — restore via a length=10 + post-launch sanity gate.
|
||||
let date_time = extract_date_from_filename("Snapchat-1383929602.jpg").unwrap();
|
||||
assert_eq!(date_time.timestamp(), 1383929602);
|
||||
}
|
||||
|
||||
// The obsolete `test_memory_date_priority_*` tests covered the old
|
||||
// request-time waterfall in `get_memory_date_with_priority`. Their
|
||||
// replacement lives in `crate::date_resolver::tests` (resolver
|
||||
|
||||
Reference in New Issue
Block a user