Merge pull request 'feature/canonical-date-taken' (#76) from feature/canonical-date-taken into master

Reviewed-on: #76
This commit was merged in pull request #76.
This commit is contained in:
2026-05-06 21:15:57 +00:00
13 changed files with 1263 additions and 473 deletions

View File

@@ -364,6 +364,53 @@ Runs in background thread with two-tier strategy:
- Batch queries EXIF DB to detect new files - Batch queries EXIF DB to detect new files
- Configurable via `WATCH_QUICK_INTERVAL_SECONDS` and `WATCH_FULL_INTERVAL_SECONDS` - Configurable via `WATCH_QUICK_INTERVAL_SECONDS` and `WATCH_FULL_INTERVAL_SECONDS`
**Canonical date_taken pipeline (`src/date_resolver.rs`).** Every row's
`image_exif.date_taken` is populated at ingest by a four-step waterfall;
which step won is recorded in `image_exif.date_taken_source` so the
per-tick drain can re-resolve weak entries when better tools become
available, and so the UI/debug surface can answer "why did this photo
land on this date?". Order:
1. **`exif`** — kamadak-exif `DateTime` / `DateTimeOriginal`. Fast,
in-process, image-only.
2. **`exiftool`** — shell-out fallback for tags kamadak can't reach:
QuickTime/MP4 (`MediaCreateDate`, `TrackCreateDate`, `CreateDate`),
Apple's `ContentCreateDate`, MakerNote sub-IFDs. Required for
videos to land a real date. Single-file at ingest; the per-tick
drain feeds the whole batch through one `exiftool -@ -` subprocess.
Degrades silently when `exiftool` isn't on PATH (resolver caches the
"available" check via `OnceLock`).
3. **`filename`** — `extract_date_from_filename` in `memories.rs`
matches screenshot, chat-export, and timestamp-named patterns.
4. **`fs_time`** — `earliest_fs_time(metadata)` (earlier of created /
modified). Last resort.
Notable behavior change vs. the pre-2026-05 request-time logic:
**EXIF beats filename when both are present.** A photo named
`Screenshot_2014-06-01.png` whose EXIF `DateTime` is 2021 now appears
under 2021, not 2014 — on the theory that EXIF is more reliable than
import-named filenames. The reverse case (no EXIF, filename has a
date) is unchanged.
The `backfill_missing_date_taken` drain (`src/main.rs`) runs every
watcher tick alongside `backfill_unhashed_backlog`. It loads up to
`DATE_BACKFILL_MAX_PER_TICK` rows (default 500) where
`date_taken IS NULL OR date_taken_source = 'fs_time'` (backed by the
`idx_image_exif_date_backfill` partial index), runs the waterfall
batch via `resolve_dates_batch`, and writes results via the
`backfill_date_taken` DAO method (touches only `date_taken` +
`date_taken_source` so EXIF / hash / perceptual columns are
preserved). `filename`-sourced rows are intentionally not re-resolved
— the regex is authoritative when it matches, and re-running exiftool
won't change the answer.
`/memories` is a single SQL query against this column
(`get_memories_in_window` in `src/database/mod.rs`), using
`strftime('%m-%d' | '%W' | '%m', date_taken, 'unixepoch', tz)` for
calendar matching with the client's timezone offset. The pre-rewrite
version stat'd every row and walked the entire library tree — at
~14k photos this took 1015 s; the rewrite is single-digit ms.
**EXIF Extraction:** **EXIF Extraction:**
- Uses `kamadak-exif` crate - Uses `kamadak-exif` crate
- Supports: JPEG, TIFF, RAW (NEF, CR2, CR3), HEIF/HEIC, PNG, WebP - Supports: JPEG, TIFF, RAW (NEF, CR2, CR3), HEIF/HEIC, PNG, WebP
@@ -534,6 +581,7 @@ Optional:
```bash ```bash
WATCH_QUICK_INTERVAL_SECONDS=60 # Quick scan interval WATCH_QUICK_INTERVAL_SECONDS=60 # Quick scan interval
WATCH_FULL_INTERVAL_SECONDS=3600 # Full scan interval WATCH_FULL_INTERVAL_SECONDS=3600 # Full scan interval
DATE_BACKFILL_MAX_PER_TICK=500 # Cap on canonical-date drain per watcher tick
OTLP_OTLS_ENDPOINT=http://... # OpenTelemetry collector (release builds) OTLP_OTLS_ENDPOINT=http://... # OpenTelemetry collector (release builds)
# AI Insights Configuration # AI Insights Configuration

View File

@@ -0,0 +1,2 @@
DROP INDEX IF EXISTS idx_image_exif_date_backfill;
ALTER TABLE image_exif DROP COLUMN date_taken_source;

View File

@@ -0,0 +1,24 @@
-- Tracks where a row's `date_taken` was sourced so the canonical-date
-- waterfall (kamadak-exif → exiftool → filename → earliest_fs_time) is
-- visible to debugging and to the per-tick backfill drain that re-runs
-- weak sources once stronger ones become available (e.g. exiftool gets
-- installed on a deploy that didn't have it). See CLAUDE.md → Memories
-- canonical-date pipeline.
--
-- Values:
-- 'exif' — kamadak-exif read DateTime/DateTimeOriginal directly
-- 'exiftool' — exiftool fallback caught a video / MakerNote / QuickTime tag
-- 'filename' — extract_date_from_filename matched a known pattern
-- 'fs_time' — fell through to earliest_fs_time(metadata)
--
-- NULL when `date_taken` itself is NULL (no source resolved the date).
ALTER TABLE image_exif ADD COLUMN date_taken_source TEXT;
-- Partial index for the per-tick backfill drain: targets rows that need
-- re-resolution (no date yet, or only the weakest source resolved it).
-- Filename-sourced rows are intentionally excluded — the regex is
-- authoritative when it matches and re-running exiftool wouldn't change
-- the answer.
CREATE INDEX idx_image_exif_date_backfill
ON image_exif (library_id, id)
WHERE date_taken IS NULL OR date_taken_source = 'fs_time';

View File

@@ -0,0 +1,9 @@
-- Reverting this migration is a no-op: the labels we wrote in `up.sql`
-- are correct under any state of the schema (every dated row was indeed
-- exif-sourced before the resolver landed), and there's no signal that
-- distinguishes "labelled by this migration" from "labelled by the
-- ingest path post-resolver". Clearing them would break the drain's
-- eligibility filter again.
--
-- The companion migration `2026-05-06-000000_add_date_taken_source` is
-- the one to revert if you need to remove the column entirely.

View File

@@ -0,0 +1,20 @@
-- Backfill `date_taken_source` for rows that pre-date the canonical-date
-- pipeline. Before the resolver landed, `image_exif.date_taken` could
-- only be populated via `exif::extract_exif_from_path` (kamadak-exif)
-- on the file-watcher, upload, or GPS-write paths. The resolver column
-- migration added `date_taken_source` defaulting to NULL, so every
-- historical row with a date is currently unlabelled — and the
-- per-tick drain skips them because its eligibility predicate is
-- `date_taken IS NULL OR date_taken_source = 'fs_time'`.
--
-- Label them `'exif'` once and let the drain take over from here. Safe
-- because every code path that wrote `date_taken` prior to the
-- resolver was a kamadak-exif read — there was no other source.
--
-- Idempotent: re-running this migration on a DB that has already been
-- backfilled is a no-op (the WHERE clause matches nothing the second
-- time around).
UPDATE image_exif
SET date_taken_source = 'exif'
WHERE date_taken IS NOT NULL
AND date_taken_source IS NULL;

View File

@@ -9,6 +9,21 @@ use crate::database::models::{
}; };
use crate::otel::trace_db_call; use crate::otel::trace_db_call;
/// Decoded shape for `get_memories_in_window`'s raw `sql_query`. Diesel's
/// query DSL doesn't expose strftime, so the memories filter is hand-
/// written SQL — but the returned columns are simple enough that a small
/// `QueryableByName` struct suffices, kept private to this module.
#[derive(diesel::QueryableByName)]
#[allow(dead_code)] // fields read via Diesel's QueryableByName derive
struct MemoriesWindowRow {
#[diesel(sql_type = diesel::sql_types::Text)]
rel_path: String,
#[diesel(sql_type = diesel::sql_types::BigInt)]
date_taken: i64,
#[diesel(sql_type = diesel::sql_types::BigInt)]
last_modified: i64,
}
/// Wire shape for a single member of a duplicate group, returned by /// Wire shape for a single member of a duplicate group, returned by
/// `list_duplicates_*` and `lookup_duplicate_row`. Carries everything /// `list_duplicates_*` and `lookup_duplicate_row`. Carries everything
/// the Apollo modal needs to render a member tile and its meta line — /// the Apollo modal needs to render a member tile and its meta line —
@@ -396,6 +411,63 @@ pub trait ExifDao: Sync + Send {
size_bytes: i64, size_bytes: i64,
) -> Result<(), DbError>; ) -> Result<(), DbError>;
/// Return image_exif rows that need their `date_taken` re-resolved by
/// the canonical-date waterfall (see `crate::date_resolver`):
/// either no source ever ran (`date_taken IS NULL`), or only the
/// weakest fallback resolved it (`date_taken_source = 'fs_time'`).
/// Returns `(library_id, rel_path)`. The caller filters to its own
/// library on the way through; rows from other libraries fall to the
/// next library's tick. Backed by the partial index
/// `idx_image_exif_date_backfill`.
fn get_rows_needing_date_backfill(
&mut self,
context: &opentelemetry::Context,
library_id: i32,
limit: i64,
) -> Result<Vec<(i32, String)>, DbError>;
/// Persist a resolver result for an existing row. Touches `date_taken`
/// and `date_taken_source` only — leaves all other columns alone so
/// the drain doesn't accidentally clobber EXIF/hash/perceptual data
/// the watcher / GPS-write path may have already written.
fn backfill_date_taken(
&mut self,
context: &opentelemetry::Context,
library_id: i32,
rel_path: &str,
date_taken: i64,
source: &str,
) -> Result<(), DbError>;
/// Single-query backend for `/memories`. Returns
/// `(rel_path, date_taken, last_modified)` for rows in `library_id`
/// whose `date_taken` falls within `[now - years_back y, now]` and
/// whose calendar position matches the request's span:
/// - `"day"` — same month + day-of-month (any year)
/// - `"week"` — same week-of-year (SQLite `%W`, Monday-anchored —
/// close to but not exactly ISO week 8601; the
/// boundary cases at year-start/end can shift by ±1
/// vs the prior request-time `iso_week()` filter)
/// - `"month"` — same month (any year)
///
/// `tz_offset_minutes` is applied to both sides of the strftime
/// comparison so the calendar match is in the user's local time.
/// Backed by the `(library_id, date_taken)` index.
///
/// This is the single-SQL replacement for the EXIF-loop +
/// WalkDir-fallback that powered `/memories` previously; it's
/// correct only because the canonical-date waterfall at ingest
/// (`crate::date_resolver`) populates `date_taken` for every row
/// it can resolve.
fn get_memories_in_window(
&mut self,
context: &opentelemetry::Context,
library_id: i32,
span_token: &str,
years_back: i32,
tz_offset_minutes: i32,
) -> Result<Vec<(String, i64, i64)>, DbError>;
/// Return image rows that have a `content_hash` but no `phash_64`, /// Return image rows that have a `content_hash` but no `phash_64`,
/// oldest first. Used by the `backfill_perceptual_hash` binary. /// oldest first. Used by the `backfill_perceptual_hash` binary.
/// Filters by image extension at the DB layer to avoid ever asking /// Filters by image extension at the DB layer to avoid ever asking
@@ -730,6 +802,7 @@ impl ExifDao for SqliteExifDao {
shutter_speed.eq(&exif_data.shutter_speed), shutter_speed.eq(&exif_data.shutter_speed),
iso.eq(&exif_data.iso), iso.eq(&exif_data.iso),
date_taken.eq(&exif_data.date_taken), date_taken.eq(&exif_data.date_taken),
date_taken_source.eq(&exif_data.date_taken_source),
last_modified.eq(&exif_data.last_modified), last_modified.eq(&exif_data.last_modified),
)) ))
.execute(connection.deref_mut()) .execute(connection.deref_mut())
@@ -1055,6 +1128,117 @@ impl ExifDao for SqliteExifDao {
.map_err(|_| DbError::new(DbErrorKind::UpdateError)) .map_err(|_| DbError::new(DbErrorKind::UpdateError))
} }
fn get_rows_needing_date_backfill(
&mut self,
context: &opentelemetry::Context,
library_id_val: i32,
limit: i64,
) -> Result<Vec<(i32, String)>, DbError> {
trace_db_call(
context,
"query",
"get_rows_needing_date_backfill",
|_span| {
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
// The partial index is on `(library_id, id) WHERE date_taken
// IS NULL OR date_taken_source = 'fs_time'`, so the planner
// hits it directly when both predicates are present.
image_exif
.filter(library_id.eq(library_id_val))
.filter(date_taken.is_null().or(date_taken_source.eq("fs_time")))
.select((library_id, rel_path))
.order(id.asc())
.limit(limit)
.load::<(i32, String)>(connection.deref_mut())
.map_err(|_| anyhow::anyhow!("Query error"))
},
)
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn backfill_date_taken(
&mut self,
context: &opentelemetry::Context,
library_id_val: i32,
rel_path_val: &str,
date_taken_val: i64,
source: &str,
) -> Result<(), DbError> {
trace_db_call(context, "update", "backfill_date_taken", |_span| {
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
diesel::update(
image_exif
.filter(library_id.eq(library_id_val))
.filter(rel_path.eq(rel_path_val)),
)
.set((date_taken.eq(date_taken_val), date_taken_source.eq(source)))
.execute(connection.deref_mut())
.map(|_| ())
.map_err(|_| anyhow::anyhow!("Update error"))
})
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
}
fn get_memories_in_window(
&mut self,
context: &opentelemetry::Context,
library_id: i32,
span_token: &str,
years_back: i32,
tz_offset_minutes: i32,
) -> Result<Vec<(String, i64, i64)>, DbError> {
trace_db_call(context, "query", "get_memories_in_window", |_span| {
// strftime pattern is span-dependent; the rest of the WHERE
// clause is shared. Only `%m-%d`, `%W`, `%m` are accepted —
// anything else is a programmer error.
let pattern = match span_token {
"day" => "%m-%d",
"week" => "%W",
"month" => "%m",
_ => return Err(anyhow::anyhow!("invalid span token: {}", span_token)),
};
// SQLite's date modifiers want a string like `'-480 minutes'`
// (signed) or `'-15 years'`. Use the `+` flag so positive
// offsets render as `+480 minutes`.
let tz_modifier = format!("{:+} minutes", tz_offset_minutes);
let years_modifier = format!("-{} years", years_back);
let sql = format!(
"SELECT rel_path, date_taken, last_modified \
FROM image_exif \
WHERE library_id = ?1 \
AND date_taken IS NOT NULL \
AND date_taken <= unixepoch('now') \
AND date_taken >= unixepoch('now', ?2) \
AND strftime('{p}', date_taken, 'unixepoch', ?3) \
= strftime('{p}', 'now', ?3)",
p = pattern,
);
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
diesel::sql_query(sql)
.bind::<diesel::sql_types::Integer, _>(library_id)
.bind::<diesel::sql_types::Text, _>(years_modifier)
.bind::<diesel::sql_types::Text, _>(tz_modifier)
.load::<MemoriesWindowRow>(connection.deref_mut())
.map(|rows| {
rows.into_iter()
.map(|r| (r.rel_path, r.date_taken, r.last_modified))
.collect()
})
.map_err(|e| anyhow::anyhow!("Query error: {}", e))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn find_by_content_hash( fn find_by_content_hash(
&mut self, &mut self,
context: &opentelemetry::Context, context: &opentelemetry::Context,
@@ -1819,6 +2003,7 @@ mod exif_dao_tests {
size_bytes: None, size_bytes: None,
phash_64: None, phash_64: None,
dhash_64: None, dhash_64: None,
date_taken_source: None,
}, },
) )
.expect("insert exif row"); .expect("insert exif row");
@@ -1931,4 +2116,205 @@ mod exif_dao_tests {
// Unknown library: zero, no error. // Unknown library: zero, no error.
assert_eq!(dao.count_for_library(&ctx(), 999).unwrap(), 0); assert_eq!(dao.count_for_library(&ctx(), 999).unwrap(), 0);
} }
/// Insert a row with an explicit date source — used by the
/// canonical-date drain tests below.
fn insert_row_with_source(
dao: &mut SqliteExifDao,
lib_id: i32,
rel: &str,
date: Option<i64>,
source: Option<&str>,
) {
dao.store_exif(
&ctx(),
InsertImageExif {
library_id: lib_id,
file_path: rel.to_string(),
camera_make: None,
camera_model: None,
lens_model: None,
width: None,
height: None,
orientation: None,
gps_latitude: None,
gps_longitude: None,
gps_altitude: None,
focal_length: None,
aperture: None,
shutter_speed: None,
iso: None,
date_taken: date,
created_time: 0,
last_modified: 0,
content_hash: None,
size_bytes: None,
phash_64: None,
dhash_64: None,
date_taken_source: source.map(|s| s.to_string()),
},
)
.expect("insert exif row");
}
#[test]
fn get_rows_needing_date_backfill_returns_null_and_fs_time() {
let mut dao = setup_two_libraries();
// Each row exercises a different source: null, fs_time (eligible),
// filename and exif (skipped).
insert_row_with_source(&mut dao, 1, "main/null.jpg", None, None);
insert_row_with_source(&mut dao, 1, "main/fs.jpg", Some(123), Some("fs_time"));
insert_row_with_source(&mut dao, 1, "main/name.jpg", Some(456), Some("filename"));
insert_row_with_source(&mut dao, 1, "main/real.jpg", Some(789), Some("exif"));
// Other library — never returned even when eligible.
insert_row_with_source(&mut dao, 2, "archive/null.jpg", None, None);
let rows = dao.get_rows_needing_date_backfill(&ctx(), 1, 100).unwrap();
let paths: Vec<String> = rows.into_iter().map(|(_, p)| p).collect();
assert_eq!(paths.len(), 2, "expected null + fs_time eligible only");
assert!(paths.contains(&"main/null.jpg".to_string()));
assert!(paths.contains(&"main/fs.jpg".to_string()));
}
#[test]
fn backfill_date_taken_writes_date_and_source_only() {
let mut dao = setup_two_libraries();
insert_row_with_source(&mut dao, 1, "main/x.jpg", None, None);
// Set a content_hash on the row to verify backfill_date_taken
// doesn't disturb other columns. Using the existing
// backfill_content_hash for this verifies via a separate path.
dao.backfill_content_hash(&ctx(), 1, "main/x.jpg", "deadbeef", 1024)
.unwrap();
dao.backfill_date_taken(&ctx(), 1, "main/x.jpg", 1700000000, "exiftool")
.unwrap();
let row = dao.get_exif(&ctx(), "main/x.jpg").unwrap().unwrap();
assert_eq!(row.date_taken, Some(1700000000));
assert_eq!(row.date_taken_source, Some("exiftool".to_string()));
// Untouched columns survive.
assert_eq!(row.content_hash, Some("deadbeef".to_string()));
assert_eq!(row.size_bytes, Some(1024));
}
#[test]
fn get_memories_in_window_day_matches_only_same_md_in_year_window() {
let mut dao = setup_two_libraries();
// Anchor on a known date so the test is timezone-stable: insert
// rows whose date_taken IS the same wall-clock time as `now()`
// would have been some N years ago, and verify the day-span
// filter returns them. We can't bind 'now' from Rust, so instead
// we insert rows for the *current* day (offset by 365 days * N
// years) and rely on SQLite computing the same `%m-%d` for both
// sides of the equality. Using the unix-now-minus-365*N seconds
// approximation is good enough — leap years drift by ~one day
// every four years, but the test only checks day-of-year match
// for rows inserted "today minus N years (no leap correction)".
// To dodge the leap-year drift entirely, we use rows whose
// calendar date is read back from SQLite and we just check
// membership.
// 1y, 5y, 10y, 21y back from 'now':
let now_ts = chrono::Utc::now().timestamp();
let year_secs: i64 = 365 * 86_400;
insert_row_with_source(
&mut dao,
1,
"y1.jpg",
Some(now_ts - year_secs),
Some("exif"),
);
insert_row_with_source(
&mut dao,
1,
"y5.jpg",
Some(now_ts - 5 * year_secs),
Some("exif"),
);
insert_row_with_source(
&mut dao,
1,
"y10.jpg",
Some(now_ts - 10 * year_secs),
Some("exif"),
);
// Outside the 20-year window:
insert_row_with_source(
&mut dao,
1,
"y21.jpg",
Some(now_ts - 21 * year_secs),
Some("exif"),
);
// Future row: must be excluded by the `<= now` clause.
insert_row_with_source(
&mut dao,
1,
"future.jpg",
Some(now_ts + 86_400),
Some("exif"),
);
// No date — never returned regardless of source.
insert_row_with_source(&mut dao, 1, "nodate.jpg", None, None);
// Month span returns rows from the same calendar month over the
// window — y1, y5, y10 should all qualify (same month any year),
// y21 trims (out of years_back), future trims (> now), nodate
// never qualifies. Day-of-month leap drift means even with 365-
// day approximation a row may shift by one in either direction;
// month is the safer assertion under that approximation.
let rows = dao
.get_memories_in_window(&ctx(), 1, "month", 20, 0)
.unwrap();
let paths: std::collections::HashSet<String> =
rows.into_iter().map(|(p, _, _)| p).collect();
assert!(
paths.contains("y1.jpg") && paths.contains("y5.jpg") && paths.contains("y10.jpg"),
"month span should include all in-window rows: {:?}",
paths
);
assert!(
!paths.contains("y21.jpg"),
"21-year-old row should fall outside the years_back window"
);
assert!(!paths.contains("future.jpg"), "future row must be excluded");
assert!(
!paths.contains("nodate.jpg"),
"row without date must never appear"
);
}
#[test]
fn get_memories_in_window_scopes_by_library_id() {
let mut dao = setup_two_libraries();
let now_ts = chrono::Utc::now().timestamp();
let year = 365 * 86_400i64;
insert_row_with_source(&mut dao, 1, "main/x.jpg", Some(now_ts - year), Some("exif"));
insert_row_with_source(
&mut dao,
2,
"archive/x.jpg",
Some(now_ts - year),
Some("exif"),
);
let lib1 = dao
.get_memories_in_window(&ctx(), 1, "month", 20, 0)
.unwrap();
let lib2 = dao
.get_memories_in_window(&ctx(), 2, "month", 20, 0)
.unwrap();
assert_eq!(lib1.len(), 1);
assert_eq!(lib1[0].0, "main/x.jpg");
assert_eq!(lib2.len(), 1);
assert_eq!(lib2[0].0, "archive/x.jpg");
}
#[test]
fn get_memories_in_window_rejects_unknown_span_token() {
let mut dao = setup_two_libraries();
let err = dao.get_memories_in_window(&ctx(), 1, "decade", 20, 0);
assert!(err.is_err());
}
} }

View File

@@ -63,6 +63,12 @@ pub struct InsertImageExif {
pub phash_64: Option<i64>, pub phash_64: Option<i64>,
/// 64-bit dHash (gradient). NULL for videos and decode failures. /// 64-bit dHash (gradient). NULL for videos and decode failures.
pub dhash_64: Option<i64>, pub dhash_64: Option<i64>,
/// Which step of the canonical-date waterfall populated `date_taken`:
/// `"exif"` | `"exiftool"` | `"filename"` | `"fs_time"`. NULL when
/// `date_taken` is NULL (no source resolved it). The per-tick backfill
/// drain re-resolves rows whose source is `"fs_time"` once exiftool
/// has had a chance to run.
pub date_taken_source: Option<String>,
} }
// Field order matches the post-migration column order in `image_exif`. // Field order matches the post-migration column order in `image_exif`.
@@ -98,6 +104,8 @@ pub struct ImageExif {
pub duplicate_of_hash: Option<String>, pub duplicate_of_hash: Option<String>,
/// Unix seconds at which the resolve was committed. /// Unix seconds at which the resolve was committed.
pub duplicate_decided_at: Option<i64>, pub duplicate_decided_at: Option<i64>,
/// Which step of the canonical-date waterfall populated `date_taken`.
pub date_taken_source: Option<String>,
} }
#[derive(Insertable)] #[derive(Insertable)]

View File

@@ -125,6 +125,7 @@ diesel::table! {
dhash_64 -> Nullable<BigInt>, dhash_64 -> Nullable<BigInt>,
duplicate_of_hash -> Nullable<Text>, duplicate_of_hash -> Nullable<Text>,
duplicate_decided_at -> Nullable<BigInt>, duplicate_decided_at -> Nullable<BigInt>,
date_taken_source -> Nullable<Text>,
} }
} }

495
src/date_resolver.rs Normal file
View File

@@ -0,0 +1,495 @@
//! Canonical `date_taken` resolution for ingest and the per-tick backfill
//! drain.
//!
//! The waterfall (in order; first hit wins):
//!
//! 1. **kamadak-exif** — fast in-process EXIF read. Already done by
//! `exif::extract_exif_from_path` for image-bearing formats; callers
//! pass that result in via `prior_exif_date` so we don't re-parse.
//! 2. **exiftool** — shell-out fallback that reaches places kamadak-exif
//! can't: QuickTime/MP4 (`MediaCreateDate`, `TrackCreateDate`,
//! `CreateDate`), Apple's `ContentCreateDate`, MakerNote sub-IFDs.
//! Required for videos to land a real date; degrades silently when
//! `exiftool` isn't on PATH.
//! 3. **filename regex** — `memories::extract_date_from_filename` covers
//! common screenshot / chat-export / timestamp-named patterns.
//! 4. **earliest filesystem time** — `utils::earliest_fs_time` picks the
//! earlier of created / modified, which on copied-from-backup files is
//! a better proxy for content age than either alone.
//!
//! `DateSource` records which step won so the per-tick drain can re-resolve
//! weak sources (`fs_time`) once exiftool becomes available, and so the
//! UI/debug surface can answer "why does this photo show up under this
//! date." Note that the previous `/memories` request-time logic preferred
//! filename even when EXIF was present; this resolver inverts that — EXIF
//! is authoritative when it exists, on the theory that an EXIF
//! `DateTimeOriginal` is more reliable than a filename pattern that may
//! reflect import time rather than capture time.
use std::collections::HashMap;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::sync::OnceLock;
use chrono::{DateTime, Utc};
use log::{debug, trace, warn};
use serde::Deserialize;
use crate::utils::earliest_fs_time;
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DateSource {
/// kamadak-exif read DateTime/DateTimeOriginal directly.
Exif,
/// exiftool fallback caught a video / MakerNote / QuickTime tag.
Exiftool,
/// `extract_date_from_filename` matched a known pattern.
Filename,
/// Fell through to `earliest_fs_time(metadata)`.
FsTime,
}
impl DateSource {
pub fn as_str(self) -> &'static str {
match self {
DateSource::Exif => "exif",
DateSource::Exiftool => "exiftool",
DateSource::Filename => "filename",
DateSource::FsTime => "fs_time",
}
}
}
#[derive(Copy, Clone, Debug)]
pub struct ResolvedDate {
pub timestamp: i64,
pub source: DateSource,
}
/// Resolve the canonical date for a single file, given an already-extracted
/// kamadak-exif date if available. Returns `None` only if every step in the
/// waterfall fails — for files that exist on disk this should be vanishingly
/// rare (the fs-time fallback alone almost always succeeds).
pub fn resolve_date_taken(path: &Path, prior_exif_date: Option<i64>) -> Option<ResolvedDate> {
if let Some(ts) = prior_exif_date {
return Some(ResolvedDate {
timestamp: ts,
source: DateSource::Exif,
});
}
if let Some(ts) = exiftool_date_single(path) {
return Some(ResolvedDate {
timestamp: ts,
source: DateSource::Exiftool,
});
}
if let Some(dt) = path
.file_name()
.and_then(|f| f.to_str())
.and_then(crate::memories::extract_date_from_filename)
{
return Some(ResolvedDate {
timestamp: dt.timestamp(),
source: DateSource::Filename,
});
}
if let Ok(meta) = std::fs::metadata(path)
&& let Some(t) = earliest_fs_time(&meta)
{
let dt: DateTime<Utc> = t.into();
return Some(ResolvedDate {
timestamp: dt.timestamp(),
source: DateSource::FsTime,
});
}
None
}
/// Batch waterfall. exiftool runs once over the whole batch (single
/// subprocess); everything else is per-file and runs only on misses.
/// `prior_exif_dates` lets the caller pass in already-known kamadak dates
/// keyed by path; entries without a prior date fall through to exiftool
/// and the rest of the waterfall.
///
/// The per-tick backfill drain is the primary caller — it loads ~500 rows
/// at a time and uses one exiftool subprocess to drain the lot.
pub fn resolve_dates_batch(
paths: &[PathBuf],
prior_exif_dates: &HashMap<PathBuf, i64>,
) -> HashMap<PathBuf, ResolvedDate> {
let mut out: HashMap<PathBuf, ResolvedDate> = HashMap::new();
let mut needs_exiftool: Vec<&Path> = Vec::with_capacity(paths.len());
for path in paths {
if let Some(&ts) = prior_exif_dates.get(path) {
out.insert(
path.clone(),
ResolvedDate {
timestamp: ts,
source: DateSource::Exif,
},
);
} else {
needs_exiftool.push(path.as_path());
}
}
if !needs_exiftool.is_empty() {
let exiftool_results = exiftool_dates_batch(&needs_exiftool);
for path in &needs_exiftool {
if let Some(&ts) = exiftool_results.get(*path) {
out.insert(
path.to_path_buf(),
ResolvedDate {
timestamp: ts,
source: DateSource::Exiftool,
},
);
}
}
}
for path in paths {
if out.contains_key(path) {
continue;
}
if let Some(dt) = path
.file_name()
.and_then(|f| f.to_str())
.and_then(crate::memories::extract_date_from_filename)
{
out.insert(
path.clone(),
ResolvedDate {
timestamp: dt.timestamp(),
source: DateSource::Filename,
},
);
continue;
}
if let Ok(meta) = std::fs::metadata(path)
&& let Some(t) = earliest_fs_time(&meta)
{
let dt: DateTime<Utc> = t.into();
out.insert(
path.clone(),
ResolvedDate {
timestamp: dt.timestamp(),
source: DateSource::FsTime,
},
);
}
}
out
}
/// Tag priority for exiftool extraction. First non-zero value wins.
///
/// Photos: `DateTimeOriginal` (original capture) and `SubSecDateTimeOriginal`
/// are most authoritative. `CreateDate` is a common alias and a sane fallback.
///
/// Videos: `MediaCreateDate` / `TrackCreateDate` are the QuickTime/MP4
/// timestamps. `ContentCreateDate` is Apple's iOS-set tag; it often
/// reflects local capture time on iPhone exports better than the others.
///
/// Notably absent: `FileModifyDate` / `FileAccessDate` — those are
/// filesystem-derived and the resolver covers them via the `fs_time`
/// fallback. Letting exiftool pull them here would mask "no real EXIF
/// date" with a `source = exiftool` row that's no better than fs_time.
const EXIFTOOL_DATE_TAGS: &[&str] = &[
"DateTimeOriginal",
"SubSecDateTimeOriginal",
"CreateDate",
"MediaCreateDate",
"TrackCreateDate",
"ContentCreateDate",
];
/// Cache the "exiftool exists on PATH" check across the process lifetime so
/// the per-tick backfill doesn't fork a doomed subprocess every iteration on
/// deploys without exiftool installed.
fn exiftool_available() -> bool {
static AVAIL: OnceLock<bool> = OnceLock::new();
*AVAIL.get_or_init(|| {
let ok = Command::new("exiftool")
.arg("-ver")
.stdout(Stdio::null())
.stderr(Stdio::null())
.status()
.map(|s| s.success())
.unwrap_or(false);
if !ok {
warn!("exiftool not on PATH; date_taken waterfall skips that step");
}
ok
})
}
/// One-file exiftool invocation. Used by the upload + GPS-write paths,
/// which deal with one file at a time. The batch path uses
/// `exiftool_dates_batch` so we don't pay subprocess startup per row.
fn exiftool_date_single(path: &Path) -> Option<i64> {
if !exiftool_available() {
return None;
}
let mut cmd = Command::new("exiftool");
cmd.arg("-j").arg("-q").arg("-d").arg("%s").arg("-fast2");
for tag in EXIFTOOL_DATE_TAGS {
cmd.arg(format!("-{}", tag));
}
cmd.arg(path);
let output = cmd.output().ok()?;
if !output.status.success() {
trace!("exiftool exited non-zero for {:?}", path);
return None;
}
parse_exiftool_json(&output.stdout)
.into_iter()
.next()
.map(|(_, ts)| ts)
}
/// Drain a batch via a single exiftool subprocess. Paths are fed on stdin
/// via `-@ -`, so the argv stays short regardless of batch size — safe for
/// libraries with very long path components.
fn exiftool_dates_batch(paths: &[&Path]) -> HashMap<PathBuf, i64> {
let mut out = HashMap::new();
if paths.is_empty() || !exiftool_available() {
return out;
}
let mut cmd = Command::new("exiftool");
cmd.arg("-j").arg("-q").arg("-d").arg("%s").arg("-fast2");
for tag in EXIFTOOL_DATE_TAGS {
cmd.arg(format!("-{}", tag));
}
cmd.arg("-@").arg("-");
cmd.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::null());
let mut child = match cmd.spawn() {
Ok(c) => c,
Err(e) => {
warn!("exiftool batch spawn failed: {}", e);
return out;
}
};
if let Some(mut stdin) = child.stdin.take() {
for p in paths {
// exiftool's argfile reader treats each line as one path; OS
// path bytes don't always survive a String round-trip, but
// every path we get here originated from rel_path / root_path
// strings already, so to-string-lossy is a non-event.
if let Err(e) = writeln!(stdin, "{}", p.display()) {
warn!("exiftool batch stdin write failed: {}", e);
break;
}
}
}
let output = match child.wait_with_output() {
Ok(o) => o,
Err(e) => {
warn!("exiftool batch wait failed: {}", e);
return out;
}
};
if !output.status.success() {
debug!(
"exiftool batch exit status {:?}; partial output may still parse",
output.status.code()
);
}
for (source, ts) in parse_exiftool_json(&output.stdout) {
out.insert(PathBuf::from(source), ts);
}
out
}
/// One row per input file. exiftool emits any tag we asked for that was
/// present, plus the `SourceFile` it was reading. Tags are JSON values
/// because `-d %s` returns the timestamp as a *string* of digits, not a
/// number, when the date parses; absent tags are simply missing keys.
#[derive(Debug, Deserialize)]
struct ExiftoolEntry {
#[serde(rename = "SourceFile")]
source_file: String,
#[serde(rename = "DateTimeOriginal")]
date_time_original: Option<serde_json::Value>,
#[serde(rename = "SubSecDateTimeOriginal")]
sub_sec_date_time_original: Option<serde_json::Value>,
#[serde(rename = "CreateDate")]
create_date: Option<serde_json::Value>,
#[serde(rename = "MediaCreateDate")]
media_create_date: Option<serde_json::Value>,
#[serde(rename = "TrackCreateDate")]
track_create_date: Option<serde_json::Value>,
#[serde(rename = "ContentCreateDate")]
content_create_date: Option<serde_json::Value>,
}
fn parse_exiftool_json(stdout: &[u8]) -> Vec<(String, i64)> {
let entries: Vec<ExiftoolEntry> = match serde_json::from_slice(stdout) {
Ok(v) => v,
Err(e) => {
// Empty stdout on total failure isn't a parse error worth
// logging at warn — the caller already noted the non-zero
// exit status.
if !stdout.is_empty() {
warn!("exiftool JSON parse failed: {}", e);
}
return Vec::new();
}
};
let mut out = Vec::with_capacity(entries.len());
for entry in entries {
// Walk the priority list. exiftool sometimes returns the literal
// string "0000:00:00 00:00:00" for missing-but-allocated date
// slots; with `-d %s` that becomes the unix epoch (0). Reject
// anything <= 0 so we fall through to the next tag.
let tags = [
entry.date_time_original.as_ref(),
entry.sub_sec_date_time_original.as_ref(),
entry.create_date.as_ref(),
entry.media_create_date.as_ref(),
entry.track_create_date.as_ref(),
entry.content_create_date.as_ref(),
];
let mut chosen: Option<i64> = None;
for tag in tags.iter().flatten() {
if let Some(ts) = coerce_to_unix_seconds(tag)
&& ts > 0
{
chosen = Some(ts);
break;
}
}
if let Some(ts) = chosen {
out.push((entry.source_file, ts));
}
}
out
}
/// `-d %s` should hand us a numeric string, but exiftool's JSON encoder
/// will emit a number when the tag was defined as numeric in its lib —
/// accept both shapes.
fn coerce_to_unix_seconds(v: &serde_json::Value) -> Option<i64> {
match v {
serde_json::Value::String(s) => s.trim().parse::<i64>().ok(),
serde_json::Value::Number(n) => n.as_i64(),
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_exiftool_json_picks_first_priority_tag() {
let json = br#"[{
"SourceFile": "/lib/IMG.jpg",
"DateTimeOriginal": "1500000000",
"CreateDate": "1400000000"
}]"#;
let parsed = parse_exiftool_json(json);
assert_eq!(parsed, vec![("/lib/IMG.jpg".to_string(), 1500000000)]);
}
#[test]
fn parse_exiftool_json_falls_through_zeros() {
// exiftool emits "0000:00:00 00:00:00" → unix epoch 0 with -d %s.
// The resolver should skip those and pick the next tag.
let json = br#"[{
"SourceFile": "/lib/clip.mov",
"DateTimeOriginal": "0",
"MediaCreateDate": "1500000000"
}]"#;
let parsed = parse_exiftool_json(json);
assert_eq!(parsed, vec![("/lib/clip.mov".to_string(), 1500000000)]);
}
#[test]
fn parse_exiftool_json_accepts_numeric_values() {
let json = br#"[{
"SourceFile": "/lib/a.jpg",
"CreateDate": 1234567890
}]"#;
let parsed = parse_exiftool_json(json);
assert_eq!(parsed, vec![("/lib/a.jpg".to_string(), 1234567890)]);
}
#[test]
fn parse_exiftool_json_emits_nothing_when_no_tag_present() {
let json = br#"[{"SourceFile": "/lib/no_dates.bin"}]"#;
let parsed = parse_exiftool_json(json);
assert!(parsed.is_empty());
}
#[test]
fn parse_exiftool_json_handles_multiple_entries() {
let json = br#"[
{"SourceFile": "/lib/a.jpg", "DateTimeOriginal": "100"},
{"SourceFile": "/lib/b.jpg", "CreateDate": "200"}
]"#;
let parsed = parse_exiftool_json(json);
assert_eq!(
parsed,
vec![
("/lib/a.jpg".to_string(), 100),
("/lib/b.jpg".to_string(), 200)
]
);
}
#[test]
fn date_source_as_str_round_trip() {
for src in [
DateSource::Exif,
DateSource::Exiftool,
DateSource::Filename,
DateSource::FsTime,
] {
assert!(!src.as_str().is_empty());
}
}
#[test]
fn resolve_uses_prior_exif_when_present() {
// Path doesn't need to exist when prior_exif_date short-circuits.
let resolved =
resolve_date_taken(Path::new("/nonexistent/file.jpg"), Some(1700000000)).unwrap();
assert_eq!(resolved.timestamp, 1700000000);
assert_eq!(resolved.source, DateSource::Exif);
}
#[test]
fn resolve_filename_when_no_exif_and_file_missing() {
// No prior EXIF, no exiftool match (file missing), but the filename
// pattern still matches so the resolver lands on Filename.
let resolved = resolve_date_taken(
Path::new("/nonexistent/Screenshot_2014-06-01-20-44-50.png"),
None,
)
.unwrap();
assert_eq!(resolved.source, DateSource::Filename);
}
#[test]
fn resolve_fs_time_when_only_metadata_available() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("plain.jpg");
std::fs::File::create(&path).unwrap();
let resolved = resolve_date_taken(&path, None).unwrap();
// exiftool may or may not be installed in the test env; either
// way the file has no EXIF and no filename date, so we should
// fall to fs_time.
assert_eq!(resolved.source, DateSource::FsTime);
}
}

View File

@@ -1508,6 +1508,7 @@ mod tests {
dhash_64: data.dhash_64, dhash_64: data.dhash_64,
duplicate_of_hash: None, duplicate_of_hash: None,
duplicate_decided_at: None, duplicate_decided_at: None,
date_taken_source: data.date_taken_source.clone(),
}) })
} }
@@ -1551,6 +1552,7 @@ mod tests {
dhash_64: data.dhash_64, dhash_64: data.dhash_64,
duplicate_of_hash: None, duplicate_of_hash: None,
duplicate_decided_at: None, duplicate_decided_at: None,
date_taken_source: data.date_taken_source.clone(),
}) })
} }
@@ -1644,6 +1646,37 @@ mod tests {
Ok(()) Ok(())
} }
fn get_rows_needing_date_backfill(
&mut self,
_context: &opentelemetry::Context,
_library_id: i32,
_limit: i64,
) -> Result<Vec<(i32, String)>, DbError> {
Ok(Vec::new())
}
fn backfill_date_taken(
&mut self,
_context: &opentelemetry::Context,
_library_id: i32,
_rel_path: &str,
_date_taken: i64,
_source: &str,
) -> Result<(), DbError> {
Ok(())
}
fn get_memories_in_window(
&mut self,
_context: &opentelemetry::Context,
_library_id: i32,
_span_token: &str,
_years_back: i32,
_tz_offset_minutes: i32,
) -> Result<Vec<(String, i64, i64)>, DbError> {
Ok(Vec::new())
}
fn find_by_content_hash( fn find_by_content_hash(
&mut self, &mut self,
_context: &opentelemetry::Context, _context: &opentelemetry::Context,

View File

@@ -10,6 +10,7 @@ pub mod cleanup;
pub mod content_hash; pub mod content_hash;
pub mod data; pub mod data;
pub mod database; pub mod database;
pub mod date_resolver;
pub mod duplicates; pub mod duplicates;
pub mod error; pub mod error;
pub mod exif; pub mod exif;

View File

@@ -64,6 +64,7 @@ mod auth;
mod content_hash; mod content_hash;
mod data; mod data;
mod database; mod database;
mod date_resolver;
mod duplicates; mod duplicates;
mod error; mod error;
mod exif; mod exif;
@@ -503,6 +504,11 @@ async fn set_image_gps(
}; };
let now = Utc::now().timestamp(); let now = Utc::now().timestamp();
let normalized_path = body.path.replace('\\', "/"); let normalized_path = body.path.replace('\\', "/");
// Re-run the canonical-date waterfall on every GPS write — exiftool
// writing GPS doesn't change the capture date, but if the row was
// previously sourced from `fs_time` the re-read may have given us a
// real EXIF date this time, and we want to upgrade the source.
let resolved_date = date_resolver::resolve_date_taken(&full_path, extracted.date_taken);
let insert_exif = InsertImageExif { let insert_exif = InsertImageExif {
library_id: resolved_library.id, library_id: resolved_library.id,
file_path: normalized_path.clone(), file_path: normalized_path.clone(),
@@ -519,7 +525,7 @@ async fn set_image_gps(
aperture: extracted.aperture.map(|v| v as f32), aperture: extracted.aperture.map(|v| v as f32),
shutter_speed: extracted.shutter_speed, shutter_speed: extracted.shutter_speed,
iso: extracted.iso, iso: extracted.iso,
date_taken: extracted.date_taken, date_taken: resolved_date.map(|r| r.timestamp),
// Created_time is preserved by update_exif (it doesn't touch the // Created_time is preserved by update_exif (it doesn't touch the
// column); pass any int — it's ignored in the UPDATE statement. // column); pass any int — it's ignored in the UPDATE statement.
created_time: now, created_time: now,
@@ -537,6 +543,7 @@ async fn set_image_gps(
// with a usable signal; failure just leaves prior values in place. // with a usable signal; failure just leaves prior values in place.
phash_64: perceptual_hash::compute(&full_path).map(|h| h.phash_64), phash_64: perceptual_hash::compute(&full_path).map(|h| h.phash_64),
dhash_64: perceptual_hash::compute(&full_path).map(|h| h.dhash_64), dhash_64: perceptual_hash::compute(&full_path).map(|h| h.dhash_64),
date_taken_source: resolved_date.map(|r| r.source.as_str().to_string()),
}; };
let updated = { let updated = {
@@ -749,6 +756,10 @@ async fn upload_image(
} }
}; };
let perceptual = perceptual_hash::compute(&uploaded_path); let perceptual = perceptual_hash::compute(&uploaded_path);
let resolved_date = date_resolver::resolve_date_taken(
&uploaded_path,
exif_data.date_taken,
);
let insert_exif = InsertImageExif { let insert_exif = InsertImageExif {
library_id: target_library.id, library_id: target_library.id,
file_path: relative_path.clone(), file_path: relative_path.clone(),
@@ -765,13 +776,15 @@ async fn upload_image(
aperture: exif_data.aperture.map(|v| v as f32), aperture: exif_data.aperture.map(|v| v as f32),
shutter_speed: exif_data.shutter_speed, shutter_speed: exif_data.shutter_speed,
iso: exif_data.iso, iso: exif_data.iso,
date_taken: exif_data.date_taken, date_taken: resolved_date.map(|r| r.timestamp),
created_time: timestamp, created_time: timestamp,
last_modified: timestamp, last_modified: timestamp,
content_hash, content_hash,
size_bytes, size_bytes,
phash_64: perceptual.map(|h| h.phash_64), phash_64: perceptual.map(|h| h.phash_64),
dhash_64: perceptual.map(|h| h.dhash_64), dhash_64: perceptual.map(|h| h.dhash_64),
date_taken_source: resolved_date
.map(|r| r.source.as_str().to_string()),
}; };
if let Ok(mut dao) = exif_dao.lock() { if let Ok(mut dao) = exif_dao.lock() {
@@ -2112,6 +2125,15 @@ fn watch_files(
); );
} }
// Date-taken backfill: drain rows whose canonical date is
// either unresolved or only fs_time-sourced. Independent
// of face detection — runs even on deploys that don't
// configure Apollo, since `/memories` depends on it.
{
let context = opentelemetry::Context::new();
backfill_missing_date_taken(&context, lib, &exif_dao);
}
if is_full_scan { if is_full_scan {
info!( info!(
"Running full scan for library '{}' (scan #{})", "Running full scan for library '{}' (scan #{})",
@@ -2377,6 +2399,16 @@ fn process_new_files(
None None
}; };
// Canonical date_taken via the waterfall — kamadak-exif (already
// computed above) → exiftool fallback for videos / MakerNote /
// QuickTime → filename regex → earliest_fs_time. Source is
// recorded so the per-tick backfill drain can re-run weak
// resolutions later.
let resolved_date = date_resolver::resolve_date_taken(
&file_path,
exif_fields.as_ref().and_then(|e| e.date_taken),
);
let insert_exif = InsertImageExif { let insert_exif = InsertImageExif {
library_id: library.id, library_id: library.id,
file_path: relative_path.clone(), file_path: relative_path.clone(),
@@ -2403,13 +2435,14 @@ fn process_new_files(
.and_then(|e| e.aperture.map(|v| v as f32)), .and_then(|e| e.aperture.map(|v| v as f32)),
shutter_speed: exif_fields.as_ref().and_then(|e| e.shutter_speed.clone()), shutter_speed: exif_fields.as_ref().and_then(|e| e.shutter_speed.clone()),
iso: exif_fields.as_ref().and_then(|e| e.iso), iso: exif_fields.as_ref().and_then(|e| e.iso),
date_taken: exif_fields.as_ref().and_then(|e| e.date_taken), date_taken: resolved_date.map(|r| r.timestamp),
created_time: timestamp, created_time: timestamp,
last_modified: timestamp, last_modified: timestamp,
content_hash, content_hash,
size_bytes, size_bytes,
phash_64: perceptual.map(|h| h.phash_64), phash_64: perceptual.map(|h| h.phash_64),
dhash_64: perceptual.map(|h| h.dhash_64), dhash_64: perceptual.map(|h| h.dhash_64),
date_taken_source: resolved_date.map(|r| r.source.as_str().to_string()),
}; };
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
@@ -2682,6 +2715,103 @@ fn backfill_unhashed_backlog(
backfilled backfilled
} }
/// Drain image_exif rows whose `date_taken` was never resolved or was
/// resolved by the weakest fallback (`fs_time`). Runs the canonical-date
/// waterfall — exiftool batch (one subprocess for the whole tick's
/// rows) → filename regex → earliest_fs_time — and persists each
/// resolution with its source tag. Capped per tick by
/// `DATE_BACKFILL_MAX_PER_TICK` (default 500) so a 14k-row library
/// drains over a few quick-scan ticks without blocking the watcher.
///
/// kamadak-exif is intentionally skipped here: the row already has a
/// NULL date_taken because the ingest path's kamadak-exif call returned
/// nothing, and re-running it would just produce the same answer.
/// exiftool is the meaningful new attempt — it handles videos and
/// MakerNote-hosted dates kamadak can't reach.
fn backfill_missing_date_taken(
context: &opentelemetry::Context,
library: &libraries::Library,
exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
) -> usize {
let cap: i64 = dotenv::var("DATE_BACKFILL_MAX_PER_TICK")
.ok()
.and_then(|s| s.parse().ok())
.filter(|n: &i64| *n > 0)
.unwrap_or(500);
let rows: Vec<(i32, String)> = {
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
dao.get_rows_needing_date_backfill(context, library.id, cap + 1)
.unwrap_or_default()
};
if rows.is_empty() {
return 0;
}
let more_than_cap = rows.len() as i64 > cap;
let base_path = std::path::Path::new(&library.root_path);
// Build absolute paths and drop rows whose files no longer exist —
// the missing-file scan in library_maintenance retires deleted rows
// separately. Without this filter, NULL-date rows for missing files
// would loop through the drain forever (no source can resolve them).
let mut existing: Vec<(String, PathBuf)> = Vec::with_capacity(rows.len() as usize);
for (_, rel_path) in rows.iter().take(cap as usize) {
let abs = base_path.join(rel_path);
if abs.exists() {
existing.push((rel_path.clone(), abs));
}
}
if existing.is_empty() {
return 0;
}
// One exiftool subprocess for the whole batch; the resolver falls
// through to filename / fs_time per file when exiftool can't supply
// a date (or isn't installed at all).
let paths: Vec<PathBuf> = existing.iter().map(|(_, p)| p.clone()).collect();
let resolved = date_resolver::resolve_dates_batch(&paths, &HashMap::new());
let mut backfilled = 0usize;
let mut unresolved = 0usize;
let mut by_source: HashMap<&'static str, usize> = HashMap::new();
{
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
for (rel_path, abs) in &existing {
let Some(rd) = resolved.get(abs).copied() else {
unresolved += 1;
continue;
};
match dao.backfill_date_taken(
context,
library.id,
rel_path,
rd.timestamp,
rd.source.as_str(),
) {
Ok(()) => {
backfilled += 1;
*by_source.entry(rd.source.as_str()).or_insert(0) += 1;
}
Err(e) => {
warn!(
"date_backfill: update failed for lib {} {}: {:?}",
library.id, rel_path, e
);
}
}
}
}
if backfilled > 0 || unresolved > 0 || more_than_cap {
info!(
"date_backfill: library '{}': resolved {} ({:?}), {} unresolved, cap={}, more_remain={}",
library.name, backfilled, by_source, unresolved, cap, more_than_cap
);
}
backfilled
}
/// Per-tick face-detection drain. Pulls a capped batch of hashed-but- /// Per-tick face-detection drain. Pulls a capped batch of hashed-but-
/// unscanned image_exif rows directly via the FaceDao anti-join and /// unscanned image_exif rows directly via the FaceDao anti-join and
/// hands them to the existing detection pass. Runs on every tick (not /// hands them to the existing detection pass. Runs on every tick (not

View File

@@ -1,25 +1,19 @@
use actix_web::web::Data; use actix_web::web::Data;
use actix_web::{HttpRequest, HttpResponse, Responder, get, web}; use actix_web::{HttpRequest, HttpResponse, Responder, get, web};
use chrono::LocalResult::{Ambiguous, Single}; use chrono::LocalResult::{Ambiguous, Single};
use chrono::{DateTime, Datelike, FixedOffset, Local, LocalResult, NaiveDate, TimeZone, Utc}; use chrono::{DateTime, FixedOffset, Local, LocalResult, NaiveDate, TimeZone};
use log::{debug, trace, warn}; use log::{debug, trace, warn};
use opentelemetry::KeyValue; use opentelemetry::KeyValue;
use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer}; use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer};
use rayon::prelude::*;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::path::Path; use std::path::Path;
use std::path::PathBuf; use std::path::PathBuf;
use std::sync::Mutex; use std::sync::Mutex;
use walkdir::WalkDir;
use crate::data::Claims; use crate::data::Claims;
use crate::database::ExifDao; use crate::database::ExifDao;
use crate::files::is_image_or_video;
use crate::libraries::Library;
use crate::otel::{extract_context_from_request, global_tracer}; use crate::otel::{extract_context_from_request, global_tracer};
use crate::state::AppState; use crate::state::AppState;
use crate::utils::earliest_fs_time;
// Helper that encapsulates path-exclusion semantics // Helper that encapsulates path-exclusion semantics
#[derive(Debug)] #[derive(Debug)]
@@ -139,22 +133,6 @@ pub struct MemoriesResponse {
pub items: Vec<MemoryItem>, pub items: Vec<MemoryItem>,
} }
/// Convert Unix timestamp to NaiveDate in client timezone
fn timestamp_to_naive_date(
timestamp: i64,
client_timezone: &Option<FixedOffset>,
) -> Option<NaiveDate> {
let dt_utc = DateTime::<Utc>::from_timestamp(timestamp, 0)?;
let date = if let Some(tz) = client_timezone {
dt_utc.with_timezone(tz).date_naive()
} else {
dt_utc.with_timezone(&Local).date_naive()
};
Some(date)
}
pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset>> { pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset>> {
let build_date_from_ymd_capture = let build_date_from_ymd_capture =
|captures: &regex::Captures| -> Option<DateTime<FixedOffset>> { |captures: &regex::Captures| -> Option<DateTime<FixedOffset>> {
@@ -283,232 +261,21 @@ pub fn extract_date_from_filename(filename: &str) -> Option<DateTime<FixedOffset
None None
} }
/// Get the canonical date for a memory with priority: filename → EXIF → metadata /// Convert a `date_taken` Unix-seconds value to a `NaiveDate` in the
/// Returns (NaiveDate for matching, timestamp for display, modified timestamp) /// client's local time. Falls back to server-local when the client didn't
fn get_memory_date_with_priority( /// send a tz hint.
path: &Path, fn date_in_client_tz(timestamp: i64, client_timezone: Option<FixedOffset>) -> Option<NaiveDate> {
exif_date_taken: Option<i64>, let dt = DateTime::from_timestamp(timestamp, 0)?;
client_timezone: &Option<FixedOffset>, Some(match client_timezone {
) -> Option<(NaiveDate, Option<i64>, Option<i64>)> { Some(tz) => dt.with_timezone(&tz).date_naive(),
// Read file metadata once None => dt.with_timezone(&Local).date_naive(),
let meta = std::fs::metadata(path).ok()?; })
// Priority 1: Try to extract date from filename
if let Some(filename_date) = path
.file_name()
.and_then(|f| f.to_str())
.and_then(extract_date_from_filename)
{
// Convert to client timezone if specified
let date_in_timezone = if let Some(tz) = client_timezone {
filename_date.with_timezone(tz)
} else {
filename_date.with_timezone(&Local).fixed_offset()
};
let timestamp = if let Some(tz) = client_timezone {
filename_date.with_timezone(tz).timestamp()
} else {
filename_date.timestamp()
};
let modified = meta.modified().ok().map(|t| {
let utc: DateTime<Utc> = t.into();
if let Some(tz) = client_timezone {
utc.with_timezone(tz).timestamp()
} else {
utc.timestamp()
}
});
debug!(
"Memory date from filename {:?} > {:?} = {:?}",
path.file_name(),
filename_date,
date_in_timezone
);
return Some((date_in_timezone.date_naive(), Some(timestamp), modified));
}
// Priority 2: Use EXIF date_taken if available
if let Some(exif_timestamp) = exif_date_taken {
let date = timestamp_to_naive_date(exif_timestamp, client_timezone)?;
let modified = meta.modified().ok().map(|t| {
let utc: DateTime<Utc> = t.into();
if let Some(tz) = client_timezone {
utc.with_timezone(tz).timestamp()
} else {
utc.timestamp()
}
});
debug!("Memory date from EXIF {:?} = {:?}", path.file_name(), date);
return Some((date, Some(exif_timestamp), modified));
}
// Priority 3: Fall back to metadata (earlier of created/modified — see utils::earliest_fs_time)
let system_time = earliest_fs_time(&meta)?;
let dt_utc: DateTime<Utc> = system_time.into();
let date_in_timezone = if let Some(tz) = client_timezone {
dt_utc.with_timezone(tz).date_naive()
} else {
dt_utc.with_timezone(&Local).date_naive()
};
let created_timestamp = if let Some(tz) = client_timezone {
dt_utc.with_timezone(tz).timestamp()
} else {
dt_utc.timestamp()
};
let modified = meta.modified().ok().map(|t| {
let utc: DateTime<Utc> = t.into();
if let Some(tz) = client_timezone {
utc.with_timezone(tz).timestamp()
} else {
utc.timestamp()
}
});
trace!("Fallback metadata create date = {:?}", date_in_timezone);
Some((date_in_timezone, Some(created_timestamp), modified))
} }
/// Collect memories from EXIF database /// Default lookback for `/memories`. The original 15-year cap pre-dated
fn collect_exif_memories( /// most of the imported libraries; bumped to 20 so users with deeper
exif_dao: &Data<Mutex<Box<dyn ExifDao>>>, /// archives see those photos surface on the matching anniversary too.
context: &opentelemetry::Context, pub const DEFAULT_YEARS_BACK: i32 = 20;
base_path: &str,
library_id: i32,
now: NaiveDate,
span_mode: MemoriesSpan,
years_back: u32,
client_timezone: &Option<FixedOffset>,
path_excluder: &PathExcluder,
) -> Vec<(MemoryItem, NaiveDate)> {
// Query database for all files with date_taken
let exif_records = match exif_dao.lock() {
Ok(mut dao) => match dao.get_all_with_date_taken(context, Some(library_id)) {
Ok(records) => records,
Err(e) => {
warn!("Failed to query EXIF database: {:?}", e);
return Vec::new(); // Graceful fallback
}
},
Err(e) => {
warn!("Failed to lock EXIF DAO: {:?}", e);
return Vec::new();
}
};
// Parallel processing with Rayon
exif_records
.par_iter()
.filter_map(|(file_path, date_taken_ts)| {
// Build full path
let full_path = Path::new(base_path).join(file_path);
// Check exclusions
if path_excluder.is_excluded(&full_path) {
return None;
}
// Verify file exists
if !full_path.exists() || !full_path.is_file() {
warn!("EXIF record exists but file not found: {:?}", full_path);
return None;
}
// Get date with priority: filename → EXIF → metadata
// This ensures sorting and display use the same date source
let (file_date, created, modified) =
get_memory_date_with_priority(&full_path, Some(*date_taken_ts), client_timezone)?;
// Check if matches memory criteria
if !is_memories_match(file_path, file_date, now, span_mode, years_back) {
return None;
}
Some((
MemoryItem {
path: file_path.clone(),
created,
modified,
library_id,
},
file_date,
))
})
.collect()
}
/// Collect memories from file system scan (for files not in EXIF DB)
fn collect_filesystem_memories(
base_path: &str,
library_id: i32,
path_excluder: &PathExcluder,
skip_paths: &HashSet<PathBuf>,
now: NaiveDate,
span_mode: MemoriesSpan,
years_back: u32,
client_timezone: &Option<FixedOffset>,
) -> Vec<(MemoryItem, NaiveDate)> {
let base = Path::new(base_path);
let entries: Vec<_> = WalkDir::new(base)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| {
let path = e.path();
// Skip if already processed by EXIF query
if skip_paths.contains(path) {
return false;
}
// Check exclusions
if path_excluder.is_excluded(path) {
return false;
}
// Only process image/video files
e.file_type().is_file() && is_image_or_video(path)
})
.collect();
entries
.par_iter()
.filter_map(|entry| {
// Use unified date priority function (no EXIF for filesystem scan)
let (file_date, created, modified) =
get_memory_date_with_priority(entry.path(), None, client_timezone)?;
if is_memories_match(
entry.path().to_str().unwrap_or("Unknown"),
file_date,
now,
span_mode,
years_back,
) {
let path_relative = entry.path().strip_prefix(base).ok()?.to_str()?.to_string();
Some((
MemoryItem {
path: path_relative,
created,
modified,
library_id,
},
file_date,
))
} else {
None
}
})
.collect()
}
#[get("/memories")] #[get("/memories")]
pub async fn list_memories( pub async fn list_memories(
@@ -525,32 +292,28 @@ pub async fn list_memories(
opentelemetry::Context::new().with_remote_span_context(span.span_context().clone()); opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
let span_mode = q.span.unwrap_or(MemoriesSpan::Day); let span_mode = q.span.unwrap_or(MemoriesSpan::Day);
let years_back: u32 = 15; let span_token = match span_mode {
MemoriesSpan::Day => "day",
// Create timezone from client offset, default to local timezone if not provided MemoriesSpan::Week => "week",
let client_timezone = match q.timezone_offset_minutes { MemoriesSpan::Month => "month",
Some(offset_mins) => {
let offset_secs = offset_mins * 60;
Some(
FixedOffset::east_opt(offset_secs)
.unwrap_or_else(|| FixedOffset::east_opt(0).unwrap()),
)
}
None => None,
}; };
let years_back: i32 = DEFAULT_YEARS_BACK;
let now = if let Some(tz) = client_timezone { // The SQL filter expects a signed offset in minutes from UTC; default
debug!("Client timezone: {:?}", tz); // 0 (UTC) when the client didn't send a hint. We also keep a chrono
Utc::now().with_timezone(&tz).date_naive() // `FixedOffset` for sorting/secondary-key date math in Rust below —
} else { // anchoring both sides on the same value keeps "what SQL matched" and
Local::now().date_naive() // "what we sort by" consistent.
}; let tz_offset_minutes = q.timezone_offset_minutes.unwrap_or(0);
let client_timezone = q
.timezone_offset_minutes
.and_then(|offset_mins| FixedOffset::east_opt(offset_mins * 60));
debug!("Now: {:?}", now); debug!(
"list_memories: span={:?} tz_offset_min={} years_back={}",
span_mode, tz_offset_minutes, years_back
);
// Resolve the optional library filter. Unknown values are a 400; None
// means "all libraries" — currently equivalent to the primary library
// while only one is configured.
let library = match crate::libraries::resolve_library_param(&app_state, q.library.as_deref()) { let library = match crate::libraries::resolve_library_param(&app_state, q.library.as_deref()) {
Ok(lib) => lib, Ok(lib) => lib,
Err(msg) => { Err(msg) => {
@@ -558,13 +321,13 @@ pub async fn list_memories(
return HttpResponse::BadRequest().body(msg); return HttpResponse::BadRequest().body(msg);
} }
}; };
// When `library` is `Some`, scope to that one library; otherwise union let libraries_to_scan: Vec<&crate::libraries::Library> = match library {
// across every configured library and let the results interleave.
let libraries_to_scan: Vec<&Library> = match library {
Some(lib) => vec![lib], Some(lib) => vec![lib],
None => app_state.libraries.iter().collect(), None => app_state.libraries.iter().collect(),
}; };
// (item, date) tuples — `date` is the canonical NaiveDate of the
// memory in the client's tz, used as the primary sort key.
let mut memories_with_dates: Vec<(MemoryItem, NaiveDate)> = Vec::new(); let mut memories_with_dates: Vec<(MemoryItem, NaiveDate)> = Vec::new();
for lib in &libraries_to_scan { for lib in &libraries_to_scan {
@@ -572,78 +335,82 @@ pub async fn list_memories(
let effective = lib.effective_excluded_dirs(&app_state.excluded_dirs); let effective = lib.effective_excluded_dirs(&app_state.excluded_dirs);
let path_excluder = PathExcluder::new(base, &effective); let path_excluder = PathExcluder::new(base, &effective);
let exif_memories = collect_exif_memories( let rows = match exif_dao.lock() {
&exif_dao, Ok(mut dao) => match dao.get_memories_in_window(
&span_context, &span_context,
&lib.root_path, lib.id,
lib.id, span_token,
now, years_back,
span_mode, tz_offset_minutes,
years_back, ) {
&client_timezone, Ok(rows) => rows,
&path_excluder, Err(e) => {
); warn!(
"Failed to query memories for library '{}': {:?}",
lib.name, e
);
continue;
}
},
Err(e) => {
warn!("Failed to lock EXIF DAO: {:?}", e);
continue;
}
};
let exif_paths: HashSet<PathBuf> = exif_memories for (rel_path, date_taken_ts, last_modified_ts) in rows {
.iter() // Apply per-library exclusions in Rust — they're a small
.map(|(item, _)| PathBuf::from(&lib.root_path).join(&item.path)) // set and pushing them into the SQL WHERE adds bind-param
.collect(); // gymnastics with no measurable win at this scale.
let full_path = base.join(&rel_path);
if path_excluder.is_excluded(&full_path) {
trace!("Memory excluded by PathExcluder: {:?}", full_path);
continue;
}
let fs_memories = collect_filesystem_memories( let Some(file_date) = date_in_client_tz(date_taken_ts, client_timezone) else {
&lib.root_path, continue;
lib.id, };
&path_excluder,
&exif_paths,
now,
span_mode,
years_back,
&client_timezone,
);
memories_with_dates.extend(exif_memories); memories_with_dates.push((
memories_with_dates.extend(fs_memories); MemoryItem {
path: rel_path,
created: Some(date_taken_ts),
modified: Some(last_modified_ts),
library_id: lib.id,
},
file_date,
));
}
} }
// Sort once over the merged result set. The SQL filter handles the
// matching; sort order is purely UI concern.
match span_mode { match span_mode {
// Sort by absolute time for a more 'overview' // Month: chronological — gives an "overview" feel.
MemoriesSpan::Month => memories_with_dates.sort_by(|a, b| a.1.cmp(&b.1)), MemoriesSpan::Month => memories_with_dates.sort_by(|a, b| a.1.cmp(&b.1)),
// For week span, sort by full date + timestamp (chronological) // Week: full date then timestamp (oldest → newest).
MemoriesSpan::Week => { MemoriesSpan::Week => {
memories_with_dates.sort_by(|a, b| { memories_with_dates.sort_by(|a, b| {
// First, sort by full date (year, month, day) a.1.cmp(&b.1)
let date_cmp = a.1.cmp(&b.1); .then_with(|| match (a.0.created, b.0.created) {
if date_cmp != std::cmp::Ordering::Equal { (Some(at), Some(bt)) => at.cmp(&bt),
return date_cmp;
}
// Then sort by full created timestamp (oldest to newest)
match (a.0.created, b.0.created) {
(Some(a_time), Some(b_time)) => a_time.cmp(&b_time),
(Some(_), None) => std::cmp::Ordering::Less,
(None, Some(_)) => std::cmp::Ordering::Greater,
(None, None) => std::cmp::Ordering::Equal,
}
});
}
// For day span, sort by day of month then by time
MemoriesSpan::Day => {
memories_with_dates.sort_by(|a, b| {
let day_comparison = a.1.day().cmp(&b.1.day());
if day_comparison == std::cmp::Ordering::Equal {
match (a.0.created, b.0.created) {
(Some(a_time), Some(b_time)) => a_time.cmp(&b_time),
(Some(_), None) => std::cmp::Ordering::Less, (Some(_), None) => std::cmp::Ordering::Less,
(None, Some(_)) => std::cmp::Ordering::Greater, (None, Some(_)) => std::cmp::Ordering::Greater,
(None, None) => std::cmp::Ordering::Equal, (None, None) => std::cmp::Ordering::Equal,
} })
} else { });
day_comparison }
} // Day: same calendar day across years, sub-sorted by timestamp.
MemoriesSpan::Day => {
memories_with_dates.sort_by(|a, b| match (a.0.created, b.0.created) {
(Some(at), Some(bt)) => at.cmp(&bt),
(Some(_), None) => std::cmp::Ordering::Less,
(None, Some(_)) => std::cmp::Ordering::Greater,
(None, None) => std::cmp::Ordering::Equal,
}); });
} }
} }
// Sort by day of the month and time (using the created timestamp)
let items: Vec<MemoryItem> = memories_with_dates.into_iter().map(|(m, _)| m).collect(); let items: Vec<MemoryItem> = memories_with_dates.into_iter().map(|(m, _)| m).collect();
@@ -653,13 +420,7 @@ pub async fn list_memories(
KeyValue::new("span", format!("{:?}", span_mode)), KeyValue::new("span", format!("{:?}", span_mode)),
KeyValue::new("years_back", years_back.to_string()), KeyValue::new("years_back", years_back.to_string()),
KeyValue::new("result_count", items.len().to_string()), KeyValue::new("result_count", items.len().to_string()),
KeyValue::new( KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()),
"client_timezone",
format!(
"{:?}",
client_timezone.unwrap_or_else(|| FixedOffset::east_opt(0).unwrap())
),
),
KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)), KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)),
], ],
); );
@@ -668,50 +429,10 @@ pub async fn list_memories(
HttpResponse::Ok().json(MemoriesResponse { items }) HttpResponse::Ok().json(MemoriesResponse { items })
} }
fn is_memories_match(
file_path: &str,
file_date: NaiveDate,
today: NaiveDate,
span: MemoriesSpan,
years_back: u32,
) -> bool {
if file_date > today {
return false;
}
let years_diff = (today.year() - file_date.year()).unsigned_abs();
if years_diff > years_back {
warn!(
"File ({}) date is too far in the past: {:?} vs {:?}",
file_path, file_date, today
);
return false;
}
match span {
MemoriesSpan::Day => same_month_day_any_year(file_date, today),
MemoriesSpan::Week => same_week_any_year(file_date, today),
MemoriesSpan::Month => same_month_any_year(file_date, today),
}
}
fn same_month_day_any_year(a: NaiveDate, b: NaiveDate) -> bool {
a.month() == b.month() && a.day() == b.day()
}
// Match same ISO week number and same weekday (ignoring year)
fn same_week_any_year(a: NaiveDate, b: NaiveDate) -> bool {
a.iso_week().week().eq(&b.iso_week().week())
}
// Match same month (ignoring day and year)
fn same_month_any_year(a: NaiveDate, b: NaiveDate) -> bool {
a.month() == b.month()
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use chrono::Timelike; use chrono::{Datelike, Timelike};
use std::fs::{self, File}; use std::fs::{self, File};
use tempfile::tempdir; use tempfile::tempdir;
@@ -869,99 +590,11 @@ mod tests {
); );
} }
#[test] // The obsolete `test_memory_date_priority_*` tests covered the old
fn test_memory_date_priority_filename() { // request-time waterfall in `get_memory_date_with_priority`. Their
let temp_dir = tempdir().unwrap(); // replacement lives in `crate::date_resolver::tests` (resolver
let temp_file = temp_dir.path().join("Screenshot_2014-06-01-20-44-50.png"); // waterfall) and the SQL surface is exercised by integration tests
File::create(&temp_file).unwrap(); // that hit `get_memories_in_window` directly.
// Test that filename takes priority (even with EXIF data available)
let exif_date = DateTime::<Utc>::from_timestamp(1609459200, 0) // 2021-01-01
.unwrap()
.timestamp();
let (date, created, _) = get_memory_date_with_priority(
&temp_file,
Some(exif_date),
&Some(*Local::now().fixed_offset().offset()),
)
.unwrap();
// Check that date is from filename (2014), NOT EXIF (2021)
assert_eq!(date.year(), 2014);
assert_eq!(date.month(), 6);
assert_eq!(date.day(), 1);
// Check that created timestamp matches the date from filename
assert!(created.is_some());
let ts = created.unwrap();
// The timestamp should be for 2014-06-01 20:44:50 in the LOCAL timezone
let dt_from_ts = Local.timestamp_opt(ts, 0).unwrap();
assert_eq!(dt_from_ts.year(), 2014);
assert_eq!(dt_from_ts.month(), 6);
assert_eq!(dt_from_ts.day(), 1);
assert_eq!(dt_from_ts.hour(), 20);
assert_eq!(dt_from_ts.minute(), 44);
assert_eq!(dt_from_ts.second(), 50);
}
#[test]
fn test_memory_date_priority_metadata_fallback() {
let temp_dir = tempdir().unwrap();
let temp_file = temp_dir.path().join("regular_image.jpg");
File::create(&temp_file).unwrap();
// Test metadata fallback when no filename date or EXIF
let (date, created, modified) =
get_memory_date_with_priority(&temp_file, None, &None).unwrap();
// Both date and timestamps should be from metadata (recent)
let today = Local::now().date_naive();
assert_eq!(date.year(), today.year());
assert_eq!(date.month(), today.month());
// Both timestamps should be valid
assert!(created.is_some());
assert!(modified.is_some());
// Check that timestamps are recent
let dt_created = DateTime::<Utc>::from_timestamp(created.unwrap(), 0).unwrap();
assert_eq!(dt_created.year(), today.year());
let dt_modified = DateTime::<Utc>::from_timestamp(modified.unwrap(), 0).unwrap();
assert_eq!(dt_modified.year(), today.year());
}
#[test]
fn test_memory_date_priority_exif_over_metadata() {
let temp_dir = tempdir().unwrap();
let temp_file = temp_dir.path().join("regular_image.jpg");
File::create(&temp_file).unwrap();
// Test that EXIF takes priority over metadata (but not filename)
// EXIF date: June 15, 2020 12:00:00 UTC (safe from timezone edge cases)
let exif_date = DateTime::<Utc>::from_timestamp(1592222400, 0) // 2020-06-15 12:00:00 UTC
.unwrap()
.timestamp();
let (date, created, modified) =
get_memory_date_with_priority(&temp_file, Some(exif_date), &None).unwrap();
// Date should be from EXIF (2020), not metadata (today)
assert_eq!(date.year(), 2020);
assert_eq!(date.month(), 6);
assert_eq!(date.day(), 15);
// Created timestamp should also be from EXIF
assert!(created.is_some());
assert_eq!(created.unwrap(), exif_date);
// Modified should still be from metadata
assert!(modified.is_some());
let today = Local::now().date_naive();
let dt_modified = DateTime::<Utc>::from_timestamp(modified.unwrap(), 0).unwrap();
assert_eq!(dt_modified.year(), today.year());
}
#[test] #[test]
fn test_path_excluder_absolute_under_base() { fn test_path_excluder_absolute_under_base() {