diff --git a/migrations/2026-05-06-000000_add_date_taken_source/down.sql b/migrations/2026-05-06-000000_add_date_taken_source/down.sql new file mode 100644 index 0000000..212eddf --- /dev/null +++ b/migrations/2026-05-06-000000_add_date_taken_source/down.sql @@ -0,0 +1,2 @@ +DROP INDEX IF EXISTS idx_image_exif_date_backfill; +ALTER TABLE image_exif DROP COLUMN date_taken_source; diff --git a/migrations/2026-05-06-000000_add_date_taken_source/up.sql b/migrations/2026-05-06-000000_add_date_taken_source/up.sql new file mode 100644 index 0000000..2be2590 --- /dev/null +++ b/migrations/2026-05-06-000000_add_date_taken_source/up.sql @@ -0,0 +1,24 @@ +-- Tracks where a row's `date_taken` was sourced so the canonical-date +-- waterfall (kamadak-exif → exiftool → filename → earliest_fs_time) is +-- visible to debugging and to the per-tick backfill drain that re-runs +-- weak sources once stronger ones become available (e.g. exiftool gets +-- installed on a deploy that didn't have it). See CLAUDE.md → Memories +-- canonical-date pipeline. +-- +-- Values: +-- 'exif' — kamadak-exif read DateTime/DateTimeOriginal directly +-- 'exiftool' — exiftool fallback caught a video / MakerNote / QuickTime tag +-- 'filename' — extract_date_from_filename matched a known pattern +-- 'fs_time' — fell through to earliest_fs_time(metadata) +-- +-- NULL when `date_taken` itself is NULL (no source resolved the date). +ALTER TABLE image_exif ADD COLUMN date_taken_source TEXT; + +-- Partial index for the per-tick backfill drain: targets rows that need +-- re-resolution (no date yet, or only the weakest source resolved it). +-- Filename-sourced rows are intentionally excluded — the regex is +-- authoritative when it matches and re-running exiftool wouldn't change +-- the answer. +CREATE INDEX idx_image_exif_date_backfill + ON image_exif (library_id, id) + WHERE date_taken IS NULL OR date_taken_source = 'fs_time'; diff --git a/src/database/mod.rs b/src/database/mod.rs index 509315d..9ee2a16 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -730,6 +730,7 @@ impl ExifDao for SqliteExifDao { shutter_speed.eq(&exif_data.shutter_speed), iso.eq(&exif_data.iso), date_taken.eq(&exif_data.date_taken), + date_taken_source.eq(&exif_data.date_taken_source), last_modified.eq(&exif_data.last_modified), )) .execute(connection.deref_mut()) @@ -1819,6 +1820,7 @@ mod exif_dao_tests { size_bytes: None, phash_64: None, dhash_64: None, + date_taken_source: None, }, ) .expect("insert exif row"); diff --git a/src/database/models.rs b/src/database/models.rs index 9d1a3b8..1e3139d 100644 --- a/src/database/models.rs +++ b/src/database/models.rs @@ -63,6 +63,12 @@ pub struct InsertImageExif { pub phash_64: Option, /// 64-bit dHash (gradient). NULL for videos and decode failures. pub dhash_64: Option, + /// Which step of the canonical-date waterfall populated `date_taken`: + /// `"exif"` | `"exiftool"` | `"filename"` | `"fs_time"`. NULL when + /// `date_taken` is NULL (no source resolved it). The per-tick backfill + /// drain re-resolves rows whose source is `"fs_time"` once exiftool + /// has had a chance to run. + pub date_taken_source: Option, } // Field order matches the post-migration column order in `image_exif`. @@ -98,6 +104,8 @@ pub struct ImageExif { pub duplicate_of_hash: Option, /// Unix seconds at which the resolve was committed. pub duplicate_decided_at: Option, + /// Which step of the canonical-date waterfall populated `date_taken`. + pub date_taken_source: Option, } #[derive(Insertable)] diff --git a/src/database/schema.rs b/src/database/schema.rs index bbd0a8d..9a9958a 100644 --- a/src/database/schema.rs +++ b/src/database/schema.rs @@ -125,6 +125,7 @@ diesel::table! { dhash_64 -> Nullable, duplicate_of_hash -> Nullable, duplicate_decided_at -> Nullable, + date_taken_source -> Nullable, } } diff --git a/src/files.rs b/src/files.rs index 9f01624..10d8be4 100644 --- a/src/files.rs +++ b/src/files.rs @@ -1508,6 +1508,7 @@ mod tests { dhash_64: data.dhash_64, duplicate_of_hash: None, duplicate_decided_at: None, + date_taken_source: data.date_taken_source.clone(), }) } @@ -1551,6 +1552,7 @@ mod tests { dhash_64: data.dhash_64, duplicate_of_hash: None, duplicate_decided_at: None, + date_taken_source: data.date_taken_source.clone(), }) } diff --git a/src/main.rs b/src/main.rs index 2d598ca..3c0a9a6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -537,6 +537,8 @@ async fn set_image_gps( // with a usable signal; failure just leaves prior values in place. phash_64: perceptual_hash::compute(&full_path).map(|h| h.phash_64), dhash_64: perceptual_hash::compute(&full_path).map(|h| h.dhash_64), + // Replaced in a follow-up commit with the canonical-date resolver's output. + date_taken_source: None, }; let updated = { @@ -772,6 +774,8 @@ async fn upload_image( size_bytes, phash_64: perceptual.map(|h| h.phash_64), dhash_64: perceptual.map(|h| h.dhash_64), + // Replaced in a follow-up commit with the canonical-date resolver's output. + date_taken_source: None, }; if let Ok(mut dao) = exif_dao.lock() { @@ -2410,6 +2414,8 @@ fn process_new_files( size_bytes, phash_64: perceptual.map(|h| h.phash_64), dhash_64: perceptual.map(|h| h.dhash_64), + // Replaced in a follow-up commit with the canonical-date resolver's output. + date_taken_source: None, }; let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");