image_exif: add date_taken_source column

New nullable TEXT column tracks which step of the canonical-date
waterfall (kamadak-exif → exiftool → filename → fs_time) populated
`date_taken`. Lets a later per-tick drain re-resolve weak sources
(`fs_time`) once stronger ones become available, and gives the UI/debug
surface a way to answer "why does this photo show up under this date?".

Adds the column at all `InsertImageExif` construction sites with `None`
placeholders (the resolver wiring lands in a follow-up commit), and
extends the `update_exif` SET tuple so the column survives the GPS-write
re-read path. Partial index `idx_image_exif_date_backfill` is created
for the upcoming drain query.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-05-06 15:57:49 -04:00
parent 5de9a322ac
commit 84326501a9
7 changed files with 45 additions and 0 deletions

View File

@@ -0,0 +1,2 @@
DROP INDEX IF EXISTS idx_image_exif_date_backfill;
ALTER TABLE image_exif DROP COLUMN date_taken_source;

View File

@@ -0,0 +1,24 @@
-- Tracks where a row's `date_taken` was sourced so the canonical-date
-- waterfall (kamadak-exif → exiftool → filename → earliest_fs_time) is
-- visible to debugging and to the per-tick backfill drain that re-runs
-- weak sources once stronger ones become available (e.g. exiftool gets
-- installed on a deploy that didn't have it). See CLAUDE.md → Memories
-- canonical-date pipeline.
--
-- Values:
-- 'exif' — kamadak-exif read DateTime/DateTimeOriginal directly
-- 'exiftool' — exiftool fallback caught a video / MakerNote / QuickTime tag
-- 'filename' — extract_date_from_filename matched a known pattern
-- 'fs_time' — fell through to earliest_fs_time(metadata)
--
-- NULL when `date_taken` itself is NULL (no source resolved the date).
ALTER TABLE image_exif ADD COLUMN date_taken_source TEXT;
-- Partial index for the per-tick backfill drain: targets rows that need
-- re-resolution (no date yet, or only the weakest source resolved it).
-- Filename-sourced rows are intentionally excluded — the regex is
-- authoritative when it matches and re-running exiftool wouldn't change
-- the answer.
CREATE INDEX idx_image_exif_date_backfill
ON image_exif (library_id, id)
WHERE date_taken IS NULL OR date_taken_source = 'fs_time';

View File

@@ -730,6 +730,7 @@ impl ExifDao for SqliteExifDao {
shutter_speed.eq(&exif_data.shutter_speed), shutter_speed.eq(&exif_data.shutter_speed),
iso.eq(&exif_data.iso), iso.eq(&exif_data.iso),
date_taken.eq(&exif_data.date_taken), date_taken.eq(&exif_data.date_taken),
date_taken_source.eq(&exif_data.date_taken_source),
last_modified.eq(&exif_data.last_modified), last_modified.eq(&exif_data.last_modified),
)) ))
.execute(connection.deref_mut()) .execute(connection.deref_mut())
@@ -1819,6 +1820,7 @@ mod exif_dao_tests {
size_bytes: None, size_bytes: None,
phash_64: None, phash_64: None,
dhash_64: None, dhash_64: None,
date_taken_source: None,
}, },
) )
.expect("insert exif row"); .expect("insert exif row");

View File

@@ -63,6 +63,12 @@ pub struct InsertImageExif {
pub phash_64: Option<i64>, pub phash_64: Option<i64>,
/// 64-bit dHash (gradient). NULL for videos and decode failures. /// 64-bit dHash (gradient). NULL for videos and decode failures.
pub dhash_64: Option<i64>, pub dhash_64: Option<i64>,
/// Which step of the canonical-date waterfall populated `date_taken`:
/// `"exif"` | `"exiftool"` | `"filename"` | `"fs_time"`. NULL when
/// `date_taken` is NULL (no source resolved it). The per-tick backfill
/// drain re-resolves rows whose source is `"fs_time"` once exiftool
/// has had a chance to run.
pub date_taken_source: Option<String>,
} }
// Field order matches the post-migration column order in `image_exif`. // Field order matches the post-migration column order in `image_exif`.
@@ -98,6 +104,8 @@ pub struct ImageExif {
pub duplicate_of_hash: Option<String>, pub duplicate_of_hash: Option<String>,
/// Unix seconds at which the resolve was committed. /// Unix seconds at which the resolve was committed.
pub duplicate_decided_at: Option<i64>, pub duplicate_decided_at: Option<i64>,
/// Which step of the canonical-date waterfall populated `date_taken`.
pub date_taken_source: Option<String>,
} }
#[derive(Insertable)] #[derive(Insertable)]

View File

@@ -125,6 +125,7 @@ diesel::table! {
dhash_64 -> Nullable<BigInt>, dhash_64 -> Nullable<BigInt>,
duplicate_of_hash -> Nullable<Text>, duplicate_of_hash -> Nullable<Text>,
duplicate_decided_at -> Nullable<BigInt>, duplicate_decided_at -> Nullable<BigInt>,
date_taken_source -> Nullable<Text>,
} }
} }

View File

@@ -1508,6 +1508,7 @@ mod tests {
dhash_64: data.dhash_64, dhash_64: data.dhash_64,
duplicate_of_hash: None, duplicate_of_hash: None,
duplicate_decided_at: None, duplicate_decided_at: None,
date_taken_source: data.date_taken_source.clone(),
}) })
} }
@@ -1551,6 +1552,7 @@ mod tests {
dhash_64: data.dhash_64, dhash_64: data.dhash_64,
duplicate_of_hash: None, duplicate_of_hash: None,
duplicate_decided_at: None, duplicate_decided_at: None,
date_taken_source: data.date_taken_source.clone(),
}) })
} }

View File

@@ -537,6 +537,8 @@ async fn set_image_gps(
// with a usable signal; failure just leaves prior values in place. // with a usable signal; failure just leaves prior values in place.
phash_64: perceptual_hash::compute(&full_path).map(|h| h.phash_64), phash_64: perceptual_hash::compute(&full_path).map(|h| h.phash_64),
dhash_64: perceptual_hash::compute(&full_path).map(|h| h.dhash_64), dhash_64: perceptual_hash::compute(&full_path).map(|h| h.dhash_64),
// Replaced in a follow-up commit with the canonical-date resolver's output.
date_taken_source: None,
}; };
let updated = { let updated = {
@@ -772,6 +774,8 @@ async fn upload_image(
size_bytes, size_bytes,
phash_64: perceptual.map(|h| h.phash_64), phash_64: perceptual.map(|h| h.phash_64),
dhash_64: perceptual.map(|h| h.dhash_64), dhash_64: perceptual.map(|h| h.dhash_64),
// Replaced in a follow-up commit with the canonical-date resolver's output.
date_taken_source: None,
}; };
if let Ok(mut dao) = exif_dao.lock() { if let Ok(mut dao) = exif_dao.lock() {
@@ -2410,6 +2414,8 @@ fn process_new_files(
size_bytes, size_bytes,
phash_64: perceptual.map(|h| h.phash_64), phash_64: perceptual.map(|h| h.phash_64),
dhash_64: perceptual.map(|h| h.dhash_64), dhash_64: perceptual.map(|h| h.dhash_64),
// Replaced in a follow-up commit with the canonical-date resolver's output.
date_taken_source: None,
}; };
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");