image_exif: add date_taken_source column
New nullable TEXT column tracks which step of the canonical-date waterfall (kamadak-exif → exiftool → filename → fs_time) populated `date_taken`. Lets a later per-tick drain re-resolve weak sources (`fs_time`) once stronger ones become available, and gives the UI/debug surface a way to answer "why does this photo show up under this date?". Adds the column at all `InsertImageExif` construction sites with `None` placeholders (the resolver wiring lands in a follow-up commit), and extends the `update_exif` SET tuple so the column survives the GPS-write re-read path. Partial index `idx_image_exif_date_backfill` is created for the upcoming drain query. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,2 @@
|
||||
DROP INDEX IF EXISTS idx_image_exif_date_backfill;
|
||||
ALTER TABLE image_exif DROP COLUMN date_taken_source;
|
||||
24
migrations/2026-05-06-000000_add_date_taken_source/up.sql
Normal file
24
migrations/2026-05-06-000000_add_date_taken_source/up.sql
Normal file
@@ -0,0 +1,24 @@
|
||||
-- Tracks where a row's `date_taken` was sourced so the canonical-date
|
||||
-- waterfall (kamadak-exif → exiftool → filename → earliest_fs_time) is
|
||||
-- visible to debugging and to the per-tick backfill drain that re-runs
|
||||
-- weak sources once stronger ones become available (e.g. exiftool gets
|
||||
-- installed on a deploy that didn't have it). See CLAUDE.md → Memories
|
||||
-- canonical-date pipeline.
|
||||
--
|
||||
-- Values:
|
||||
-- 'exif' — kamadak-exif read DateTime/DateTimeOriginal directly
|
||||
-- 'exiftool' — exiftool fallback caught a video / MakerNote / QuickTime tag
|
||||
-- 'filename' — extract_date_from_filename matched a known pattern
|
||||
-- 'fs_time' — fell through to earliest_fs_time(metadata)
|
||||
--
|
||||
-- NULL when `date_taken` itself is NULL (no source resolved the date).
|
||||
ALTER TABLE image_exif ADD COLUMN date_taken_source TEXT;
|
||||
|
||||
-- Partial index for the per-tick backfill drain: targets rows that need
|
||||
-- re-resolution (no date yet, or only the weakest source resolved it).
|
||||
-- Filename-sourced rows are intentionally excluded — the regex is
|
||||
-- authoritative when it matches and re-running exiftool wouldn't change
|
||||
-- the answer.
|
||||
CREATE INDEX idx_image_exif_date_backfill
|
||||
ON image_exif (library_id, id)
|
||||
WHERE date_taken IS NULL OR date_taken_source = 'fs_time';
|
||||
@@ -730,6 +730,7 @@ impl ExifDao for SqliteExifDao {
|
||||
shutter_speed.eq(&exif_data.shutter_speed),
|
||||
iso.eq(&exif_data.iso),
|
||||
date_taken.eq(&exif_data.date_taken),
|
||||
date_taken_source.eq(&exif_data.date_taken_source),
|
||||
last_modified.eq(&exif_data.last_modified),
|
||||
))
|
||||
.execute(connection.deref_mut())
|
||||
@@ -1819,6 +1820,7 @@ mod exif_dao_tests {
|
||||
size_bytes: None,
|
||||
phash_64: None,
|
||||
dhash_64: None,
|
||||
date_taken_source: None,
|
||||
},
|
||||
)
|
||||
.expect("insert exif row");
|
||||
|
||||
@@ -63,6 +63,12 @@ pub struct InsertImageExif {
|
||||
pub phash_64: Option<i64>,
|
||||
/// 64-bit dHash (gradient). NULL for videos and decode failures.
|
||||
pub dhash_64: Option<i64>,
|
||||
/// Which step of the canonical-date waterfall populated `date_taken`:
|
||||
/// `"exif"` | `"exiftool"` | `"filename"` | `"fs_time"`. NULL when
|
||||
/// `date_taken` is NULL (no source resolved it). The per-tick backfill
|
||||
/// drain re-resolves rows whose source is `"fs_time"` once exiftool
|
||||
/// has had a chance to run.
|
||||
pub date_taken_source: Option<String>,
|
||||
}
|
||||
|
||||
// Field order matches the post-migration column order in `image_exif`.
|
||||
@@ -98,6 +104,8 @@ pub struct ImageExif {
|
||||
pub duplicate_of_hash: Option<String>,
|
||||
/// Unix seconds at which the resolve was committed.
|
||||
pub duplicate_decided_at: Option<i64>,
|
||||
/// Which step of the canonical-date waterfall populated `date_taken`.
|
||||
pub date_taken_source: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Insertable)]
|
||||
|
||||
@@ -125,6 +125,7 @@ diesel::table! {
|
||||
dhash_64 -> Nullable<BigInt>,
|
||||
duplicate_of_hash -> Nullable<Text>,
|
||||
duplicate_decided_at -> Nullable<BigInt>,
|
||||
date_taken_source -> Nullable<Text>,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1508,6 +1508,7 @@ mod tests {
|
||||
dhash_64: data.dhash_64,
|
||||
duplicate_of_hash: None,
|
||||
duplicate_decided_at: None,
|
||||
date_taken_source: data.date_taken_source.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1551,6 +1552,7 @@ mod tests {
|
||||
dhash_64: data.dhash_64,
|
||||
duplicate_of_hash: None,
|
||||
duplicate_decided_at: None,
|
||||
date_taken_source: data.date_taken_source.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -537,6 +537,8 @@ async fn set_image_gps(
|
||||
// with a usable signal; failure just leaves prior values in place.
|
||||
phash_64: perceptual_hash::compute(&full_path).map(|h| h.phash_64),
|
||||
dhash_64: perceptual_hash::compute(&full_path).map(|h| h.dhash_64),
|
||||
// Replaced in a follow-up commit with the canonical-date resolver's output.
|
||||
date_taken_source: None,
|
||||
};
|
||||
|
||||
let updated = {
|
||||
@@ -772,6 +774,8 @@ async fn upload_image(
|
||||
size_bytes,
|
||||
phash_64: perceptual.map(|h| h.phash_64),
|
||||
dhash_64: perceptual.map(|h| h.dhash_64),
|
||||
// Replaced in a follow-up commit with the canonical-date resolver's output.
|
||||
date_taken_source: None,
|
||||
};
|
||||
|
||||
if let Ok(mut dao) = exif_dao.lock() {
|
||||
@@ -2410,6 +2414,8 @@ fn process_new_files(
|
||||
size_bytes,
|
||||
phash_64: perceptual.map(|h| h.phash_64),
|
||||
dhash_64: perceptual.map(|h| h.dhash_64),
|
||||
// Replaced in a follow-up commit with the canonical-date resolver's output.
|
||||
date_taken_source: None,
|
||||
};
|
||||
|
||||
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
|
||||
|
||||
Reference in New Issue
Block a user