From 9f1b3f6d9a5c2e823c743a84752c4eb9871d01df Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Wed, 6 May 2026 17:05:00 -0400 Subject: [PATCH] date_taken_source: backfill 'exif' on legacy rows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-resolver rows already had a populated `date_taken` from the old kamadak-exif-only ingest path. The column-add migration left their `date_taken_source` as NULL, and the drain's eligibility predicate (`date_taken IS NULL OR date_taken_source = 'fs_time'`) skips them — so they remain unlabelled forever and never benefit from the resolver's exiftool fallback even if they're videos that should upgrade. Label them all `'exif'` in a one-shot UPDATE. Safe because every write path that populated `date_taken` before the resolver landed was a kamadak-exif read. Idempotent (the WHERE matches nothing on a second run). Down.sql is a no-op — the labels stay correct under any schema state, and the column-add migration is the right place to revert if needed. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../down.sql | 9 +++++++++ .../up.sql | 20 +++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 migrations/2026-05-06-000100_backfill_date_taken_source_legacy/down.sql create mode 100644 migrations/2026-05-06-000100_backfill_date_taken_source_legacy/up.sql diff --git a/migrations/2026-05-06-000100_backfill_date_taken_source_legacy/down.sql b/migrations/2026-05-06-000100_backfill_date_taken_source_legacy/down.sql new file mode 100644 index 0000000..2d2c82a --- /dev/null +++ b/migrations/2026-05-06-000100_backfill_date_taken_source_legacy/down.sql @@ -0,0 +1,9 @@ +-- Reverting this migration is a no-op: the labels we wrote in `up.sql` +-- are correct under any state of the schema (every dated row was indeed +-- exif-sourced before the resolver landed), and there's no signal that +-- distinguishes "labelled by this migration" from "labelled by the +-- ingest path post-resolver". Clearing them would break the drain's +-- eligibility filter again. +-- +-- The companion migration `2026-05-06-000000_add_date_taken_source` is +-- the one to revert if you need to remove the column entirely. diff --git a/migrations/2026-05-06-000100_backfill_date_taken_source_legacy/up.sql b/migrations/2026-05-06-000100_backfill_date_taken_source_legacy/up.sql new file mode 100644 index 0000000..cccf343 --- /dev/null +++ b/migrations/2026-05-06-000100_backfill_date_taken_source_legacy/up.sql @@ -0,0 +1,20 @@ +-- Backfill `date_taken_source` for rows that pre-date the canonical-date +-- pipeline. Before the resolver landed, `image_exif.date_taken` could +-- only be populated via `exif::extract_exif_from_path` (kamadak-exif) +-- on the file-watcher, upload, or GPS-write paths. The resolver column +-- migration added `date_taken_source` defaulting to NULL, so every +-- historical row with a date is currently unlabelled — and the +-- per-tick drain skips them because its eligibility predicate is +-- `date_taken IS NULL OR date_taken_source = 'fs_time'`. +-- +-- Label them `'exif'` once and let the drain take over from here. Safe +-- because every code path that wrote `date_taken` prior to the +-- resolver was a kamadak-exif read — there was no other source. +-- +-- Idempotent: re-running this migration on a DB that has already been +-- backfilled is a no-op (the WHERE clause matches nothing the second +-- time around). +UPDATE image_exif +SET date_taken_source = 'exif' +WHERE date_taken IS NOT NULL + AND date_taken_source IS NULL;