From 2d56047497f3898fd645ae175c20e6e05d377e8b Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Tue, 12 May 2026 14:37:36 -0400 Subject: [PATCH] Drop fs_time from date-backfill eligibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The drain queried `date_taken IS NULL OR date_taken_source = 'fs_time'` ORDER BY id ASC LIMIT 500 every watcher tick. The resolver is deterministic on file bytes + filename + fs metadata, so any row that landed on fs_time once landed there again on every retry — the drain spun on the same lowest-id rows in perpetuity, never advancing to rows 501+ while still logging more_remain=true. Side effect: 500 auto-commit UPDATEs per tick sustained the SQLite write lock long enough that other writers on separate DAO connections hit the 5s busy_timeout. Manifested as intermittent 500s on PATCH /image/faces/{id} that succeeded on retry. Narrow the partial index and query predicate to `date_taken IS NULL`. If exiftool installs or a new filename regex lands, an operator can re-resolve fs_time rows out-of-band rather than re-introducing the steady-state churn. Co-Authored-By: Claude Opus 4.7 (1M context) --- CLAUDE.md | 23 +++++++----- .../down.sql | 5 +++ .../up.sql | 18 ++++++++++ src/database/mod.rs | 36 +++++++++++-------- 4 files changed, 59 insertions(+), 23 deletions(-) create mode 100644 migrations/2026-05-12-000000_date_backfill_null_only/down.sql create mode 100644 migrations/2026-05-12-000000_date_backfill_null_only/up.sql diff --git a/CLAUDE.md b/CLAUDE.md index 3924fb5..7e605cc 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -398,14 +398,21 @@ date) is unchanged. The `backfill_missing_date_taken` drain (`src/backfill.rs`) runs every watcher tick alongside `backfill_unhashed_backlog` (also `src/backfill.rs`). It loads up to `DATE_BACKFILL_MAX_PER_TICK` rows (default 500) where -`date_taken IS NULL OR date_taken_source = 'fs_time'` (backed by the -`idx_image_exif_date_backfill` partial index), runs the waterfall -batch via `resolve_dates_batch`, and writes results via the -`backfill_date_taken` DAO method (touches only `date_taken` + -`date_taken_source` so EXIF / hash / perceptual columns are -preserved). `filename`-sourced rows are intentionally not re-resolved -— the regex is authoritative when it matches, and re-running exiftool -won't change the answer. +`date_taken IS NULL` (backed by the `idx_image_exif_date_backfill` +partial index), runs the waterfall batch via `resolve_dates_batch`, +and writes results via the `backfill_date_taken` DAO method (touches +only `date_taken` + `date_taken_source` so EXIF / hash / perceptual +columns are preserved). Resolved rows — including the ones the +waterfall could only resolve via `fs_time` — are not re-eligible: +the resolver is deterministic on file bytes + filename + fs metadata, +so re-running on the same inputs lands on the same source every time. +An earlier version included `date_taken_source = 'fs_time'` in the +eligibility predicate, but with `ORDER BY id ASC LIMIT 500` it spun on +the same lowest-id rows in perpetuity and held the SQLite write lock +long enough to starve face-PATCH writers (5s busy_timeout → 500). If +a stronger tool comes online (exiftool install, new filename regex), +re-resolve out-of-band rather than re-introducing the steady-state +eligibility. `/memories` is a single SQL query against this column (`get_memories_in_window` in `src/database/mod.rs`), using diff --git a/migrations/2026-05-12-000000_date_backfill_null_only/down.sql b/migrations/2026-05-12-000000_date_backfill_null_only/down.sql new file mode 100644 index 0000000..5845090 --- /dev/null +++ b/migrations/2026-05-12-000000_date_backfill_null_only/down.sql @@ -0,0 +1,5 @@ +DROP INDEX IF EXISTS idx_image_exif_date_backfill; + +CREATE INDEX idx_image_exif_date_backfill + ON image_exif (library_id, id) + WHERE date_taken IS NULL OR date_taken_source = 'fs_time'; diff --git a/migrations/2026-05-12-000000_date_backfill_null_only/up.sql b/migrations/2026-05-12-000000_date_backfill_null_only/up.sql new file mode 100644 index 0000000..d182374 --- /dev/null +++ b/migrations/2026-05-12-000000_date_backfill_null_only/up.sql @@ -0,0 +1,18 @@ +-- Narrow the date-backfill partial index to NULL-only rows. +-- +-- The original index (2026-05-06-000000_add_date_taken_source) also matched +-- `date_taken_source = 'fs_time'` so the drain could "re-resolve weak +-- entries when better tools become available." In practice the resolver +-- is deterministic on file bytes + filename + fs metadata: a row that +-- landed on fs_time once will land on fs_time again on every subsequent +-- tick. With `ORDER BY id ASC LIMIT 500`, the drain spun on the same +-- lowest-id fs_time rows in perpetuity, never advancing, while hammering +-- the SQLite write lock once per row and starving other writers (face +-- PATCHes were hitting busy_timeout and returning 500). Drop fs_time +-- from the eligibility set; if exiftool / a new filename pattern ever +-- comes online, a one-shot operator command can re-resolve. +DROP INDEX IF EXISTS idx_image_exif_date_backfill; + +CREATE INDEX idx_image_exif_date_backfill + ON image_exif (library_id, id) + WHERE date_taken IS NULL; diff --git a/src/database/mod.rs b/src/database/mod.rs index 32b6ab5..cf20ee9 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -414,14 +414,21 @@ pub trait ExifDao: Sync + Send { size_bytes: i64, ) -> Result<(), DbError>; - /// Return image_exif rows that need their `date_taken` re-resolved by - /// the canonical-date waterfall (see `crate::date_resolver`): - /// either no source ever ran (`date_taken IS NULL`), or only the - /// weakest fallback resolved it (`date_taken_source = 'fs_time'`). - /// Returns `(library_id, rel_path)`. The caller filters to its own - /// library on the way through; rows from other libraries fall to the - /// next library's tick. Backed by the partial index + /// Return image_exif rows that need their `date_taken` resolved by the + /// canonical-date waterfall (see `crate::date_resolver`): `date_taken + /// IS NULL`. Returns `(library_id, rel_path)`. The caller filters to + /// its own library on the way through; rows from other libraries fall + /// to the next library's tick. Backed by the partial index /// `idx_image_exif_date_backfill`. + /// + /// `fs_time`-sourced rows are intentionally excluded even though they + /// represent the weakest resolution: the resolver is deterministic on + /// file bytes + filename + fs metadata, so a row that landed on + /// fs_time once will land there again on every retry. Including them + /// in the drain caused it to spin on the same lowest-id rows forever + /// and starve other SQLite writers (face PATCHes hitting busy_timeout). + /// If a stronger tool comes online (exiftool install, new filename + /// regex), an operator can issue a one-shot re-resolve out-of-band. fn get_rows_needing_date_backfill( &mut self, context: &opentelemetry::Context, @@ -1240,11 +1247,10 @@ impl ExifDao for SqliteExifDao { let mut connection = self.connection.lock().expect("Unable to get ExifDao"); // The partial index is on `(library_id, id) WHERE date_taken - // IS NULL OR date_taken_source = 'fs_time'`, so the planner - // hits it directly when both predicates are present. + // IS NULL`, so the planner hits it directly. image_exif .filter(library_id.eq(library_id_val)) - .filter(date_taken.is_null().or(date_taken_source.eq("fs_time"))) + .filter(date_taken.is_null()) .select((library_id, rel_path)) .order(id.asc()) .limit(limit) @@ -2395,10 +2401,11 @@ mod exif_dao_tests { } #[test] - fn get_rows_needing_date_backfill_returns_null_and_fs_time() { + fn get_rows_needing_date_backfill_returns_null_only() { let mut dao = setup_two_libraries(); - // Each row exercises a different source: null, fs_time (eligible), - // filename and exif (skipped). + // Each row exercises a different source. Only NULL is eligible — + // fs_time was removed from the drain because re-resolving it is + // deterministic-no-op work that starves other writers. insert_row_with_source(&mut dao, 1, "main/null.jpg", None, None); insert_row_with_source(&mut dao, 1, "main/fs.jpg", Some(123), Some("fs_time")); insert_row_with_source(&mut dao, 1, "main/name.jpg", Some(456), Some("filename")); @@ -2408,9 +2415,8 @@ mod exif_dao_tests { let rows = dao.get_rows_needing_date_backfill(&ctx(), 1, 100).unwrap(); let paths: Vec = rows.into_iter().map(|(_, p)| p).collect(); - assert_eq!(paths.len(), 2, "expected null + fs_time eligible only"); + assert_eq!(paths.len(), 1, "expected only NULL-date rows"); assert!(paths.contains(&"main/null.jpg".to_string())); - assert!(paths.contains(&"main/fs.jpg".to_string())); } #[test]