Drop fs_time from date-backfill eligibility

The drain queried `date_taken IS NULL OR date_taken_source = 'fs_time'`
ORDER BY id ASC LIMIT 500 every watcher tick. The resolver is
deterministic on file bytes + filename + fs metadata, so any row that
landed on fs_time once landed there again on every retry — the drain
spun on the same lowest-id rows in perpetuity, never advancing to
rows 501+ while still logging more_remain=true.

Side effect: 500 auto-commit UPDATEs per tick sustained the SQLite
write lock long enough that other writers on separate DAO connections
hit the 5s busy_timeout. Manifested as intermittent 500s on
PATCH /image/faces/{id} that succeeded on retry.

Narrow the partial index and query predicate to `date_taken IS NULL`.
If exiftool installs or a new filename regex lands, an operator can
re-resolve fs_time rows out-of-band rather than re-introducing the
steady-state churn.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-05-12 14:37:36 -04:00
parent 3427c2916c
commit 2d56047497
4 changed files with 59 additions and 23 deletions

View File

@@ -398,14 +398,21 @@ date) is unchanged.
The `backfill_missing_date_taken` drain (`src/backfill.rs`) runs every The `backfill_missing_date_taken` drain (`src/backfill.rs`) runs every
watcher tick alongside `backfill_unhashed_backlog` (also `src/backfill.rs`). It loads up to watcher tick alongside `backfill_unhashed_backlog` (also `src/backfill.rs`). It loads up to
`DATE_BACKFILL_MAX_PER_TICK` rows (default 500) where `DATE_BACKFILL_MAX_PER_TICK` rows (default 500) where
`date_taken IS NULL OR date_taken_source = 'fs_time'` (backed by the `date_taken IS NULL` (backed by the `idx_image_exif_date_backfill`
`idx_image_exif_date_backfill` partial index), runs the waterfall partial index), runs the waterfall batch via `resolve_dates_batch`,
batch via `resolve_dates_batch`, and writes results via the and writes results via the `backfill_date_taken` DAO method (touches
`backfill_date_taken` DAO method (touches only `date_taken` + only `date_taken` + `date_taken_source` so EXIF / hash / perceptual
`date_taken_source` so EXIF / hash / perceptual columns are columns are preserved). Resolved rows — including the ones the
preserved). `filename`-sourced rows are intentionally not re-resolved waterfall could only resolve via `fs_time` — are not re-eligible:
the regex is authoritative when it matches, and re-running exiftool the resolver is deterministic on file bytes + filename + fs metadata,
won't change the answer. so re-running on the same inputs lands on the same source every time.
An earlier version included `date_taken_source = 'fs_time'` in the
eligibility predicate, but with `ORDER BY id ASC LIMIT 500` it spun on
the same lowest-id rows in perpetuity and held the SQLite write lock
long enough to starve face-PATCH writers (5s busy_timeout → 500). If
a stronger tool comes online (exiftool install, new filename regex),
re-resolve out-of-band rather than re-introducing the steady-state
eligibility.
`/memories` is a single SQL query against this column `/memories` is a single SQL query against this column
(`get_memories_in_window` in `src/database/mod.rs`), using (`get_memories_in_window` in `src/database/mod.rs`), using

View File

@@ -0,0 +1,5 @@
DROP INDEX IF EXISTS idx_image_exif_date_backfill;
CREATE INDEX idx_image_exif_date_backfill
ON image_exif (library_id, id)
WHERE date_taken IS NULL OR date_taken_source = 'fs_time';

View File

@@ -0,0 +1,18 @@
-- Narrow the date-backfill partial index to NULL-only rows.
--
-- The original index (2026-05-06-000000_add_date_taken_source) also matched
-- `date_taken_source = 'fs_time'` so the drain could "re-resolve weak
-- entries when better tools become available." In practice the resolver
-- is deterministic on file bytes + filename + fs metadata: a row that
-- landed on fs_time once will land on fs_time again on every subsequent
-- tick. With `ORDER BY id ASC LIMIT 500`, the drain spun on the same
-- lowest-id fs_time rows in perpetuity, never advancing, while hammering
-- the SQLite write lock once per row and starving other writers (face
-- PATCHes were hitting busy_timeout and returning 500). Drop fs_time
-- from the eligibility set; if exiftool / a new filename pattern ever
-- comes online, a one-shot operator command can re-resolve.
DROP INDEX IF EXISTS idx_image_exif_date_backfill;
CREATE INDEX idx_image_exif_date_backfill
ON image_exif (library_id, id)
WHERE date_taken IS NULL;

View File

@@ -414,14 +414,21 @@ pub trait ExifDao: Sync + Send {
size_bytes: i64, size_bytes: i64,
) -> Result<(), DbError>; ) -> Result<(), DbError>;
/// Return image_exif rows that need their `date_taken` re-resolved by /// Return image_exif rows that need their `date_taken` resolved by the
/// the canonical-date waterfall (see `crate::date_resolver`): /// canonical-date waterfall (see `crate::date_resolver`): `date_taken
/// either no source ever ran (`date_taken IS NULL`), or only the /// IS NULL`. Returns `(library_id, rel_path)`. The caller filters to
/// weakest fallback resolved it (`date_taken_source = 'fs_time'`). /// its own library on the way through; rows from other libraries fall
/// Returns `(library_id, rel_path)`. The caller filters to its own /// to the next library's tick. Backed by the partial index
/// library on the way through; rows from other libraries fall to the
/// next library's tick. Backed by the partial index
/// `idx_image_exif_date_backfill`. /// `idx_image_exif_date_backfill`.
///
/// `fs_time`-sourced rows are intentionally excluded even though they
/// represent the weakest resolution: the resolver is deterministic on
/// file bytes + filename + fs metadata, so a row that landed on
/// fs_time once will land there again on every retry. Including them
/// in the drain caused it to spin on the same lowest-id rows forever
/// and starve other SQLite writers (face PATCHes hitting busy_timeout).
/// If a stronger tool comes online (exiftool install, new filename
/// regex), an operator can issue a one-shot re-resolve out-of-band.
fn get_rows_needing_date_backfill( fn get_rows_needing_date_backfill(
&mut self, &mut self,
context: &opentelemetry::Context, context: &opentelemetry::Context,
@@ -1240,11 +1247,10 @@ impl ExifDao for SqliteExifDao {
let mut connection = self.connection.lock().expect("Unable to get ExifDao"); let mut connection = self.connection.lock().expect("Unable to get ExifDao");
// The partial index is on `(library_id, id) WHERE date_taken // The partial index is on `(library_id, id) WHERE date_taken
// IS NULL OR date_taken_source = 'fs_time'`, so the planner // IS NULL`, so the planner hits it directly.
// hits it directly when both predicates are present.
image_exif image_exif
.filter(library_id.eq(library_id_val)) .filter(library_id.eq(library_id_val))
.filter(date_taken.is_null().or(date_taken_source.eq("fs_time"))) .filter(date_taken.is_null())
.select((library_id, rel_path)) .select((library_id, rel_path))
.order(id.asc()) .order(id.asc())
.limit(limit) .limit(limit)
@@ -2395,10 +2401,11 @@ mod exif_dao_tests {
} }
#[test] #[test]
fn get_rows_needing_date_backfill_returns_null_and_fs_time() { fn get_rows_needing_date_backfill_returns_null_only() {
let mut dao = setup_two_libraries(); let mut dao = setup_two_libraries();
// Each row exercises a different source: null, fs_time (eligible), // Each row exercises a different source. Only NULL is eligible
// filename and exif (skipped). // fs_time was removed from the drain because re-resolving it is
// deterministic-no-op work that starves other writers.
insert_row_with_source(&mut dao, 1, "main/null.jpg", None, None); insert_row_with_source(&mut dao, 1, "main/null.jpg", None, None);
insert_row_with_source(&mut dao, 1, "main/fs.jpg", Some(123), Some("fs_time")); insert_row_with_source(&mut dao, 1, "main/fs.jpg", Some(123), Some("fs_time"));
insert_row_with_source(&mut dao, 1, "main/name.jpg", Some(456), Some("filename")); insert_row_with_source(&mut dao, 1, "main/name.jpg", Some(456), Some("filename"));
@@ -2408,9 +2415,8 @@ mod exif_dao_tests {
let rows = dao.get_rows_needing_date_backfill(&ctx(), 1, 100).unwrap(); let rows = dao.get_rows_needing_date_backfill(&ctx(), 1, 100).unwrap();
let paths: Vec<String> = rows.into_iter().map(|(_, p)| p).collect(); let paths: Vec<String> = rows.into_iter().map(|(_, p)| p).collect();
assert_eq!(paths.len(), 2, "expected null + fs_time eligible only"); assert_eq!(paths.len(), 1, "expected only NULL-date rows");
assert!(paths.contains(&"main/null.jpg".to_string())); assert!(paths.contains(&"main/null.jpg".to_string()));
assert!(paths.contains(&"main/fs.jpg".to_string()));
} }
#[test] #[test]