Merge pull request 'feature/date-backfill-null-only' (#93) from feature/date-backfill-null-only into master
Reviewed-on: #93
This commit was merged in pull request #93.
This commit is contained in:
23
CLAUDE.md
23
CLAUDE.md
@@ -398,14 +398,21 @@ date) is unchanged.
|
||||
The `backfill_missing_date_taken` drain (`src/backfill.rs`) runs every
|
||||
watcher tick alongside `backfill_unhashed_backlog` (also `src/backfill.rs`). It loads up to
|
||||
`DATE_BACKFILL_MAX_PER_TICK` rows (default 500) where
|
||||
`date_taken IS NULL OR date_taken_source = 'fs_time'` (backed by the
|
||||
`idx_image_exif_date_backfill` partial index), runs the waterfall
|
||||
batch via `resolve_dates_batch`, and writes results via the
|
||||
`backfill_date_taken` DAO method (touches only `date_taken` +
|
||||
`date_taken_source` so EXIF / hash / perceptual columns are
|
||||
preserved). `filename`-sourced rows are intentionally not re-resolved
|
||||
— the regex is authoritative when it matches, and re-running exiftool
|
||||
won't change the answer.
|
||||
`date_taken IS NULL` (backed by the `idx_image_exif_date_backfill`
|
||||
partial index), runs the waterfall batch via `resolve_dates_batch`,
|
||||
and writes results via the `backfill_date_taken` DAO method (touches
|
||||
only `date_taken` + `date_taken_source` so EXIF / hash / perceptual
|
||||
columns are preserved). Resolved rows — including the ones the
|
||||
waterfall could only resolve via `fs_time` — are not re-eligible:
|
||||
the resolver is deterministic on file bytes + filename + fs metadata,
|
||||
so re-running on the same inputs lands on the same source every time.
|
||||
An earlier version included `date_taken_source = 'fs_time'` in the
|
||||
eligibility predicate, but with `ORDER BY id ASC LIMIT 500` it spun on
|
||||
the same lowest-id rows in perpetuity and held the SQLite write lock
|
||||
long enough to starve face-PATCH writers (5s busy_timeout → 500). If
|
||||
a stronger tool comes online (exiftool install, new filename regex),
|
||||
re-resolve out-of-band rather than re-introducing the steady-state
|
||||
eligibility.
|
||||
|
||||
`/memories` is a single SQL query against this column
|
||||
(`get_memories_in_window` in `src/database/mod.rs`), using
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
DROP INDEX IF EXISTS idx_image_exif_date_backfill;
|
||||
|
||||
CREATE INDEX idx_image_exif_date_backfill
|
||||
ON image_exif (library_id, id)
|
||||
WHERE date_taken IS NULL OR date_taken_source = 'fs_time';
|
||||
18
migrations/2026-05-12-000000_date_backfill_null_only/up.sql
Normal file
18
migrations/2026-05-12-000000_date_backfill_null_only/up.sql
Normal file
@@ -0,0 +1,18 @@
|
||||
-- Narrow the date-backfill partial index to NULL-only rows.
|
||||
--
|
||||
-- The original index (2026-05-06-000000_add_date_taken_source) also matched
|
||||
-- `date_taken_source = 'fs_time'` so the drain could "re-resolve weak
|
||||
-- entries when better tools become available." In practice the resolver
|
||||
-- is deterministic on file bytes + filename + fs metadata: a row that
|
||||
-- landed on fs_time once will land on fs_time again on every subsequent
|
||||
-- tick. With `ORDER BY id ASC LIMIT 500`, the drain spun on the same
|
||||
-- lowest-id fs_time rows in perpetuity, never advancing, while hammering
|
||||
-- the SQLite write lock once per row and starving other writers (face
|
||||
-- PATCHes were hitting busy_timeout and returning 500). Drop fs_time
|
||||
-- from the eligibility set; if exiftool / a new filename pattern ever
|
||||
-- comes online, a one-shot operator command can re-resolve.
|
||||
DROP INDEX IF EXISTS idx_image_exif_date_backfill;
|
||||
|
||||
CREATE INDEX idx_image_exif_date_backfill
|
||||
ON image_exif (library_id, id)
|
||||
WHERE date_taken IS NULL;
|
||||
@@ -414,14 +414,21 @@ pub trait ExifDao: Sync + Send {
|
||||
size_bytes: i64,
|
||||
) -> Result<(), DbError>;
|
||||
|
||||
/// Return image_exif rows that need their `date_taken` re-resolved by
|
||||
/// the canonical-date waterfall (see `crate::date_resolver`):
|
||||
/// either no source ever ran (`date_taken IS NULL`), or only the
|
||||
/// weakest fallback resolved it (`date_taken_source = 'fs_time'`).
|
||||
/// Returns `(library_id, rel_path)`. The caller filters to its own
|
||||
/// library on the way through; rows from other libraries fall to the
|
||||
/// next library's tick. Backed by the partial index
|
||||
/// Return image_exif rows that need their `date_taken` resolved by the
|
||||
/// canonical-date waterfall (see `crate::date_resolver`): `date_taken
|
||||
/// IS NULL`. Returns `(library_id, rel_path)`. The caller filters to
|
||||
/// its own library on the way through; rows from other libraries fall
|
||||
/// to the next library's tick. Backed by the partial index
|
||||
/// `idx_image_exif_date_backfill`.
|
||||
///
|
||||
/// `fs_time`-sourced rows are intentionally excluded even though they
|
||||
/// represent the weakest resolution: the resolver is deterministic on
|
||||
/// file bytes + filename + fs metadata, so a row that landed on
|
||||
/// fs_time once will land there again on every retry. Including them
|
||||
/// in the drain caused it to spin on the same lowest-id rows forever
|
||||
/// and starve other SQLite writers (face PATCHes hitting busy_timeout).
|
||||
/// If a stronger tool comes online (exiftool install, new filename
|
||||
/// regex), an operator can issue a one-shot re-resolve out-of-band.
|
||||
fn get_rows_needing_date_backfill(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
@@ -1240,11 +1247,10 @@ impl ExifDao for SqliteExifDao {
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
// The partial index is on `(library_id, id) WHERE date_taken
|
||||
// IS NULL OR date_taken_source = 'fs_time'`, so the planner
|
||||
// hits it directly when both predicates are present.
|
||||
// IS NULL`, so the planner hits it directly.
|
||||
image_exif
|
||||
.filter(library_id.eq(library_id_val))
|
||||
.filter(date_taken.is_null().or(date_taken_source.eq("fs_time")))
|
||||
.filter(date_taken.is_null())
|
||||
.select((library_id, rel_path))
|
||||
.order(id.asc())
|
||||
.limit(limit)
|
||||
@@ -2395,10 +2401,11 @@ mod exif_dao_tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_rows_needing_date_backfill_returns_null_and_fs_time() {
|
||||
fn get_rows_needing_date_backfill_returns_null_only() {
|
||||
let mut dao = setup_two_libraries();
|
||||
// Each row exercises a different source: null, fs_time (eligible),
|
||||
// filename and exif (skipped).
|
||||
// Each row exercises a different source. Only NULL is eligible —
|
||||
// fs_time was removed from the drain because re-resolving it is
|
||||
// deterministic-no-op work that starves other writers.
|
||||
insert_row_with_source(&mut dao, 1, "main/null.jpg", None, None);
|
||||
insert_row_with_source(&mut dao, 1, "main/fs.jpg", Some(123), Some("fs_time"));
|
||||
insert_row_with_source(&mut dao, 1, "main/name.jpg", Some(456), Some("filename"));
|
||||
@@ -2408,9 +2415,8 @@ mod exif_dao_tests {
|
||||
|
||||
let rows = dao.get_rows_needing_date_backfill(&ctx(), 1, 100).unwrap();
|
||||
let paths: Vec<String> = rows.into_iter().map(|(_, p)| p).collect();
|
||||
assert_eq!(paths.len(), 2, "expected null + fs_time eligible only");
|
||||
assert_eq!(paths.len(), 1, "expected only NULL-date rows");
|
||||
assert!(paths.contains(&"main/null.jpg".to_string()));
|
||||
assert!(paths.contains(&"main/fs.jpg".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
26
src/faces.rs
26
src/faces.rs
@@ -2018,12 +2018,19 @@ async fn update_face_handler<D: FaceDao>(
|
||||
match dao.get_face(&span_context, id) {
|
||||
Ok(Some(r)) => r,
|
||||
Ok(None) => return HttpResponse::NotFound().finish(),
|
||||
Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
|
||||
Err(e) => {
|
||||
warn!("PATCH /image/faces/{}: 500 — get_face failed: {:#}", id, e);
|
||||
return HttpResponse::InternalServerError().body(e.to_string());
|
||||
}
|
||||
}
|
||||
};
|
||||
let library = match app_state.library_by_id(current.library_id) {
|
||||
Some(l) => l.clone(),
|
||||
None => {
|
||||
warn!(
|
||||
"PATCH /image/faces/{}: 500 — face row references unknown library_id {}",
|
||||
id, current.library_id
|
||||
);
|
||||
return HttpResponse::InternalServerError().body(format!(
|
||||
"face row references unknown library_id {}",
|
||||
current.library_id
|
||||
@@ -2106,7 +2113,14 @@ async fn update_face_handler<D: FaceDao>(
|
||||
let mut dao = face_dao.lock().expect("face dao lock");
|
||||
let row = match dao.update_face(&span_context, id, person_patch, bbox_patch, new_embedding) {
|
||||
Ok(r) => r,
|
||||
Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
|
||||
Err(e) => {
|
||||
// The full anyhow chain (`{:#}`) shows the diesel cause behind
|
||||
// the short context string we surface in the response body —
|
||||
// SQLITE_BUSY here usually means another DAO's writer held the
|
||||
// lock past `busy_timeout` (5s), which is invisible in `{}`.
|
||||
warn!("PATCH /image/faces/{}: 500 — update_face failed: {:#}", id, e);
|
||||
return HttpResponse::InternalServerError().body(e.to_string());
|
||||
}
|
||||
};
|
||||
// Hydrate person_name so the response shape matches GET /image/faces
|
||||
// — the carousel overlay does an optimistic replace on this row, and
|
||||
@@ -2114,7 +2128,13 @@ async fn update_face_handler<D: FaceDao>(
|
||||
// VFD label off the bbox even though the assignment didn't change.
|
||||
match hydrate_face_with_person(&mut *dao, &span_context, row) {
|
||||
Ok(joined) => HttpResponse::Ok().json(joined),
|
||||
Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"PATCH /image/faces/{}: 500 — hydrate_face_with_person failed: {:#}",
|
||||
id, e
|
||||
);
|
||||
HttpResponse::InternalServerError().body(e.to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user