faces: count distinct content_hash in stats total_photos

face_detections is keyed on content_hash (one row per unique bytes,
shared across libraries / duplicate paths) but total_photos was
COUNT(*) over image_exif rows. A file present at multiple rel_paths or
across libraries inflated the denominator without inflating the
numerator, leaving a permanent gap (e.g. 1101/1103 with nothing
actually pending detection).

Switch total_photos to COUNT(DISTINCT content_hash) so numerator and
denominator live in the same domain. Exclude rows with NULL
content_hash from the count — they're held in the hash-backfill
backlog, not the detection backlog, and counting them pins the bar
below 100% for the duration of that pass.

CLAUDE.md: document the stats domain rule next to the rest of the
face-detection notes.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-04-30 22:41:20 +00:00
parent d0833177c7
commit 323097c650
2 changed files with 19 additions and 4 deletions

View File

@@ -1045,20 +1045,33 @@ impl FaceDao for SqliteFaceDao {
// SCANNED can actually reach 100%: videos sit in `image_exif` but
// never get a `face_detections` row, so counting them here
// permanently caps the percentage below 100%.
//
// Count DISTINCT content_hash (not rows) so the numerator
// (`scanned`, also distinct-content_hash) and denominator live
// in the same domain. Without this, a file present at multiple
// rel_paths or across libraries inflates total_photos by one
// per duplicate row while face_detections — keyed on
// content_hash — counts the bytes once, leaving a permanent
// gap (e.g. 1101/1103 with nothing actually pending). Rows
// with NULL content_hash are excluded; they're held in the
// hash-backfill backlog and counting them would pin the bar
// below 100% for the duration of that backfill.
let total_photos: i64 = {
let ext_predicate = image_path_predicate("rel_path");
let row: CountRow = if let Some(lib) = library_id {
let sql = format!(
"SELECT COUNT(*) AS count FROM image_exif \
WHERE library_id = ? AND {ext_predicate}"
"SELECT COUNT(DISTINCT content_hash) AS count FROM image_exif \
WHERE library_id = ? AND content_hash IS NOT NULL AND {ext_predicate}"
);
diesel::sql_query(sql)
.bind::<diesel::sql_types::Integer, _>(lib)
.get_result(conn.deref_mut())
.with_context(|| "stats: total_photos")?
} else {
let sql =
format!("SELECT COUNT(*) AS count FROM image_exif WHERE {ext_predicate}");
let sql = format!(
"SELECT COUNT(DISTINCT content_hash) AS count FROM image_exif \
WHERE content_hash IS NOT NULL AND {ext_predicate}"
);
diesel::sql_query(sql)
.get_result(conn.deref_mut())
.with_context(|| "stats: total_photos")?