faces: count distinct content_hash in stats total_photos #65
@@ -233,6 +233,8 @@ ImageApi owns the face data; Apollo (sibling repo) hosts the insightface inferen
|
|||||||
|
|
||||||
**Rerun preserves manual rows** (`POST /image/faces/{id}/rerun`): only `source='auto'` rows are deleted before re-running detection. `already_scanned` returns true on ANY row, so a photo whose only faces are manually drawn never auto-redetects.
|
**Rerun preserves manual rows** (`POST /image/faces/{id}/rerun`): only `source='auto'` rows are deleted before re-running detection. `already_scanned` returns true on ANY row, so a photo whose only faces are manually drawn never auto-redetects.
|
||||||
|
|
||||||
|
**Stats domain — content_hash, not file rows** (`FaceDao::stats` in `src/faces.rs`): `total_photos` counts `DISTINCT content_hash` over `image_exif` (filtered to image extensions, `content_hash IS NOT NULL`), and so do `scanned` / `with_faces` / `no_faces` / `failed` over `face_detections`. Numerator and denominator must live in the same domain — `face_detections` is keyed on content_hash, so the same JPEG present at two rel_paths or in two libraries scans once. Counting `image_exif` rows in the denominator inflated total by one per duplicate file and produced a permanent gap (e.g. 1101/1103 with nothing actually pending). Hash-less rows are excluded from total_photos while they sit in the `backfill_unhashed_backlog` queue; otherwise the bar pins below 100% for the duration of that backfill even though those rows aren't pending detection yet — they're pending hashing.
|
||||||
|
|
||||||
Module map:
|
Module map:
|
||||||
- `src/faces.rs` — `FaceDao` trait + `SqliteFaceDao` impl, route handlers for `/faces/*`, `/image/faces/*`, `/persons/*`. Mirror of `tags.rs` layout.
|
- `src/faces.rs` — `FaceDao` trait + `SqliteFaceDao` impl, route handlers for `/faces/*`, `/image/faces/*`, `/persons/*`. Mirror of `tags.rs` layout.
|
||||||
- `src/face_watch.rs` — Tokio orchestration for the file-watch detect pass; `filter_excluded` (PathExcluder + image-extension filter), `read_image_bytes_for_detect` (RAW preview fallback).
|
- `src/face_watch.rs` — Tokio orchestration for the file-watch detect pass; `filter_excluded` (PathExcluder + image-extension filter), `read_image_bytes_for_detect` (RAW preview fallback).
|
||||||
|
|||||||
21
src/faces.rs
21
src/faces.rs
@@ -1045,20 +1045,33 @@ impl FaceDao for SqliteFaceDao {
|
|||||||
// SCANNED can actually reach 100%: videos sit in `image_exif` but
|
// SCANNED can actually reach 100%: videos sit in `image_exif` but
|
||||||
// never get a `face_detections` row, so counting them here
|
// never get a `face_detections` row, so counting them here
|
||||||
// permanently caps the percentage below 100%.
|
// permanently caps the percentage below 100%.
|
||||||
|
//
|
||||||
|
// Count DISTINCT content_hash (not rows) so the numerator
|
||||||
|
// (`scanned`, also distinct-content_hash) and denominator live
|
||||||
|
// in the same domain. Without this, a file present at multiple
|
||||||
|
// rel_paths or across libraries inflates total_photos by one
|
||||||
|
// per duplicate row while face_detections — keyed on
|
||||||
|
// content_hash — counts the bytes once, leaving a permanent
|
||||||
|
// gap (e.g. 1101/1103 with nothing actually pending). Rows
|
||||||
|
// with NULL content_hash are excluded; they're held in the
|
||||||
|
// hash-backfill backlog and counting them would pin the bar
|
||||||
|
// below 100% for the duration of that backfill.
|
||||||
let total_photos: i64 = {
|
let total_photos: i64 = {
|
||||||
let ext_predicate = image_path_predicate("rel_path");
|
let ext_predicate = image_path_predicate("rel_path");
|
||||||
let row: CountRow = if let Some(lib) = library_id {
|
let row: CountRow = if let Some(lib) = library_id {
|
||||||
let sql = format!(
|
let sql = format!(
|
||||||
"SELECT COUNT(*) AS count FROM image_exif \
|
"SELECT COUNT(DISTINCT content_hash) AS count FROM image_exif \
|
||||||
WHERE library_id = ? AND {ext_predicate}"
|
WHERE library_id = ? AND content_hash IS NOT NULL AND {ext_predicate}"
|
||||||
);
|
);
|
||||||
diesel::sql_query(sql)
|
diesel::sql_query(sql)
|
||||||
.bind::<diesel::sql_types::Integer, _>(lib)
|
.bind::<diesel::sql_types::Integer, _>(lib)
|
||||||
.get_result(conn.deref_mut())
|
.get_result(conn.deref_mut())
|
||||||
.with_context(|| "stats: total_photos")?
|
.with_context(|| "stats: total_photos")?
|
||||||
} else {
|
} else {
|
||||||
let sql =
|
let sql = format!(
|
||||||
format!("SELECT COUNT(*) AS count FROM image_exif WHERE {ext_predicate}");
|
"SELECT COUNT(DISTINCT content_hash) AS count FROM image_exif \
|
||||||
|
WHERE content_hash IS NOT NULL AND {ext_predicate}"
|
||||||
|
);
|
||||||
diesel::sql_query(sql)
|
diesel::sql_query(sql)
|
||||||
.get_result(conn.deref_mut())
|
.get_result(conn.deref_mut())
|
||||||
.with_context(|| "stats: total_photos")?
|
.with_context(|| "stats: total_photos")?
|
||||||
|
|||||||
Reference in New Issue
Block a user