feature/face-stats-exclude-videos #64

Merged
cameron merged 2 commits from feature/face-stats-exclude-videos into master 2026-04-30 21:17:20 +00:00
3 changed files with 145 additions and 43 deletions

View File

@@ -383,7 +383,10 @@ mod tests {
// body cap and rejected normal-size photos before they reached
// the backend.
assert!(is_transient(&classify_error_response(408, "")));
assert!(is_transient(&classify_error_response(413, "<html>nginx</html>")));
assert!(is_transient(&classify_error_response(
413,
"<html>nginx</html>"
)));
assert!(is_transient(&classify_error_response(429, "{}")));
}

View File

@@ -20,9 +20,10 @@
use crate::Claims;
use crate::ai::face_client::{DetectMeta, FaceClient, FaceDetectError};
use crate::exif;
use crate::database::schema::{face_detections, image_exif, persons};
use crate::error::IntoHttpError;
use crate::exif;
use crate::file_types;
use crate::libraries::{self, Library};
use crate::otel::{extract_context_from_request, global_tracer, trace_db_call};
use crate::state::AppState;
@@ -99,9 +100,30 @@ pub struct FaceDetectionRow {
pub created_at: i64,
}
/// SQL fragment restricting an `image_exif.rel_path` (or `face_detections.rel_path`)
/// column to image extensions. Videos register in `image_exif` with a
/// populated `content_hash` but can never produce a `face_detections` row
/// — applying this filter at query time keeps videos out of the per-tick
/// backlog drain (which would otherwise loop forever — `filter_excluded`
/// drops them client-side without writing a marker) and out of the SCANNED
/// stat denominator (so 100% is reachable).
fn image_path_predicate(col: &str) -> String {
let clauses: Vec<String> = file_types::IMAGE_EXTENSIONS
.iter()
.map(|ext| format!("lower({col}) LIKE '%.{ext}'"))
.collect();
format!("({})", clauses.join(" OR "))
}
/// Row shape for `list_unscanned_candidates`'s raw SQL. Diesel's
/// `sql_query` requires a `QueryableByName` row type with explicit
/// column SQL types; using a tuple isn't supported.
#[derive(diesel::QueryableByName, Debug)]
struct CountRow {
#[diesel(sql_type = diesel::sql_types::BigInt)]
count: i64,
}
#[derive(diesel::QueryableByName, Debug)]
struct UnscannedRow {
#[diesel(sql_type = diesel::sql_types::Text)]
@@ -601,19 +623,25 @@ impl FaceDao for SqliteFaceDao {
// fire multiple detect calls for the same hash if it lives
// under several rel_paths in the same library. The
// anti-join (NOT EXISTS) drains hashes that have no row in
// face_detections at all.
let rows: Vec<(String, String)> = diesel::sql_query(
// face_detections at all. The image-extension predicate
// keeps videos out of the candidate set; without it they'd
// be filtered client-side and re-pulled every tick forever
// because no marker row is written for excluded paths.
let ext_predicate = image_path_predicate("rel_path");
let sql = format!(
"SELECT rel_path, content_hash \
FROM image_exif e \
WHERE library_id = ? \
AND content_hash IS NOT NULL \
AND {ext_predicate} \
AND NOT EXISTS ( \
SELECT 1 FROM face_detections f \
WHERE f.content_hash = e.content_hash \
) \
GROUP BY content_hash \
LIMIT ?",
)
LIMIT ?"
);
let rows: Vec<(String, String)> = diesel::sql_query(sql)
.bind::<diesel::sql_types::Integer, _>(library_id)
.bind::<diesel::sql_types::BigInt, _>(limit)
.load::<UnscannedRow>(conn.deref_mut())
@@ -1013,14 +1041,29 @@ impl FaceDao for SqliteFaceDao {
.first(conn.deref_mut())
.with_context(|| "stats: failed")?
};
// Image-extension filter mirrors `list_unscanned_candidates` so
// SCANNED can actually reach 100%: videos sit in `image_exif` but
// never get a `face_detections` row, so counting them here
// permanently caps the percentage below 100%.
let total_photos: i64 = {
let mut q = image_exif::table.into_boxed();
if let Some(lib) = library_id {
q = q.filter(image_exif::library_id.eq(lib));
}
q.select(diesel::dsl::count_star())
.first(conn.deref_mut())
let ext_predicate = image_path_predicate("rel_path");
let row: CountRow = if let Some(lib) = library_id {
let sql = format!(
"SELECT COUNT(*) AS count FROM image_exif \
WHERE library_id = ? AND {ext_predicate}"
);
diesel::sql_query(sql)
.bind::<diesel::sql_types::Integer, _>(lib)
.get_result(conn.deref_mut())
.with_context(|| "stats: total_photos")?
} else {
let sql =
format!("SELECT COUNT(*) AS count FROM image_exif WHERE {ext_predicate}");
diesel::sql_query(sql)
.get_result(conn.deref_mut())
.with_context(|| "stats: total_photos")?
};
row.count
};
let persons_count: i64 = persons::table
.select(diesel::dsl::count_star())
@@ -2284,8 +2327,7 @@ async fn update_face_handler<D: FaceDao>(
"PATCH /image/faces/{}: crop failed for {:?}: {:?}",
id, abs_path, e
);
return HttpResponse::BadRequest()
.body(format!("cannot crop new bbox: {}", e));
return HttpResponse::BadRequest().body(format!("cannot crop new bbox: {}", e));
}
};
let meta = DetectMeta {
@@ -2335,8 +2377,7 @@ async fn update_face_handler<D: FaceDao>(
return HttpResponse::ServiceUnavailable().body(format!("{}", e));
}
Err(FaceDetectError::Disabled) => {
return HttpResponse::ServiceUnavailable()
.body("face client disabled mid-flight");
return HttpResponse::ServiceUnavailable().body("face client disabled mid-flight");
}
}
}
@@ -3145,6 +3186,39 @@ mod tests {
assert_eq!(stats.with_faces, 0);
}
#[test]
fn stats_total_photos_excludes_videos() {
// SCANNED counts content_hashes in face_detections; total_photos
// must apply the same image-extension filter as the watcher
// backlog query so the percentage can reach 100%. Without this,
// videos sit in image_exif but never produce a face_detections
// row (Apollo decodes images only) and the bar caps below 100%.
let mut dao = fresh_dao();
diesel::sql_query(
"INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
VALUES (1, 'main', '/tmp', 0)",
)
.execute(dao.connection.lock().unwrap().deref_mut())
.expect("seed libraries");
diesel::sql_query(
"INSERT INTO image_exif \
(library_id, rel_path, content_hash, created_time, last_modified) VALUES \
(1, 'a.jpg', 'h-a', 0, 0), \
(1, 'b.JPEG', 'h-b', 0, 0), \
(1, 'movie.mp4', 'h-mp4', 0, 0), \
(1, 'clip.MOV', 'h-mov', 0, 0)",
)
.execute(dao.connection.lock().unwrap().deref_mut())
.expect("seed image_exif");
let stats = dao.stats(&ctx(), Some(1)).expect("stats");
assert_eq!(
stats.total_photos, 2,
"videos should not count toward total"
);
}
#[test]
fn merge_persons_repoints_faces() {
let mut dao = fresh_dao();
@@ -3325,8 +3399,7 @@ mod tests {
)
.unwrap();
let row = seed_library_and_face(&mut dao, Some(p.id));
let joined =
hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate assigned");
let joined = hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate assigned");
assert_eq!(joined.person_id, Some(p.id));
assert_eq!(joined.person_name.as_deref(), Some("Alice"));
// Bbox + confidence + source must round-trip — these are what
@@ -3345,8 +3418,7 @@ mod tests {
// previously-assigned row's serialization.
let mut dao = fresh_dao();
let row = seed_library_and_face(&mut dao, None);
let joined =
hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate unassigned");
let joined = hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate unassigned");
assert!(joined.person_id.is_none());
assert!(joined.person_name.is_none());
}
@@ -3367,7 +3439,12 @@ mod tests {
.execute(dao.connection.lock().unwrap().deref_mut())
.expect("seed libraries");
// Seed image_exif: mix of hashed/unhashed/scanned/cross-library.
// Seed image_exif: mix of hashed/unhashed/scanned/cross-library,
// plus a video and a mixed-case image extension. Videos register
// in image_exif but can never produce a face_detections row, so
// the SQL must filter them out — otherwise the per-tick backlog
// drain re-pulls them every tick (no marker is ever written, so
// they loop forever) and the SCANNED stat is permanently capped.
diesel::sql_query(
"INSERT INTO image_exif \
(library_id, rel_path, content_hash, created_time, last_modified) VALUES \
@@ -3375,6 +3452,9 @@ mod tests {
(1, 'b.jpg', 'h-b', 0, 0), \
(1, 'c.jpg', NULL, 0, 0), \
(1, 'd.jpg', 'h-d', 0, 0), \
(1, 'movie.mp4', 'h-mp4', 0, 0), \
(1, 'clip.MOV', 'h-mov', 0, 0), \
(1, 'photo.JPG', 'h-jpg-upper', 0, 0), \
(2, 'e.jpg', 'h-e', 0, 0)",
)
.execute(dao.connection.lock().unwrap().deref_mut())
@@ -3388,16 +3468,26 @@ mod tests {
.list_unscanned_candidates(&ctx(), 1, 10)
.expect("list unscanned");
let hashes: std::collections::HashSet<_> =
cands.iter().map(|(_, h)| h.clone()).collect();
let hashes: std::collections::HashSet<_> = cands.iter().map(|(_, h)| h.clone()).collect();
// Should contain a and d (hashed, unscanned, library 1).
// Should contain a, d, and the upper-case .JPG (image-extension
// match is case-insensitive).
assert!(hashes.contains("h-a"), "missing h-a: {:?}", hashes);
assert!(hashes.contains("h-d"), "missing h-d: {:?}", hashes);
// Should NOT contain b (scanned), c (no hash), e (other library).
assert!(
hashes.contains("h-jpg-upper"),
"missing h-jpg-upper: {:?}",
hashes
);
// Should NOT contain b (scanned), c (no hash), e (other library),
// or videos (mp4/mov are not image extensions).
assert!(!hashes.contains("h-b"), "expected h-b filtered (scanned)");
assert!(!hashes.contains("h-e"), "expected h-e filtered (other library)");
assert_eq!(cands.len(), 2, "unexpected candidates: {:?}", cands);
assert!(
!hashes.contains("h-e"),
"expected h-e filtered (other library)"
);
assert!(!hashes.contains("h-mp4"), "expected h-mp4 filtered (video)");
assert!(!hashes.contains("h-mov"), "expected h-mov filtered (video)");
assert_eq!(cands.len(), 3, "unexpected candidates: {:?}", cands);
}
}

View File

@@ -1253,20 +1253,29 @@ mod tests {
// Seed: two paths tagged, one path untagged.
dao.tagged_photos.borrow_mut().insert(
"a.jpg".into(),
vec![Tag { id: 1, name: "alpha".into(), created_time: 0 }],
vec![Tag {
id: 1,
name: "alpha".into(),
created_time: 0,
}],
);
dao.tagged_photos.borrow_mut().insert(
"b.jpg".into(),
vec![
Tag { id: 2, name: "beta".into(), created_time: 0 },
Tag { id: 3, name: "gamma".into(), created_time: 0 },
Tag {
id: 2,
name: "beta".into(),
created_time: 0,
},
Tag {
id: 3,
name: "gamma".into(),
created_time: 0,
},
],
);
let grouped = dao
.get_tags_grouped_by_paths(
&ctx,
&["a.jpg".into(), "b.jpg".into(), "c.jpg".into()],
)
.get_tags_grouped_by_paths(&ctx, &["a.jpg".into(), "b.jpg".into(), "c.jpg".into()])
.unwrap();
assert_eq!(grouped.get("a.jpg").map(|v| v.len()), Some(1));
assert_eq!(grouped.get("b.jpg").map(|v| v.len()), Some(2));