faces: exclude videos from backlog drain and SCANNED denominator
list_unscanned_candidates pulled every hashed image_exif row, including videos. filter_excluded then dropped them client-side without writing a marker, so the same set re-appeared every watcher tick — emitting the "backlog drain — running detection on N candidate(s)" log forever and producing no progress. face_stats.total_photos counted the same video rows in the denominator, so the SCANNED percentage was structurally capped below 100%. Add an image-extension SQL predicate (case-insensitive, sourced from file_types::IMAGE_EXTENSIONS) and apply it to both queries. Videos never enter the candidate set, total_photos counts only what can actually be scanned, and 100% becomes reachable. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
144
src/faces.rs
144
src/faces.rs
@@ -20,9 +20,10 @@
|
||||
|
||||
use crate::Claims;
|
||||
use crate::ai::face_client::{DetectMeta, FaceClient, FaceDetectError};
|
||||
use crate::exif;
|
||||
use crate::database::schema::{face_detections, image_exif, persons};
|
||||
use crate::error::IntoHttpError;
|
||||
use crate::exif;
|
||||
use crate::file_types;
|
||||
use crate::libraries::{self, Library};
|
||||
use crate::otel::{extract_context_from_request, global_tracer, trace_db_call};
|
||||
use crate::state::AppState;
|
||||
@@ -99,9 +100,30 @@ pub struct FaceDetectionRow {
|
||||
pub created_at: i64,
|
||||
}
|
||||
|
||||
/// SQL fragment restricting an `image_exif.rel_path` (or `face_detections.rel_path`)
|
||||
/// column to image extensions. Videos register in `image_exif` with a
|
||||
/// populated `content_hash` but can never produce a `face_detections` row
|
||||
/// — applying this filter at query time keeps videos out of the per-tick
|
||||
/// backlog drain (which would otherwise loop forever — `filter_excluded`
|
||||
/// drops them client-side without writing a marker) and out of the SCANNED
|
||||
/// stat denominator (so 100% is reachable).
|
||||
fn image_path_predicate(col: &str) -> String {
|
||||
let clauses: Vec<String> = file_types::IMAGE_EXTENSIONS
|
||||
.iter()
|
||||
.map(|ext| format!("lower({col}) LIKE '%.{ext}'"))
|
||||
.collect();
|
||||
format!("({})", clauses.join(" OR "))
|
||||
}
|
||||
|
||||
/// Row shape for `list_unscanned_candidates`'s raw SQL. Diesel's
|
||||
/// `sql_query` requires a `QueryableByName` row type with explicit
|
||||
/// column SQL types; using a tuple isn't supported.
|
||||
#[derive(diesel::QueryableByName, Debug)]
|
||||
struct CountRow {
|
||||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||
count: i64,
|
||||
}
|
||||
|
||||
#[derive(diesel::QueryableByName, Debug)]
|
||||
struct UnscannedRow {
|
||||
#[diesel(sql_type = diesel::sql_types::Text)]
|
||||
@@ -601,19 +623,25 @@ impl FaceDao for SqliteFaceDao {
|
||||
// fire multiple detect calls for the same hash if it lives
|
||||
// under several rel_paths in the same library. The
|
||||
// anti-join (NOT EXISTS) drains hashes that have no row in
|
||||
// face_detections at all.
|
||||
let rows: Vec<(String, String)> = diesel::sql_query(
|
||||
// face_detections at all. The image-extension predicate
|
||||
// keeps videos out of the candidate set; without it they'd
|
||||
// be filtered client-side and re-pulled every tick forever
|
||||
// because no marker row is written for excluded paths.
|
||||
let ext_predicate = image_path_predicate("rel_path");
|
||||
let sql = format!(
|
||||
"SELECT rel_path, content_hash \
|
||||
FROM image_exif e \
|
||||
WHERE library_id = ? \
|
||||
AND content_hash IS NOT NULL \
|
||||
AND {ext_predicate} \
|
||||
AND NOT EXISTS ( \
|
||||
SELECT 1 FROM face_detections f \
|
||||
WHERE f.content_hash = e.content_hash \
|
||||
) \
|
||||
GROUP BY content_hash \
|
||||
LIMIT ?",
|
||||
)
|
||||
LIMIT ?"
|
||||
);
|
||||
let rows: Vec<(String, String)> = diesel::sql_query(sql)
|
||||
.bind::<diesel::sql_types::Integer, _>(library_id)
|
||||
.bind::<diesel::sql_types::BigInt, _>(limit)
|
||||
.load::<UnscannedRow>(conn.deref_mut())
|
||||
@@ -1013,14 +1041,29 @@ impl FaceDao for SqliteFaceDao {
|
||||
.first(conn.deref_mut())
|
||||
.with_context(|| "stats: failed")?
|
||||
};
|
||||
// Image-extension filter mirrors `list_unscanned_candidates` so
|
||||
// SCANNED can actually reach 100%: videos sit in `image_exif` but
|
||||
// never get a `face_detections` row, so counting them here
|
||||
// permanently caps the percentage below 100%.
|
||||
let total_photos: i64 = {
|
||||
let mut q = image_exif::table.into_boxed();
|
||||
if let Some(lib) = library_id {
|
||||
q = q.filter(image_exif::library_id.eq(lib));
|
||||
}
|
||||
q.select(diesel::dsl::count_star())
|
||||
.first(conn.deref_mut())
|
||||
let ext_predicate = image_path_predicate("rel_path");
|
||||
let row: CountRow = if let Some(lib) = library_id {
|
||||
let sql = format!(
|
||||
"SELECT COUNT(*) AS count FROM image_exif \
|
||||
WHERE library_id = ? AND {ext_predicate}"
|
||||
);
|
||||
diesel::sql_query(sql)
|
||||
.bind::<diesel::sql_types::Integer, _>(lib)
|
||||
.get_result(conn.deref_mut())
|
||||
.with_context(|| "stats: total_photos")?
|
||||
} else {
|
||||
let sql =
|
||||
format!("SELECT COUNT(*) AS count FROM image_exif WHERE {ext_predicate}");
|
||||
diesel::sql_query(sql)
|
||||
.get_result(conn.deref_mut())
|
||||
.with_context(|| "stats: total_photos")?
|
||||
};
|
||||
row.count
|
||||
};
|
||||
let persons_count: i64 = persons::table
|
||||
.select(diesel::dsl::count_star())
|
||||
@@ -2284,8 +2327,7 @@ async fn update_face_handler<D: FaceDao>(
|
||||
"PATCH /image/faces/{}: crop failed for {:?}: {:?}",
|
||||
id, abs_path, e
|
||||
);
|
||||
return HttpResponse::BadRequest()
|
||||
.body(format!("cannot crop new bbox: {}", e));
|
||||
return HttpResponse::BadRequest().body(format!("cannot crop new bbox: {}", e));
|
||||
}
|
||||
};
|
||||
let meta = DetectMeta {
|
||||
@@ -2335,8 +2377,7 @@ async fn update_face_handler<D: FaceDao>(
|
||||
return HttpResponse::ServiceUnavailable().body(format!("{}", e));
|
||||
}
|
||||
Err(FaceDetectError::Disabled) => {
|
||||
return HttpResponse::ServiceUnavailable()
|
||||
.body("face client disabled mid-flight");
|
||||
return HttpResponse::ServiceUnavailable().body("face client disabled mid-flight");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3145,6 +3186,39 @@ mod tests {
|
||||
assert_eq!(stats.with_faces, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stats_total_photos_excludes_videos() {
|
||||
// SCANNED counts content_hashes in face_detections; total_photos
|
||||
// must apply the same image-extension filter as the watcher
|
||||
// backlog query so the percentage can reach 100%. Without this,
|
||||
// videos sit in image_exif but never produce a face_detections
|
||||
// row (Apollo decodes images only) and the bar caps below 100%.
|
||||
let mut dao = fresh_dao();
|
||||
diesel::sql_query(
|
||||
"INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
|
||||
VALUES (1, 'main', '/tmp', 0)",
|
||||
)
|
||||
.execute(dao.connection.lock().unwrap().deref_mut())
|
||||
.expect("seed libraries");
|
||||
|
||||
diesel::sql_query(
|
||||
"INSERT INTO image_exif \
|
||||
(library_id, rel_path, content_hash, created_time, last_modified) VALUES \
|
||||
(1, 'a.jpg', 'h-a', 0, 0), \
|
||||
(1, 'b.JPEG', 'h-b', 0, 0), \
|
||||
(1, 'movie.mp4', 'h-mp4', 0, 0), \
|
||||
(1, 'clip.MOV', 'h-mov', 0, 0)",
|
||||
)
|
||||
.execute(dao.connection.lock().unwrap().deref_mut())
|
||||
.expect("seed image_exif");
|
||||
|
||||
let stats = dao.stats(&ctx(), Some(1)).expect("stats");
|
||||
assert_eq!(
|
||||
stats.total_photos, 2,
|
||||
"videos should not count toward total"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn merge_persons_repoints_faces() {
|
||||
let mut dao = fresh_dao();
|
||||
@@ -3325,8 +3399,7 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
let row = seed_library_and_face(&mut dao, Some(p.id));
|
||||
let joined =
|
||||
hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate assigned");
|
||||
let joined = hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate assigned");
|
||||
assert_eq!(joined.person_id, Some(p.id));
|
||||
assert_eq!(joined.person_name.as_deref(), Some("Alice"));
|
||||
// Bbox + confidence + source must round-trip — these are what
|
||||
@@ -3345,8 +3418,7 @@ mod tests {
|
||||
// previously-assigned row's serialization.
|
||||
let mut dao = fresh_dao();
|
||||
let row = seed_library_and_face(&mut dao, None);
|
||||
let joined =
|
||||
hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate unassigned");
|
||||
let joined = hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate unassigned");
|
||||
assert!(joined.person_id.is_none());
|
||||
assert!(joined.person_name.is_none());
|
||||
}
|
||||
@@ -3367,7 +3439,12 @@ mod tests {
|
||||
.execute(dao.connection.lock().unwrap().deref_mut())
|
||||
.expect("seed libraries");
|
||||
|
||||
// Seed image_exif: mix of hashed/unhashed/scanned/cross-library.
|
||||
// Seed image_exif: mix of hashed/unhashed/scanned/cross-library,
|
||||
// plus a video and a mixed-case image extension. Videos register
|
||||
// in image_exif but can never produce a face_detections row, so
|
||||
// the SQL must filter them out — otherwise the per-tick backlog
|
||||
// drain re-pulls them every tick (no marker is ever written, so
|
||||
// they loop forever) and the SCANNED stat is permanently capped.
|
||||
diesel::sql_query(
|
||||
"INSERT INTO image_exif \
|
||||
(library_id, rel_path, content_hash, created_time, last_modified) VALUES \
|
||||
@@ -3375,6 +3452,9 @@ mod tests {
|
||||
(1, 'b.jpg', 'h-b', 0, 0), \
|
||||
(1, 'c.jpg', NULL, 0, 0), \
|
||||
(1, 'd.jpg', 'h-d', 0, 0), \
|
||||
(1, 'movie.mp4', 'h-mp4', 0, 0), \
|
||||
(1, 'clip.MOV', 'h-mov', 0, 0), \
|
||||
(1, 'photo.JPG', 'h-jpg-upper', 0, 0), \
|
||||
(2, 'e.jpg', 'h-e', 0, 0)",
|
||||
)
|
||||
.execute(dao.connection.lock().unwrap().deref_mut())
|
||||
@@ -3388,16 +3468,26 @@ mod tests {
|
||||
.list_unscanned_candidates(&ctx(), 1, 10)
|
||||
.expect("list unscanned");
|
||||
|
||||
let hashes: std::collections::HashSet<_> =
|
||||
cands.iter().map(|(_, h)| h.clone()).collect();
|
||||
let hashes: std::collections::HashSet<_> = cands.iter().map(|(_, h)| h.clone()).collect();
|
||||
|
||||
// Should contain a and d (hashed, unscanned, library 1).
|
||||
// Should contain a, d, and the upper-case .JPG (image-extension
|
||||
// match is case-insensitive).
|
||||
assert!(hashes.contains("h-a"), "missing h-a: {:?}", hashes);
|
||||
assert!(hashes.contains("h-d"), "missing h-d: {:?}", hashes);
|
||||
// Should NOT contain b (scanned), c (no hash), e (other library).
|
||||
assert!(
|
||||
hashes.contains("h-jpg-upper"),
|
||||
"missing h-jpg-upper: {:?}",
|
||||
hashes
|
||||
);
|
||||
// Should NOT contain b (scanned), c (no hash), e (other library),
|
||||
// or videos (mp4/mov are not image extensions).
|
||||
assert!(!hashes.contains("h-b"), "expected h-b filtered (scanned)");
|
||||
assert!(!hashes.contains("h-e"), "expected h-e filtered (other library)");
|
||||
assert_eq!(cands.len(), 2, "unexpected candidates: {:?}", cands);
|
||||
assert!(
|
||||
!hashes.contains("h-e"),
|
||||
"expected h-e filtered (other library)"
|
||||
);
|
||||
assert!(!hashes.contains("h-mp4"), "expected h-mp4 filtered (video)");
|
||||
assert!(!hashes.contains("h-mov"), "expected h-mov filtered (video)");
|
||||
assert_eq!(cands.len(), 3, "unexpected candidates: {:?}", cands);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user