feature/face-stats-exclude-videos #64
@@ -383,7 +383,10 @@ mod tests {
|
|||||||
// body cap and rejected normal-size photos before they reached
|
// body cap and rejected normal-size photos before they reached
|
||||||
// the backend.
|
// the backend.
|
||||||
assert!(is_transient(&classify_error_response(408, "")));
|
assert!(is_transient(&classify_error_response(408, "")));
|
||||||
assert!(is_transient(&classify_error_response(413, "<html>nginx</html>")));
|
assert!(is_transient(&classify_error_response(
|
||||||
|
413,
|
||||||
|
"<html>nginx</html>"
|
||||||
|
)));
|
||||||
assert!(is_transient(&classify_error_response(429, "{}")));
|
assert!(is_transient(&classify_error_response(429, "{}")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
160
src/faces.rs
160
src/faces.rs
@@ -20,9 +20,10 @@
|
|||||||
|
|
||||||
use crate::Claims;
|
use crate::Claims;
|
||||||
use crate::ai::face_client::{DetectMeta, FaceClient, FaceDetectError};
|
use crate::ai::face_client::{DetectMeta, FaceClient, FaceDetectError};
|
||||||
use crate::exif;
|
|
||||||
use crate::database::schema::{face_detections, image_exif, persons};
|
use crate::database::schema::{face_detections, image_exif, persons};
|
||||||
use crate::error::IntoHttpError;
|
use crate::error::IntoHttpError;
|
||||||
|
use crate::exif;
|
||||||
|
use crate::file_types;
|
||||||
use crate::libraries::{self, Library};
|
use crate::libraries::{self, Library};
|
||||||
use crate::otel::{extract_context_from_request, global_tracer, trace_db_call};
|
use crate::otel::{extract_context_from_request, global_tracer, trace_db_call};
|
||||||
use crate::state::AppState;
|
use crate::state::AppState;
|
||||||
@@ -99,9 +100,30 @@ pub struct FaceDetectionRow {
|
|||||||
pub created_at: i64,
|
pub created_at: i64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// SQL fragment restricting an `image_exif.rel_path` (or `face_detections.rel_path`)
|
||||||
|
/// column to image extensions. Videos register in `image_exif` with a
|
||||||
|
/// populated `content_hash` but can never produce a `face_detections` row
|
||||||
|
/// — applying this filter at query time keeps videos out of the per-tick
|
||||||
|
/// backlog drain (which would otherwise loop forever — `filter_excluded`
|
||||||
|
/// drops them client-side without writing a marker) and out of the SCANNED
|
||||||
|
/// stat denominator (so 100% is reachable).
|
||||||
|
fn image_path_predicate(col: &str) -> String {
|
||||||
|
let clauses: Vec<String> = file_types::IMAGE_EXTENSIONS
|
||||||
|
.iter()
|
||||||
|
.map(|ext| format!("lower({col}) LIKE '%.{ext}'"))
|
||||||
|
.collect();
|
||||||
|
format!("({})", clauses.join(" OR "))
|
||||||
|
}
|
||||||
|
|
||||||
/// Row shape for `list_unscanned_candidates`'s raw SQL. Diesel's
|
/// Row shape for `list_unscanned_candidates`'s raw SQL. Diesel's
|
||||||
/// `sql_query` requires a `QueryableByName` row type with explicit
|
/// `sql_query` requires a `QueryableByName` row type with explicit
|
||||||
/// column SQL types; using a tuple isn't supported.
|
/// column SQL types; using a tuple isn't supported.
|
||||||
|
#[derive(diesel::QueryableByName, Debug)]
|
||||||
|
struct CountRow {
|
||||||
|
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||||||
|
count: i64,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(diesel::QueryableByName, Debug)]
|
#[derive(diesel::QueryableByName, Debug)]
|
||||||
struct UnscannedRow {
|
struct UnscannedRow {
|
||||||
#[diesel(sql_type = diesel::sql_types::Text)]
|
#[diesel(sql_type = diesel::sql_types::Text)]
|
||||||
@@ -601,26 +623,32 @@ impl FaceDao for SqliteFaceDao {
|
|||||||
// fire multiple detect calls for the same hash if it lives
|
// fire multiple detect calls for the same hash if it lives
|
||||||
// under several rel_paths in the same library. The
|
// under several rel_paths in the same library. The
|
||||||
// anti-join (NOT EXISTS) drains hashes that have no row in
|
// anti-join (NOT EXISTS) drains hashes that have no row in
|
||||||
// face_detections at all.
|
// face_detections at all. The image-extension predicate
|
||||||
let rows: Vec<(String, String)> = diesel::sql_query(
|
// keeps videos out of the candidate set; without it they'd
|
||||||
|
// be filtered client-side and re-pulled every tick forever
|
||||||
|
// because no marker row is written for excluded paths.
|
||||||
|
let ext_predicate = image_path_predicate("rel_path");
|
||||||
|
let sql = format!(
|
||||||
"SELECT rel_path, content_hash \
|
"SELECT rel_path, content_hash \
|
||||||
FROM image_exif e \
|
FROM image_exif e \
|
||||||
WHERE library_id = ? \
|
WHERE library_id = ? \
|
||||||
AND content_hash IS NOT NULL \
|
AND content_hash IS NOT NULL \
|
||||||
|
AND {ext_predicate} \
|
||||||
AND NOT EXISTS ( \
|
AND NOT EXISTS ( \
|
||||||
SELECT 1 FROM face_detections f \
|
SELECT 1 FROM face_detections f \
|
||||||
WHERE f.content_hash = e.content_hash \
|
WHERE f.content_hash = e.content_hash \
|
||||||
) \
|
) \
|
||||||
GROUP BY content_hash \
|
GROUP BY content_hash \
|
||||||
LIMIT ?",
|
LIMIT ?"
|
||||||
)
|
);
|
||||||
.bind::<diesel::sql_types::Integer, _>(library_id)
|
let rows: Vec<(String, String)> = diesel::sql_query(sql)
|
||||||
.bind::<diesel::sql_types::BigInt, _>(limit)
|
.bind::<diesel::sql_types::Integer, _>(library_id)
|
||||||
.load::<UnscannedRow>(conn.deref_mut())
|
.bind::<diesel::sql_types::BigInt, _>(limit)
|
||||||
.with_context(|| "list_unscanned_candidates")?
|
.load::<UnscannedRow>(conn.deref_mut())
|
||||||
.into_iter()
|
.with_context(|| "list_unscanned_candidates")?
|
||||||
.map(|r| (r.rel_path, r.content_hash))
|
.into_iter()
|
||||||
.collect();
|
.map(|r| (r.rel_path, r.content_hash))
|
||||||
|
.collect();
|
||||||
Ok(rows)
|
Ok(rows)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -1013,14 +1041,29 @@ impl FaceDao for SqliteFaceDao {
|
|||||||
.first(conn.deref_mut())
|
.first(conn.deref_mut())
|
||||||
.with_context(|| "stats: failed")?
|
.with_context(|| "stats: failed")?
|
||||||
};
|
};
|
||||||
|
// Image-extension filter mirrors `list_unscanned_candidates` so
|
||||||
|
// SCANNED can actually reach 100%: videos sit in `image_exif` but
|
||||||
|
// never get a `face_detections` row, so counting them here
|
||||||
|
// permanently caps the percentage below 100%.
|
||||||
let total_photos: i64 = {
|
let total_photos: i64 = {
|
||||||
let mut q = image_exif::table.into_boxed();
|
let ext_predicate = image_path_predicate("rel_path");
|
||||||
if let Some(lib) = library_id {
|
let row: CountRow = if let Some(lib) = library_id {
|
||||||
q = q.filter(image_exif::library_id.eq(lib));
|
let sql = format!(
|
||||||
}
|
"SELECT COUNT(*) AS count FROM image_exif \
|
||||||
q.select(diesel::dsl::count_star())
|
WHERE library_id = ? AND {ext_predicate}"
|
||||||
.first(conn.deref_mut())
|
);
|
||||||
.with_context(|| "stats: total_photos")?
|
diesel::sql_query(sql)
|
||||||
|
.bind::<diesel::sql_types::Integer, _>(lib)
|
||||||
|
.get_result(conn.deref_mut())
|
||||||
|
.with_context(|| "stats: total_photos")?
|
||||||
|
} else {
|
||||||
|
let sql =
|
||||||
|
format!("SELECT COUNT(*) AS count FROM image_exif WHERE {ext_predicate}");
|
||||||
|
diesel::sql_query(sql)
|
||||||
|
.get_result(conn.deref_mut())
|
||||||
|
.with_context(|| "stats: total_photos")?
|
||||||
|
};
|
||||||
|
row.count
|
||||||
};
|
};
|
||||||
let persons_count: i64 = persons::table
|
let persons_count: i64 = persons::table
|
||||||
.select(diesel::dsl::count_star())
|
.select(diesel::dsl::count_star())
|
||||||
@@ -2284,8 +2327,7 @@ async fn update_face_handler<D: FaceDao>(
|
|||||||
"PATCH /image/faces/{}: crop failed for {:?}: {:?}",
|
"PATCH /image/faces/{}: crop failed for {:?}: {:?}",
|
||||||
id, abs_path, e
|
id, abs_path, e
|
||||||
);
|
);
|
||||||
return HttpResponse::BadRequest()
|
return HttpResponse::BadRequest().body(format!("cannot crop new bbox: {}", e));
|
||||||
.body(format!("cannot crop new bbox: {}", e));
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let meta = DetectMeta {
|
let meta = DetectMeta {
|
||||||
@@ -2335,8 +2377,7 @@ async fn update_face_handler<D: FaceDao>(
|
|||||||
return HttpResponse::ServiceUnavailable().body(format!("{}", e));
|
return HttpResponse::ServiceUnavailable().body(format!("{}", e));
|
||||||
}
|
}
|
||||||
Err(FaceDetectError::Disabled) => {
|
Err(FaceDetectError::Disabled) => {
|
||||||
return HttpResponse::ServiceUnavailable()
|
return HttpResponse::ServiceUnavailable().body("face client disabled mid-flight");
|
||||||
.body("face client disabled mid-flight");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -3145,6 +3186,39 @@ mod tests {
|
|||||||
assert_eq!(stats.with_faces, 0);
|
assert_eq!(stats.with_faces, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn stats_total_photos_excludes_videos() {
|
||||||
|
// SCANNED counts content_hashes in face_detections; total_photos
|
||||||
|
// must apply the same image-extension filter as the watcher
|
||||||
|
// backlog query so the percentage can reach 100%. Without this,
|
||||||
|
// videos sit in image_exif but never produce a face_detections
|
||||||
|
// row (Apollo decodes images only) and the bar caps below 100%.
|
||||||
|
let mut dao = fresh_dao();
|
||||||
|
diesel::sql_query(
|
||||||
|
"INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
|
||||||
|
VALUES (1, 'main', '/tmp', 0)",
|
||||||
|
)
|
||||||
|
.execute(dao.connection.lock().unwrap().deref_mut())
|
||||||
|
.expect("seed libraries");
|
||||||
|
|
||||||
|
diesel::sql_query(
|
||||||
|
"INSERT INTO image_exif \
|
||||||
|
(library_id, rel_path, content_hash, created_time, last_modified) VALUES \
|
||||||
|
(1, 'a.jpg', 'h-a', 0, 0), \
|
||||||
|
(1, 'b.JPEG', 'h-b', 0, 0), \
|
||||||
|
(1, 'movie.mp4', 'h-mp4', 0, 0), \
|
||||||
|
(1, 'clip.MOV', 'h-mov', 0, 0)",
|
||||||
|
)
|
||||||
|
.execute(dao.connection.lock().unwrap().deref_mut())
|
||||||
|
.expect("seed image_exif");
|
||||||
|
|
||||||
|
let stats = dao.stats(&ctx(), Some(1)).expect("stats");
|
||||||
|
assert_eq!(
|
||||||
|
stats.total_photos, 2,
|
||||||
|
"videos should not count toward total"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn merge_persons_repoints_faces() {
|
fn merge_persons_repoints_faces() {
|
||||||
let mut dao = fresh_dao();
|
let mut dao = fresh_dao();
|
||||||
@@ -3325,8 +3399,7 @@ mod tests {
|
|||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let row = seed_library_and_face(&mut dao, Some(p.id));
|
let row = seed_library_and_face(&mut dao, Some(p.id));
|
||||||
let joined =
|
let joined = hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate assigned");
|
||||||
hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate assigned");
|
|
||||||
assert_eq!(joined.person_id, Some(p.id));
|
assert_eq!(joined.person_id, Some(p.id));
|
||||||
assert_eq!(joined.person_name.as_deref(), Some("Alice"));
|
assert_eq!(joined.person_name.as_deref(), Some("Alice"));
|
||||||
// Bbox + confidence + source must round-trip — these are what
|
// Bbox + confidence + source must round-trip — these are what
|
||||||
@@ -3345,8 +3418,7 @@ mod tests {
|
|||||||
// previously-assigned row's serialization.
|
// previously-assigned row's serialization.
|
||||||
let mut dao = fresh_dao();
|
let mut dao = fresh_dao();
|
||||||
let row = seed_library_and_face(&mut dao, None);
|
let row = seed_library_and_face(&mut dao, None);
|
||||||
let joined =
|
let joined = hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate unassigned");
|
||||||
hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate unassigned");
|
|
||||||
assert!(joined.person_id.is_none());
|
assert!(joined.person_id.is_none());
|
||||||
assert!(joined.person_name.is_none());
|
assert!(joined.person_name.is_none());
|
||||||
}
|
}
|
||||||
@@ -3367,7 +3439,12 @@ mod tests {
|
|||||||
.execute(dao.connection.lock().unwrap().deref_mut())
|
.execute(dao.connection.lock().unwrap().deref_mut())
|
||||||
.expect("seed libraries");
|
.expect("seed libraries");
|
||||||
|
|
||||||
// Seed image_exif: mix of hashed/unhashed/scanned/cross-library.
|
// Seed image_exif: mix of hashed/unhashed/scanned/cross-library,
|
||||||
|
// plus a video and a mixed-case image extension. Videos register
|
||||||
|
// in image_exif but can never produce a face_detections row, so
|
||||||
|
// the SQL must filter them out — otherwise the per-tick backlog
|
||||||
|
// drain re-pulls them every tick (no marker is ever written, so
|
||||||
|
// they loop forever) and the SCANNED stat is permanently capped.
|
||||||
diesel::sql_query(
|
diesel::sql_query(
|
||||||
"INSERT INTO image_exif \
|
"INSERT INTO image_exif \
|
||||||
(library_id, rel_path, content_hash, created_time, last_modified) VALUES \
|
(library_id, rel_path, content_hash, created_time, last_modified) VALUES \
|
||||||
@@ -3375,6 +3452,9 @@ mod tests {
|
|||||||
(1, 'b.jpg', 'h-b', 0, 0), \
|
(1, 'b.jpg', 'h-b', 0, 0), \
|
||||||
(1, 'c.jpg', NULL, 0, 0), \
|
(1, 'c.jpg', NULL, 0, 0), \
|
||||||
(1, 'd.jpg', 'h-d', 0, 0), \
|
(1, 'd.jpg', 'h-d', 0, 0), \
|
||||||
|
(1, 'movie.mp4', 'h-mp4', 0, 0), \
|
||||||
|
(1, 'clip.MOV', 'h-mov', 0, 0), \
|
||||||
|
(1, 'photo.JPG', 'h-jpg-upper', 0, 0), \
|
||||||
(2, 'e.jpg', 'h-e', 0, 0)",
|
(2, 'e.jpg', 'h-e', 0, 0)",
|
||||||
)
|
)
|
||||||
.execute(dao.connection.lock().unwrap().deref_mut())
|
.execute(dao.connection.lock().unwrap().deref_mut())
|
||||||
@@ -3388,16 +3468,26 @@ mod tests {
|
|||||||
.list_unscanned_candidates(&ctx(), 1, 10)
|
.list_unscanned_candidates(&ctx(), 1, 10)
|
||||||
.expect("list unscanned");
|
.expect("list unscanned");
|
||||||
|
|
||||||
let hashes: std::collections::HashSet<_> =
|
let hashes: std::collections::HashSet<_> = cands.iter().map(|(_, h)| h.clone()).collect();
|
||||||
cands.iter().map(|(_, h)| h.clone()).collect();
|
|
||||||
|
|
||||||
// Should contain a and d (hashed, unscanned, library 1).
|
// Should contain a, d, and the upper-case .JPG (image-extension
|
||||||
|
// match is case-insensitive).
|
||||||
assert!(hashes.contains("h-a"), "missing h-a: {:?}", hashes);
|
assert!(hashes.contains("h-a"), "missing h-a: {:?}", hashes);
|
||||||
assert!(hashes.contains("h-d"), "missing h-d: {:?}", hashes);
|
assert!(hashes.contains("h-d"), "missing h-d: {:?}", hashes);
|
||||||
// Should NOT contain b (scanned), c (no hash), e (other library).
|
assert!(
|
||||||
|
hashes.contains("h-jpg-upper"),
|
||||||
|
"missing h-jpg-upper: {:?}",
|
||||||
|
hashes
|
||||||
|
);
|
||||||
|
// Should NOT contain b (scanned), c (no hash), e (other library),
|
||||||
|
// or videos (mp4/mov are not image extensions).
|
||||||
assert!(!hashes.contains("h-b"), "expected h-b filtered (scanned)");
|
assert!(!hashes.contains("h-b"), "expected h-b filtered (scanned)");
|
||||||
assert!(!hashes.contains("h-e"), "expected h-e filtered (other library)");
|
assert!(
|
||||||
assert_eq!(cands.len(), 2, "unexpected candidates: {:?}", cands);
|
!hashes.contains("h-e"),
|
||||||
|
"expected h-e filtered (other library)"
|
||||||
|
);
|
||||||
|
assert!(!hashes.contains("h-mp4"), "expected h-mp4 filtered (video)");
|
||||||
|
assert!(!hashes.contains("h-mov"), "expected h-mov filtered (video)");
|
||||||
|
assert_eq!(cands.len(), 3, "unexpected candidates: {:?}", cands);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
23
src/tags.rs
23
src/tags.rs
@@ -1253,20 +1253,29 @@ mod tests {
|
|||||||
// Seed: two paths tagged, one path untagged.
|
// Seed: two paths tagged, one path untagged.
|
||||||
dao.tagged_photos.borrow_mut().insert(
|
dao.tagged_photos.borrow_mut().insert(
|
||||||
"a.jpg".into(),
|
"a.jpg".into(),
|
||||||
vec![Tag { id: 1, name: "alpha".into(), created_time: 0 }],
|
vec![Tag {
|
||||||
|
id: 1,
|
||||||
|
name: "alpha".into(),
|
||||||
|
created_time: 0,
|
||||||
|
}],
|
||||||
);
|
);
|
||||||
dao.tagged_photos.borrow_mut().insert(
|
dao.tagged_photos.borrow_mut().insert(
|
||||||
"b.jpg".into(),
|
"b.jpg".into(),
|
||||||
vec![
|
vec![
|
||||||
Tag { id: 2, name: "beta".into(), created_time: 0 },
|
Tag {
|
||||||
Tag { id: 3, name: "gamma".into(), created_time: 0 },
|
id: 2,
|
||||||
|
name: "beta".into(),
|
||||||
|
created_time: 0,
|
||||||
|
},
|
||||||
|
Tag {
|
||||||
|
id: 3,
|
||||||
|
name: "gamma".into(),
|
||||||
|
created_time: 0,
|
||||||
|
},
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
let grouped = dao
|
let grouped = dao
|
||||||
.get_tags_grouped_by_paths(
|
.get_tags_grouped_by_paths(&ctx, &["a.jpg".into(), "b.jpg".into(), "c.jpg".into()])
|
||||||
&ctx,
|
|
||||||
&["a.jpg".into(), "b.jpg".into(), "c.jpg".into()],
|
|
||||||
)
|
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(grouped.get("a.jpg").map(|v| v.len()), Some(1));
|
assert_eq!(grouped.get("a.jpg").map(|v| v.len()), Some(1));
|
||||||
assert_eq!(grouped.get("b.jpg").map(|v| v.len()), Some(2));
|
assert_eq!(grouped.get("b.jpg").map(|v| v.len()), Some(2));
|
||||||
|
|||||||
Reference in New Issue
Block a user