Face Recognition / People Integration #61
100
src/main.rs
100
src/main.rs
@@ -2126,6 +2126,17 @@ fn process_new_files(
|
|||||||
// disabled (no Apollo integration configured) — Phase 3 wires this
|
// disabled (no Apollo integration configured) — Phase 3 wires this
|
||||||
// up; the watcher remains usable on legacy deploys.
|
// up; the watcher remains usable on legacy deploys.
|
||||||
if face_client.is_enabled() {
|
if face_client.is_enabled() {
|
||||||
|
// Opportunistic content_hash backfill: photos indexed before
|
||||||
|
// content-hashing landed (or where the hash compute failed
|
||||||
|
// silently on insert) end up in image_exif with NULL
|
||||||
|
// content_hash. build_face_candidates keys on content_hash, so
|
||||||
|
// those files would never become candidates without backfill.
|
||||||
|
// Idempotent — subsequent scans see the populated hashes and
|
||||||
|
// no-op. The dedicated `backfill_hashes` binary is still the
|
||||||
|
// right tool for very large legacy libraries; this branch
|
||||||
|
// ensures small/medium deploys self-heal without operator
|
||||||
|
// action.
|
||||||
|
backfill_missing_content_hashes(&context, &files, library, &exif_dao);
|
||||||
let candidates = build_face_candidates(&context, &files, &exif_dao, &face_dao);
|
let candidates = build_face_candidates(&context, &files, &exif_dao, &face_dao);
|
||||||
debug!(
|
debug!(
|
||||||
"face_watch: scan tick — {} image file(s) walked, {} candidate(s) (library '{}', modified_since={})",
|
"face_watch: scan tick — {} image file(s) walked, {} candidate(s) (library '{}', modified_since={})",
|
||||||
@@ -2270,6 +2281,95 @@ fn process_new_files(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Compute and persist content_hash for image_exif rows where it's NULL.
|
||||||
|
///
|
||||||
|
/// Bounded per call by `FACE_HASH_BACKFILL_MAX_PER_TICK` (default 500) so
|
||||||
|
/// a watcher tick on a large legacy library doesn't block for hours
|
||||||
|
/// blake3-ing every photo at once. Subsequent scans pick up the rest.
|
||||||
|
/// For 50k+ libraries the dedicated `cargo run --bin backfill_hashes`
|
||||||
|
/// is still faster (it doesn't fight a watcher loop for the DAO mutex).
|
||||||
|
fn backfill_missing_content_hashes(
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
files: &[(PathBuf, String)],
|
||||||
|
library: &libraries::Library,
|
||||||
|
exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
|
||||||
|
) {
|
||||||
|
let image_paths: Vec<String> = files
|
||||||
|
.iter()
|
||||||
|
.filter(|(p, _)| !is_video_file(p))
|
||||||
|
.map(|(_, rel)| rel.clone())
|
||||||
|
.collect();
|
||||||
|
if image_paths.is_empty() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let exif_records = {
|
||||||
|
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
|
||||||
|
dao.get_exif_batch(context, &image_paths)
|
||||||
|
.unwrap_or_default()
|
||||||
|
};
|
||||||
|
// Cheap lookup back from rel_path → absolute file_path so
|
||||||
|
// content_hash::compute can read the bytes.
|
||||||
|
let path_by_rel: HashMap<String, &PathBuf> =
|
||||||
|
files.iter().map(|(p, rel)| (rel.clone(), p)).collect();
|
||||||
|
|
||||||
|
let cap: usize = dotenv::var("FACE_HASH_BACKFILL_MAX_PER_TICK")
|
||||||
|
.ok()
|
||||||
|
.and_then(|s| s.parse().ok())
|
||||||
|
.filter(|n: &usize| *n > 0)
|
||||||
|
.unwrap_or(500);
|
||||||
|
|
||||||
|
let mut backfilled = 0usize;
|
||||||
|
let mut errors = 0usize;
|
||||||
|
for record in &exif_records {
|
||||||
|
if backfilled + errors >= cap {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if record.content_hash.is_some() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let Some(file_path) = path_by_rel.get(&record.file_path) else {
|
||||||
|
// Walked file went missing between the directory scan and now;
|
||||||
|
// next tick will retry naturally.
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
match content_hash::compute(file_path) {
|
||||||
|
Ok(id) => {
|
||||||
|
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
|
||||||
|
if let Err(e) = dao.backfill_content_hash(
|
||||||
|
context,
|
||||||
|
library.id,
|
||||||
|
&record.file_path,
|
||||||
|
&id.content_hash,
|
||||||
|
id.size_bytes,
|
||||||
|
) {
|
||||||
|
warn!(
|
||||||
|
"face_watch: backfill_content_hash failed for {}: {:?}",
|
||||||
|
record.file_path, e
|
||||||
|
);
|
||||||
|
errors += 1;
|
||||||
|
} else {
|
||||||
|
backfilled += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
debug!(
|
||||||
|
"face_watch: hash compute failed for {} ({:?})",
|
||||||
|
file_path.display(),
|
||||||
|
e
|
||||||
|
);
|
||||||
|
errors += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if backfilled > 0 || errors > 0 {
|
||||||
|
info!(
|
||||||
|
"face_watch: backfilled content_hash for {} file(s) in library '{}' ({} error(s); cap={})",
|
||||||
|
backfilled, library.name, errors, cap
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Build the face-detection candidate list for a scan tick.
|
/// Build the face-detection candidate list for a scan tick.
|
||||||
///
|
///
|
||||||
/// We need `(rel_path, content_hash)` for every image file that has a
|
/// We need `(rel_path, content_hash)` for every image file that has a
|
||||||
|
|||||||
Reference in New Issue
Block a user