Split main.rs: extract backfill drains and thumbnails into modules

main.rs drops from 3542 → ~2930 lines by moving:

- src/backfill.rs (new): backfill_unhashed_backlog,
  backfill_missing_date_taken, backfill_missing_content_hashes,
  build_face_candidates, process_face_backlog. Now unit-tested for
  the first time — 5 tests covering cap behavior, library-id
  filtering, missing-on-disk skip, and the video/unhashed/scanned
  filters on face-candidate selection.

- src/thumbnails.rs (new): unsupported_thumbnail_sentinel,
  generate_image_thumbnail, create_thumbnails, update_media_counts,
  is_image, is_video, plus the IMAGE_GAUGE / VIDEO_GAUGE Prometheus
  metrics. Replaces the no-op stubs that used to live in lib.rs.
  4 new unit tests for the sentinel path math and the
  walker-counts-images-vs-videos smoke path.

Supporting:
- SqliteExifDao::from_shared (test-only) so an SqliteExifDao and
  SqliteFaceDao can share one in-memory connection — required to
  test build_face_candidates against the real join.
- files.rs / video/{mod,actors}.rs import from crate::thumbnails::*
  instead of the now-removed stubs in lib.rs.

cargo test --bin image-api: 325 passing (was 314).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-05-12 12:22:02 -04:00
parent 05ec5d0c70
commit bec9857426
8 changed files with 1028 additions and 648 deletions

721
src/backfill.rs Normal file
View File

@@ -0,0 +1,721 @@
//! Per-tick drains the watcher runs alongside ingest.
//!
//! These passes were previously inlined in `main.rs`; they exist because
//! a quick scan only walks recently-modified files, so any backlog of
//! rows missing a `content_hash` / `date_taken` / face detection
//! wouldn't otherwise drain except during the once-an-hour full scan.
//! Each function is bounded per call by a `*_PER_TICK` env-var cap.
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::{Arc, Mutex};
use log::{debug, info, warn};
use crate::content_hash;
use crate::database::ExifDao;
use crate::date_resolver;
use crate::face_watch;
use crate::faces;
use crate::file_types;
use crate::libraries;
use crate::tags;
/// Compute and persist content_hash for image_exif rows where it's NULL.
///
/// Bounded per call by `FACE_HASH_BACKFILL_MAX_PER_TICK` (default 2000)
/// so a watcher tick on a large legacy library doesn't block for hours
/// blake3-ing every photo at once. Subsequent scans pick up the rest.
/// For 50k+ libraries the dedicated `cargo run --bin backfill_hashes`
/// is still faster (it doesn't fight a watcher loop for the DAO mutex).
///
/// Drains unhashed image_exif rows by querying them directly, independent
/// of the filesystem walk. Quick scans only walk recently-modified files,
/// so a backlog of pre-existing unhashed rows never enters
/// `process_new_files`'s candidate set — left alone, it would only drain
/// on full scans (default once an hour). Calling this every tick keeps
/// the face-detection backlog moving regardless.
///
/// Returns the number of rows successfully backfilled this pass.
pub fn backfill_unhashed_backlog(
context: &opentelemetry::Context,
library: &libraries::Library,
exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
) -> usize {
let cap: i64 = dotenv::var("FACE_HASH_BACKFILL_MAX_PER_TICK")
.ok()
.and_then(|s| s.parse().ok())
.filter(|n: &i64| *n > 0)
.unwrap_or(2000);
// Fetch up to cap+1 rows so we can tell "more remain" without a
// separate count query. Across libraries — there's no per-library
// filter on get_rows_missing_hash today — but we only ever update
// rows whose library_id matches the caller's library, so other
// libraries' rows just get skipped here and picked up on the next
// library's tick. Negligible cost given the cap.
let rows: Vec<(i32, String)> = {
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
dao.get_rows_missing_hash(context, cap + 1)
.unwrap_or_default()
};
if rows.is_empty() {
return 0;
}
let more_than_cap = rows.len() as i64 > cap;
let base_path = std::path::Path::new(&library.root_path);
let mut backfilled = 0usize;
let mut errors = 0usize;
let mut skipped_other_lib = 0usize;
for (lib_id, rel_path) in rows.iter().take(cap as usize) {
if *lib_id != library.id {
skipped_other_lib += 1;
continue;
}
let abs = base_path.join(rel_path);
if !abs.exists() {
// File walked away — the watcher's reconciliation pass will
// remove the orphan exif row eventually.
continue;
}
match content_hash::compute(&abs) {
Ok(id) => {
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
if let Err(e) = dao.backfill_content_hash(
context,
library.id,
rel_path,
&id.content_hash,
id.size_bytes,
) {
warn!(
"face_watch: backfill_content_hash failed for {}: {:?}",
rel_path, e
);
errors += 1;
} else {
backfilled += 1;
}
}
Err(e) => {
debug!(
"face_watch: hash compute failed for {} ({:?})",
abs.display(),
e
);
errors += 1;
}
}
}
if backfilled > 0 || errors > 0 || more_than_cap {
info!(
"face_watch: backfill pass for library '{}': hashed {} ({} error(s), {} skipped to other libraries; {} cap, more_remain={})",
library.name, backfilled, errors, skipped_other_lib, cap, more_than_cap
);
}
backfilled
}
/// Drain image_exif rows whose `date_taken` was never resolved or was
/// resolved by the weakest fallback (`fs_time`). Runs the canonical-date
/// waterfall — exiftool batch (one subprocess for the whole tick's
/// rows) → filename regex → earliest_fs_time — and persists each
/// resolution with its source tag. Capped per tick by
/// `DATE_BACKFILL_MAX_PER_TICK` (default 500) so a 14k-row library
/// drains over a few quick-scan ticks without blocking the watcher.
///
/// kamadak-exif is intentionally skipped here: the row already has a
/// NULL date_taken because the ingest path's kamadak-exif call returned
/// nothing, and re-running it would just produce the same answer.
/// exiftool is the meaningful new attempt — it handles videos and
/// MakerNote-hosted dates kamadak can't reach.
pub fn backfill_missing_date_taken(
context: &opentelemetry::Context,
library: &libraries::Library,
exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
) -> usize {
let cap: i64 = dotenv::var("DATE_BACKFILL_MAX_PER_TICK")
.ok()
.and_then(|s| s.parse().ok())
.filter(|n: &i64| *n > 0)
.unwrap_or(500);
let rows: Vec<(i32, String)> = {
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
dao.get_rows_needing_date_backfill(context, library.id, cap + 1)
.unwrap_or_default()
};
if rows.is_empty() {
return 0;
}
let more_than_cap = rows.len() as i64 > cap;
let base_path = std::path::Path::new(&library.root_path);
// Build absolute paths and drop rows whose files no longer exist —
// the missing-file scan in library_maintenance retires deleted rows
// separately. Without this filter, NULL-date rows for missing files
// would loop through the drain forever (no source can resolve them).
let mut existing: Vec<(String, PathBuf)> = Vec::with_capacity(rows.len());
for (_, rel_path) in rows.iter().take(cap as usize) {
let abs = base_path.join(rel_path);
if abs.exists() {
existing.push((rel_path.clone(), abs));
}
}
if existing.is_empty() {
return 0;
}
// One exiftool subprocess for the whole batch; the resolver falls
// through to filename / fs_time per file when exiftool can't supply
// a date (or isn't installed at all).
let paths: Vec<PathBuf> = existing.iter().map(|(_, p)| p.clone()).collect();
let resolved = date_resolver::resolve_dates_batch(&paths, &HashMap::new());
let mut backfilled = 0usize;
let mut unresolved = 0usize;
let mut by_source: HashMap<&'static str, usize> = HashMap::new();
{
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
for (rel_path, abs) in &existing {
let Some(rd) = resolved.get(abs).copied() else {
unresolved += 1;
continue;
};
match dao.backfill_date_taken(
context,
library.id,
rel_path,
rd.timestamp,
rd.source.as_str(),
) {
Ok(()) => {
backfilled += 1;
*by_source.entry(rd.source.as_str()).or_insert(0) += 1;
}
Err(e) => {
warn!(
"date_backfill: update failed for lib {} {}: {:?}",
library.id, rel_path, e
);
}
}
}
}
if backfilled > 0 || unresolved > 0 || more_than_cap {
info!(
"date_backfill: library '{}': resolved {} ({:?}), {} unresolved, cap={}, more_remain={}",
library.name, backfilled, by_source, unresolved, cap, more_than_cap
);
}
backfilled
}
/// Per-tick face-detection drain. Pulls a capped batch of hashed-but-
/// unscanned image_exif rows directly via the FaceDao anti-join and
/// hands them to the existing detection pass. Runs on every tick (not
/// just full scans) so the backlog moves at quick-scan cadence.
pub fn process_face_backlog(
context: &opentelemetry::Context,
library: &libraries::Library,
face_client: &crate::ai::face_client::FaceClient,
face_dao: &Arc<Mutex<Box<dyn faces::FaceDao>>>,
tag_dao: &Arc<Mutex<Box<dyn tags::TagDao>>>,
excluded_dirs: &[String],
) {
let cap: i64 = dotenv::var("FACE_BACKLOG_MAX_PER_TICK")
.ok()
.and_then(|s| s.parse().ok())
.filter(|n: &i64| *n > 0)
.unwrap_or(64);
let rows: Vec<(String, String)> = {
let mut dao = face_dao.lock().expect("face dao");
match dao.list_unscanned_candidates(context, library.id, cap) {
Ok(r) => r,
Err(e) => {
warn!(
"face_watch: list_unscanned_candidates failed for library '{}': {:?}",
library.name, e
);
return;
}
}
};
if rows.is_empty() {
return;
}
info!(
"face_watch: backlog drain — running detection on {} candidate(s) for library '{}' (cap={})",
rows.len(),
library.name,
cap
);
let candidates: Vec<face_watch::FaceCandidate> = rows
.into_iter()
.map(|(rel_path, content_hash)| face_watch::FaceCandidate {
rel_path,
content_hash,
})
.collect();
face_watch::run_face_detection_pass(
library,
excluded_dirs,
face_client,
Arc::clone(face_dao),
Arc::clone(tag_dao),
candidates,
);
}
/// Compute content_hash for any image rows the walker just touched
/// whose stored EXIF row is still hash-less. Called from
/// `process_new_files` so freshly-ingested files don't have to wait for
/// the next standalone `backfill_unhashed_backlog` tick before face
/// detection can key on their bytes.
///
/// Cap is on **successes only**. An earlier version counted errors too,
/// so a pocket of chronically-unhashable files at the front of the
/// table (vanished mid-scan, permission denied, etc.) burned the budget
/// every tick and the rest of the backlog never advanced.
pub fn backfill_missing_content_hashes(
context: &opentelemetry::Context,
files: &[(PathBuf, String)],
library: &libraries::Library,
exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
) {
let image_paths: Vec<String> = files
.iter()
.filter(|(p, _)| !file_types::is_video_file(p))
.map(|(_, rel)| rel.clone())
.collect();
if image_paths.is_empty() {
return;
}
let exif_records = {
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
dao.get_exif_batch(context, Some(library.id), &image_paths)
.unwrap_or_default()
};
// Cheap lookup back from rel_path → absolute file_path so
// content_hash::compute can read the bytes.
let path_by_rel: HashMap<String, &PathBuf> =
files.iter().map(|(p, rel)| (rel.clone(), p)).collect();
let cap: usize = dotenv::var("FACE_HASH_BACKFILL_MAX_PER_TICK")
.ok()
.and_then(|s| s.parse().ok())
.filter(|n: &usize| *n > 0)
.unwrap_or(2000);
// Count the unhashed backlog up front so we can surface "still needs
// backfill: N" in the log — without it, a face-scan that's stuck at
// 44% looks stalled when really it's chipping through hashes.
let unhashed_total = exif_records
.iter()
.filter(|r| r.content_hash.is_none())
.count();
let mut backfilled = 0usize;
let mut errors = 0usize;
for record in &exif_records {
if backfilled >= cap {
break;
}
if record.content_hash.is_some() {
continue;
}
let Some(file_path) = path_by_rel.get(&record.file_path) else {
// Walked file went missing between the directory scan and now;
// next tick will retry naturally.
continue;
};
match content_hash::compute(file_path) {
Ok(id) => {
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
if let Err(e) = dao.backfill_content_hash(
context,
library.id,
&record.file_path,
&id.content_hash,
id.size_bytes,
) {
warn!(
"face_watch: backfill_content_hash failed for {}: {:?}",
record.file_path, e
);
errors += 1;
} else {
backfilled += 1;
}
}
Err(e) => {
debug!(
"face_watch: hash compute failed for {} ({:?})",
file_path.display(),
e
);
errors += 1;
}
}
}
// Always log when there's an unhashed backlog so an operator
// looking at "scan stuck at 44%" can see backfill is running and
// how much remains. Quiet only when there's nothing to do.
if unhashed_total > 0 || backfilled > 0 || errors > 0 {
let remaining = unhashed_total.saturating_sub(backfilled);
info!(
"face_watch: backfilled {}/{} content_hash for library '{}' ({} error(s); {} still need backfill; cap={})",
backfilled, unhashed_total, library.name, errors, remaining, cap
);
}
}
/// Build the face-detection candidate list for a scan tick.
///
/// Returns `(rel_path, content_hash)` for every image file that has a
/// content_hash recorded in image_exif but no row in face_detections
/// yet. Re-querying image_exif here picks up rows the EXIF write loop
/// just inserted alongside any pre-existing rows the watcher walked
/// over — covers both new uploads and the initial backlog scan.
pub fn build_face_candidates(
context: &opentelemetry::Context,
library: &libraries::Library,
files: &[(PathBuf, String)],
exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
face_dao: &Arc<Mutex<Box<dyn faces::FaceDao>>>,
) -> Vec<face_watch::FaceCandidate> {
// Restrict to image files; videos aren't face-scanned in v1 (kamadak
// doesn't even register them in image_exif).
let image_paths: Vec<String> = files
.iter()
.filter(|(p, _)| !file_types::is_video_file(p))
.map(|(_, rel)| rel.clone())
.collect();
if image_paths.is_empty() {
return Vec::new();
}
let exif_records = {
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
dao.get_exif_batch(context, Some(library.id), &image_paths)
.unwrap_or_default()
};
// rel_path → content_hash (only rows with a hash; without one we have
// nothing to key face data against).
let mut hash_by_path: HashMap<String, String> = HashMap::with_capacity(exif_records.len());
for record in exif_records {
if let Some(h) = record.content_hash {
hash_by_path.insert(record.file_path, h);
}
}
let mut candidates = Vec::new();
let mut dao = face_dao.lock().expect("face dao");
for rel_path in image_paths {
let Some(hash) = hash_by_path.get(&rel_path) else {
continue;
};
match dao.already_scanned(context, hash) {
Ok(true) => continue,
Ok(false) => candidates.push(face_watch::FaceCandidate {
rel_path,
content_hash: hash.clone(),
}),
Err(e) => {
warn!("face_watch: already_scanned errored for {}: {:?}", hash, e);
}
}
}
candidates
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use std::sync::{Arc, Mutex};
use diesel::prelude::*;
use tempfile::TempDir;
use crate::database::models::{InsertImageExif, InsertLibrary};
use crate::database::test::in_memory_db_connection;
use crate::database::{ExifDao, SqliteExifDao, schema};
use crate::faces::{FaceDao, SqliteFaceDao};
use crate::libraries::Library;
fn ctx() -> opentelemetry::Context {
opentelemetry::Context::new()
}
/// Build a tempdir-backed library + DAOs sharing a single in-memory
/// SQLite connection (so cross-table joins like
/// `list_unscanned_candidates` see consistent state).
fn setup() -> (
TempDir,
Library,
Arc<Mutex<diesel::SqliteConnection>>,
Arc<Mutex<Box<dyn ExifDao>>>,
Arc<Mutex<Box<dyn FaceDao>>>,
) {
let tmp = TempDir::new().expect("tempdir");
let mut conn = in_memory_db_connection();
// Migration seeds library id=1 with a placeholder root; rewrite it
// to point at the tempdir so `<root>/<rel_path>` resolves to real
// files this test creates.
diesel::update(schema::libraries::table.filter(schema::libraries::id.eq(1)))
.set(schema::libraries::root_path.eq(tmp.path().to_string_lossy().to_string()))
.execute(&mut conn)
.expect("rewrite library 1 root");
// Add a second library so cross-library skip cases have somewhere
// to put their rows.
diesel::insert_into(schema::libraries::table)
.values(InsertLibrary {
name: "other",
root_path: "/tmp/other-test-lib",
created_at: 0,
enabled: true,
excluded_dirs: None,
})
.execute(&mut conn)
.expect("seed second library");
let library = Library {
id: 1,
name: "main".to_string(),
root_path: tmp.path().to_string_lossy().to_string(),
enabled: true,
excluded_dirs: Vec::new(),
};
let shared = Arc::new(Mutex::new(conn));
let exif_dao: Arc<Mutex<Box<dyn ExifDao>>> = Arc::new(Mutex::new(Box::new(
SqliteExifDao::from_shared(Arc::clone(&shared)),
)));
let face_dao: Arc<Mutex<Box<dyn FaceDao>>> = Arc::new(Mutex::new(Box::new(
SqliteFaceDao::from_connection(Arc::clone(&shared)),
)));
(tmp, library, shared, exif_dao, face_dao)
}
fn insert_exif(
exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
lib_id: i32,
rel: &str,
content_hash: Option<&str>,
) {
let mut dao = exif_dao.lock().unwrap();
dao.store_exif(
&ctx(),
InsertImageExif {
library_id: lib_id,
file_path: rel.to_string(),
camera_make: None,
camera_model: None,
lens_model: None,
width: None,
height: None,
orientation: None,
gps_latitude: None,
gps_longitude: None,
gps_altitude: None,
focal_length: None,
aperture: None,
shutter_speed: None,
iso: None,
date_taken: None,
created_time: 0,
last_modified: 0,
content_hash: content_hash.map(|s| s.to_string()),
size_bytes: None,
phash_64: None,
dhash_64: None,
date_taken_source: None,
},
)
.expect("insert");
}
fn write_image(root: &std::path::Path, rel: &str, bytes: &[u8]) {
let abs = root.join(rel);
if let Some(parent) = abs.parent() {
fs::create_dir_all(parent).expect("mkdir");
}
fs::write(abs, bytes).expect("write file");
}
#[test]
fn backfill_unhashed_backlog_hashes_missing_rows_in_this_library() {
let (tmp, library, _conn, exif_dao, _face_dao) = setup();
write_image(tmp.path(), "a.jpg", b"alpha-bytes");
write_image(tmp.path(), "b.jpg", b"bravo-bytes");
insert_exif(&exif_dao, 1, "a.jpg", None);
insert_exif(&exif_dao, 1, "b.jpg", None);
let backfilled = backfill_unhashed_backlog(&ctx(), &library, &exif_dao);
assert_eq!(backfilled, 2);
let mut dao = exif_dao.lock().unwrap();
let rows = dao
.get_exif_batch(&ctx(), Some(1), &["a.jpg".to_string(), "b.jpg".to_string()])
.unwrap();
assert_eq!(rows.len(), 2);
for r in rows {
assert!(
r.content_hash.is_some(),
"row {} should have a hash",
r.file_path
);
}
}
#[test]
fn backfill_unhashed_backlog_skips_other_libraries_and_missing_files() {
let (tmp, library, _conn, exif_dao, _face_dao) = setup();
write_image(tmp.path(), "exists.jpg", b"hello");
// Row for this library whose file is missing on disk:
insert_exif(&exif_dao, 1, "ghost.jpg", None);
insert_exif(&exif_dao, 1, "exists.jpg", None);
// Row in the other library — must be skipped (different lib_id).
insert_exif(&exif_dao, 2, "other.jpg", None);
let backfilled = backfill_unhashed_backlog(&ctx(), &library, &exif_dao);
assert_eq!(backfilled, 1, "only the existing in-library file hashes");
let mut dao = exif_dao.lock().unwrap();
let other = dao
.get_exif_batch(&ctx(), Some(2), &["other.jpg".to_string()])
.unwrap();
assert_eq!(other.len(), 1);
assert!(
other[0].content_hash.is_none(),
"other-library row must remain unhashed"
);
let ghost = dao
.get_exif_batch(&ctx(), Some(1), &["ghost.jpg".to_string()])
.unwrap();
assert_eq!(ghost.len(), 1);
assert!(
ghost[0].content_hash.is_none(),
"missing-on-disk row stays unhashed (reconciliation removes it later)"
);
}
#[test]
fn backfill_unhashed_backlog_respects_per_tick_cap() {
// Env-var-driven cap; the function reads it on every call, so we
// can set it just for this test and unset before returning.
// Serial guard: tests in the same binary may share env, but each
// backfill call re-reads — and we only care that the cap shape
// (success count <= cap, more_remain logged) holds.
unsafe {
std::env::set_var("FACE_HASH_BACKFILL_MAX_PER_TICK", "2");
}
let (tmp, library, _conn, exif_dao, _face_dao) = setup();
for i in 0..5 {
let rel = format!("img_{}.jpg", i);
write_image(tmp.path(), &rel, format!("bytes-{}", i).as_bytes());
insert_exif(&exif_dao, 1, &rel, None);
}
let backfilled = backfill_unhashed_backlog(&ctx(), &library, &exif_dao);
assert_eq!(backfilled, 2, "cap=2 must bound the per-tick successes");
unsafe {
std::env::remove_var("FACE_HASH_BACKFILL_MAX_PER_TICK");
}
}
#[test]
fn backfill_missing_content_hashes_skips_videos_and_hashed_rows() {
let (tmp, library, _conn, exif_dao, _face_dao) = setup();
// Two image rows (one already hashed, one not), one video.
write_image(tmp.path(), "fresh.jpg", b"fresh-pixels");
write_image(tmp.path(), "already.jpg", b"already-pixels");
write_image(tmp.path(), "clip.mp4", b"video-bytes");
insert_exif(&exif_dao, 1, "fresh.jpg", None);
insert_exif(&exif_dao, 1, "already.jpg", Some("pre-existing-hash"));
insert_exif(&exif_dao, 1, "clip.mp4", None);
let files: Vec<(PathBuf, String)> = vec![
(tmp.path().join("fresh.jpg"), "fresh.jpg".to_string()),
(tmp.path().join("already.jpg"), "already.jpg".to_string()),
(tmp.path().join("clip.mp4"), "clip.mp4".to_string()),
];
backfill_missing_content_hashes(&ctx(), &files, &library, &exif_dao);
let mut dao = exif_dao.lock().unwrap();
let rows = dao
.get_exif_batch(
&ctx(),
Some(1),
&[
"fresh.jpg".to_string(),
"already.jpg".to_string(),
"clip.mp4".to_string(),
],
)
.unwrap();
let by_path: HashMap<String, Option<String>> = rows
.into_iter()
.map(|r| (r.file_path, r.content_hash))
.collect();
assert!(
by_path["fresh.jpg"].is_some(),
"fresh image must get a hash"
);
assert_eq!(
by_path["already.jpg"].as_deref(),
Some("pre-existing-hash"),
"already-hashed image left untouched"
);
assert!(
by_path["clip.mp4"].is_none(),
"video skipped (not face-scanned, no hash needed via this path)"
);
}
#[test]
fn build_face_candidates_filters_videos_unhashed_and_already_scanned() {
let (tmp, library, _conn, exif_dao, face_dao) = setup();
// Seed image_exif with: hashed unscanned, hashed scanned, unhashed,
// and a video. Files don't need to exist on disk — the function
// doesn't read them, only the DB rows.
insert_exif(&exif_dao, 1, "fresh.jpg", Some("hash-fresh"));
insert_exif(&exif_dao, 1, "scanned.jpg", Some("hash-scanned"));
insert_exif(&exif_dao, 1, "unhashed.jpg", None);
insert_exif(&exif_dao, 1, "clip.mp4", Some("hash-video"));
// Mark `scanned.jpg`'s hash as already detected.
{
let mut dao = face_dao.lock().unwrap();
dao.mark_status(&ctx(), 1, "hash-scanned", "scanned.jpg", "no_faces", "test")
.expect("mark scanned");
}
let files: Vec<(PathBuf, String)> = vec![
(tmp.path().join("fresh.jpg"), "fresh.jpg".to_string()),
(tmp.path().join("scanned.jpg"), "scanned.jpg".to_string()),
(tmp.path().join("unhashed.jpg"), "unhashed.jpg".to_string()),
(tmp.path().join("clip.mp4"), "clip.mp4".to_string()),
];
let candidates = build_face_candidates(&ctx(), &library, &files, &exif_dao, &face_dao);
assert_eq!(
candidates.len(),
1,
"exactly fresh.jpg should be a candidate"
);
assert_eq!(candidates[0].rel_path, "fresh.jpg");
assert_eq!(candidates[0].content_hash, "hash-fresh");
}
}