faces: phase 3 — file-watch hook drives auto detection
Wire face detection into ImageApi's existing scan loop so new uploads
pick up faces automatically and the initial backlog grinds through on
full-scan ticks. No new job system; Phase 2's already_scanned check
makes the work implicitly idempotent (one face_detections row per
content_hash, including no_faces / failed marker rows).
face_watch.rs (new):
- run_face_detection_pass(library, excluded_dirs, face_client,
face_dao, candidates) — sync entry point. Builds a per-pass tokio
runtime and fans out detect calls bounded by FACE_DETECT_CONCURRENCY
(default 8). The watcher thread itself stays sync.
- filter_excluded — applies the same PathExcluder /memories uses, so
@eaDir / .thumbnails / EXCLUDED_DIRS-listed paths skip detection
before we burn a detect call (and Apollo's GPU memory) on junk.
- read_image_bytes_for_detect — RAW/HEIC route through
extract_embedded_jpeg_preview because opencv-python-headless can't
decode either; everything else gets a plain std::fs::read so EXIF
orientation reaches Apollo's exif_transpose intact.
- process_one — translates Apollo's response into the Phase 2 marker
contract: faces[] empty → no_faces; FaceDetectError::Permanent →
failed (don't retry); Transient → no marker (next scan retries);
success with N faces → N detected rows with the embeddings unpacked.
main.rs (process_new_files + watch_files):
- watch_files now also takes face_client + excluded_dirs; the watcher
thread builds a SqliteFaceDao the same way it builds ExifDao /
PreviewDao.
- After the EXIF write loop, build_face_candidates queries image_exif
for the just-walked image paths' content_hashes (covers new uploads
and pre-existing backlog), filters out anything already_scanned, and
hands the rest to face_watch::run_face_detection_pass.
- Bypassed wholesale when face_client.is_enabled() is false — keeps
the watcher usable on legacy deploys where Apollo isn't configured.
Tests: 5 face_watch unit tests cover the parts that don't need a real
Apollo:
- filter_excluded drops dir-component patterns (@eaDir) without
matching substring file names (eaDir-not-a-thing.jpg keeps).
- filter_excluded drops absolute-under-base subtrees (/private).
- empty EXCLUDED_DIRS short-circuits cleanly.
- read_image_bytes_for_detect passes JPEG bytes through verbatim
(orientation must reach Apollo unmodified).
- read_image_bytes_for_detect falls through to plain read when a
RAW-extension file has no embedded preview, so Apollo gets a chance
to 422 and we mark failed rather than infinitely-retrying.
cargo test --lib: 170 / 0; fmt and clippy clean for new code.
End-to-end (drop a photo → face_detections row appears) needs Apollo
running and is deferred to deploy-time verification.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
93
src/main.rs
93
src/main.rs
@@ -66,6 +66,7 @@ mod data;
|
||||
mod database;
|
||||
mod error;
|
||||
mod exif;
|
||||
mod face_watch;
|
||||
mod faces;
|
||||
mod file_types;
|
||||
mod files;
|
||||
@@ -1460,6 +1461,8 @@ fn main() -> std::io::Result<()> {
|
||||
app_state.libraries.clone(),
|
||||
playlist_mgr_for_watcher,
|
||||
preview_gen_for_watcher,
|
||||
app_state.face_client.clone(),
|
||||
app_state.excluded_dirs.clone(),
|
||||
);
|
||||
|
||||
// Start orphaned playlist cleanup job
|
||||
@@ -1787,6 +1790,8 @@ fn watch_files(
|
||||
libs: Vec<libraries::Library>,
|
||||
playlist_manager: Addr<VideoPlaylistManager>,
|
||||
preview_generator: Addr<video::actors::PreviewClipGenerator>,
|
||||
face_client: crate::ai::face_client::FaceClient,
|
||||
excluded_dirs: Vec<String>,
|
||||
) {
|
||||
std::thread::spawn(move || {
|
||||
// Get polling intervals from environment variables
|
||||
@@ -1819,6 +1824,9 @@ fn watch_files(
|
||||
let preview_dao = Arc::new(Mutex::new(
|
||||
Box::new(SqlitePreviewDao::new()) as Box<dyn PreviewDao>
|
||||
));
|
||||
let face_dao = Arc::new(Mutex::new(
|
||||
Box::new(faces::SqliteFaceDao::new()) as Box<dyn faces::FaceDao>
|
||||
));
|
||||
|
||||
let mut last_quick_scan = SystemTime::now();
|
||||
let mut last_full_scan = SystemTime::now();
|
||||
@@ -1844,6 +1852,9 @@ fn watch_files(
|
||||
lib,
|
||||
Arc::clone(&exif_dao),
|
||||
Arc::clone(&preview_dao),
|
||||
Arc::clone(&face_dao),
|
||||
face_client.clone(),
|
||||
&excluded_dirs,
|
||||
None,
|
||||
playlist_manager.clone(),
|
||||
preview_generator.clone(),
|
||||
@@ -1861,6 +1872,9 @@ fn watch_files(
|
||||
lib,
|
||||
Arc::clone(&exif_dao),
|
||||
Arc::clone(&preview_dao),
|
||||
Arc::clone(&face_dao),
|
||||
face_client.clone(),
|
||||
&excluded_dirs,
|
||||
Some(check_since),
|
||||
playlist_manager.clone(),
|
||||
preview_generator.clone(),
|
||||
@@ -1907,6 +1921,9 @@ fn process_new_files(
|
||||
library: &libraries::Library,
|
||||
exif_dao: Arc<Mutex<Box<dyn ExifDao>>>,
|
||||
preview_dao: Arc<Mutex<Box<dyn PreviewDao>>>,
|
||||
face_dao: Arc<Mutex<Box<dyn faces::FaceDao>>>,
|
||||
face_client: crate::ai::face_client::FaceClient,
|
||||
excluded_dirs: &[String],
|
||||
modified_since: Option<SystemTime>,
|
||||
playlist_manager: Addr<VideoPlaylistManager>,
|
||||
preview_generator: Addr<video::actors::PreviewClipGenerator>,
|
||||
@@ -2082,6 +2099,24 @@ fn process_new_files(
|
||||
}
|
||||
}
|
||||
|
||||
// ── Face detection pass ────────────────────────────────────────────
|
||||
// Run after EXIF writes so newly-registered files have their
|
||||
// content_hash populated. Skipped wholesale when face_client is
|
||||
// disabled (no Apollo integration configured) — Phase 3 wires this
|
||||
// up; the watcher remains usable on legacy deploys.
|
||||
if face_client.is_enabled() {
|
||||
let candidates = build_face_candidates(&context, &files, &exif_dao, &face_dao);
|
||||
if !candidates.is_empty() {
|
||||
face_watch::run_face_detection_pass(
|
||||
library,
|
||||
excluded_dirs,
|
||||
&face_client,
|
||||
Arc::clone(&face_dao),
|
||||
candidates,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Check for videos that need HLS playlists
|
||||
let video_path_base = dotenv::var("VIDEO_PATH").expect("VIDEO_PATH must be set");
|
||||
let mut videos_needing_playlists = Vec::new();
|
||||
@@ -2206,6 +2241,64 @@ fn process_new_files(
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the face-detection candidate list for a scan tick.
|
||||
///
|
||||
/// We need `(rel_path, content_hash)` for every image file that has a
|
||||
/// content_hash recorded in image_exif but no row in face_detections yet.
|
||||
/// Re-querying image_exif here picks up rows the EXIF write loop just
|
||||
/// inserted alongside any pre-existing rows the watcher walked over —
|
||||
/// covers both new uploads and the initial backlog scan.
|
||||
fn build_face_candidates(
|
||||
context: &opentelemetry::Context,
|
||||
files: &[(PathBuf, String)],
|
||||
exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
|
||||
face_dao: &Arc<Mutex<Box<dyn faces::FaceDao>>>,
|
||||
) -> Vec<face_watch::FaceCandidate> {
|
||||
// Restrict to image files; videos aren't face-scanned in v1 (kamadak
|
||||
// doesn't even register them in image_exif).
|
||||
let image_paths: Vec<String> = files
|
||||
.iter()
|
||||
.filter(|(p, _)| !is_video_file(p))
|
||||
.map(|(_, rel)| rel.clone())
|
||||
.collect();
|
||||
if image_paths.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let exif_records = {
|
||||
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
|
||||
dao.get_exif_batch(context, &image_paths)
|
||||
.unwrap_or_default()
|
||||
};
|
||||
// rel_path → content_hash (only rows with a hash; without one we have
|
||||
// nothing to key face data against).
|
||||
let mut hash_by_path: HashMap<String, String> = HashMap::with_capacity(exif_records.len());
|
||||
for record in exif_records {
|
||||
if let Some(h) = record.content_hash {
|
||||
hash_by_path.insert(record.file_path, h);
|
||||
}
|
||||
}
|
||||
|
||||
let mut candidates = Vec::new();
|
||||
let mut dao = face_dao.lock().expect("face dao");
|
||||
for rel_path in image_paths {
|
||||
let Some(hash) = hash_by_path.get(&rel_path) else {
|
||||
continue;
|
||||
};
|
||||
match dao.already_scanned(context, hash) {
|
||||
Ok(true) => continue,
|
||||
Ok(false) => candidates.push(face_watch::FaceCandidate {
|
||||
rel_path,
|
||||
content_hash: hash.clone(),
|
||||
}),
|
||||
Err(e) => {
|
||||
warn!("face_watch: already_scanned errored for {}: {:?}", hash, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
candidates
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
Reference in New Issue
Block a user