//! File enumeration for the indexer pass. //! //! Walks a library root and returns the `(absolute_path, forward_slash_rel_path)` //! pairs that belong in `image_exif`. Pruning `EXCLUDED_DIRS` happens here at //! WalkDir time via `filter_entry` so whole subtrees (Synology's `@eaDir`, //! `.thumbnails`, the operator's configured excludes) are never descended — //! vs walking the full tree and discarding leaves, which on a Synology mount //! with thousands of `@eaDir` subdirs is the difference between scanning N //! files and N×3. //! //! Previously inlined in `main.rs::process_new_files` without the exclusion //! filter — paths like `/@eaDir/.../SYNOFILE_THUMB_*.jpg` ended up in //! `image_exif` and looped through `face_watch::filter_excluded` every tick, //! since no `face_detections` row would ever be written for a path dropped //! at runtime. use std::path::{Path, PathBuf}; use std::time::SystemTime; use walkdir::WalkDir; use crate::file_types; use crate::memories::PathExcluder; /// Walk `base_path`, prune `EXCLUDED_DIRS` subtrees, and return /// `(absolute_path, forward_slash_rel_path)` for every image / video file /// that should be indexed. /// /// `modified_since` keeps only files modified at or after the instant — /// used by the watcher's quick-scan tick to skip the long tail. Files /// whose metadata can't be read are kept; the caller's batch EXIF lookup /// dedups against existing rows. pub fn enumerate_indexable_files( base_path: &Path, excluded_dirs: &[String], modified_since: Option, ) -> Vec<(PathBuf, String)> { let excluder = PathExcluder::new(base_path, excluded_dirs); WalkDir::new(base_path) .into_iter() // Prune whole subtrees so WalkDir doesn't descend into excluded // dirs at all. Always allow depth 0 (the root itself); under a // pathological config that excludes the base, downstream filters // would still drop everything anyway. .filter_entry(|entry| entry.depth() == 0 || !excluder.is_excluded(entry.path())) .filter_map(|entry| entry.ok()) .filter(|entry| entry.file_type().is_file()) .filter(|entry| match modified_since { Some(since) => entry .metadata() .ok() .and_then(|m| m.modified().ok()) .map(|m| m >= since) .unwrap_or(true), None => true, }) .filter(|entry| { file_types::direntry_is_image(entry) || file_types::direntry_is_video(entry) }) .filter_map(|entry| { let file_path = entry.path().to_path_buf(); // Forward-slash rel_path regardless of OS so DB comparisons // against the batch EXIF lookup line up. let rel = file_path .strip_prefix(base_path) .ok()? .to_str()? .replace('\\', "/"); Some((file_path, rel)) }) .collect() } #[cfg(test)] mod tests { use super::*; use std::fs; use std::time::Duration; /// Build a tempdir with `paths` (relative). Each touched file is empty; /// directory components are created automatically. fn make_tree(paths: &[&str]) -> tempfile::TempDir { let dir = tempfile::tempdir().expect("tempdir"); for p in paths { let abs = dir.path().join(p); if let Some(parent) = abs.parent() { fs::create_dir_all(parent).expect("mkdir -p"); } fs::File::create(&abs).expect("touch"); } dir } fn rel_paths(found: &[(PathBuf, String)]) -> Vec { let mut v: Vec = found.iter().map(|(_, r)| r.clone()).collect(); v.sort(); v } #[test] fn excludes_eadir_subtree() { // The bug: Synology's @eaDir gets walked into and its // SYNOFILE_THUMB_*.jpg leaves end up in image_exif. With // filter_entry pruning, the subtree is never descended. let dir = make_tree(&[ "vacation/IMG_0001.jpg", "vacation/@eaDir/IMG_0001.jpg/SYNOFILE_THUMB_S.jpg", "vacation/@eaDir/IMG_0001.jpg/SYNOFILE_THUMB_XL.jpg", "@eaDir/top_level_thumb.jpg", ]); let found = enumerate_indexable_files( dir.path(), &["@eaDir".to_string()], None, ); assert_eq!(rel_paths(&found), vec!["vacation/IMG_0001.jpg".to_string()]); } #[test] fn excludes_nested_pattern() { // .thumbnails as a component pattern (not an absolute dir). let dir = make_tree(&[ "a/b/photo.jpg", "a/.thumbnails/cached.jpg", "a/b/.thumbnails/nested.jpg", ]); let found = enumerate_indexable_files( dir.path(), &[".thumbnails".to_string()], None, ); assert_eq!(rel_paths(&found), vec!["a/b/photo.jpg".to_string()]); } #[test] fn excludes_absolute_under_base() { // Leading-'/' entries are interpreted as paths under the library // root (see PathExcluder::new). let dir = make_tree(&[ "private/secret.jpg", "public/keep.jpg", ]); let found = enumerate_indexable_files( dir.path(), &["/private".to_string()], None, ); assert_eq!(rel_paths(&found), vec!["public/keep.jpg".to_string()]); } #[test] fn filters_non_media() { let dir = make_tree(&[ "a.jpg", "b.mp4", "c.txt", "d", // no extension "e.jpg.bak", // wrong ext ]); let found = enumerate_indexable_files(dir.path(), &[], None); assert_eq!(rel_paths(&found), vec!["a.jpg".to_string(), "b.mp4".to_string()]); } #[test] fn modified_since_filters_old_files() { let dir = make_tree(&["old.jpg", "new.jpg"]); // Backdate "old.jpg" to a known instant. Use filetime via a portable // touch: set both atime and mtime to a fixed past time using // std::fs::File metadata — simpler to set the cutoff into the future // for "old" and the present for "new" semantically. // // Simplest reliable approach: capture mtime of new.jpg, sleep // briefly, recreate it, and use the original mtime as the cutoff. // That way "old.jpg" is older than the cutoff and "new.jpg" is at // or after. let new_path = dir.path().join("new.jpg"); // Force a measurable gap so filesystems with low-resolution mtime // don't collapse them into the same instant. std::thread::sleep(Duration::from_millis(20)); let cutoff = SystemTime::now(); std::thread::sleep(Duration::from_millis(20)); // Bump new.jpg's mtime by rewriting it. fs::write(&new_path, b"x").expect("rewrite"); let found = enumerate_indexable_files(dir.path(), &[], Some(cutoff)); assert_eq!(rel_paths(&found), vec!["new.jpg".to_string()]); } #[test] fn rel_path_is_forward_slash() { // Sanity on a nested path. On Unix this is already '/'; the // assertion guards a future Windows port from regressing. let dir = make_tree(&["a/b/c.jpg"]); let found = enumerate_indexable_files(dir.path(), &[], None); let (_abs, rel) = &found[0]; assert_eq!(rel, "a/b/c.jpg"); assert!(!rel.contains('\\')); } }