//! File enumeration for the indexer pass. //! //! Walks a library root and returns the `(absolute_path, forward_slash_rel_path)` //! pairs that belong in `image_exif`. Pruning `EXCLUDED_DIRS` happens here at //! WalkDir time via `filter_entry` so whole subtrees (Synology's `@eaDir`, //! `.thumbnails`, the operator's configured excludes) are never descended — //! vs walking the full tree and discarding leaves, which on a Synology mount //! with thousands of `@eaDir` subdirs is the difference between scanning N //! files and N×3. //! //! Previously inlined in `main.rs::process_new_files` without the exclusion //! filter — paths like `/@eaDir/.../SYNOFILE_THUMB_*.jpg` ended up in //! `image_exif` and looped through `face_watch::filter_excluded` every tick, //! since no `face_detections` row would ever be written for a path dropped //! at runtime. use std::path::{Path, PathBuf}; use std::time::SystemTime; use walkdir::{DirEntry, WalkDir}; use crate::file_types; use crate::memories::PathExcluder; /// Walk `base_path`, prune `EXCLUDED_DIRS` subtrees, and return every file /// entry (any extension). The shared primitive for any code that walks a /// library root — thumbnail generation, media counts, orphan-playlist /// reverse lookups, the indexer happy-path, etc. Higher-level helpers /// (e.g. `enumerate_indexable_files`) layer media-type / mtime filters /// on top. /// /// Pruning happens via `filter_entry` so excluded subtrees are never /// descended at all. On a Synology mount with thousands of `@eaDir` /// dirs, that's the difference between visiting N files and ~3N. pub fn walk_library_files(base_path: &Path, excluded_dirs: &[String]) -> Vec { let excluder = PathExcluder::new(base_path, excluded_dirs); WalkDir::new(base_path) .into_iter() // Always allow depth 0 (the root). Under a pathological config // that excludes the base itself, downstream filters drop everything // anyway — but yielding nothing here would also be silently wrong. .filter_entry(move |entry| entry.depth() == 0 || !excluder.is_excluded(entry.path())) .filter_map(|entry| entry.ok()) .filter(|entry| entry.file_type().is_file()) .collect() } /// Walk `base_path`, prune `EXCLUDED_DIRS` subtrees, and return /// `(absolute_path, forward_slash_rel_path)` for every image / video file /// that should be indexed. /// /// `modified_since` keeps only files modified at or after the instant — /// used by the watcher's quick-scan tick to skip the long tail. Files /// whose metadata can't be read are kept; the caller's batch EXIF lookup /// dedups against existing rows. pub fn enumerate_indexable_files( base_path: &Path, excluded_dirs: &[String], modified_since: Option, ) -> Vec<(PathBuf, String)> { walk_library_files(base_path, excluded_dirs) .into_iter() .filter(|entry| match modified_since { Some(since) => entry .metadata() .ok() .and_then(|m| m.modified().ok()) .map(|m| m >= since) .unwrap_or(true), None => true, }) .filter(|entry| { file_types::direntry_is_image(entry) || file_types::direntry_is_video(entry) }) .filter_map(|entry| { let file_path = entry.path().to_path_buf(); // Forward-slash rel_path regardless of OS so DB comparisons // against the batch EXIF lookup line up. let rel = file_path .strip_prefix(base_path) .ok()? .to_str()? .replace('\\', "/"); Some((file_path, rel)) }) .collect() } #[cfg(test)] mod tests { use super::*; use std::fs; use std::time::Duration; /// Build a tempdir with `paths` (relative). Each touched file is empty; /// directory components are created automatically. fn make_tree(paths: &[&str]) -> tempfile::TempDir { let dir = tempfile::tempdir().expect("tempdir"); for p in paths { let abs = dir.path().join(p); if let Some(parent) = abs.parent() { fs::create_dir_all(parent).expect("mkdir -p"); } fs::File::create(&abs).expect("touch"); } dir } fn rel_paths(found: &[(PathBuf, String)]) -> Vec { let mut v: Vec = found.iter().map(|(_, r)| r.clone()).collect(); v.sort(); v } #[test] fn excludes_eadir_subtree() { // The bug: Synology's @eaDir gets walked into and its // SYNOFILE_THUMB_*.jpg leaves end up in image_exif. With // filter_entry pruning, the subtree is never descended. let dir = make_tree(&[ "vacation/IMG_0001.jpg", "vacation/@eaDir/IMG_0001.jpg/SYNOFILE_THUMB_S.jpg", "vacation/@eaDir/IMG_0001.jpg/SYNOFILE_THUMB_XL.jpg", "@eaDir/top_level_thumb.jpg", ]); let found = enumerate_indexable_files( dir.path(), &["@eaDir".to_string()], None, ); assert_eq!(rel_paths(&found), vec!["vacation/IMG_0001.jpg".to_string()]); } #[test] fn excludes_nested_pattern() { // .thumbnails as a component pattern (not an absolute dir). let dir = make_tree(&[ "a/b/photo.jpg", "a/.thumbnails/cached.jpg", "a/b/.thumbnails/nested.jpg", ]); let found = enumerate_indexable_files( dir.path(), &[".thumbnails".to_string()], None, ); assert_eq!(rel_paths(&found), vec!["a/b/photo.jpg".to_string()]); } #[test] fn excludes_absolute_under_base() { // Leading-'/' entries are interpreted as paths under the library // root (see PathExcluder::new). let dir = make_tree(&[ "private/secret.jpg", "public/keep.jpg", ]); let found = enumerate_indexable_files( dir.path(), &["/private".to_string()], None, ); assert_eq!(rel_paths(&found), vec!["public/keep.jpg".to_string()]); } #[test] fn filters_non_media() { let dir = make_tree(&[ "a.jpg", "b.mp4", "c.txt", "d", // no extension "e.jpg.bak", // wrong ext ]); let found = enumerate_indexable_files(dir.path(), &[], None); assert_eq!(rel_paths(&found), vec!["a.jpg".to_string(), "b.mp4".to_string()]); } #[test] fn modified_since_filters_old_files() { let dir = make_tree(&["old.jpg", "new.jpg"]); // Backdate "old.jpg" to a known instant. Use filetime via a portable // touch: set both atime and mtime to a fixed past time using // std::fs::File metadata — simpler to set the cutoff into the future // for "old" and the present for "new" semantically. // // Simplest reliable approach: capture mtime of new.jpg, sleep // briefly, recreate it, and use the original mtime as the cutoff. // That way "old.jpg" is older than the cutoff and "new.jpg" is at // or after. let new_path = dir.path().join("new.jpg"); // Force a measurable gap so filesystems with low-resolution mtime // don't collapse them into the same instant. std::thread::sleep(Duration::from_millis(20)); let cutoff = SystemTime::now(); std::thread::sleep(Duration::from_millis(20)); // Bump new.jpg's mtime by rewriting it. fs::write(&new_path, b"x").expect("rewrite"); let found = enumerate_indexable_files(dir.path(), &[], Some(cutoff)); assert_eq!(rel_paths(&found), vec!["new.jpg".to_string()]); } #[test] fn walk_library_files_excludes_subtrees_and_returns_all_extensions() { // The lower-level primitive: any extension survives, but excluded // subtrees are pruned. Used by thumbnail gen and media-count // gauges, which need non-media files too (e.g., walks through // sidecar XMPs alongside the photos). let dir = make_tree(&[ "vacation/IMG_0001.jpg", "vacation/IMG_0001.xmp", "vacation/@eaDir/IMG_0001.jpg/SYNOFILE_THUMB_S.jpg", "notes.txt", ]); let mut got: Vec = walk_library_files(dir.path(), &["@eaDir".to_string()]) .into_iter() .map(|e| { e.path() .strip_prefix(dir.path()) .unwrap() .to_string_lossy() .replace('\\', "/") }) .collect(); got.sort(); assert_eq!( got, vec![ "notes.txt".to_string(), "vacation/IMG_0001.jpg".to_string(), "vacation/IMG_0001.xmp".to_string(), ] ); } #[test] fn rel_path_is_forward_slash() { // Sanity on a nested path. On Unix this is already '/'; the // assertion guards a future Windows port from regressing. let dir = make_tree(&["a/b/c.jpg"]); let found = enumerate_indexable_files(dir.path(), &[], None); let (_abs, rel) = &found[0]; assert_eq!(rel, "a/b/c.jpg"); assert!(!rel.contains('\\')); } }