indexer: apply EXCLUDED_DIRS to remaining WalkDir callers
Audit follow-up to 5bf4956. The same `@eaDir` pruning that protects
the indexer also needs to protect the other walks under library roots:
- `create_thumbnails` walks every file in every library to generate
thumbnails. Without EXCLUDED_DIRS, it would generate thumbnails of
Synology's `SYNOFILE_THUMB_*.jpg` thumbnails (thumbnails of thumbnails).
- `update_media_counts` walks for the prometheus IMAGE / VIDEO gauges.
Without EXCLUDED_DIRS, the gauges over-count by however many phantom
`@eaDir` images live alongside the real photos.
- `cleanup_orphaned_playlists` walks BASE_PATH searching for source
videos by filename. EXCLUDED_DIRS isn't a behavior change for typical
Synology mounts (no .mp4 in @eaDir), but it's a correctness win for
any operator-defined exclude that happens to contain video.
Refactor: add `walk_library_files(base, excluded_dirs) -> Vec<DirEntry>`
to file_scan.rs as the shared primitive. `enumerate_indexable_files`
now layers media-type + mtime filters on top of it. One new test
covers the lower-level helper (returns all extensions, prunes excluded
subtrees).
`generate_video_gifs` (currently `#[allow(dead_code)]`, not reachable
from main) gets the `update_media_counts` signature update and reads
EXCLUDED_DIRS from env so a future revival isn't broken — but its
WalkDir walk stays raw because the dual lib/bin compile makes the
file_scan module path non-trivial there. Tagged with a comment.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -17,11 +17,34 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::SystemTime;
|
||||
|
||||
use walkdir::WalkDir;
|
||||
use walkdir::{DirEntry, WalkDir};
|
||||
|
||||
use crate::file_types;
|
||||
use crate::memories::PathExcluder;
|
||||
|
||||
/// Walk `base_path`, prune `EXCLUDED_DIRS` subtrees, and return every file
|
||||
/// entry (any extension). The shared primitive for any code that walks a
|
||||
/// library root — thumbnail generation, media counts, orphan-playlist
|
||||
/// reverse lookups, the indexer happy-path, etc. Higher-level helpers
|
||||
/// (e.g. `enumerate_indexable_files`) layer media-type / mtime filters
|
||||
/// on top.
|
||||
///
|
||||
/// Pruning happens via `filter_entry` so excluded subtrees are never
|
||||
/// descended at all. On a Synology mount with thousands of `@eaDir`
|
||||
/// dirs, that's the difference between visiting N files and ~3N.
|
||||
pub fn walk_library_files(base_path: &Path, excluded_dirs: &[String]) -> Vec<DirEntry> {
|
||||
let excluder = PathExcluder::new(base_path, excluded_dirs);
|
||||
WalkDir::new(base_path)
|
||||
.into_iter()
|
||||
// Always allow depth 0 (the root). Under a pathological config
|
||||
// that excludes the base itself, downstream filters drop everything
|
||||
// anyway — but yielding nothing here would also be silently wrong.
|
||||
.filter_entry(move |entry| entry.depth() == 0 || !excluder.is_excluded(entry.path()))
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter(|entry| entry.file_type().is_file())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Walk `base_path`, prune `EXCLUDED_DIRS` subtrees, and return
|
||||
/// `(absolute_path, forward_slash_rel_path)` for every image / video file
|
||||
/// that should be indexed.
|
||||
@@ -35,17 +58,8 @@ pub fn enumerate_indexable_files(
|
||||
excluded_dirs: &[String],
|
||||
modified_since: Option<SystemTime>,
|
||||
) -> Vec<(PathBuf, String)> {
|
||||
let excluder = PathExcluder::new(base_path, excluded_dirs);
|
||||
|
||||
WalkDir::new(base_path)
|
||||
walk_library_files(base_path, excluded_dirs)
|
||||
.into_iter()
|
||||
// Prune whole subtrees so WalkDir doesn't descend into excluded
|
||||
// dirs at all. Always allow depth 0 (the root itself); under a
|
||||
// pathological config that excludes the base, downstream filters
|
||||
// would still drop everything anyway.
|
||||
.filter_entry(|entry| entry.depth() == 0 || !excluder.is_excluded(entry.path()))
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter(|entry| entry.file_type().is_file())
|
||||
.filter(|entry| match modified_since {
|
||||
Some(since) => entry
|
||||
.metadata()
|
||||
@@ -187,6 +201,39 @@ mod tests {
|
||||
assert_eq!(rel_paths(&found), vec!["new.jpg".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn walk_library_files_excludes_subtrees_and_returns_all_extensions() {
|
||||
// The lower-level primitive: any extension survives, but excluded
|
||||
// subtrees are pruned. Used by thumbnail gen and media-count
|
||||
// gauges, which need non-media files too (e.g., walks through
|
||||
// sidecar XMPs alongside the photos).
|
||||
let dir = make_tree(&[
|
||||
"vacation/IMG_0001.jpg",
|
||||
"vacation/IMG_0001.xmp",
|
||||
"vacation/@eaDir/IMG_0001.jpg/SYNOFILE_THUMB_S.jpg",
|
||||
"notes.txt",
|
||||
]);
|
||||
let mut got: Vec<String> = walk_library_files(dir.path(), &["@eaDir".to_string()])
|
||||
.into_iter()
|
||||
.map(|e| {
|
||||
e.path()
|
||||
.strip_prefix(dir.path())
|
||||
.unwrap()
|
||||
.to_string_lossy()
|
||||
.replace('\\', "/")
|
||||
})
|
||||
.collect();
|
||||
got.sort();
|
||||
assert_eq!(
|
||||
got,
|
||||
vec![
|
||||
"notes.txt".to_string(),
|
||||
"vacation/IMG_0001.jpg".to_string(),
|
||||
"vacation/IMG_0001.xmp".to_string(),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rel_path_is_forward_slash() {
|
||||
// Sanity on a nested path. On Unix this is already '/'; the
|
||||
|
||||
Reference in New Issue
Block a user