indexer: apply EXCLUDED_DIRS to remaining WalkDir callers

Audit follow-up to 5bf4956. The same `@eaDir` pruning that protects
the indexer also needs to protect the other walks under library roots:

- `create_thumbnails` walks every file in every library to generate
  thumbnails. Without EXCLUDED_DIRS, it would generate thumbnails of
  Synology's `SYNOFILE_THUMB_*.jpg` thumbnails (thumbnails of thumbnails).
- `update_media_counts` walks for the prometheus IMAGE / VIDEO gauges.
  Without EXCLUDED_DIRS, the gauges over-count by however many phantom
  `@eaDir` images live alongside the real photos.
- `cleanup_orphaned_playlists` walks BASE_PATH searching for source
  videos by filename. EXCLUDED_DIRS isn't a behavior change for typical
  Synology mounts (no .mp4 in @eaDir), but it's a correctness win for
  any operator-defined exclude that happens to contain video.

Refactor: add `walk_library_files(base, excluded_dirs) -> Vec<DirEntry>`
to file_scan.rs as the shared primitive. `enumerate_indexable_files`
now layers media-type + mtime filters on top of it. One new test
covers the lower-level helper (returns all extensions, prunes excluded
subtrees).

`generate_video_gifs` (currently `#[allow(dead_code)]`, not reachable
from main) gets the `update_media_counts` signature update and reads
EXCLUDED_DIRS from env so a future revival isn't broken — but its
WalkDir walk stays raw because the dual lib/bin compile makes the
file_scan module path non-trivial there. Tagged with a comment.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-04-30 20:17:51 +00:00
parent 5bf49568f1
commit f50655fb21
5 changed files with 120 additions and 65 deletions

View File

@@ -17,11 +17,34 @@
use std::path::{Path, PathBuf};
use std::time::SystemTime;
use walkdir::WalkDir;
use walkdir::{DirEntry, WalkDir};
use crate::file_types;
use crate::memories::PathExcluder;
/// Walk `base_path`, prune `EXCLUDED_DIRS` subtrees, and return every file
/// entry (any extension). The shared primitive for any code that walks a
/// library root — thumbnail generation, media counts, orphan-playlist
/// reverse lookups, the indexer happy-path, etc. Higher-level helpers
/// (e.g. `enumerate_indexable_files`) layer media-type / mtime filters
/// on top.
///
/// Pruning happens via `filter_entry` so excluded subtrees are never
/// descended at all. On a Synology mount with thousands of `@eaDir`
/// dirs, that's the difference between visiting N files and ~3N.
pub fn walk_library_files(base_path: &Path, excluded_dirs: &[String]) -> Vec<DirEntry> {
let excluder = PathExcluder::new(base_path, excluded_dirs);
WalkDir::new(base_path)
.into_iter()
// Always allow depth 0 (the root). Under a pathological config
// that excludes the base itself, downstream filters drop everything
// anyway — but yielding nothing here would also be silently wrong.
.filter_entry(move |entry| entry.depth() == 0 || !excluder.is_excluded(entry.path()))
.filter_map(|entry| entry.ok())
.filter(|entry| entry.file_type().is_file())
.collect()
}
/// Walk `base_path`, prune `EXCLUDED_DIRS` subtrees, and return
/// `(absolute_path, forward_slash_rel_path)` for every image / video file
/// that should be indexed.
@@ -35,17 +58,8 @@ pub fn enumerate_indexable_files(
excluded_dirs: &[String],
modified_since: Option<SystemTime>,
) -> Vec<(PathBuf, String)> {
let excluder = PathExcluder::new(base_path, excluded_dirs);
WalkDir::new(base_path)
walk_library_files(base_path, excluded_dirs)
.into_iter()
// Prune whole subtrees so WalkDir doesn't descend into excluded
// dirs at all. Always allow depth 0 (the root itself); under a
// pathological config that excludes the base, downstream filters
// would still drop everything anyway.
.filter_entry(|entry| entry.depth() == 0 || !excluder.is_excluded(entry.path()))
.filter_map(|entry| entry.ok())
.filter(|entry| entry.file_type().is_file())
.filter(|entry| match modified_since {
Some(since) => entry
.metadata()
@@ -109,11 +123,7 @@ mod tests {
"vacation/@eaDir/IMG_0001.jpg/SYNOFILE_THUMB_XL.jpg",
"@eaDir/top_level_thumb.jpg",
]);
let found = enumerate_indexable_files(
dir.path(),
&["@eaDir".to_string()],
None,
);
let found = enumerate_indexable_files(dir.path(), &["@eaDir".to_string()], None);
assert_eq!(rel_paths(&found), vec!["vacation/IMG_0001.jpg".to_string()]);
}
@@ -125,11 +135,7 @@ mod tests {
"a/.thumbnails/cached.jpg",
"a/b/.thumbnails/nested.jpg",
]);
let found = enumerate_indexable_files(
dir.path(),
&[".thumbnails".to_string()],
None,
);
let found = enumerate_indexable_files(dir.path(), &[".thumbnails".to_string()], None);
assert_eq!(rel_paths(&found), vec!["a/b/photo.jpg".to_string()]);
}
@@ -137,15 +143,8 @@ mod tests {
fn excludes_absolute_under_base() {
// Leading-'/' entries are interpreted as paths under the library
// root (see PathExcluder::new).
let dir = make_tree(&[
"private/secret.jpg",
"public/keep.jpg",
]);
let found = enumerate_indexable_files(
dir.path(),
&["/private".to_string()],
None,
);
let dir = make_tree(&["private/secret.jpg", "public/keep.jpg"]);
let found = enumerate_indexable_files(dir.path(), &["/private".to_string()], None);
assert_eq!(rel_paths(&found), vec!["public/keep.jpg".to_string()]);
}
@@ -155,11 +154,14 @@ mod tests {
"a.jpg",
"b.mp4",
"c.txt",
"d", // no extension
"e.jpg.bak", // wrong ext
"d", // no extension
"e.jpg.bak", // wrong ext
]);
let found = enumerate_indexable_files(dir.path(), &[], None);
assert_eq!(rel_paths(&found), vec!["a.jpg".to_string(), "b.mp4".to_string()]);
assert_eq!(
rel_paths(&found),
vec!["a.jpg".to_string(), "b.mp4".to_string()]
);
}
#[test]
@@ -187,6 +189,39 @@ mod tests {
assert_eq!(rel_paths(&found), vec!["new.jpg".to_string()]);
}
#[test]
fn walk_library_files_excludes_subtrees_and_returns_all_extensions() {
// The lower-level primitive: any extension survives, but excluded
// subtrees are pruned. Used by thumbnail gen and media-count
// gauges, which need non-media files too (e.g., walks through
// sidecar XMPs alongside the photos).
let dir = make_tree(&[
"vacation/IMG_0001.jpg",
"vacation/IMG_0001.xmp",
"vacation/@eaDir/IMG_0001.jpg/SYNOFILE_THUMB_S.jpg",
"notes.txt",
]);
let mut got: Vec<String> = walk_library_files(dir.path(), &["@eaDir".to_string()])
.into_iter()
.map(|e| {
e.path()
.strip_prefix(dir.path())
.unwrap()
.to_string_lossy()
.replace('\\', "/")
})
.collect();
got.sort();
assert_eq!(
got,
vec![
"notes.txt".to_string(),
"vacation/IMG_0001.jpg".to_string(),
"vacation/IMG_0001.xmp".to_string(),
]
);
}
#[test]
fn rel_path_is_forward_slash() {
// Sanity on a nested path. On Unix this is already '/'; the