indexer: apply EXCLUDED_DIRS to remaining WalkDir callers

Audit follow-up to 5bf4956. The same `@eaDir` pruning that protects
the indexer also needs to protect the other walks under library roots:

- `create_thumbnails` walks every file in every library to generate
  thumbnails. Without EXCLUDED_DIRS, it would generate thumbnails of
  Synology's `SYNOFILE_THUMB_*.jpg` thumbnails (thumbnails of thumbnails).
- `update_media_counts` walks for the prometheus IMAGE / VIDEO gauges.
  Without EXCLUDED_DIRS, the gauges over-count by however many phantom
  `@eaDir` images live alongside the real photos.
- `cleanup_orphaned_playlists` walks BASE_PATH searching for source
  videos by filename. EXCLUDED_DIRS isn't a behavior change for typical
  Synology mounts (no .mp4 in @eaDir), but it's a correctness win for
  any operator-defined exclude that happens to contain video.

Refactor: add `walk_library_files(base, excluded_dirs) -> Vec<DirEntry>`
to file_scan.rs as the shared primitive. `enumerate_indexable_files`
now layers media-type + mtime filters on top of it. One new test
covers the lower-level helper (returns all extensions, prunes excluded
subtrees).

`generate_video_gifs` (currently `#[allow(dead_code)]`, not reachable
from main) gets the `update_media_counts` signature update and reads
EXCLUDED_DIRS from env so a future revival isn't broken — but its
WalkDir walk stays raw because the dual lib/bin compile makes the
file_scan module path non-trivial there. Tagged with a comment.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-04-30 20:17:51 +00:00
parent 5bf49568f1
commit f50655fb21
5 changed files with 120 additions and 65 deletions

View File

@@ -1292,7 +1292,7 @@ fn generate_image_thumbnail(src: &Path, thumb_path: &Path) -> std::io::Result<()
Ok(())
}
fn create_thumbnails(libs: &[libraries::Library]) {
fn create_thumbnails(libs: &[libraries::Library], excluded_dirs: &[String]) {
let tracer = global_tracer();
let span = tracer.start("creating thumbnails");
@@ -1306,12 +1306,10 @@ fn create_thumbnails(libs: &[libraries::Library]) {
);
let images = PathBuf::from(&lib.root_path);
WalkDir::new(&images)
.into_iter()
.collect::<Vec<Result<_, _>>>()
// Prune EXCLUDED_DIRS so we don't generate thumbnails-of-thumbnails
// for Synology @eaDir trees. file_scan handles filter_entry pruning.
image_api::file_scan::walk_library_files(&images, excluded_dirs)
.into_par_iter()
.filter_map(|entry| entry.ok())
.filter(|entry| entry.file_type().is_file())
.for_each(|entry| {
let src = entry.path();
let Ok(relative_path) = src.strip_prefix(&images) else {
@@ -1367,17 +1365,17 @@ fn create_thumbnails(libs: &[libraries::Library]) {
debug!("Finished making thumbnails");
for lib in libs {
update_media_counts(Path::new(&lib.root_path));
update_media_counts(Path::new(&lib.root_path), excluded_dirs);
}
}
fn update_media_counts(media_dir: &Path) {
fn update_media_counts(media_dir: &Path, excluded_dirs: &[String]) {
let mut image_count = 0;
let mut video_count = 0;
for ref entry in WalkDir::new(media_dir).into_iter().filter_map(|e| e.ok()) {
if is_image(entry) {
for entry in image_api::file_scan::walk_library_files(media_dir, excluded_dirs) {
if is_image(&entry) {
image_count += 1;
} else if is_video(entry) {
} else if is_video(&entry) {
video_count += 1;
}
}
@@ -1426,8 +1424,9 @@ fn main() -> std::io::Result<()> {
// so missed files are filled in over successive scans.
{
let libs = app_data.libraries.clone();
let excluded = app_data.excluded_dirs.clone();
std::thread::spawn(move || {
create_thumbnails(&libs);
create_thumbnails(&libs, &excluded);
});
}
// generate_video_gifs().await;
@@ -1466,7 +1465,7 @@ fn main() -> std::io::Result<()> {
);
// Start orphaned playlist cleanup job
cleanup_orphaned_playlists();
cleanup_orphaned_playlists(app_state.excluded_dirs.clone());
// Spawn background job to generate daily conversation summaries
{
@@ -1658,8 +1657,8 @@ fn run_migrations(
}
/// Clean up orphaned HLS playlists and segments whose source videos no longer exist
fn cleanup_orphaned_playlists() {
std::thread::spawn(|| {
fn cleanup_orphaned_playlists(excluded_dirs: Vec<String>) {
std::thread::spawn(move || {
let video_path = dotenv::var("VIDEO_PATH").expect("VIDEO_PATH must be set");
let base_path = dotenv::var("BASE_PATH").expect("BASE_PATH must be set");
@@ -1704,13 +1703,14 @@ fn cleanup_orphaned_playlists() {
if let Some(filename) = playlist_path.file_stem() {
let video_filename = filename.to_string_lossy();
// Search for this video file in BASE_PATH
// Search for this video file in BASE_PATH, respecting
// EXCLUDED_DIRS so we don't false-resurrect playlists for
// videos that only exist inside an excluded subtree.
let mut video_exists = false;
for entry in WalkDir::new(&base_path)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
{
for entry in image_api::file_scan::walk_library_files(
Path::new(&base_path),
&excluded_dirs,
) {
if let Some(entry_stem) = entry.path().file_stem()
&& entry_stem == filename
&& is_video_file(entry.path())
@@ -1922,7 +1922,7 @@ fn watch_files(
}
// Update media counts per library (metric aggregates across all)
update_media_counts(Path::new(&lib.root_path));
update_media_counts(Path::new(&lib.root_path), &excluded_dirs);
}
if is_full_scan {
@@ -2229,7 +2229,7 @@ fn process_new_files(
// Generate thumbnails for all files that need them
if new_files_found {
info!("Processing thumbnails for new files...");
create_thumbnails(std::slice::from_ref(library));
create_thumbnails(std::slice::from_ref(library), excluded_dirs);
}
// Reconciliation: on a full scan, prune image_exif rows whose rel_path no
@@ -2309,7 +2309,8 @@ fn backfill_unhashed_backlog(
// library's tick. Negligible cost given the cap.
let rows: Vec<(i32, String)> = {
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
dao.get_rows_missing_hash(context, cap + 1).unwrap_or_default()
dao.get_rows_missing_hash(context, cap + 1)
.unwrap_or_default()
};
if rows.is_empty() {
return 0;
@@ -2335,9 +2336,13 @@ fn backfill_unhashed_backlog(
match content_hash::compute(&abs) {
Ok(id) => {
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
if let Err(e) =
dao.backfill_content_hash(context, library.id, rel_path, &id.content_hash, id.size_bytes)
{
if let Err(e) = dao.backfill_content_hash(
context,
library.id,
rel_path,
&id.content_hash,
id.size_bytes,
) {
warn!(
"face_watch: backfill_content_hash failed for {}: {:?}",
rel_path, e
@@ -2348,7 +2353,11 @@ fn backfill_unhashed_backlog(
}
}
Err(e) => {
debug!("face_watch: hash compute failed for {} ({:?})", abs.display(), e);
debug!(
"face_watch: hash compute failed for {} ({:?})",
abs.display(),
e
);
errors += 1;
}
}