Compare commits

...

1 Commits

Author SHA1 Message Date
Cameron Cordes
a48744c7ad indexer: apply EXCLUDED_DIRS to remaining WalkDir callers
Audit follow-up to 5bf4956. The same `@eaDir` pruning that protects
the indexer also needs to protect the other walks under library roots:

- `create_thumbnails` walks every file in every library to generate
  thumbnails. Without EXCLUDED_DIRS, it would generate thumbnails of
  Synology's `SYNOFILE_THUMB_*.jpg` thumbnails (thumbnails of thumbnails).
- `update_media_counts` walks for the prometheus IMAGE / VIDEO gauges.
  Without EXCLUDED_DIRS, the gauges over-count by however many phantom
  `@eaDir` images live alongside the real photos.
- `cleanup_orphaned_playlists` walks BASE_PATH searching for source
  videos by filename. EXCLUDED_DIRS isn't a behavior change for typical
  Synology mounts (no .mp4 in @eaDir), but it's a correctness win for
  any operator-defined exclude that happens to contain video.

Refactor: add `walk_library_files(base, excluded_dirs) -> Vec<DirEntry>`
to file_scan.rs as the shared primitive. `enumerate_indexable_files`
now layers media-type + mtime filters on top of it. One new test
covers the lower-level helper (returns all extensions, prunes excluded
subtrees).

`generate_video_gifs` (currently `#[allow(dead_code)]`, not reachable
from main) gets the `update_media_counts` signature update and reads
EXCLUDED_DIRS from env so a future revival isn't broken — but its
WalkDir walk stays raw because the dual lib/bin compile makes the
file_scan module path non-trivial there. Tagged with a comment.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 20:17:51 +00:00
5 changed files with 96 additions and 38 deletions

View File

@@ -17,11 +17,34 @@
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::time::SystemTime; use std::time::SystemTime;
use walkdir::WalkDir; use walkdir::{DirEntry, WalkDir};
use crate::file_types; use crate::file_types;
use crate::memories::PathExcluder; use crate::memories::PathExcluder;
/// Walk `base_path`, prune `EXCLUDED_DIRS` subtrees, and return every file
/// entry (any extension). The shared primitive for any code that walks a
/// library root — thumbnail generation, media counts, orphan-playlist
/// reverse lookups, the indexer happy-path, etc. Higher-level helpers
/// (e.g. `enumerate_indexable_files`) layer media-type / mtime filters
/// on top.
///
/// Pruning happens via `filter_entry` so excluded subtrees are never
/// descended at all. On a Synology mount with thousands of `@eaDir`
/// dirs, that's the difference between visiting N files and ~3N.
pub fn walk_library_files(base_path: &Path, excluded_dirs: &[String]) -> Vec<DirEntry> {
let excluder = PathExcluder::new(base_path, excluded_dirs);
WalkDir::new(base_path)
.into_iter()
// Always allow depth 0 (the root). Under a pathological config
// that excludes the base itself, downstream filters drop everything
// anyway — but yielding nothing here would also be silently wrong.
.filter_entry(move |entry| entry.depth() == 0 || !excluder.is_excluded(entry.path()))
.filter_map(|entry| entry.ok())
.filter(|entry| entry.file_type().is_file())
.collect()
}
/// Walk `base_path`, prune `EXCLUDED_DIRS` subtrees, and return /// Walk `base_path`, prune `EXCLUDED_DIRS` subtrees, and return
/// `(absolute_path, forward_slash_rel_path)` for every image / video file /// `(absolute_path, forward_slash_rel_path)` for every image / video file
/// that should be indexed. /// that should be indexed.
@@ -35,17 +58,8 @@ pub fn enumerate_indexable_files(
excluded_dirs: &[String], excluded_dirs: &[String],
modified_since: Option<SystemTime>, modified_since: Option<SystemTime>,
) -> Vec<(PathBuf, String)> { ) -> Vec<(PathBuf, String)> {
let excluder = PathExcluder::new(base_path, excluded_dirs); walk_library_files(base_path, excluded_dirs)
WalkDir::new(base_path)
.into_iter() .into_iter()
// Prune whole subtrees so WalkDir doesn't descend into excluded
// dirs at all. Always allow depth 0 (the root itself); under a
// pathological config that excludes the base, downstream filters
// would still drop everything anyway.
.filter_entry(|entry| entry.depth() == 0 || !excluder.is_excluded(entry.path()))
.filter_map(|entry| entry.ok())
.filter(|entry| entry.file_type().is_file())
.filter(|entry| match modified_since { .filter(|entry| match modified_since {
Some(since) => entry Some(since) => entry
.metadata() .metadata()
@@ -187,6 +201,39 @@ mod tests {
assert_eq!(rel_paths(&found), vec!["new.jpg".to_string()]); assert_eq!(rel_paths(&found), vec!["new.jpg".to_string()]);
} }
#[test]
fn walk_library_files_excludes_subtrees_and_returns_all_extensions() {
// The lower-level primitive: any extension survives, but excluded
// subtrees are pruned. Used by thumbnail gen and media-count
// gauges, which need non-media files too (e.g., walks through
// sidecar XMPs alongside the photos).
let dir = make_tree(&[
"vacation/IMG_0001.jpg",
"vacation/IMG_0001.xmp",
"vacation/@eaDir/IMG_0001.jpg/SYNOFILE_THUMB_S.jpg",
"notes.txt",
]);
let mut got: Vec<String> = walk_library_files(dir.path(), &["@eaDir".to_string()])
.into_iter()
.map(|e| {
e.path()
.strip_prefix(dir.path())
.unwrap()
.to_string_lossy()
.replace('\\', "/")
})
.collect();
got.sort();
assert_eq!(
got,
vec![
"notes.txt".to_string(),
"vacation/IMG_0001.jpg".to_string(),
"vacation/IMG_0001.xmp".to_string(),
]
);
}
#[test] #[test]
fn rel_path_is_forward_slash() { fn rel_path_is_forward_slash() {
// Sanity on a nested path. On Unix this is already '/'; the // Sanity on a nested path. On Unix this is already '/'; the

View File

@@ -1397,7 +1397,7 @@ impl Handler<RefreshThumbnailsMessage> for StreamActor {
// The stub in lib.rs is a no-op; the real generation is driven by // The stub in lib.rs is a no-op; the real generation is driven by
// the file watcher tick in main.rs, which has access to the // the file watcher tick in main.rs, which has access to the
// configured libraries. // configured libraries.
create_thumbnails(&[]) create_thumbnails(&[], &[])
} }
} }

View File

@@ -40,11 +40,11 @@ pub use state::AppState;
use std::path::Path; use std::path::Path;
use walkdir::DirEntry; use walkdir::DirEntry;
pub fn create_thumbnails(_libs: &[libraries::Library]) { pub fn create_thumbnails(_libs: &[libraries::Library], _excluded_dirs: &[String]) {
// Stub - implemented in main.rs // Stub - implemented in main.rs
} }
pub fn update_media_counts(_media_dir: &Path) { pub fn update_media_counts(_media_dir: &Path, _excluded_dirs: &[String]) {
// Stub - implemented in main.rs // Stub - implemented in main.rs
} }

View File

@@ -1292,7 +1292,7 @@ fn generate_image_thumbnail(src: &Path, thumb_path: &Path) -> std::io::Result<()
Ok(()) Ok(())
} }
fn create_thumbnails(libs: &[libraries::Library]) { fn create_thumbnails(libs: &[libraries::Library], excluded_dirs: &[String]) {
let tracer = global_tracer(); let tracer = global_tracer();
let span = tracer.start("creating thumbnails"); let span = tracer.start("creating thumbnails");
@@ -1306,12 +1306,10 @@ fn create_thumbnails(libs: &[libraries::Library]) {
); );
let images = PathBuf::from(&lib.root_path); let images = PathBuf::from(&lib.root_path);
WalkDir::new(&images) // Prune EXCLUDED_DIRS so we don't generate thumbnails-of-thumbnails
.into_iter() // for Synology @eaDir trees. file_scan handles filter_entry pruning.
.collect::<Vec<Result<_, _>>>() image_api::file_scan::walk_library_files(&images, excluded_dirs)
.into_par_iter() .into_par_iter()
.filter_map(|entry| entry.ok())
.filter(|entry| entry.file_type().is_file())
.for_each(|entry| { .for_each(|entry| {
let src = entry.path(); let src = entry.path();
let Ok(relative_path) = src.strip_prefix(&images) else { let Ok(relative_path) = src.strip_prefix(&images) else {
@@ -1367,17 +1365,17 @@ fn create_thumbnails(libs: &[libraries::Library]) {
debug!("Finished making thumbnails"); debug!("Finished making thumbnails");
for lib in libs { for lib in libs {
update_media_counts(Path::new(&lib.root_path)); update_media_counts(Path::new(&lib.root_path), excluded_dirs);
} }
} }
fn update_media_counts(media_dir: &Path) { fn update_media_counts(media_dir: &Path, excluded_dirs: &[String]) {
let mut image_count = 0; let mut image_count = 0;
let mut video_count = 0; let mut video_count = 0;
for ref entry in WalkDir::new(media_dir).into_iter().filter_map(|e| e.ok()) { for entry in image_api::file_scan::walk_library_files(media_dir, excluded_dirs) {
if is_image(entry) { if is_image(&entry) {
image_count += 1; image_count += 1;
} else if is_video(entry) { } else if is_video(&entry) {
video_count += 1; video_count += 1;
} }
} }
@@ -1426,8 +1424,9 @@ fn main() -> std::io::Result<()> {
// so missed files are filled in over successive scans. // so missed files are filled in over successive scans.
{ {
let libs = app_data.libraries.clone(); let libs = app_data.libraries.clone();
let excluded = app_data.excluded_dirs.clone();
std::thread::spawn(move || { std::thread::spawn(move || {
create_thumbnails(&libs); create_thumbnails(&libs, &excluded);
}); });
} }
// generate_video_gifs().await; // generate_video_gifs().await;
@@ -1466,7 +1465,7 @@ fn main() -> std::io::Result<()> {
); );
// Start orphaned playlist cleanup job // Start orphaned playlist cleanup job
cleanup_orphaned_playlists(); cleanup_orphaned_playlists(app_state.excluded_dirs.clone());
// Spawn background job to generate daily conversation summaries // Spawn background job to generate daily conversation summaries
{ {
@@ -1658,8 +1657,8 @@ fn run_migrations(
} }
/// Clean up orphaned HLS playlists and segments whose source videos no longer exist /// Clean up orphaned HLS playlists and segments whose source videos no longer exist
fn cleanup_orphaned_playlists() { fn cleanup_orphaned_playlists(excluded_dirs: Vec<String>) {
std::thread::spawn(|| { std::thread::spawn(move || {
let video_path = dotenv::var("VIDEO_PATH").expect("VIDEO_PATH must be set"); let video_path = dotenv::var("VIDEO_PATH").expect("VIDEO_PATH must be set");
let base_path = dotenv::var("BASE_PATH").expect("BASE_PATH must be set"); let base_path = dotenv::var("BASE_PATH").expect("BASE_PATH must be set");
@@ -1704,13 +1703,14 @@ fn cleanup_orphaned_playlists() {
if let Some(filename) = playlist_path.file_stem() { if let Some(filename) = playlist_path.file_stem() {
let video_filename = filename.to_string_lossy(); let video_filename = filename.to_string_lossy();
// Search for this video file in BASE_PATH // Search for this video file in BASE_PATH, respecting
// EXCLUDED_DIRS so we don't false-resurrect playlists for
// videos that only exist inside an excluded subtree.
let mut video_exists = false; let mut video_exists = false;
for entry in WalkDir::new(&base_path) for entry in image_api::file_scan::walk_library_files(
.into_iter() Path::new(&base_path),
.filter_map(|e| e.ok()) &excluded_dirs,
.filter(|e| e.file_type().is_file()) ) {
{
if let Some(entry_stem) = entry.path().file_stem() if let Some(entry_stem) = entry.path().file_stem()
&& entry_stem == filename && entry_stem == filename
&& is_video_file(entry.path()) && is_video_file(entry.path())
@@ -1922,7 +1922,7 @@ fn watch_files(
} }
// Update media counts per library (metric aggregates across all) // Update media counts per library (metric aggregates across all)
update_media_counts(Path::new(&lib.root_path)); update_media_counts(Path::new(&lib.root_path), &excluded_dirs);
} }
if is_full_scan { if is_full_scan {
@@ -2229,7 +2229,7 @@ fn process_new_files(
// Generate thumbnails for all files that need them // Generate thumbnails for all files that need them
if new_files_found { if new_files_found {
info!("Processing thumbnails for new files..."); info!("Processing thumbnails for new files...");
create_thumbnails(std::slice::from_ref(library)); create_thumbnails(std::slice::from_ref(library), excluded_dirs);
} }
// Reconciliation: on a full scan, prune image_exif rows whose rel_path no // Reconciliation: on a full scan, prune image_exif rows whose rel_path no

View File

@@ -24,6 +24,17 @@ pub async fn generate_video_gifs() {
fs::create_dir_all(gif_base_path).expect("There was an issue creating directory"); fs::create_dir_all(gif_base_path).expect("There was an issue creating directory");
let files = PathBuf::from(dotenv::var("BASE_PATH").unwrap()); let files = PathBuf::from(dotenv::var("BASE_PATH").unwrap());
// EXCLUDED_DIRS read here for the update_media_counts call below.
// The WalkDir walk itself is left raw — this function is currently
// dead code (`#[allow(dead_code)]`) and not reachable from main.
// If revived, swap to file_scan::walk_library_files (dual lib/bin
// module path makes that non-trivial here).
let excluded_dirs: Vec<String> = std::env::var("EXCLUDED_DIRS")
.unwrap_or_default()
.split(',')
.filter(|s| !s.trim().is_empty())
.map(|s| s.trim().to_string())
.collect();
let ffmpeg = Ffmpeg; let ffmpeg = Ffmpeg;
for file in WalkDir::new(&files) for file in WalkDir::new(&files)
@@ -62,6 +73,6 @@ pub async fn generate_video_gifs() {
info!("Finished making video gifs in {:?}", start.elapsed()); info!("Finished making video gifs in {:?}", start.elapsed());
update_media_counts(&files); update_media_counts(&files, &excluded_dirs);
}); });
} }