indexer: prune EXCLUDED_DIRS at WalkDir time, extract enumerate_indexable_files #63
@@ -17,11 +17,34 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::SystemTime;
|
||||
|
||||
use walkdir::WalkDir;
|
||||
use walkdir::{DirEntry, WalkDir};
|
||||
|
||||
use crate::file_types;
|
||||
use crate::memories::PathExcluder;
|
||||
|
||||
/// Walk `base_path`, prune `EXCLUDED_DIRS` subtrees, and return every file
|
||||
/// entry (any extension). The shared primitive for any code that walks a
|
||||
/// library root — thumbnail generation, media counts, orphan-playlist
|
||||
/// reverse lookups, the indexer happy-path, etc. Higher-level helpers
|
||||
/// (e.g. `enumerate_indexable_files`) layer media-type / mtime filters
|
||||
/// on top.
|
||||
///
|
||||
/// Pruning happens via `filter_entry` so excluded subtrees are never
|
||||
/// descended at all. On a Synology mount with thousands of `@eaDir`
|
||||
/// dirs, that's the difference between visiting N files and ~3N.
|
||||
pub fn walk_library_files(base_path: &Path, excluded_dirs: &[String]) -> Vec<DirEntry> {
|
||||
let excluder = PathExcluder::new(base_path, excluded_dirs);
|
||||
WalkDir::new(base_path)
|
||||
.into_iter()
|
||||
// Always allow depth 0 (the root). Under a pathological config
|
||||
// that excludes the base itself, downstream filters drop everything
|
||||
// anyway — but yielding nothing here would also be silently wrong.
|
||||
.filter_entry(move |entry| entry.depth() == 0 || !excluder.is_excluded(entry.path()))
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter(|entry| entry.file_type().is_file())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Walk `base_path`, prune `EXCLUDED_DIRS` subtrees, and return
|
||||
/// `(absolute_path, forward_slash_rel_path)` for every image / video file
|
||||
/// that should be indexed.
|
||||
@@ -35,17 +58,8 @@ pub fn enumerate_indexable_files(
|
||||
excluded_dirs: &[String],
|
||||
modified_since: Option<SystemTime>,
|
||||
) -> Vec<(PathBuf, String)> {
|
||||
let excluder = PathExcluder::new(base_path, excluded_dirs);
|
||||
|
||||
WalkDir::new(base_path)
|
||||
walk_library_files(base_path, excluded_dirs)
|
||||
.into_iter()
|
||||
// Prune whole subtrees so WalkDir doesn't descend into excluded
|
||||
// dirs at all. Always allow depth 0 (the root itself); under a
|
||||
// pathological config that excludes the base, downstream filters
|
||||
// would still drop everything anyway.
|
||||
.filter_entry(|entry| entry.depth() == 0 || !excluder.is_excluded(entry.path()))
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter(|entry| entry.file_type().is_file())
|
||||
.filter(|entry| match modified_since {
|
||||
Some(since) => entry
|
||||
.metadata()
|
||||
@@ -109,11 +123,7 @@ mod tests {
|
||||
"vacation/@eaDir/IMG_0001.jpg/SYNOFILE_THUMB_XL.jpg",
|
||||
"@eaDir/top_level_thumb.jpg",
|
||||
]);
|
||||
let found = enumerate_indexable_files(
|
||||
dir.path(),
|
||||
&["@eaDir".to_string()],
|
||||
None,
|
||||
);
|
||||
let found = enumerate_indexable_files(dir.path(), &["@eaDir".to_string()], None);
|
||||
assert_eq!(rel_paths(&found), vec!["vacation/IMG_0001.jpg".to_string()]);
|
||||
}
|
||||
|
||||
@@ -125,11 +135,7 @@ mod tests {
|
||||
"a/.thumbnails/cached.jpg",
|
||||
"a/b/.thumbnails/nested.jpg",
|
||||
]);
|
||||
let found = enumerate_indexable_files(
|
||||
dir.path(),
|
||||
&[".thumbnails".to_string()],
|
||||
None,
|
||||
);
|
||||
let found = enumerate_indexable_files(dir.path(), &[".thumbnails".to_string()], None);
|
||||
assert_eq!(rel_paths(&found), vec!["a/b/photo.jpg".to_string()]);
|
||||
}
|
||||
|
||||
@@ -137,15 +143,8 @@ mod tests {
|
||||
fn excludes_absolute_under_base() {
|
||||
// Leading-'/' entries are interpreted as paths under the library
|
||||
// root (see PathExcluder::new).
|
||||
let dir = make_tree(&[
|
||||
"private/secret.jpg",
|
||||
"public/keep.jpg",
|
||||
]);
|
||||
let found = enumerate_indexable_files(
|
||||
dir.path(),
|
||||
&["/private".to_string()],
|
||||
None,
|
||||
);
|
||||
let dir = make_tree(&["private/secret.jpg", "public/keep.jpg"]);
|
||||
let found = enumerate_indexable_files(dir.path(), &["/private".to_string()], None);
|
||||
assert_eq!(rel_paths(&found), vec!["public/keep.jpg".to_string()]);
|
||||
}
|
||||
|
||||
@@ -159,7 +158,10 @@ mod tests {
|
||||
"e.jpg.bak", // wrong ext
|
||||
]);
|
||||
let found = enumerate_indexable_files(dir.path(), &[], None);
|
||||
assert_eq!(rel_paths(&found), vec!["a.jpg".to_string(), "b.mp4".to_string()]);
|
||||
assert_eq!(
|
||||
rel_paths(&found),
|
||||
vec!["a.jpg".to_string(), "b.mp4".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -187,6 +189,39 @@ mod tests {
|
||||
assert_eq!(rel_paths(&found), vec!["new.jpg".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn walk_library_files_excludes_subtrees_and_returns_all_extensions() {
|
||||
// The lower-level primitive: any extension survives, but excluded
|
||||
// subtrees are pruned. Used by thumbnail gen and media-count
|
||||
// gauges, which need non-media files too (e.g., walks through
|
||||
// sidecar XMPs alongside the photos).
|
||||
let dir = make_tree(&[
|
||||
"vacation/IMG_0001.jpg",
|
||||
"vacation/IMG_0001.xmp",
|
||||
"vacation/@eaDir/IMG_0001.jpg/SYNOFILE_THUMB_S.jpg",
|
||||
"notes.txt",
|
||||
]);
|
||||
let mut got: Vec<String> = walk_library_files(dir.path(), &["@eaDir".to_string()])
|
||||
.into_iter()
|
||||
.map(|e| {
|
||||
e.path()
|
||||
.strip_prefix(dir.path())
|
||||
.unwrap()
|
||||
.to_string_lossy()
|
||||
.replace('\\', "/")
|
||||
})
|
||||
.collect();
|
||||
got.sort();
|
||||
assert_eq!(
|
||||
got,
|
||||
vec![
|
||||
"notes.txt".to_string(),
|
||||
"vacation/IMG_0001.jpg".to_string(),
|
||||
"vacation/IMG_0001.xmp".to_string(),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rel_path_is_forward_slash() {
|
||||
// Sanity on a nested path. On Unix this is already '/'; the
|
||||
|
||||
@@ -1397,7 +1397,7 @@ impl Handler<RefreshThumbnailsMessage> for StreamActor {
|
||||
// The stub in lib.rs is a no-op; the real generation is driven by
|
||||
// the file watcher tick in main.rs, which has access to the
|
||||
// configured libraries.
|
||||
create_thumbnails(&[])
|
||||
create_thumbnails(&[], &[])
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -40,11 +40,11 @@ pub use state::AppState;
|
||||
use std::path::Path;
|
||||
use walkdir::DirEntry;
|
||||
|
||||
pub fn create_thumbnails(_libs: &[libraries::Library]) {
|
||||
pub fn create_thumbnails(_libs: &[libraries::Library], _excluded_dirs: &[String]) {
|
||||
// Stub - implemented in main.rs
|
||||
}
|
||||
|
||||
pub fn update_media_counts(_media_dir: &Path) {
|
||||
pub fn update_media_counts(_media_dir: &Path, _excluded_dirs: &[String]) {
|
||||
// Stub - implemented in main.rs
|
||||
}
|
||||
|
||||
|
||||
65
src/main.rs
65
src/main.rs
@@ -1292,7 +1292,7 @@ fn generate_image_thumbnail(src: &Path, thumb_path: &Path) -> std::io::Result<()
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_thumbnails(libs: &[libraries::Library]) {
|
||||
fn create_thumbnails(libs: &[libraries::Library], excluded_dirs: &[String]) {
|
||||
let tracer = global_tracer();
|
||||
let span = tracer.start("creating thumbnails");
|
||||
|
||||
@@ -1306,12 +1306,10 @@ fn create_thumbnails(libs: &[libraries::Library]) {
|
||||
);
|
||||
let images = PathBuf::from(&lib.root_path);
|
||||
|
||||
WalkDir::new(&images)
|
||||
.into_iter()
|
||||
.collect::<Vec<Result<_, _>>>()
|
||||
// Prune EXCLUDED_DIRS so we don't generate thumbnails-of-thumbnails
|
||||
// for Synology @eaDir trees. file_scan handles filter_entry pruning.
|
||||
image_api::file_scan::walk_library_files(&images, excluded_dirs)
|
||||
.into_par_iter()
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter(|entry| entry.file_type().is_file())
|
||||
.for_each(|entry| {
|
||||
let src = entry.path();
|
||||
let Ok(relative_path) = src.strip_prefix(&images) else {
|
||||
@@ -1367,17 +1365,17 @@ fn create_thumbnails(libs: &[libraries::Library]) {
|
||||
debug!("Finished making thumbnails");
|
||||
|
||||
for lib in libs {
|
||||
update_media_counts(Path::new(&lib.root_path));
|
||||
update_media_counts(Path::new(&lib.root_path), excluded_dirs);
|
||||
}
|
||||
}
|
||||
|
||||
fn update_media_counts(media_dir: &Path) {
|
||||
fn update_media_counts(media_dir: &Path, excluded_dirs: &[String]) {
|
||||
let mut image_count = 0;
|
||||
let mut video_count = 0;
|
||||
for ref entry in WalkDir::new(media_dir).into_iter().filter_map(|e| e.ok()) {
|
||||
if is_image(entry) {
|
||||
for entry in image_api::file_scan::walk_library_files(media_dir, excluded_dirs) {
|
||||
if is_image(&entry) {
|
||||
image_count += 1;
|
||||
} else if is_video(entry) {
|
||||
} else if is_video(&entry) {
|
||||
video_count += 1;
|
||||
}
|
||||
}
|
||||
@@ -1426,8 +1424,9 @@ fn main() -> std::io::Result<()> {
|
||||
// so missed files are filled in over successive scans.
|
||||
{
|
||||
let libs = app_data.libraries.clone();
|
||||
let excluded = app_data.excluded_dirs.clone();
|
||||
std::thread::spawn(move || {
|
||||
create_thumbnails(&libs);
|
||||
create_thumbnails(&libs, &excluded);
|
||||
});
|
||||
}
|
||||
// generate_video_gifs().await;
|
||||
@@ -1466,7 +1465,7 @@ fn main() -> std::io::Result<()> {
|
||||
);
|
||||
|
||||
// Start orphaned playlist cleanup job
|
||||
cleanup_orphaned_playlists();
|
||||
cleanup_orphaned_playlists(app_state.excluded_dirs.clone());
|
||||
|
||||
// Spawn background job to generate daily conversation summaries
|
||||
{
|
||||
@@ -1658,8 +1657,8 @@ fn run_migrations(
|
||||
}
|
||||
|
||||
/// Clean up orphaned HLS playlists and segments whose source videos no longer exist
|
||||
fn cleanup_orphaned_playlists() {
|
||||
std::thread::spawn(|| {
|
||||
fn cleanup_orphaned_playlists(excluded_dirs: Vec<String>) {
|
||||
std::thread::spawn(move || {
|
||||
let video_path = dotenv::var("VIDEO_PATH").expect("VIDEO_PATH must be set");
|
||||
let base_path = dotenv::var("BASE_PATH").expect("BASE_PATH must be set");
|
||||
|
||||
@@ -1704,13 +1703,14 @@ fn cleanup_orphaned_playlists() {
|
||||
if let Some(filename) = playlist_path.file_stem() {
|
||||
let video_filename = filename.to_string_lossy();
|
||||
|
||||
// Search for this video file in BASE_PATH
|
||||
// Search for this video file in BASE_PATH, respecting
|
||||
// EXCLUDED_DIRS so we don't false-resurrect playlists for
|
||||
// videos that only exist inside an excluded subtree.
|
||||
let mut video_exists = false;
|
||||
for entry in WalkDir::new(&base_path)
|
||||
.into_iter()
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|e| e.file_type().is_file())
|
||||
{
|
||||
for entry in image_api::file_scan::walk_library_files(
|
||||
Path::new(&base_path),
|
||||
&excluded_dirs,
|
||||
) {
|
||||
if let Some(entry_stem) = entry.path().file_stem()
|
||||
&& entry_stem == filename
|
||||
&& is_video_file(entry.path())
|
||||
@@ -1922,7 +1922,7 @@ fn watch_files(
|
||||
}
|
||||
|
||||
// Update media counts per library (metric aggregates across all)
|
||||
update_media_counts(Path::new(&lib.root_path));
|
||||
update_media_counts(Path::new(&lib.root_path), &excluded_dirs);
|
||||
}
|
||||
|
||||
if is_full_scan {
|
||||
@@ -2229,7 +2229,7 @@ fn process_new_files(
|
||||
// Generate thumbnails for all files that need them
|
||||
if new_files_found {
|
||||
info!("Processing thumbnails for new files...");
|
||||
create_thumbnails(std::slice::from_ref(library));
|
||||
create_thumbnails(std::slice::from_ref(library), excluded_dirs);
|
||||
}
|
||||
|
||||
// Reconciliation: on a full scan, prune image_exif rows whose rel_path no
|
||||
@@ -2309,7 +2309,8 @@ fn backfill_unhashed_backlog(
|
||||
// library's tick. Negligible cost given the cap.
|
||||
let rows: Vec<(i32, String)> = {
|
||||
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
|
||||
dao.get_rows_missing_hash(context, cap + 1).unwrap_or_default()
|
||||
dao.get_rows_missing_hash(context, cap + 1)
|
||||
.unwrap_or_default()
|
||||
};
|
||||
if rows.is_empty() {
|
||||
return 0;
|
||||
@@ -2335,9 +2336,13 @@ fn backfill_unhashed_backlog(
|
||||
match content_hash::compute(&abs) {
|
||||
Ok(id) => {
|
||||
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
|
||||
if let Err(e) =
|
||||
dao.backfill_content_hash(context, library.id, rel_path, &id.content_hash, id.size_bytes)
|
||||
{
|
||||
if let Err(e) = dao.backfill_content_hash(
|
||||
context,
|
||||
library.id,
|
||||
rel_path,
|
||||
&id.content_hash,
|
||||
id.size_bytes,
|
||||
) {
|
||||
warn!(
|
||||
"face_watch: backfill_content_hash failed for {}: {:?}",
|
||||
rel_path, e
|
||||
@@ -2348,7 +2353,11 @@ fn backfill_unhashed_backlog(
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("face_watch: hash compute failed for {} ({:?})", abs.display(), e);
|
||||
debug!(
|
||||
"face_watch: hash compute failed for {} ({:?})",
|
||||
abs.display(),
|
||||
e
|
||||
);
|
||||
errors += 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,6 +24,17 @@ pub async fn generate_video_gifs() {
|
||||
fs::create_dir_all(gif_base_path).expect("There was an issue creating directory");
|
||||
|
||||
let files = PathBuf::from(dotenv::var("BASE_PATH").unwrap());
|
||||
// EXCLUDED_DIRS read here for the update_media_counts call below.
|
||||
// The WalkDir walk itself is left raw — this function is currently
|
||||
// dead code (`#[allow(dead_code)]`) and not reachable from main.
|
||||
// If revived, swap to file_scan::walk_library_files (dual lib/bin
|
||||
// module path makes that non-trivial here).
|
||||
let excluded_dirs: Vec<String> = std::env::var("EXCLUDED_DIRS")
|
||||
.unwrap_or_default()
|
||||
.split(',')
|
||||
.filter(|s| !s.trim().is_empty())
|
||||
.map(|s| s.trim().to_string())
|
||||
.collect();
|
||||
|
||||
let ffmpeg = Ffmpeg;
|
||||
for file in WalkDir::new(&files)
|
||||
@@ -62,6 +73,6 @@ pub async fn generate_video_gifs() {
|
||||
|
||||
info!("Finished making video gifs in {:?}", start.elapsed());
|
||||
|
||||
update_media_counts(&files);
|
||||
update_media_counts(&files, &excluded_dirs);
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user