diff --git a/CLAUDE.md b/CLAUDE.md index ae0642c..de0ad44 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -210,6 +210,22 @@ Typical workflows: stage a new mount with `enabled=0` then flip to `1`; quiet a flaky NAS during maintenance without disturbing the rest of the system. +**Per-library excludes (`libraries.excluded_dirs`).** A +comma-separated column, same shape as the global `EXCLUDED_DIRS` env +var, that's applied **in union** with the env-var globals when a +walker scans this library. Use case: mount a parent directory as a +new library while a sibling library covers a child subtree, and +exclude that child subtree from the parent so the two libraries +don't double-walk and double-write `image_exif`. Hash-keyed derived +data (faces, tags, insights) is unaffected either way — those +follow the bytes — but `image_exif` row count, walker CPU, and +thumbnail disk usage all drop to 1× instead of 2× for the overlap. +Affects: file-watch ingest (`process_new_files`), thumbnail +generation, media-count gauges, the orphaned-playlist cleanup walk, +and the `/memories` endpoint. The face-detection backlog drain +inherits via `face_watch::filter_excluded`. NULL = no extras (only +the global env var applies). + **Library availability and safety.** Libraries can be on network shares or removable media; the file watcher must not interpret a temporary unavailability as a mass-deletion event. Every tick begins with a diff --git a/migrations/2026-05-01-110000_libraries_excluded_dirs/down.sql b/migrations/2026-05-01-110000_libraries_excluded_dirs/down.sql new file mode 100644 index 0000000..7c0251e --- /dev/null +++ b/migrations/2026-05-01-110000_libraries_excluded_dirs/down.sql @@ -0,0 +1,2 @@ +-- Requires SQLite 3.35+ for ALTER TABLE DROP COLUMN. +ALTER TABLE libraries DROP COLUMN excluded_dirs; diff --git a/migrations/2026-05-01-110000_libraries_excluded_dirs/up.sql b/migrations/2026-05-01-110000_libraries_excluded_dirs/up.sql new file mode 100644 index 0000000..790a859 --- /dev/null +++ b/migrations/2026-05-01-110000_libraries_excluded_dirs/up.sql @@ -0,0 +1,14 @@ +-- Per-library excluded directories. +-- +-- The global EXCLUDED_DIRS env var is the right knob for excludes that +-- every library shares (Synology @eaDir, .thumbnails, etc.). It's a +-- poor fit for "exclude this subtree from THIS library only", which +-- the natural use case for is mounting a parent directory while +-- another library already covers a child subtree underneath. +-- +-- This column is parsed comma-separated, same shape as the env var, +-- and the watcher / memories / thumbnail walks each apply +-- (env_globals ∪ library.excluded_dirs) when scanning the library. +-- NULL = no extra excludes; the global env var still applies. + +ALTER TABLE libraries ADD COLUMN excluded_dirs TEXT; diff --git a/src/database/mod.rs b/src/database/mod.rs index 3ea1cd2..43b4d34 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -1213,6 +1213,7 @@ mod exif_dao_tests { root_path: "/tmp/archive", created_at: 0, enabled: true, + excluded_dirs: None, }) .execute(&mut conn) .expect("seed second library"); diff --git a/src/database/models.rs b/src/database/models.rs index d88aa77..100757c 100644 --- a/src/database/models.rs +++ b/src/database/models.rs @@ -150,6 +150,14 @@ pub struct LibraryRow { /// Toggle via SQL today — there is intentionally no HTTP endpoint /// for library mutation (see CLAUDE.md "Multi-library data model"). pub enabled: bool, + /// Per-library excluded paths/patterns, stored comma-separated + /// (same shape as the global `EXCLUDED_DIRS` env var). NULL = no + /// extra excludes for this library; the global env var still + /// applies. The runtime `Library` struct parses this into a + /// `Vec` and the walker applies the union of (global, + /// library) excludes when scanning. Use case: mount a parent + /// directory while another library covers a child subtree. + pub excluded_dirs: Option, } #[derive(Insertable)] @@ -159,6 +167,7 @@ pub struct InsertLibrary<'a> { pub root_path: &'a str, pub created_at: i64, pub enabled: bool, + pub excluded_dirs: Option<&'a str>, } // --- Knowledge memory models --- diff --git a/src/database/schema.rs b/src/database/schema.rs index 4374218..189ba37 100644 --- a/src/database/schema.rs +++ b/src/database/schema.rs @@ -131,6 +131,7 @@ diesel::table! { root_path -> Text, created_at -> BigInt, enabled -> Bool, + excluded_dirs -> Nullable, } } diff --git a/src/libraries.rs b/src/libraries.rs index 5ec96a5..201cfd9 100644 --- a/src/libraries.rs +++ b/src/libraries.rs @@ -35,6 +35,12 @@ pub struct Library { /// will succeed if the file is on disk; nothing prevents that /// today and there's no obvious reason to). Toggle via SQL. pub enabled: bool, + /// Per-library excluded paths/patterns, parsed from the + /// comma-separated DB column. The walker applies these + /// **in union** with the global `EXCLUDED_DIRS` env var; either + /// list matching a path is enough to exclude. Empty = no + /// library-specific excludes (only the global env var applies). + pub excluded_dirs: Vec, } impl Library { @@ -56,6 +62,35 @@ impl Library { .ok() .map(|p| p.to_string_lossy().replace('\\', "/")) } + + /// Effective excluded directories for a walk of this library: + /// the union of the global env-var excludes (passed in by the + /// caller as `globals`) and this library's per-row excludes. + /// Order doesn't matter; `PathExcluder` accepts repeats. + pub fn effective_excluded_dirs(&self, globals: &[String]) -> Vec { + if self.excluded_dirs.is_empty() { + return globals.to_vec(); + } + let mut combined: Vec = Vec::with_capacity(globals.len() + self.excluded_dirs.len()); + combined.extend_from_slice(globals); + combined.extend(self.excluded_dirs.iter().cloned()); + combined + } +} + +/// Parse a comma-separated excluded_dirs column into a Vec, dropping +/// empty entries (mirrors `AppState::parse_excluded_dirs` for the env +/// var). NULL → empty Vec. +pub fn parse_excluded_dirs_column(raw: Option<&str>) -> Vec { + match raw { + None => Vec::new(), + Some(s) => s + .split(',') + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(String::from) + .collect(), + } } impl From for Library { @@ -65,6 +100,7 @@ impl From for Library { name: row.name, root_path: row.root_path, enabled: row.enabled, + excluded_dirs: parse_excluded_dirs_column(row.excluded_dirs.as_deref()), } } } @@ -120,6 +156,7 @@ pub fn seed_or_patch_from_env(conn: &mut SqliteConnection, base_path: &str) { root_path: base_path, created_at: now, enabled: true, + excluded_dirs: None, }) .execute(conn); match result { @@ -353,6 +390,7 @@ mod tests { name: "main".into(), root_path: "/tmp/media".into(), enabled: true, + excluded_dirs: Vec::new(), }; let rel = lib.strip_root(Path::new("/tmp/media/2024/photo.jpg")); assert_eq!(rel.as_deref(), Some("2024/photo.jpg")); @@ -367,6 +405,7 @@ mod tests { name: "main".into(), root_path: "/tmp/media".into(), enabled: true, + excluded_dirs: Vec::new(), }; let abs = lib.resolve("2024/photo.jpg"); assert_eq!(abs, PathBuf::from("/tmp/media/2024/photo.jpg")); @@ -385,12 +424,14 @@ mod tests { name: "main".into(), root_path: "/tmp/main".into(), enabled: true, + excluded_dirs: Vec::new(), }, Library { id: 7, name: "archive".into(), root_path: "/tmp/archive".into(), enabled: true, + excluded_dirs: Vec::new(), }, ] } @@ -444,12 +485,50 @@ mod tests { assert!(err.contains("unknown library name")); } + #[test] + fn parse_excluded_dirs_column_handles_null_and_whitespace() { + assert_eq!(parse_excluded_dirs_column(None), Vec::::new()); + assert_eq!(parse_excluded_dirs_column(Some("")), Vec::::new()); + assert_eq!( + parse_excluded_dirs_column(Some(" /a , /b/sub , @eaDir ,, ")), + vec!["/a".to_string(), "/b/sub".to_string(), "@eaDir".to_string()] + ); + } + + #[test] + fn effective_excluded_dirs_unions_global_and_per_library() { + let lib_no_extras = Library { + id: 1, + name: "main".into(), + root_path: "/x".into(), + enabled: true, + excluded_dirs: Vec::new(), + }; + let globals = vec!["@eaDir".to_string(), ".thumbnails".to_string()]; + // Empty per-library excludes → exactly the globals. + assert_eq!(lib_no_extras.effective_excluded_dirs(&globals), globals); + + let lib_with_extras = Library { + id: 2, + name: "archive".into(), + root_path: "/y".into(), + enabled: true, + excluded_dirs: vec!["/photos".to_string()], + }; + let combined = lib_with_extras.effective_excluded_dirs(&globals); + assert!(combined.contains(&"@eaDir".to_string())); + assert!(combined.contains(&".thumbnails".to_string())); + assert!(combined.contains(&"/photos".to_string())); + assert_eq!(combined.len(), 3); + } + fn probe_lib(id: i32, root: String) -> Library { Library { id, name: "main".into(), root_path: root, enabled: true, + excluded_dirs: Vec::new(), } } @@ -517,6 +596,7 @@ mod tests { name: "test".into(), root_path: tmp.path().to_string_lossy().into(), enabled: true, + excluded_dirs: Vec::new(), }; let map = new_health_map(&[lib.clone()]); diff --git a/src/library_maintenance.rs b/src/library_maintenance.rs index 458ec95..e15b8e9 100644 --- a/src/library_maintenance.rs +++ b/src/library_maintenance.rs @@ -745,12 +745,14 @@ mod tests { name: "a".into(), root_path: "/x".into(), enabled: true, + excluded_dirs: Vec::new(), }, Library { id: 2, name: "b".into(), root_path: "/y".into(), enabled: true, + excluded_dirs: Vec::new(), }, ]; let health = new_health_map(&libs); @@ -783,12 +785,14 @@ mod tests { name: "a".into(), root_path: "/x".into(), enabled: true, + excluded_dirs: Vec::new(), }, Library { id: 2, name: "b".into(), root_path: "/y".into(), enabled: false, + excluded_dirs: Vec::new(), }, ]; let health = new_health_map(&libs); diff --git a/src/main.rs b/src/main.rs index 0d38214..268ddd1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1335,10 +1335,14 @@ fn create_thumbnails(libs: &[libraries::Library], excluded_dirs: &[String]) { lib.name, lib.root_path ); let images = PathBuf::from(&lib.root_path); + // Effective excludes = global env-var excludes ∪ library row's + // excluded_dirs. Lets a parent-library mount skip the subtree + // already covered by a child library. + let effective_excludes = lib.effective_excluded_dirs(excluded_dirs); // Prune EXCLUDED_DIRS so we don't generate thumbnails-of-thumbnails // for Synology @eaDir trees. file_scan handles filter_entry pruning. - image_api::file_scan::walk_library_files(&images, excluded_dirs) + image_api::file_scan::walk_library_files(&images, &effective_excludes) .into_par_iter() .for_each(|entry| { let src = entry.path(); @@ -1413,7 +1417,8 @@ fn create_thumbnails(libs: &[libraries::Library], excluded_dirs: &[String]) { debug!("Finished making thumbnails"); for lib in libs { - update_media_counts(Path::new(&lib.root_path), excluded_dirs); + let effective_excludes = lib.effective_excluded_dirs(excluded_dirs); + update_media_counts(Path::new(&lib.root_path), &effective_excludes); } } @@ -1801,9 +1806,10 @@ fn cleanup_orphaned_playlists( // playlist isn't orphaned. let mut video_exists = false; 'libs: for lib in &libs { + let effective = lib.effective_excluded_dirs(&excluded_dirs); for entry in image_api::file_scan::walk_library_files( Path::new(&lib.root_path), - &excluded_dirs, + &effective, ) { if let Some(entry_stem) = entry.path().file_stem() && entry_stem == filename @@ -2048,6 +2054,11 @@ fn watch_files( // — without these standalone passes, backfill + // detection only progressed during full scans // (default once an hour). + // Effective excludes for this library: global env-var + // ∪ row's excluded_dirs. Compute once per tick — used + // by every walker below for this library. + let effective_excludes = lib.effective_excluded_dirs(&excluded_dirs); + if face_client.is_enabled() { let context = opentelemetry::Context::new(); backfill_unhashed_backlog(&context, lib, &exif_dao); @@ -2057,7 +2068,7 @@ fn watch_files( &face_client, &face_dao, &watcher_tag_dao, - &excluded_dirs, + &effective_excludes, ); } @@ -2073,7 +2084,7 @@ fn watch_files( Arc::clone(&face_dao), Arc::clone(&watcher_tag_dao), face_client.clone(), - &excluded_dirs, + &effective_excludes, None, playlist_manager.clone(), preview_generator.clone(), @@ -2094,7 +2105,7 @@ fn watch_files( Arc::clone(&face_dao), Arc::clone(&watcher_tag_dao), face_client.clone(), - &excluded_dirs, + &effective_excludes, Some(check_since), playlist_manager.clone(), preview_generator.clone(), @@ -2102,7 +2113,7 @@ fn watch_files( } // Update media counts per library (metric aggregates across all) - update_media_counts(Path::new(&lib.root_path), &excluded_dirs); + update_media_counts(Path::new(&lib.root_path), &effective_excludes); // Missing-file detection: prune image_exif rows whose // source file is no longer on disk. Per-library, so we diff --git a/src/memories.rs b/src/memories.rs index 95de714..54ae188 100644 --- a/src/memories.rs +++ b/src/memories.rs @@ -569,7 +569,8 @@ pub async fn list_memories( for lib in &libraries_to_scan { let base = Path::new(&lib.root_path); - let path_excluder = PathExcluder::new(base, &app_state.excluded_dirs); + let effective = lib.effective_excluded_dirs(&app_state.excluded_dirs); + let path_excluder = PathExcluder::new(base, &effective); let exif_memories = collect_exif_memories( &exif_dao, diff --git a/src/state.rs b/src/state.rs index 9dd1cca..abbcc56 100644 --- a/src/state.rs +++ b/src/state.rs @@ -356,6 +356,7 @@ impl AppState { name: "main".to_string(), root_path: base_path_str.clone(), enabled: true, + excluded_dirs: Vec::new(), }; let insight_generator = InsightGenerator::new( ollama.clone(), @@ -393,6 +394,7 @@ impl AppState { name: "main".to_string(), root_path: base_path_str.clone(), enabled: true, + excluded_dirs: Vec::new(), }]; AppState::new( Arc::new(StreamActor {}.start()),