From 3027a3ffda69c9aa37d3a4e654932bd68216afb4 Mon Sep 17 00:00:00 2001 From: Cameron Date: Sat, 18 Apr 2026 21:38:51 -0400 Subject: [PATCH] perf: DB-backed recursive /photos + watcher reconciliation Recursive listings now query image_exif instead of walking disk, taking union-mode /photos from ~17s to sub-second on a 10k-file library. The watcher's full scan prunes stale image_exif rows so the DB stays in parity with the filesystem when files are deleted externally. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/database/mod.rs | 75 +++++++++++++++ src/files.rs | 225 ++++++++++++++++++++++---------------------- src/main.rs | 47 ++++++++- 3 files changed, 235 insertions(+), 112 deletions(-) diff --git a/src/database/mod.rs b/src/database/mod.rs index e42db69..07406d6 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -385,6 +385,28 @@ pub trait ExifDao: Sync + Send { context: &opentelemetry::Context, hash: &str, ) -> Result, DbError>; + + /// List `(library_id, rel_path)` pairs for the given libraries, optionally + /// restricted to rows whose rel_path starts with `path_prefix`. When + /// `library_ids` is empty, rows from every library are returned. Used by + /// `/photos` recursive listing to skip the filesystem walk — the watcher + /// keeps image_exif in parity with disk via the reconciliation pass. + fn list_rel_paths_for_libraries( + &mut self, + context: &opentelemetry::Context, + library_ids: &[i32], + path_prefix: Option<&str>, + ) -> Result, DbError>; + + /// Delete a single image_exif row scoped to `(library_id, rel_path)`. + /// Distinct from `delete_exif`, which matches on rel_path alone and + /// would clobber same-named files across libraries. + fn delete_exif_by_library( + &mut self, + context: &opentelemetry::Context, + library_id: i32, + rel_path: &str, + ) -> Result<(), DbError>; } pub struct SqliteExifDao { @@ -933,6 +955,59 @@ impl ExifDao for SqliteExifDao { }) .map_err(|_| DbError::new(DbErrorKind::QueryError)) } + + fn list_rel_paths_for_libraries( + &mut self, + context: &opentelemetry::Context, + library_ids: &[i32], + path_prefix: Option<&str>, + ) -> Result, DbError> { + trace_db_call(context, "query", "list_rel_paths_for_libraries", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + let mut query = image_exif.select((library_id, rel_path)).into_boxed(); + + if !library_ids.is_empty() { + query = query.filter(library_id.eq_any(library_ids.to_vec())); + } + + if let Some(prefix) = path_prefix.map(str::trim).filter(|s| !s.is_empty()) { + // Trailing slash normalization so "2024" matches "2024/..." + // without also matching "2024-archive/...". + let prefix = prefix.trim_end_matches('/'); + let pattern = format!("{}/%", prefix.replace('%', "\\%").replace('_', "\\_")); + query = query.filter(rel_path.like(pattern).escape('\\')); + } + + query + .load::<(i32, String)>(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Query error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + + fn delete_exif_by_library( + &mut self, + context: &opentelemetry::Context, + library_id_val: i32, + rel_path_val: &str, + ) -> Result<(), DbError> { + trace_db_call(context, "delete", "delete_exif_by_library", |_span| { + use schema::image_exif::dsl::*; + + diesel::delete( + image_exif + .filter(library_id.eq(library_id_val)) + .filter(rel_path.eq(rel_path_val)), + ) + .execute(self.connection.lock().unwrap().deref_mut()) + .map(|_| ()) + .map_err(|_| anyhow::anyhow!("Delete error")) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } } #[cfg(test)] diff --git a/src/files.rs b/src/files.rs index fdeec9f..561414c 100644 --- a/src/files.rs +++ b/src/files.rs @@ -546,91 +546,89 @@ pub async fn list_photos( .unwrap_or_else(|e| e.error_response()); } - // Walk each candidate library's root for the requested sub-path. In - // scoped mode `libraries_to_scan` has one entry (the selected library); - // in union mode we walk every configured library and intermix results. - // For the primary library we preserve the original FileSystemAccess - // path so the test-mock path (MockFileSystem) continues to work. + // In scoped mode `libraries_to_scan` has one entry (the selected library); + // in union mode we enumerate every configured library and intermix results. + // + // Recursive mode pulls rel_paths from image_exif (kept in parity with disk + // by the watcher's full-scan reconciliation) instead of walking — a ~10k + // file library drops from multi-second to ~10ms for the listing itself. + // Non-recursive mode still walks because we need directory metadata for + // the `dirs` response and listing a single directory is cheap. let mut file_names: Vec = Vec::new(); let mut file_libraries: Vec = Vec::new(); let mut dirs_set: std::collections::HashSet = std::collections::HashSet::new(); let mut any_library_resolved = false; - for lib in &libraries_to_scan { - let files_result = if search_recursively { - is_valid_full_path( - &PathBuf::from(&lib.root_path), - &PathBuf::from(search_path), - false, - ) - .map(|path| { - debug!("Valid path for recursive search: {:?}", path); - list_files_recursive(&path).unwrap_or_default() - }) - .context("Invalid path") - } else if lib.id == app_state.primary_library().id { - file_system.get_files_for_path(search_path) + if search_recursively { + let start_db_list = std::time::Instant::now(); + let lib_ids: Vec = libraries_to_scan.iter().map(|l| l.id).collect(); + let trimmed = search_path.trim(); + let prefix = if trimmed.is_empty() || trimmed == "/" { + None } else { - is_valid_full_path( - &PathBuf::from(&lib.root_path), - &PathBuf::from(search_path), - false, - ) - .map(|path| { - debug!("Valid path for non-recursive search: {:?}", path); - list_files(&path).unwrap_or_default() - }) - .context("Invalid path") + Some(trimmed) }; - - let files = match files_result { - Ok(f) => { - any_library_resolved = true; - f - } - Err(e) => { - debug!( - "Skipping library '{}' for path '{}': {:?}", - lib.name, search_path, e - ); - continue; - } + let rows = { + let mut dao = exif_dao.lock().expect("Unable to get ExifDao"); + dao.list_rel_paths_for_libraries(&span_context, &lib_ids, prefix) + .unwrap_or_else(|e| { + warn!("list_rel_paths_for_libraries failed: {:?}", e); + Vec::new() + }) }; - info!( - "Found {:?} files in library '{}' path: {:?} (recursive: {})", - files.len(), - lib.name, - search_path, - search_recursively + "DB-backed recursive listing: {} files across {} libraries in {:?}", + rows.len(), + lib_ids.len(), + start_db_list.elapsed() ); + any_library_resolved = true; + for (lib_id, path) in rows { + file_libraries.push(lib_id); + file_names.push(path); + } + } else { + for lib in &libraries_to_scan { + let files_result = if lib.id == app_state.primary_library().id { + file_system.get_files_for_path(search_path) + } else { + is_valid_full_path( + &PathBuf::from(&lib.root_path), + &PathBuf::from(search_path), + false, + ) + .map(|path| { + debug!("Valid path for non-recursive search: {:?}", path); + list_files(&path).unwrap_or_default() + }) + .context("Invalid path") + }; - for path in &files { - match path.metadata() { - Ok(md) => { - let relative = path.strip_prefix(&lib.root_path).unwrap_or_else(|_| { - panic!( - "Unable to strip library root {} from file path {}", - &lib.root_path, - path.display() - ) - }); - // Normalize separators to '/' so downstream lookups - // (tags, EXIF, insights) that store rel_paths with - // forward slashes still match on Windows. - let relative_str = relative.to_str().unwrap().replace('\\', "/"); - - if md.is_file() { - file_names.push(relative_str); - file_libraries.push(lib.id); - } else if md.is_dir() { - dirs_set.insert(relative_str); - } + let files = match files_result { + Ok(f) => { + any_library_resolved = true; + f } Err(e) => { - error!("Failed getting file metadata: {:?}", e); - // Include files without metadata if they have extensions - if path.extension().is_some() { + debug!( + "Skipping library '{}' for path '{}': {:?}", + lib.name, search_path, e + ); + continue; + } + }; + + info!( + "Found {:?} files in library '{}' path: {:?} (recursive: {})", + files.len(), + lib.name, + search_path, + search_recursively + ); + + for path in &files { + match path.metadata() { + Ok(md) => { let relative = path.strip_prefix(&lib.root_path).unwrap_or_else(|_| { panic!( "Unable to strip library root {} from file path {}", @@ -638,8 +636,32 @@ pub async fn list_photos( path.display() ) }); - file_names.push(relative.to_str().unwrap().replace('\\', "/")); - file_libraries.push(lib.id); + // Normalize separators to '/' so downstream lookups + // (tags, EXIF, insights) that store rel_paths with + // forward slashes still match on Windows. + let relative_str = relative.to_str().unwrap().replace('\\', "/"); + + if md.is_file() { + file_names.push(relative_str); + file_libraries.push(lib.id); + } else if md.is_dir() { + dirs_set.insert(relative_str); + } + } + Err(e) => { + error!("Failed getting file metadata: {:?}", e); + // Include files without metadata if they have extensions + if path.extension().is_some() { + let relative = path.strip_prefix(&lib.root_path).unwrap_or_else(|_| { + panic!( + "Unable to strip library root {} from file path {}", + &lib.root_path, + path.display() + ) + }); + file_names.push(relative.to_str().unwrap().replace('\\', "/")); + file_libraries.push(lib.id); + } } } } @@ -943,43 +965,6 @@ pub fn list_files(dir: &Path) -> io::Result> { Ok(files) } -pub fn list_files_recursive(dir: &Path) -> io::Result> { - let tracer = global_tracer(); - let mut span = tracer.start("list_files_recursive"); - let dir_name_string = dir.to_str().unwrap_or_default().to_string(); - span.set_attribute(KeyValue::new("dir", dir_name_string)); - info!("Recursively listing files in: {:?}", dir); - - let mut result = Vec::new(); - - fn visit_dirs(dir: &Path, files: &mut Vec) -> io::Result<()> { - if dir.is_dir() { - for entry in read_dir(dir)? { - let entry = entry?; - let path = entry.path(); - - if path.is_dir() { - visit_dirs(&path, files)?; - } else if is_image_or_video(&path) { - files.push(path); - } - } - } - Ok(()) - } - - visit_dirs(dir, &mut result)?; - - span.set_attribute(KeyValue::new("file_count", result.len().to_string())); - span.set_status(Status::Ok); - info!( - "Found {:?} files recursively in directory: {:?}", - result.len(), - dir - ); - Ok(result) -} - pub fn is_image_or_video(path: &Path) -> bool { file_types::is_media_file(path) } @@ -1567,6 +1552,24 @@ mod tests { ) -> Result, DbError> { Ok(vec![]) } + + fn list_rel_paths_for_libraries( + &mut self, + _context: &opentelemetry::Context, + _library_ids: &[i32], + _path_prefix: Option<&str>, + ) -> Result, DbError> { + Ok(vec![]) + } + + fn delete_exif_by_library( + &mut self, + _context: &opentelemetry::Context, + _library_id: i32, + _rel_path: &str, + ) -> Result<(), DbError> { + Ok(()) + } } mod api { diff --git a/src/main.rs b/src/main.rs index 19edb78..570cf58 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,7 +14,10 @@ use prometheus::{self, IntGauge}; use std::error::Error; use std::sync::{Arc, Mutex}; use std::time::{Duration, SystemTime}; -use std::{collections::HashMap, io::prelude::*}; +use std::{ + collections::{HashMap, HashSet}, + io::prelude::*, +}; use std::{env, fs::File}; use std::{ io::ErrorKind, @@ -1916,6 +1919,48 @@ fn process_new_files( info!("Processing thumbnails for new files..."); create_thumbnails(std::slice::from_ref(library)); } + + // Reconciliation: on a full scan, prune image_exif rows whose rel_path no + // longer exists on disk for this library. Keeps the DB in parity so + // downstream DB-backed listings (e.g. recursive /photos) don't return + // phantom files. Skipped on quick scans — those only look at recently + // modified files and can't distinguish "missing" from "unchanged". + if modified_since.is_none() { + let disk_paths: HashSet = files.iter().map(|(_, rel)| rel.clone()).collect(); + let db_paths: Vec = { + let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); + dao.get_rel_paths_for_library(&context, library.id) + .unwrap_or_else(|e| { + error!( + "Reconciliation: failed to load image_exif rel_paths for lib {}: {:?}", + library.id, e + ); + Vec::new() + }) + }; + + let stale: Vec = db_paths + .into_iter() + .filter(|p| !disk_paths.contains(p)) + .collect(); + + if !stale.is_empty() { + info!( + "Reconciliation: pruning {} stale image_exif rows for library '{}'", + stale.len(), + library.name + ); + let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); + for rel in &stale { + if let Err(e) = dao.delete_exif_by_library(&context, library.id, rel) { + warn!( + "Reconciliation: failed to delete {} (lib {}): {:?}", + rel, library.id, e + ); + } + } + } + } } #[cfg(test)]