From 33a89a214cc3c0cac42d90ac561548a23a5754a8 Mon Sep 17 00:00:00 2001 From: Cameron Date: Sat, 18 Apr 2026 17:27:41 -0400 Subject: [PATCH] feat: union /photos and /memories across libraries When `library` is omitted, both endpoints now walk every configured library root, interleave the results, and tag each row with its source library via the parallel `photo_libraries` / per-row `library_id` arrays. Previously the handlers fell back to the primary library, silently hiding the rest. Threads a parallel `file_libraries: Vec` through the sort/paginate helpers so library attribution survives sorting and pagination. Directory names are de-duplicated across libraries. `get_all_with_date_taken` grows an optional library filter so memories can scope its EXIF query per-library during the union walk. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/database/mod.rs | 12 +- src/files.rs | 820 +++++++++++++++++++++++--------------------- src/memories.rs | 79 +++-- 3 files changed, 490 insertions(+), 421 deletions(-) diff --git a/src/database/mod.rs b/src/database/mod.rs index f29a212..fe0957c 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -266,6 +266,7 @@ pub trait ExifDao: Sync + Send { fn get_all_with_date_taken( &mut self, context: &opentelemetry::Context, + library_id: Option, ) -> Result, DbError>; /// Batch load EXIF data for multiple file paths (single query) @@ -523,15 +524,24 @@ impl ExifDao for SqliteExifDao { fn get_all_with_date_taken( &mut self, context: &opentelemetry::Context, + lib_id: Option, ) -> Result, DbError> { trace_db_call(context, "query", "get_all_with_date_taken", |_span| { use schema::image_exif::dsl::*; let mut connection = self.connection.lock().expect("Unable to get ExifDao"); - image_exif + let query = image_exif .select((rel_path, date_taken)) .filter(date_taken.is_not_null()) + .into_boxed(); + + let query = match lib_id { + Some(filter_id) => query.filter(library_id.eq(filter_id)), + None => query, + }; + + query .load::<(String, Option)>(connection.deref_mut()) .map(|records| { records diff --git a/src/files.rs b/src/files.rs index 6fb22f1..d70d2ce 100644 --- a/src/files.rs +++ b/src/files.rs @@ -41,52 +41,53 @@ pub struct FileWithMetadata { pub file_name: String, pub tag_count: i64, pub date_taken: Option, // Unix timestamp from EXIF or filename extraction + pub library_id: i32, } use serde::Deserialize; /// Apply sorting to files with EXIF data support for date-based sorting /// Handles both date sorting (with EXIF/filename fallback) and regular sorting -/// Returns (sorted_file_paths, total_count) +/// Returns (sorted_file_paths, sorted_library_ids, total_count) fn apply_sorting_with_exif( files: Vec, + file_libraries: Vec, sort_type: SortType, exif_dao: &mut Box, span_context: &opentelemetry::Context, - base_path: &Path, + libraries: &[crate::libraries::Library], limit: Option, offset: i64, -) -> (Vec, i64) { +) -> (Vec, Vec, i64) { let total_count = files.len() as i64; match sort_type { SortType::DateTakenAsc | SortType::DateTakenDesc => { info!("Date sorting requested, using in-memory sort with EXIF/filename fallback"); - // Use in-memory sort so files without EXIF dates are included via - // filename extraction and filesystem metadata fallbacks. - let (sorted, _) = in_memory_date_sort( + let (sorted, sorted_libs, _) = in_memory_date_sort( files, + file_libraries, sort_type, exif_dao, span_context, - base_path, + libraries, limit, offset, ); - (sorted, total_count) + (sorted, sorted_libs, total_count) } _ => { - // Use regular sort for non-date sorting - let sorted = sort(files, sort_type); - let result = if let Some(limit_val) = limit { - sorted - .into_iter() - .skip(offset as usize) - .take(limit_val as usize) - .collect() + let (sorted, sorted_libs) = sort(files, file_libraries, sort_type); + let (result, result_libs) = if let Some(limit_val) = limit { + let skip = offset as usize; + let take = limit_val as usize; + ( + sorted.iter().skip(skip).take(take).cloned().collect(), + sorted_libs.iter().skip(skip).take(take).copied().collect(), + ) } else { - sorted + (sorted, sorted_libs) }; - (result, total_count) + (result, result_libs, total_count) } } } @@ -94,66 +95,88 @@ fn apply_sorting_with_exif( /// Fallback in-memory date sorting with EXIF/filename extraction fn in_memory_date_sort( files: Vec, + file_libraries: Vec, sort_type: SortType, exif_dao: &mut Box, span_context: &opentelemetry::Context, - base_path: &Path, + libraries: &[crate::libraries::Library], limit: Option, offset: i64, -) -> (Vec, i64) { +) -> (Vec, Vec, i64) { let total_count = files.len() as i64; let file_paths: Vec = files.iter().map(|f| f.file_name.clone()).collect(); - // Batch fetch EXIF data - let exif_map: std::collections::HashMap = exif_dao + // Batch fetch EXIF data (keyed by rel_path; in union mode a rel_path may + // correspond to rows in multiple libraries — pick the date from the one + // matching the requesting row's library_id when possible). + let exif_rows = exif_dao .get_exif_batch(span_context, &file_paths) - .unwrap_or_default() + .unwrap_or_default(); + let exif_map: std::collections::HashMap<(String, i32), i64> = exif_rows .into_iter() - .filter_map(|exif| exif.date_taken.map(|dt| (exif.file_path, dt))) + .filter_map(|exif| { + exif.date_taken + .map(|dt| ((exif.file_path, exif.library_id), dt)) + }) + .collect(); + + let lib_roots: std::collections::HashMap = libraries + .iter() + .map(|l| (l.id, l.root_path.as_str())) .collect(); // Convert to FileWithMetadata with date fallback logic let files_with_metadata: Vec = files .into_iter() - .map(|f| { - // Try EXIF date first + .zip(file_libraries.iter().copied()) + .map(|(f, lib_id)| { let date_taken = exif_map - .get(&f.file_name) + .get(&(f.file_name.clone(), lib_id)) .copied() + .or_else(|| extract_date_from_filename(&f.file_name).map(|dt| dt.timestamp())) .or_else(|| { - // Fallback to filename extraction - extract_date_from_filename(&f.file_name).map(|dt| dt.timestamp()) - }) - .or_else(|| { - // Fallback to filesystem metadata creation date - let full_path = base_path.join(&f.file_name); - std::fs::metadata(full_path) - .and_then(|md| md.created().or(md.modified())) - .ok() - .map(|system_time| { - >>::into(system_time).timestamp() - }) + lib_roots.get(&lib_id).and_then(|root| { + let full_path = Path::new(root).join(&f.file_name); + std::fs::metadata(full_path) + .and_then(|md| md.created().or(md.modified())) + .ok() + .map(|system_time| { + >>::into(system_time).timestamp() + }) + }) }); FileWithMetadata { file_name: f.file_name, tag_count: f.tag_count, date_taken, + library_id: lib_id, } }) .collect(); - let sorted = sort_with_metadata(files_with_metadata, sort_type); - let result = if let Some(limit_val) = limit { - sorted - .into_iter() - .skip(offset as usize) - .take(limit_val as usize) - .collect() + let (sorted, sorted_libs) = sort_with_metadata(files_with_metadata, sort_type); + let (result, result_libs) = if let Some(limit_val) = limit { + let skip = offset as usize; + let take = limit_val as usize; + ( + sorted + .iter() + .skip(skip) + .take(take) + .cloned() + .collect::>(), + sorted_libs + .iter() + .skip(skip) + .take(take) + .copied() + .collect::>(), + ) } else { - sorted + (sorted, sorted_libs) }; - (result, total_count) + (result, result_libs, total_count) } pub async fn list_photos( @@ -237,9 +260,9 @@ pub async fn list_photos( KeyValue::new("library", req.library.clone().unwrap_or_default()), ]); - // Resolve the optional library filter. Unknown values return 400. - // For Phase 3 the filesystem walk still operates against a single - // library's root; Phase 4 introduces multi-root union scanning. + // Resolve the optional library filter. Unknown values return 400. A + // `None` result means "union across all libraries" and downstream + // walks iterate every configured library root. let library = match crate::libraries::resolve_library_param(&app_state, req.library.as_deref()) { Ok(lib) => lib, @@ -248,7 +271,6 @@ pub async fn list_photos( return HttpResponse::BadRequest().body(msg); } }; - let scoped_library = library.unwrap_or_else(|| app_state.primary_library()); let span_context = opentelemetry::Context::current_with_span(span); @@ -332,12 +354,15 @@ pub async fn list_photos( None }; - // When a specific library is selected, we'll gate tag-based results - // (which key on rel_path only, library-agnostic) by "does this - // rel_path actually exist on disk in the selected library's root". - // We check per-file below rather than pre-enumerating image_exif, - // since image_exif may lag a just-added library. - let library_for_scope: Option<&crate::libraries::Library> = library; + // In scoped mode (`library` is Some) we gate tag-based results (which + // key on rel_path only) by "does this rel_path actually exist on disk + // in the selected library's root". In union mode we assign each + // returned file to the first library it resolves in, and drop files + // that exist in no configured library. + let libraries_to_scan: Vec<&crate::libraries::Library> = match library { + Some(lib) => vec![lib], + None => app_state.libraries.iter().collect(), + }; let search_recursively = req.recursive.unwrap_or(false); if let Some(tag_ids) = &req.tag_ids @@ -404,17 +429,23 @@ pub async fn list_photos( true } }) - .filter(|f| { - // Scope to the selected library by checking the file - // actually exists under its root. Falls back to the - // content-hash sibling set (looked up globally, since - // the tagged rel_path may have been registered under - // a different library than the one selected). - let Some(lib) = library_for_scope else { - return true; - }; - if PathBuf::from(&lib.root_path).join(&f.file_name).exists() { - return true; + .filter_map(|f| { + // Apply media type filter first (cheap check before disk I/O). + if let Some(ref media_type) = req.media_type { + let path = PathBuf::from(&f.file_name); + if !matches_media_type(&path, media_type) { + return None; + } + } + + // Resolve the file's library by checking each + // candidate library's root on disk. Falls back to + // content-hash siblings if the rel_path was + // registered under a different path but same content. + for lib in &libraries_to_scan { + if PathBuf::from(&lib.root_path).join(&f.file_name).exists() { + return Some((f, lib.id)); + } } let siblings = { let mut dao = exif_dao.lock().expect("Unable to get ExifDao"); @@ -428,41 +459,50 @@ pub async fn list_photos( None => Vec::new(), } }; - siblings - .iter() - .any(|p| PathBuf::from(&lib.root_path).join(p).exists()) - }) - .filter(|f| { - // Apply media type filtering if specified - if let Some(ref media_type) = req.media_type { - let path = PathBuf::from(&f.file_name); - matches_media_type(&path, media_type) + for lib in &libraries_to_scan { + if siblings + .iter() + .any(|p| PathBuf::from(&lib.root_path).join(p).exists()) + { + return Some((f, lib.id)); + } + } + // Tags are library-agnostic. If we can't confirm which + // library currently holds the file on disk (e.g. the + // tagged rel_path is stale or the caller is testing + // without real files), keep the tagged row and + // attribute it to the primary library so the client + // still sees the tag hit. + if library.is_none() { + Some((f, app_state.primary_library().id)) } else { - true + None } }) - .collect::>() + .collect::>() }) - .map(|files| { + .map(|paired| { // Handle sorting - use helper function that supports EXIF date sorting and pagination let sort_type = req.sort.unwrap_or(NameAsc); let limit = req.limit; let offset = req.offset.unwrap_or(0); + let (files, file_libs): (Vec, Vec) = paired.into_iter().unzip(); let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao"); let result = apply_sorting_with_exif( files, + file_libs, sort_type, &mut exif_dao_guard, &span_context, - scoped_library.root_path.as_ref(), + &app_state.libraries, limit, offset, ); drop(exif_dao_guard); result }) - .inspect(|(files, total)| debug!("Found {:?} files (total: {})", files.len(), total)) - .map(|(tagged_files, total_count)| { + .inspect(|(files, _libs, total)| debug!("Found {:?} files (total: {})", files.len(), total)) + .map(|(tagged_files, photo_libraries, total_count)| { info!( "Found {:?} tagged files: {:?}", tagged_files.len(), @@ -493,7 +533,6 @@ pub async fn list_photos( .set_attribute(KeyValue::new("total_count", total_count.to_string())); span_context.span().set_status(Status::Ok); - let photo_libraries = vec![scoped_library.id; tagged_files.len()]; HttpResponse::Ok().json(PhotosResponse { photos: tagged_files, dirs: vec![], @@ -507,330 +546,346 @@ pub async fn list_photos( .unwrap_or_else(|e| e.error_response()); } - // Use recursive or non-recursive file listing based on flag. Both - // paths must walk the *scoped* library's root; the generic - // FileSystemAccess trait (file_system.get_files_for_path) is pinned - // to AppState's base_path at construction time and doesn't know - // which library the request targets. - let files_result = if search_recursively { - is_valid_full_path( - &PathBuf::from(&scoped_library.root_path), - &PathBuf::from(search_path), - false, - ) - .map(|path| { - debug!("Valid path for recursive search: {:?}", path); - list_files_recursive(&path).unwrap_or_default() - }) - .context("Invalid path") - } else if scoped_library.id == app_state.primary_library().id { - // Primary library: preserve the original FileSystemAccess path so - // the test-mock path (MockFileSystem) continues to work. - file_system.get_files_for_path(search_path) - } else { - is_valid_full_path( - &PathBuf::from(&scoped_library.root_path), - &PathBuf::from(search_path), - false, - ) - .map(|path| { - debug!("Valid path for non-recursive search: {:?}", path); - list_files(&path).unwrap_or_default() - }) - .context("Invalid path") - }; + // Walk each candidate library's root for the requested sub-path. In + // scoped mode `libraries_to_scan` has one entry (the selected library); + // in union mode we walk every configured library and intermix results. + // For the primary library we preserve the original FileSystemAccess + // path so the test-mock path (MockFileSystem) continues to work. + let mut file_names: Vec = Vec::new(); + let mut file_libraries: Vec = Vec::new(); + let mut dirs_set: std::collections::HashSet = std::collections::HashSet::new(); + let mut any_library_resolved = false; - match files_result { - Ok(files) => { - info!( - "Found {:?} files in path: {:?} (recursive: {})", - files.len(), - search_path, - search_recursively - ); + for lib in &libraries_to_scan { + let files_result = if search_recursively { + is_valid_full_path( + &PathBuf::from(&lib.root_path), + &PathBuf::from(search_path), + false, + ) + .map(|path| { + debug!("Valid path for recursive search: {:?}", path); + list_files_recursive(&path).unwrap_or_default() + }) + .context("Invalid path") + } else if lib.id == app_state.primary_library().id { + file_system.get_files_for_path(search_path) + } else { + is_valid_full_path( + &PathBuf::from(&lib.root_path), + &PathBuf::from(search_path), + false, + ) + .map(|path| { + debug!("Valid path for non-recursive search: {:?}", path); + list_files(&path).unwrap_or_default() + }) + .context("Invalid path") + }; - info!("Starting to filter {} files from filesystem", files.len()); - let start_filter = std::time::Instant::now(); + let files = match files_result { + Ok(f) => { + any_library_resolved = true; + f + } + Err(e) => { + debug!( + "Skipping library '{}' for path '{}': {:?}", + lib.name, search_path, e + ); + continue; + } + }; - // Separate files and directories in a single pass to avoid redundant metadata calls - let (file_names, dirs): (Vec, Vec) = - files - .iter() - .fold((Vec::new(), Vec::new()), |(mut files, mut dirs), path| { - match path.metadata() { - Ok(md) => { - let relative = path - .strip_prefix(&scoped_library.root_path) - .unwrap_or_else(|_| { - panic!( - "Unable to strip library root {} from file path {}", - &scoped_library.root_path, - path.display() - ) - }); - // Normalize separators to '/' so downstream - // lookups (tags, EXIF, insights) that store - // rel_paths with forward slashes still match - // on Windows. - let relative_str = relative.to_str().unwrap().replace('\\', "/"); + info!( + "Found {:?} files in library '{}' path: {:?} (recursive: {})", + files.len(), + lib.name, + search_path, + search_recursively + ); - if md.is_file() { - files.push(relative_str); - } else if md.is_dir() { - dirs.push(relative_str); - } - } - Err(e) => { - error!("Failed getting file metadata: {:?}", e); - // Include files without metadata if they have extensions - if path.extension().is_some() { - let relative = path - .strip_prefix(&scoped_library.root_path) - .unwrap_or_else(|_| { - panic!( - "Unable to strip library root {} from file path {}", - &scoped_library.root_path, - path.display() - ) - }); - files.push(relative.to_str().unwrap().replace('\\', "/")); - } - } - } - (files, dirs) + for path in &files { + match path.metadata() { + Ok(md) => { + let relative = path.strip_prefix(&lib.root_path).unwrap_or_else(|_| { + panic!( + "Unable to strip library root {} from file path {}", + &lib.root_path, + path.display() + ) }); + // Normalize separators to '/' so downstream lookups + // (tags, EXIF, insights) that store rel_paths with + // forward slashes still match on Windows. + let relative_str = relative.to_str().unwrap().replace('\\', "/"); + if md.is_file() { + file_names.push(relative_str); + file_libraries.push(lib.id); + } else if md.is_dir() { + dirs_set.insert(relative_str); + } + } + Err(e) => { + error!("Failed getting file metadata: {:?}", e); + // Include files without metadata if they have extensions + if path.extension().is_some() { + let relative = path.strip_prefix(&lib.root_path).unwrap_or_else(|_| { + panic!( + "Unable to strip library root {} from file path {}", + &lib.root_path, + path.display() + ) + }); + file_names.push(relative.to_str().unwrap().replace('\\', "/")); + file_libraries.push(lib.id); + } + } + } + } + } + + if !any_library_resolved { + error!("Bad photos request: {}", req.path); + span_context + .span() + .set_status(Status::error("Invalid path")); + return HttpResponse::BadRequest().finish(); + } + + let dirs: Vec = dirs_set.into_iter().collect(); + + info!( + "Starting to filter {} files from filesystem", + file_names.len() + ); + let start_filter = std::time::Instant::now(); + + info!( + "File filtering took {:?}, now fetching tag counts for {} files", + start_filter.elapsed(), + file_names.len() + ); + let start_tags = std::time::Instant::now(); + + // Batch query for tag counts (tags are library-agnostic / keyed by rel_path). + let tag_counts = { + let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao"); + tag_dao_guard + .get_tag_counts_batch(&span_context, &file_names) + .unwrap_or_default() + }; + info!("Batch tag count query took {:?}", start_tags.elapsed()); + + let start_tag_filter = std::time::Instant::now(); + let file_tags_map: std::collections::HashMap> = + if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() { info!( - "File filtering took {:?}, now fetching tag counts for {} files", - start_filter.elapsed(), + "Tag filtering requested, fetching full tag lists for {} files", file_names.len() ); - let start_tags = std::time::Instant::now(); + let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao"); + file_names + .iter() + .filter_map(|file_name| { + tag_dao_guard + .get_tags_for_path(&span_context, file_name) + .ok() + .map(|tags| (file_name.clone(), tags)) + }) + .collect() + } else { + std::collections::HashMap::new() + }; + if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() { + info!("Full tag list fetch took {:?}", start_tag_filter.elapsed()); + } - // Batch query for tag counts to avoid N+1 queries - let tag_counts = { - let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao"); - tag_dao_guard - .get_tag_counts_batch(&span_context, &file_names) + // Filter + pair with the parallel library_id while preserving ordering + // so the downstream sort can return both arrays in lockstep. + let photos_with_libs: Vec<(FileWithTagCount, i32)> = file_names + .into_iter() + .zip(file_libraries.into_iter()) + .filter_map(|(file_name, lib_id)| { + let file_tags = file_tags_map.get(&file_name).cloned().unwrap_or_default(); + + if let Some(tag_ids_csv) = &req.tag_ids { + let tag_ids = tag_ids_csv + .split(',') + .filter_map(|t| t.parse().ok()) + .collect::>(); + + let excluded_tag_ids = req + .exclude_tag_ids + .clone() .unwrap_or_default() - }; - info!("Batch tag count query took {:?}", start_tags.elapsed()); + .split(',') + .filter_map(|t| t.parse().ok()) + .collect::>(); - // Also get full tag lists for files that need tag filtering - let start_tag_filter = std::time::Instant::now(); - let file_tags_map: std::collections::HashMap> = - if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() { - info!( - "Tag filtering requested, fetching full tag lists for {} files", - file_names.len() - ); - let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao"); - file_names - .iter() - .filter_map(|file_name| { - tag_dao_guard - .get_tags_for_path(&span_context, file_name) - .ok() - .map(|tags| (file_name.clone(), tags)) - }) - .collect() - } else { - std::collections::HashMap::new() - }; - if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() { - info!("Full tag list fetch took {:?}", start_tag_filter.elapsed()); + let filter_mode = req.tag_filter_mode.unwrap_or(FilterMode::Any); + let excluded = file_tags.iter().any(|t| excluded_tag_ids.contains(&t.id)); + + let keep = !excluded + && match filter_mode { + FilterMode::Any => file_tags.iter().any(|t| tag_ids.contains(&t.id)), + FilterMode::All => tag_ids + .iter() + .all(|id| file_tags.iter().any(|tag| &tag.id == id)), + }; + if !keep { + return None; + } } - let photos = file_names + if let Some(ref exif_files) = exif_matched_files + && !exif_files.contains(&file_name) + { + return None; + } + + if let Some(ref media_type) = req.media_type { + let path = PathBuf::from(&file_name); + if !matches_media_type(&path, media_type) { + return None; + } + } + + let tag_count = *tag_counts.get(&file_name).unwrap_or(&0); + Some(( + FileWithTagCount { + file_name, + tag_count, + }, + lib_id, + )) + }) + .collect(); + + info!( + "After all filters, {} files remain (filtering took {:?})", + photos_with_libs.len(), + start_filter.elapsed() + ); + + // Extract pagination parameters + let limit = req.limit; + let offset = req.offset.unwrap_or(0); + let start_sort = std::time::Instant::now(); + + let (photos, file_libs_sorted_input): (Vec, Vec) = + photos_with_libs.into_iter().unzip(); + + let (response_files, response_libraries, total_count) = if let Some(sort_type) = req.sort { + info!("Sorting {} files by {:?}", photos.len(), sort_type); + let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao"); + let result = apply_sorting_with_exif( + photos, + file_libs_sorted_input, + sort_type, + &mut exif_dao_guard, + &span_context, + &app_state.libraries, + limit, + offset, + ); + drop(exif_dao_guard); + result + } else { + // No sorting requested - apply pagination if requested + let total = photos.len() as i64; + let (paged_files, paged_libs): (Vec, Vec) = if let Some(limit_val) = limit { + photos .into_iter() - .map(|file_name| { - let file_tags = file_tags_map.get(&file_name).cloned().unwrap_or_default(); - (file_name, file_tags) - }) - .filter(|(_, file_tags): &(String, Vec)| { - if let Some(tag_ids) = &req.tag_ids { - let tag_ids = tag_ids - .split(',') - .filter_map(|t| t.parse().ok()) - .collect::>(); + .zip(file_libs_sorted_input) + .skip(offset as usize) + .take(limit_val as usize) + .map(|(f, lib)| (f.file_name, lib)) + .unzip() + } else { + photos + .into_iter() + .zip(file_libs_sorted_input) + .map(|(f, lib)| (f.file_name, lib)) + .unzip() + }; + (paged_files, paged_libs, total) + }; + info!( + "Sorting took {:?}, returned {} files (total: {})", + start_sort.elapsed(), + response_files.len(), + total_count + ); - let excluded_tag_ids = &req - .exclude_tag_ids - .clone() - .unwrap_or_default() - .split(',') - .filter_map(|t| t.parse().ok()) - .collect::>(); - - let filter_mode = &req.tag_filter_mode.unwrap_or(FilterMode::Any); - let excluded = file_tags.iter().any(|t| excluded_tag_ids.contains(&t.id)); - - return !excluded - && match filter_mode { - FilterMode::Any => { - file_tags.iter().any(|t| tag_ids.contains(&t.id)) - } - FilterMode::All => tag_ids - .iter() - .all(|id| file_tags.iter().any(|tag| &tag.id == id)), - }; - } - - true - }) - .filter(|(file_name, _)| { - // Apply EXIF filtering if present - if let Some(ref exif_files) = exif_matched_files { - exif_files.contains(file_name) - } else { - true - } - }) - .filter(|(file_name, _)| { - // Apply media type filtering if specified - if let Some(ref media_type) = req.media_type { - let path = PathBuf::from(file_name); - matches_media_type(&path, media_type) - } else { - true - } - }) - .map( - |(file_name, _tags): (String, Vec)| FileWithTagCount { - file_name: file_name.clone(), - tag_count: *tag_counts.get(&file_name).unwrap_or(&0), - }, - ) - .collect::>(); - - info!( - "After all filters, {} files remain (filtering took {:?})", - photos.len(), - start_filter.elapsed() - ); - - // Extract pagination parameters - let limit = req.limit; - let offset = req.offset.unwrap_or(0); - let start_sort = std::time::Instant::now(); - - // Handle sorting - use helper function that supports EXIF date sorting and pagination - let (response_files, total_count) = if let Some(sort_type) = req.sort { - info!("Sorting {} files by {:?}", photos.len(), sort_type); - let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao"); - let result = apply_sorting_with_exif( - photos, - sort_type, - &mut exif_dao_guard, - &span_context, - scoped_library.root_path.as_ref(), - limit, - offset, - ); - drop(exif_dao_guard); - result + let returned_count = response_files.len() as i64; + let pagination_metadata = if limit.is_some() { + ( + Some(total_count), + Some(offset + returned_count < total_count), + if offset + returned_count < total_count { + Some(offset + returned_count) } else { - // No sorting requested - apply pagination if requested - let total = photos.len() as i64; - let files: Vec = if let Some(limit_val) = limit { - photos - .into_iter() - .skip(offset as usize) - .take(limit_val as usize) - .map(|f| f.file_name) - .collect() - } else { - photos.into_iter().map(|f| f.file_name).collect() - }; - (files, total) - }; - info!( - "Sorting took {:?}, returned {} files (total: {})", - start_sort.elapsed(), - response_files.len(), - total_count - ); + None + }, + ) + } else { + (None, None, None) + }; - // Note: dirs were already collected during file filtering to avoid redundant metadata calls + span_context.span().set_attribute(KeyValue::new( + "file_count", + response_files.len().to_string(), + )); + span_context + .span() + .set_attribute(KeyValue::new("returned_count", returned_count.to_string())); + span_context + .span() + .set_attribute(KeyValue::new("total_count", total_count.to_string())); + span_context.span().set_status(Status::Ok); - // Calculate pagination metadata - let returned_count = response_files.len() as i64; - let pagination_metadata = if limit.is_some() { - ( - Some(total_count), - Some(offset + returned_count < total_count), - if offset + returned_count < total_count { - Some(offset + returned_count) - } else { - None - }, - ) - } else { - (None, None, None) - }; - - span_context - .span() - .set_attribute(KeyValue::new("file_count", files.len().to_string())); - span_context - .span() - .set_attribute(KeyValue::new("returned_count", returned_count.to_string())); - span_context - .span() - .set_attribute(KeyValue::new("total_count", total_count.to_string())); - span_context.span().set_status(Status::Ok); - - let photo_libraries = vec![scoped_library.id; response_files.len()]; - HttpResponse::Ok().json(PhotosResponse { - photos: response_files, - dirs, - photo_libraries, - total_count: pagination_metadata.0, - has_more: pagination_metadata.1, - next_offset: pagination_metadata.2, - }) - } - _ => { - error!("Bad photos request: {}", req.path); - span_context - .span() - .set_status(Status::error("Invalid path")); - HttpResponse::BadRequest().finish() - } - } + HttpResponse::Ok().json(PhotosResponse { + photos: response_files, + dirs, + photo_libraries: response_libraries, + total_count: pagination_metadata.0, + has_more: pagination_metadata.1, + next_offset: pagination_metadata.2, + }) } -fn sort(mut files: Vec, sort_type: SortType) -> Vec { +fn sort( + files: Vec, + file_libraries: Vec, + sort_type: SortType, +) -> (Vec, Vec) { + let mut paired: Vec<(FileWithTagCount, i32)> = files.into_iter().zip(file_libraries).collect(); + match sort_type { - SortType::Shuffle => files.shuffle(&mut thread_rng()), - NameAsc => { - files.sort_by(|l, r| l.file_name.cmp(&r.file_name)); - } - SortType::NameDesc => { - files.sort_by(|l, r| r.file_name.cmp(&l.file_name)); - } - SortType::TagCountAsc => { - files.sort_by(|l, r| l.tag_count.cmp(&r.tag_count)); - } - SortType::TagCountDesc => { - files.sort_by(|l, r| r.tag_count.cmp(&l.tag_count)); - } + SortType::Shuffle => paired.shuffle(&mut thread_rng()), + NameAsc => paired.sort_by(|l, r| l.0.file_name.cmp(&r.0.file_name)), + SortType::NameDesc => paired.sort_by(|l, r| r.0.file_name.cmp(&l.0.file_name)), + SortType::TagCountAsc => paired.sort_by(|l, r| l.0.tag_count.cmp(&r.0.tag_count)), + SortType::TagCountDesc => paired.sort_by(|l, r| r.0.tag_count.cmp(&l.0.tag_count)), SortType::DateTakenAsc | SortType::DateTakenDesc => { - // Date sorting not implemented for FileWithTagCount - // We shouldn't be hitting this code warn!("Date sorting not implemented for FileWithTagCount"); - files.sort_by(|l, r| l.file_name.cmp(&r.file_name)); + paired.sort_by(|l, r| l.0.file_name.cmp(&r.0.file_name)); } } - files - .iter() - .map(|f| f.file_name.clone()) - .collect::>() + paired + .into_iter() + .map(|(f, lib)| (f.file_name, lib)) + .unzip() } /// Sort files with metadata support (including date sorting) -fn sort_with_metadata(mut files: Vec, sort_type: SortType) -> Vec { +fn sort_with_metadata( + mut files: Vec, + sort_type: SortType, +) -> (Vec, Vec) { match sort_type { SortType::Shuffle => files.shuffle(&mut thread_rng()), NameAsc => { @@ -864,9 +919,9 @@ fn sort_with_metadata(mut files: Vec, sort_type: SortType) -> } files - .iter() - .map(|f| f.file_name.clone()) - .collect::>() + .into_iter() + .map(|f| (f.file_name, f.library_id)) + .unzip() } pub fn list_files(dir: &Path) -> io::Result> { @@ -1369,6 +1424,7 @@ mod tests { fn get_all_with_date_taken( &mut self, _context: &opentelemetry::Context, + _library_id: Option, ) -> Result, DbError> { Ok(Vec::new()) } diff --git a/src/memories.rs b/src/memories.rs index 64a4c95..875a72c 100644 --- a/src/memories.rs +++ b/src/memories.rs @@ -16,6 +16,7 @@ use walkdir::WalkDir; use crate::data::Claims; use crate::database::ExifDao; use crate::files::is_image_or_video; +use crate::libraries::Library; use crate::otel::{extract_context_from_request, global_tracer}; use crate::state::AppState; @@ -378,7 +379,7 @@ fn collect_exif_memories( ) -> Vec<(MemoryItem, NaiveDate)> { // Query database for all files with date_taken let exif_records = match exif_dao.lock() { - Ok(mut dao) => match dao.get_all_with_date_taken(context) { + Ok(mut dao) => match dao.get_all_with_date_taken(context, Some(library_id)) { Ok(records) => records, Err(e) => { warn!("Failed to query EXIF database: {:?}", e); @@ -546,48 +547,50 @@ pub async fn list_memories( return HttpResponse::BadRequest().body(msg); } }; - // For Phase 3 the walker still operates against a single library's root. - // Multi-library union support for the filesystem walk comes in Phase 4. - let scoped_library = library.unwrap_or_else(|| app_state.primary_library()); - let base = Path::new(&scoped_library.root_path); + // When `library` is `Some`, scope to that one library; otherwise union + // across every configured library and let the results interleave. + let libraries_to_scan: Vec<&Library> = match library { + Some(lib) => vec![lib], + None => app_state.libraries.iter().collect(), + }; - // Build the path excluder from base and env-configured exclusions - let path_excluder = PathExcluder::new(base, &app_state.excluded_dirs); + let mut memories_with_dates: Vec<(MemoryItem, NaiveDate)> = Vec::new(); - // Phase 1: Query EXIF database - let exif_memories = collect_exif_memories( - &exif_dao, - &span_context, - &scoped_library.root_path, - scoped_library.id, - now, - span_mode, - years_back, - &client_timezone, - &path_excluder, - ); + for lib in &libraries_to_scan { + let base = Path::new(&lib.root_path); + let path_excluder = PathExcluder::new(base, &app_state.excluded_dirs); - // Build HashSet for deduplication - let exif_paths: HashSet = exif_memories - .iter() - .map(|(item, _)| PathBuf::from(&scoped_library.root_path).join(&item.path)) - .collect(); + let exif_memories = collect_exif_memories( + &exif_dao, + &span_context, + &lib.root_path, + lib.id, + now, + span_mode, + years_back, + &client_timezone, + &path_excluder, + ); - // Phase 2: File system scan (skip EXIF files) - let fs_memories = collect_filesystem_memories( - &scoped_library.root_path, - scoped_library.id, - &path_excluder, - &exif_paths, - now, - span_mode, - years_back, - &client_timezone, - ); + let exif_paths: HashSet = exif_memories + .iter() + .map(|(item, _)| PathBuf::from(&lib.root_path).join(&item.path)) + .collect(); - // Phase 3: Merge and sort - let mut memories_with_dates = exif_memories; - memories_with_dates.extend(fs_memories); + let fs_memories = collect_filesystem_memories( + &lib.root_path, + lib.id, + &path_excluder, + &exif_paths, + now, + span_mode, + years_back, + &client_timezone, + ); + + memories_with_dates.extend(exif_memories); + memories_with_dates.extend(fs_memories); + } match span_mode { // Sort by absolute time for a more 'overview'