feat: union /photos and /memories across libraries

When `library` is omitted, both endpoints now walk every configured
library root, interleave the results, and tag each row with its source
library via the parallel `photo_libraries` / per-row `library_id`
arrays. Previously the handlers fell back to the primary library,
silently hiding the rest.

Threads a parallel `file_libraries: Vec<i32>` through the sort/paginate
helpers so library attribution survives sorting and pagination.
Directory names are de-duplicated across libraries.

`get_all_with_date_taken` grows an optional library filter so memories
can scope its EXIF query per-library during the union walk.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron
2026-04-18 17:27:41 -04:00
parent 1f1f1ae9f6
commit 33a89a214c
3 changed files with 490 additions and 421 deletions

View File

@@ -266,6 +266,7 @@ pub trait ExifDao: Sync + Send {
fn get_all_with_date_taken( fn get_all_with_date_taken(
&mut self, &mut self,
context: &opentelemetry::Context, context: &opentelemetry::Context,
library_id: Option<i32>,
) -> Result<Vec<(String, i64)>, DbError>; ) -> Result<Vec<(String, i64)>, DbError>;
/// Batch load EXIF data for multiple file paths (single query) /// Batch load EXIF data for multiple file paths (single query)
@@ -523,15 +524,24 @@ impl ExifDao for SqliteExifDao {
fn get_all_with_date_taken( fn get_all_with_date_taken(
&mut self, &mut self,
context: &opentelemetry::Context, context: &opentelemetry::Context,
lib_id: Option<i32>,
) -> Result<Vec<(String, i64)>, DbError> { ) -> Result<Vec<(String, i64)>, DbError> {
trace_db_call(context, "query", "get_all_with_date_taken", |_span| { trace_db_call(context, "query", "get_all_with_date_taken", |_span| {
use schema::image_exif::dsl::*; use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao"); let mut connection = self.connection.lock().expect("Unable to get ExifDao");
image_exif let query = image_exif
.select((rel_path, date_taken)) .select((rel_path, date_taken))
.filter(date_taken.is_not_null()) .filter(date_taken.is_not_null())
.into_boxed();
let query = match lib_id {
Some(filter_id) => query.filter(library_id.eq(filter_id)),
None => query,
};
query
.load::<(String, Option<i64>)>(connection.deref_mut()) .load::<(String, Option<i64>)>(connection.deref_mut())
.map(|records| { .map(|records| {
records records

View File

@@ -41,52 +41,53 @@ pub struct FileWithMetadata {
pub file_name: String, pub file_name: String,
pub tag_count: i64, pub tag_count: i64,
pub date_taken: Option<i64>, // Unix timestamp from EXIF or filename extraction pub date_taken: Option<i64>, // Unix timestamp from EXIF or filename extraction
pub library_id: i32,
} }
use serde::Deserialize; use serde::Deserialize;
/// Apply sorting to files with EXIF data support for date-based sorting /// Apply sorting to files with EXIF data support for date-based sorting
/// Handles both date sorting (with EXIF/filename fallback) and regular sorting /// Handles both date sorting (with EXIF/filename fallback) and regular sorting
/// Returns (sorted_file_paths, total_count) /// Returns (sorted_file_paths, sorted_library_ids, total_count)
fn apply_sorting_with_exif( fn apply_sorting_with_exif(
files: Vec<FileWithTagCount>, files: Vec<FileWithTagCount>,
file_libraries: Vec<i32>,
sort_type: SortType, sort_type: SortType,
exif_dao: &mut Box<dyn ExifDao>, exif_dao: &mut Box<dyn ExifDao>,
span_context: &opentelemetry::Context, span_context: &opentelemetry::Context,
base_path: &Path, libraries: &[crate::libraries::Library],
limit: Option<i64>, limit: Option<i64>,
offset: i64, offset: i64,
) -> (Vec<String>, i64) { ) -> (Vec<String>, Vec<i32>, i64) {
let total_count = files.len() as i64; let total_count = files.len() as i64;
match sort_type { match sort_type {
SortType::DateTakenAsc | SortType::DateTakenDesc => { SortType::DateTakenAsc | SortType::DateTakenDesc => {
info!("Date sorting requested, using in-memory sort with EXIF/filename fallback"); info!("Date sorting requested, using in-memory sort with EXIF/filename fallback");
// Use in-memory sort so files without EXIF dates are included via let (sorted, sorted_libs, _) = in_memory_date_sort(
// filename extraction and filesystem metadata fallbacks.
let (sorted, _) = in_memory_date_sort(
files, files,
file_libraries,
sort_type, sort_type,
exif_dao, exif_dao,
span_context, span_context,
base_path, libraries,
limit, limit,
offset, offset,
); );
(sorted, total_count) (sorted, sorted_libs, total_count)
} }
_ => { _ => {
// Use regular sort for non-date sorting let (sorted, sorted_libs) = sort(files, file_libraries, sort_type);
let sorted = sort(files, sort_type); let (result, result_libs) = if let Some(limit_val) = limit {
let result = if let Some(limit_val) = limit { let skip = offset as usize;
sorted let take = limit_val as usize;
.into_iter() (
.skip(offset as usize) sorted.iter().skip(skip).take(take).cloned().collect(),
.take(limit_val as usize) sorted_libs.iter().skip(skip).take(take).copied().collect(),
.collect() )
} else { } else {
sorted (sorted, sorted_libs)
}; };
(result, total_count) (result, result_libs, total_count)
} }
} }
} }
@@ -94,66 +95,88 @@ fn apply_sorting_with_exif(
/// Fallback in-memory date sorting with EXIF/filename extraction /// Fallback in-memory date sorting with EXIF/filename extraction
fn in_memory_date_sort( fn in_memory_date_sort(
files: Vec<FileWithTagCount>, files: Vec<FileWithTagCount>,
file_libraries: Vec<i32>,
sort_type: SortType, sort_type: SortType,
exif_dao: &mut Box<dyn ExifDao>, exif_dao: &mut Box<dyn ExifDao>,
span_context: &opentelemetry::Context, span_context: &opentelemetry::Context,
base_path: &Path, libraries: &[crate::libraries::Library],
limit: Option<i64>, limit: Option<i64>,
offset: i64, offset: i64,
) -> (Vec<String>, i64) { ) -> (Vec<String>, Vec<i32>, i64) {
let total_count = files.len() as i64; let total_count = files.len() as i64;
let file_paths: Vec<String> = files.iter().map(|f| f.file_name.clone()).collect(); let file_paths: Vec<String> = files.iter().map(|f| f.file_name.clone()).collect();
// Batch fetch EXIF data // Batch fetch EXIF data (keyed by rel_path; in union mode a rel_path may
let exif_map: std::collections::HashMap<String, i64> = exif_dao // correspond to rows in multiple libraries — pick the date from the one
// matching the requesting row's library_id when possible).
let exif_rows = exif_dao
.get_exif_batch(span_context, &file_paths) .get_exif_batch(span_context, &file_paths)
.unwrap_or_default() .unwrap_or_default();
let exif_map: std::collections::HashMap<(String, i32), i64> = exif_rows
.into_iter() .into_iter()
.filter_map(|exif| exif.date_taken.map(|dt| (exif.file_path, dt))) .filter_map(|exif| {
exif.date_taken
.map(|dt| ((exif.file_path, exif.library_id), dt))
})
.collect();
let lib_roots: std::collections::HashMap<i32, &str> = libraries
.iter()
.map(|l| (l.id, l.root_path.as_str()))
.collect(); .collect();
// Convert to FileWithMetadata with date fallback logic // Convert to FileWithMetadata with date fallback logic
let files_with_metadata: Vec<FileWithMetadata> = files let files_with_metadata: Vec<FileWithMetadata> = files
.into_iter() .into_iter()
.map(|f| { .zip(file_libraries.iter().copied())
// Try EXIF date first .map(|(f, lib_id)| {
let date_taken = exif_map let date_taken = exif_map
.get(&f.file_name) .get(&(f.file_name.clone(), lib_id))
.copied() .copied()
.or_else(|| extract_date_from_filename(&f.file_name).map(|dt| dt.timestamp()))
.or_else(|| { .or_else(|| {
// Fallback to filename extraction lib_roots.get(&lib_id).and_then(|root| {
extract_date_from_filename(&f.file_name).map(|dt| dt.timestamp()) let full_path = Path::new(root).join(&f.file_name);
}) std::fs::metadata(full_path)
.or_else(|| { .and_then(|md| md.created().or(md.modified()))
// Fallback to filesystem metadata creation date .ok()
let full_path = base_path.join(&f.file_name); .map(|system_time| {
std::fs::metadata(full_path) <SystemTime as Into<DateTime<Utc>>>::into(system_time).timestamp()
.and_then(|md| md.created().or(md.modified())) })
.ok() })
.map(|system_time| {
<SystemTime as Into<DateTime<Utc>>>::into(system_time).timestamp()
})
}); });
FileWithMetadata { FileWithMetadata {
file_name: f.file_name, file_name: f.file_name,
tag_count: f.tag_count, tag_count: f.tag_count,
date_taken, date_taken,
library_id: lib_id,
} }
}) })
.collect(); .collect();
let sorted = sort_with_metadata(files_with_metadata, sort_type); let (sorted, sorted_libs) = sort_with_metadata(files_with_metadata, sort_type);
let result = if let Some(limit_val) = limit { let (result, result_libs) = if let Some(limit_val) = limit {
sorted let skip = offset as usize;
.into_iter() let take = limit_val as usize;
.skip(offset as usize) (
.take(limit_val as usize) sorted
.collect() .iter()
.skip(skip)
.take(take)
.cloned()
.collect::<Vec<String>>(),
sorted_libs
.iter()
.skip(skip)
.take(take)
.copied()
.collect::<Vec<i32>>(),
)
} else { } else {
sorted (sorted, sorted_libs)
}; };
(result, total_count) (result, result_libs, total_count)
} }
pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>( pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
@@ -237,9 +260,9 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
KeyValue::new("library", req.library.clone().unwrap_or_default()), KeyValue::new("library", req.library.clone().unwrap_or_default()),
]); ]);
// Resolve the optional library filter. Unknown values return 400. // Resolve the optional library filter. Unknown values return 400. A
// For Phase 3 the filesystem walk still operates against a single // `None` result means "union across all libraries" and downstream
// library's root; Phase 4 introduces multi-root union scanning. // walks iterate every configured library root.
let library = match crate::libraries::resolve_library_param(&app_state, req.library.as_deref()) let library = match crate::libraries::resolve_library_param(&app_state, req.library.as_deref())
{ {
Ok(lib) => lib, Ok(lib) => lib,
@@ -248,7 +271,6 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
return HttpResponse::BadRequest().body(msg); return HttpResponse::BadRequest().body(msg);
} }
}; };
let scoped_library = library.unwrap_or_else(|| app_state.primary_library());
let span_context = opentelemetry::Context::current_with_span(span); let span_context = opentelemetry::Context::current_with_span(span);
@@ -332,12 +354,15 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
None None
}; };
// When a specific library is selected, we'll gate tag-based results // In scoped mode (`library` is Some) we gate tag-based results (which
// (which key on rel_path only, library-agnostic) by "does this // key on rel_path only) by "does this rel_path actually exist on disk
// rel_path actually exist on disk in the selected library's root". // in the selected library's root". In union mode we assign each
// We check per-file below rather than pre-enumerating image_exif, // returned file to the first library it resolves in, and drop files
// since image_exif may lag a just-added library. // that exist in no configured library.
let library_for_scope: Option<&crate::libraries::Library> = library; let libraries_to_scan: Vec<&crate::libraries::Library> = match library {
Some(lib) => vec![lib],
None => app_state.libraries.iter().collect(),
};
let search_recursively = req.recursive.unwrap_or(false); let search_recursively = req.recursive.unwrap_or(false);
if let Some(tag_ids) = &req.tag_ids if let Some(tag_ids) = &req.tag_ids
@@ -404,17 +429,23 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
true true
} }
}) })
.filter(|f| { .filter_map(|f| {
// Scope to the selected library by checking the file // Apply media type filter first (cheap check before disk I/O).
// actually exists under its root. Falls back to the if let Some(ref media_type) = req.media_type {
// content-hash sibling set (looked up globally, since let path = PathBuf::from(&f.file_name);
// the tagged rel_path may have been registered under if !matches_media_type(&path, media_type) {
// a different library than the one selected). return None;
let Some(lib) = library_for_scope else { }
return true; }
};
if PathBuf::from(&lib.root_path).join(&f.file_name).exists() { // Resolve the file's library by checking each
return true; // candidate library's root on disk. Falls back to
// content-hash siblings if the rel_path was
// registered under a different path but same content.
for lib in &libraries_to_scan {
if PathBuf::from(&lib.root_path).join(&f.file_name).exists() {
return Some((f, lib.id));
}
} }
let siblings = { let siblings = {
let mut dao = exif_dao.lock().expect("Unable to get ExifDao"); let mut dao = exif_dao.lock().expect("Unable to get ExifDao");
@@ -428,41 +459,50 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
None => Vec::new(), None => Vec::new(),
} }
}; };
siblings for lib in &libraries_to_scan {
.iter() if siblings
.any(|p| PathBuf::from(&lib.root_path).join(p).exists()) .iter()
}) .any(|p| PathBuf::from(&lib.root_path).join(p).exists())
.filter(|f| { {
// Apply media type filtering if specified return Some((f, lib.id));
if let Some(ref media_type) = req.media_type { }
let path = PathBuf::from(&f.file_name); }
matches_media_type(&path, media_type) // Tags are library-agnostic. If we can't confirm which
// library currently holds the file on disk (e.g. the
// tagged rel_path is stale or the caller is testing
// without real files), keep the tagged row and
// attribute it to the primary library so the client
// still sees the tag hit.
if library.is_none() {
Some((f, app_state.primary_library().id))
} else { } else {
true None
} }
}) })
.collect::<Vec<FileWithTagCount>>() .collect::<Vec<(FileWithTagCount, i32)>>()
}) })
.map(|files| { .map(|paired| {
// Handle sorting - use helper function that supports EXIF date sorting and pagination // Handle sorting - use helper function that supports EXIF date sorting and pagination
let sort_type = req.sort.unwrap_or(NameAsc); let sort_type = req.sort.unwrap_or(NameAsc);
let limit = req.limit; let limit = req.limit;
let offset = req.offset.unwrap_or(0); let offset = req.offset.unwrap_or(0);
let (files, file_libs): (Vec<FileWithTagCount>, Vec<i32>) = paired.into_iter().unzip();
let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao"); let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao");
let result = apply_sorting_with_exif( let result = apply_sorting_with_exif(
files, files,
file_libs,
sort_type, sort_type,
&mut exif_dao_guard, &mut exif_dao_guard,
&span_context, &span_context,
scoped_library.root_path.as_ref(), &app_state.libraries,
limit, limit,
offset, offset,
); );
drop(exif_dao_guard); drop(exif_dao_guard);
result result
}) })
.inspect(|(files, total)| debug!("Found {:?} files (total: {})", files.len(), total)) .inspect(|(files, _libs, total)| debug!("Found {:?} files (total: {})", files.len(), total))
.map(|(tagged_files, total_count)| { .map(|(tagged_files, photo_libraries, total_count)| {
info!( info!(
"Found {:?} tagged files: {:?}", "Found {:?} tagged files: {:?}",
tagged_files.len(), tagged_files.len(),
@@ -493,7 +533,6 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
.set_attribute(KeyValue::new("total_count", total_count.to_string())); .set_attribute(KeyValue::new("total_count", total_count.to_string()));
span_context.span().set_status(Status::Ok); span_context.span().set_status(Status::Ok);
let photo_libraries = vec![scoped_library.id; tagged_files.len()];
HttpResponse::Ok().json(PhotosResponse { HttpResponse::Ok().json(PhotosResponse {
photos: tagged_files, photos: tagged_files,
dirs: vec![], dirs: vec![],
@@ -507,330 +546,346 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
.unwrap_or_else(|e| e.error_response()); .unwrap_or_else(|e| e.error_response());
} }
// Use recursive or non-recursive file listing based on flag. Both // Walk each candidate library's root for the requested sub-path. In
// paths must walk the *scoped* library's root; the generic // scoped mode `libraries_to_scan` has one entry (the selected library);
// FileSystemAccess trait (file_system.get_files_for_path) is pinned // in union mode we walk every configured library and intermix results.
// to AppState's base_path at construction time and doesn't know // For the primary library we preserve the original FileSystemAccess
// which library the request targets. // path so the test-mock path (MockFileSystem) continues to work.
let files_result = if search_recursively { let mut file_names: Vec<String> = Vec::new();
is_valid_full_path( let mut file_libraries: Vec<i32> = Vec::new();
&PathBuf::from(&scoped_library.root_path), let mut dirs_set: std::collections::HashSet<String> = std::collections::HashSet::new();
&PathBuf::from(search_path), let mut any_library_resolved = false;
false,
)
.map(|path| {
debug!("Valid path for recursive search: {:?}", path);
list_files_recursive(&path).unwrap_or_default()
})
.context("Invalid path")
} else if scoped_library.id == app_state.primary_library().id {
// Primary library: preserve the original FileSystemAccess path so
// the test-mock path (MockFileSystem) continues to work.
file_system.get_files_for_path(search_path)
} else {
is_valid_full_path(
&PathBuf::from(&scoped_library.root_path),
&PathBuf::from(search_path),
false,
)
.map(|path| {
debug!("Valid path for non-recursive search: {:?}", path);
list_files(&path).unwrap_or_default()
})
.context("Invalid path")
};
match files_result { for lib in &libraries_to_scan {
Ok(files) => { let files_result = if search_recursively {
info!( is_valid_full_path(
"Found {:?} files in path: {:?} (recursive: {})", &PathBuf::from(&lib.root_path),
files.len(), &PathBuf::from(search_path),
search_path, false,
search_recursively )
); .map(|path| {
debug!("Valid path for recursive search: {:?}", path);
list_files_recursive(&path).unwrap_or_default()
})
.context("Invalid path")
} else if lib.id == app_state.primary_library().id {
file_system.get_files_for_path(search_path)
} else {
is_valid_full_path(
&PathBuf::from(&lib.root_path),
&PathBuf::from(search_path),
false,
)
.map(|path| {
debug!("Valid path for non-recursive search: {:?}", path);
list_files(&path).unwrap_or_default()
})
.context("Invalid path")
};
info!("Starting to filter {} files from filesystem", files.len()); let files = match files_result {
let start_filter = std::time::Instant::now(); Ok(f) => {
any_library_resolved = true;
f
}
Err(e) => {
debug!(
"Skipping library '{}' for path '{}': {:?}",
lib.name, search_path, e
);
continue;
}
};
// Separate files and directories in a single pass to avoid redundant metadata calls info!(
let (file_names, dirs): (Vec<String>, Vec<String>) = "Found {:?} files in library '{}' path: {:?} (recursive: {})",
files files.len(),
.iter() lib.name,
.fold((Vec::new(), Vec::new()), |(mut files, mut dirs), path| { search_path,
match path.metadata() { search_recursively
Ok(md) => { );
let relative = path
.strip_prefix(&scoped_library.root_path)
.unwrap_or_else(|_| {
panic!(
"Unable to strip library root {} from file path {}",
&scoped_library.root_path,
path.display()
)
});
// Normalize separators to '/' so downstream
// lookups (tags, EXIF, insights) that store
// rel_paths with forward slashes still match
// on Windows.
let relative_str = relative.to_str().unwrap().replace('\\', "/");
if md.is_file() { for path in &files {
files.push(relative_str); match path.metadata() {
} else if md.is_dir() { Ok(md) => {
dirs.push(relative_str); let relative = path.strip_prefix(&lib.root_path).unwrap_or_else(|_| {
} panic!(
} "Unable to strip library root {} from file path {}",
Err(e) => { &lib.root_path,
error!("Failed getting file metadata: {:?}", e); path.display()
// Include files without metadata if they have extensions )
if path.extension().is_some() {
let relative = path
.strip_prefix(&scoped_library.root_path)
.unwrap_or_else(|_| {
panic!(
"Unable to strip library root {} from file path {}",
&scoped_library.root_path,
path.display()
)
});
files.push(relative.to_str().unwrap().replace('\\', "/"));
}
}
}
(files, dirs)
}); });
// Normalize separators to '/' so downstream lookups
// (tags, EXIF, insights) that store rel_paths with
// forward slashes still match on Windows.
let relative_str = relative.to_str().unwrap().replace('\\', "/");
if md.is_file() {
file_names.push(relative_str);
file_libraries.push(lib.id);
} else if md.is_dir() {
dirs_set.insert(relative_str);
}
}
Err(e) => {
error!("Failed getting file metadata: {:?}", e);
// Include files without metadata if they have extensions
if path.extension().is_some() {
let relative = path.strip_prefix(&lib.root_path).unwrap_or_else(|_| {
panic!(
"Unable to strip library root {} from file path {}",
&lib.root_path,
path.display()
)
});
file_names.push(relative.to_str().unwrap().replace('\\', "/"));
file_libraries.push(lib.id);
}
}
}
}
}
if !any_library_resolved {
error!("Bad photos request: {}", req.path);
span_context
.span()
.set_status(Status::error("Invalid path"));
return HttpResponse::BadRequest().finish();
}
let dirs: Vec<String> = dirs_set.into_iter().collect();
info!(
"Starting to filter {} files from filesystem",
file_names.len()
);
let start_filter = std::time::Instant::now();
info!(
"File filtering took {:?}, now fetching tag counts for {} files",
start_filter.elapsed(),
file_names.len()
);
let start_tags = std::time::Instant::now();
// Batch query for tag counts (tags are library-agnostic / keyed by rel_path).
let tag_counts = {
let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao");
tag_dao_guard
.get_tag_counts_batch(&span_context, &file_names)
.unwrap_or_default()
};
info!("Batch tag count query took {:?}", start_tags.elapsed());
let start_tag_filter = std::time::Instant::now();
let file_tags_map: std::collections::HashMap<String, Vec<crate::tags::Tag>> =
if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() {
info!( info!(
"File filtering took {:?}, now fetching tag counts for {} files", "Tag filtering requested, fetching full tag lists for {} files",
start_filter.elapsed(),
file_names.len() file_names.len()
); );
let start_tags = std::time::Instant::now(); let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao");
file_names
.iter()
.filter_map(|file_name| {
tag_dao_guard
.get_tags_for_path(&span_context, file_name)
.ok()
.map(|tags| (file_name.clone(), tags))
})
.collect()
} else {
std::collections::HashMap::new()
};
if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() {
info!("Full tag list fetch took {:?}", start_tag_filter.elapsed());
}
// Batch query for tag counts to avoid N+1 queries // Filter + pair with the parallel library_id while preserving ordering
let tag_counts = { // so the downstream sort can return both arrays in lockstep.
let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao"); let photos_with_libs: Vec<(FileWithTagCount, i32)> = file_names
tag_dao_guard .into_iter()
.get_tag_counts_batch(&span_context, &file_names) .zip(file_libraries.into_iter())
.filter_map(|(file_name, lib_id)| {
let file_tags = file_tags_map.get(&file_name).cloned().unwrap_or_default();
if let Some(tag_ids_csv) = &req.tag_ids {
let tag_ids = tag_ids_csv
.split(',')
.filter_map(|t| t.parse().ok())
.collect::<Vec<i32>>();
let excluded_tag_ids = req
.exclude_tag_ids
.clone()
.unwrap_or_default() .unwrap_or_default()
}; .split(',')
info!("Batch tag count query took {:?}", start_tags.elapsed()); .filter_map(|t| t.parse().ok())
.collect::<Vec<i32>>();
// Also get full tag lists for files that need tag filtering let filter_mode = req.tag_filter_mode.unwrap_or(FilterMode::Any);
let start_tag_filter = std::time::Instant::now(); let excluded = file_tags.iter().any(|t| excluded_tag_ids.contains(&t.id));
let file_tags_map: std::collections::HashMap<String, Vec<crate::tags::Tag>> =
if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() { let keep = !excluded
info!( && match filter_mode {
"Tag filtering requested, fetching full tag lists for {} files", FilterMode::Any => file_tags.iter().any(|t| tag_ids.contains(&t.id)),
file_names.len() FilterMode::All => tag_ids
); .iter()
let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao"); .all(|id| file_tags.iter().any(|tag| &tag.id == id)),
file_names };
.iter() if !keep {
.filter_map(|file_name| { return None;
tag_dao_guard }
.get_tags_for_path(&span_context, file_name)
.ok()
.map(|tags| (file_name.clone(), tags))
})
.collect()
} else {
std::collections::HashMap::new()
};
if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() {
info!("Full tag list fetch took {:?}", start_tag_filter.elapsed());
} }
let photos = file_names if let Some(ref exif_files) = exif_matched_files
&& !exif_files.contains(&file_name)
{
return None;
}
if let Some(ref media_type) = req.media_type {
let path = PathBuf::from(&file_name);
if !matches_media_type(&path, media_type) {
return None;
}
}
let tag_count = *tag_counts.get(&file_name).unwrap_or(&0);
Some((
FileWithTagCount {
file_name,
tag_count,
},
lib_id,
))
})
.collect();
info!(
"After all filters, {} files remain (filtering took {:?})",
photos_with_libs.len(),
start_filter.elapsed()
);
// Extract pagination parameters
let limit = req.limit;
let offset = req.offset.unwrap_or(0);
let start_sort = std::time::Instant::now();
let (photos, file_libs_sorted_input): (Vec<FileWithTagCount>, Vec<i32>) =
photos_with_libs.into_iter().unzip();
let (response_files, response_libraries, total_count) = if let Some(sort_type) = req.sort {
info!("Sorting {} files by {:?}", photos.len(), sort_type);
let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao");
let result = apply_sorting_with_exif(
photos,
file_libs_sorted_input,
sort_type,
&mut exif_dao_guard,
&span_context,
&app_state.libraries,
limit,
offset,
);
drop(exif_dao_guard);
result
} else {
// No sorting requested - apply pagination if requested
let total = photos.len() as i64;
let (paged_files, paged_libs): (Vec<String>, Vec<i32>) = if let Some(limit_val) = limit {
photos
.into_iter() .into_iter()
.map(|file_name| { .zip(file_libs_sorted_input)
let file_tags = file_tags_map.get(&file_name).cloned().unwrap_or_default(); .skip(offset as usize)
(file_name, file_tags) .take(limit_val as usize)
}) .map(|(f, lib)| (f.file_name, lib))
.filter(|(_, file_tags): &(String, Vec<crate::tags::Tag>)| { .unzip()
if let Some(tag_ids) = &req.tag_ids { } else {
let tag_ids = tag_ids photos
.split(',') .into_iter()
.filter_map(|t| t.parse().ok()) .zip(file_libs_sorted_input)
.collect::<Vec<i32>>(); .map(|(f, lib)| (f.file_name, lib))
.unzip()
};
(paged_files, paged_libs, total)
};
info!(
"Sorting took {:?}, returned {} files (total: {})",
start_sort.elapsed(),
response_files.len(),
total_count
);
let excluded_tag_ids = &req let returned_count = response_files.len() as i64;
.exclude_tag_ids let pagination_metadata = if limit.is_some() {
.clone() (
.unwrap_or_default() Some(total_count),
.split(',') Some(offset + returned_count < total_count),
.filter_map(|t| t.parse().ok()) if offset + returned_count < total_count {
.collect::<Vec<i32>>(); Some(offset + returned_count)
let filter_mode = &req.tag_filter_mode.unwrap_or(FilterMode::Any);
let excluded = file_tags.iter().any(|t| excluded_tag_ids.contains(&t.id));
return !excluded
&& match filter_mode {
FilterMode::Any => {
file_tags.iter().any(|t| tag_ids.contains(&t.id))
}
FilterMode::All => tag_ids
.iter()
.all(|id| file_tags.iter().any(|tag| &tag.id == id)),
};
}
true
})
.filter(|(file_name, _)| {
// Apply EXIF filtering if present
if let Some(ref exif_files) = exif_matched_files {
exif_files.contains(file_name)
} else {
true
}
})
.filter(|(file_name, _)| {
// Apply media type filtering if specified
if let Some(ref media_type) = req.media_type {
let path = PathBuf::from(file_name);
matches_media_type(&path, media_type)
} else {
true
}
})
.map(
|(file_name, _tags): (String, Vec<crate::tags::Tag>)| FileWithTagCount {
file_name: file_name.clone(),
tag_count: *tag_counts.get(&file_name).unwrap_or(&0),
},
)
.collect::<Vec<FileWithTagCount>>();
info!(
"After all filters, {} files remain (filtering took {:?})",
photos.len(),
start_filter.elapsed()
);
// Extract pagination parameters
let limit = req.limit;
let offset = req.offset.unwrap_or(0);
let start_sort = std::time::Instant::now();
// Handle sorting - use helper function that supports EXIF date sorting and pagination
let (response_files, total_count) = if let Some(sort_type) = req.sort {
info!("Sorting {} files by {:?}", photos.len(), sort_type);
let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao");
let result = apply_sorting_with_exif(
photos,
sort_type,
&mut exif_dao_guard,
&span_context,
scoped_library.root_path.as_ref(),
limit,
offset,
);
drop(exif_dao_guard);
result
} else { } else {
// No sorting requested - apply pagination if requested None
let total = photos.len() as i64; },
let files: Vec<String> = if let Some(limit_val) = limit { )
photos } else {
.into_iter() (None, None, None)
.skip(offset as usize) };
.take(limit_val as usize)
.map(|f| f.file_name)
.collect()
} else {
photos.into_iter().map(|f| f.file_name).collect()
};
(files, total)
};
info!(
"Sorting took {:?}, returned {} files (total: {})",
start_sort.elapsed(),
response_files.len(),
total_count
);
// Note: dirs were already collected during file filtering to avoid redundant metadata calls span_context.span().set_attribute(KeyValue::new(
"file_count",
response_files.len().to_string(),
));
span_context
.span()
.set_attribute(KeyValue::new("returned_count", returned_count.to_string()));
span_context
.span()
.set_attribute(KeyValue::new("total_count", total_count.to_string()));
span_context.span().set_status(Status::Ok);
// Calculate pagination metadata HttpResponse::Ok().json(PhotosResponse {
let returned_count = response_files.len() as i64; photos: response_files,
let pagination_metadata = if limit.is_some() { dirs,
( photo_libraries: response_libraries,
Some(total_count), total_count: pagination_metadata.0,
Some(offset + returned_count < total_count), has_more: pagination_metadata.1,
if offset + returned_count < total_count { next_offset: pagination_metadata.2,
Some(offset + returned_count) })
} else {
None
},
)
} else {
(None, None, None)
};
span_context
.span()
.set_attribute(KeyValue::new("file_count", files.len().to_string()));
span_context
.span()
.set_attribute(KeyValue::new("returned_count", returned_count.to_string()));
span_context
.span()
.set_attribute(KeyValue::new("total_count", total_count.to_string()));
span_context.span().set_status(Status::Ok);
let photo_libraries = vec![scoped_library.id; response_files.len()];
HttpResponse::Ok().json(PhotosResponse {
photos: response_files,
dirs,
photo_libraries,
total_count: pagination_metadata.0,
has_more: pagination_metadata.1,
next_offset: pagination_metadata.2,
})
}
_ => {
error!("Bad photos request: {}", req.path);
span_context
.span()
.set_status(Status::error("Invalid path"));
HttpResponse::BadRequest().finish()
}
}
} }
fn sort(mut files: Vec<FileWithTagCount>, sort_type: SortType) -> Vec<String> { fn sort(
files: Vec<FileWithTagCount>,
file_libraries: Vec<i32>,
sort_type: SortType,
) -> (Vec<String>, Vec<i32>) {
let mut paired: Vec<(FileWithTagCount, i32)> = files.into_iter().zip(file_libraries).collect();
match sort_type { match sort_type {
SortType::Shuffle => files.shuffle(&mut thread_rng()), SortType::Shuffle => paired.shuffle(&mut thread_rng()),
NameAsc => { NameAsc => paired.sort_by(|l, r| l.0.file_name.cmp(&r.0.file_name)),
files.sort_by(|l, r| l.file_name.cmp(&r.file_name)); SortType::NameDesc => paired.sort_by(|l, r| r.0.file_name.cmp(&l.0.file_name)),
} SortType::TagCountAsc => paired.sort_by(|l, r| l.0.tag_count.cmp(&r.0.tag_count)),
SortType::NameDesc => { SortType::TagCountDesc => paired.sort_by(|l, r| r.0.tag_count.cmp(&l.0.tag_count)),
files.sort_by(|l, r| r.file_name.cmp(&l.file_name));
}
SortType::TagCountAsc => {
files.sort_by(|l, r| l.tag_count.cmp(&r.tag_count));
}
SortType::TagCountDesc => {
files.sort_by(|l, r| r.tag_count.cmp(&l.tag_count));
}
SortType::DateTakenAsc | SortType::DateTakenDesc => { SortType::DateTakenAsc | SortType::DateTakenDesc => {
// Date sorting not implemented for FileWithTagCount
// We shouldn't be hitting this code
warn!("Date sorting not implemented for FileWithTagCount"); warn!("Date sorting not implemented for FileWithTagCount");
files.sort_by(|l, r| l.file_name.cmp(&r.file_name)); paired.sort_by(|l, r| l.0.file_name.cmp(&r.0.file_name));
} }
} }
files paired
.iter() .into_iter()
.map(|f| f.file_name.clone()) .map(|(f, lib)| (f.file_name, lib))
.collect::<Vec<String>>() .unzip()
} }
/// Sort files with metadata support (including date sorting) /// Sort files with metadata support (including date sorting)
fn sort_with_metadata(mut files: Vec<FileWithMetadata>, sort_type: SortType) -> Vec<String> { fn sort_with_metadata(
mut files: Vec<FileWithMetadata>,
sort_type: SortType,
) -> (Vec<String>, Vec<i32>) {
match sort_type { match sort_type {
SortType::Shuffle => files.shuffle(&mut thread_rng()), SortType::Shuffle => files.shuffle(&mut thread_rng()),
NameAsc => { NameAsc => {
@@ -864,9 +919,9 @@ fn sort_with_metadata(mut files: Vec<FileWithMetadata>, sort_type: SortType) ->
} }
files files
.iter() .into_iter()
.map(|f| f.file_name.clone()) .map(|f| (f.file_name, f.library_id))
.collect::<Vec<String>>() .unzip()
} }
pub fn list_files(dir: &Path) -> io::Result<Vec<PathBuf>> { pub fn list_files(dir: &Path) -> io::Result<Vec<PathBuf>> {
@@ -1369,6 +1424,7 @@ mod tests {
fn get_all_with_date_taken( fn get_all_with_date_taken(
&mut self, &mut self,
_context: &opentelemetry::Context, _context: &opentelemetry::Context,
_library_id: Option<i32>,
) -> Result<Vec<(String, i64)>, DbError> { ) -> Result<Vec<(String, i64)>, DbError> {
Ok(Vec::new()) Ok(Vec::new())
} }

View File

@@ -16,6 +16,7 @@ use walkdir::WalkDir;
use crate::data::Claims; use crate::data::Claims;
use crate::database::ExifDao; use crate::database::ExifDao;
use crate::files::is_image_or_video; use crate::files::is_image_or_video;
use crate::libraries::Library;
use crate::otel::{extract_context_from_request, global_tracer}; use crate::otel::{extract_context_from_request, global_tracer};
use crate::state::AppState; use crate::state::AppState;
@@ -378,7 +379,7 @@ fn collect_exif_memories(
) -> Vec<(MemoryItem, NaiveDate)> { ) -> Vec<(MemoryItem, NaiveDate)> {
// Query database for all files with date_taken // Query database for all files with date_taken
let exif_records = match exif_dao.lock() { let exif_records = match exif_dao.lock() {
Ok(mut dao) => match dao.get_all_with_date_taken(context) { Ok(mut dao) => match dao.get_all_with_date_taken(context, Some(library_id)) {
Ok(records) => records, Ok(records) => records,
Err(e) => { Err(e) => {
warn!("Failed to query EXIF database: {:?}", e); warn!("Failed to query EXIF database: {:?}", e);
@@ -546,48 +547,50 @@ pub async fn list_memories(
return HttpResponse::BadRequest().body(msg); return HttpResponse::BadRequest().body(msg);
} }
}; };
// For Phase 3 the walker still operates against a single library's root. // When `library` is `Some`, scope to that one library; otherwise union
// Multi-library union support for the filesystem walk comes in Phase 4. // across every configured library and let the results interleave.
let scoped_library = library.unwrap_or_else(|| app_state.primary_library()); let libraries_to_scan: Vec<&Library> = match library {
let base = Path::new(&scoped_library.root_path); Some(lib) => vec![lib],
None => app_state.libraries.iter().collect(),
};
// Build the path excluder from base and env-configured exclusions let mut memories_with_dates: Vec<(MemoryItem, NaiveDate)> = Vec::new();
let path_excluder = PathExcluder::new(base, &app_state.excluded_dirs);
// Phase 1: Query EXIF database for lib in &libraries_to_scan {
let exif_memories = collect_exif_memories( let base = Path::new(&lib.root_path);
&exif_dao, let path_excluder = PathExcluder::new(base, &app_state.excluded_dirs);
&span_context,
&scoped_library.root_path,
scoped_library.id,
now,
span_mode,
years_back,
&client_timezone,
&path_excluder,
);
// Build HashSet for deduplication let exif_memories = collect_exif_memories(
let exif_paths: HashSet<PathBuf> = exif_memories &exif_dao,
.iter() &span_context,
.map(|(item, _)| PathBuf::from(&scoped_library.root_path).join(&item.path)) &lib.root_path,
.collect(); lib.id,
now,
span_mode,
years_back,
&client_timezone,
&path_excluder,
);
// Phase 2: File system scan (skip EXIF files) let exif_paths: HashSet<PathBuf> = exif_memories
let fs_memories = collect_filesystem_memories( .iter()
&scoped_library.root_path, .map(|(item, _)| PathBuf::from(&lib.root_path).join(&item.path))
scoped_library.id, .collect();
&path_excluder,
&exif_paths,
now,
span_mode,
years_back,
&client_timezone,
);
// Phase 3: Merge and sort let fs_memories = collect_filesystem_memories(
let mut memories_with_dates = exif_memories; &lib.root_path,
memories_with_dates.extend(fs_memories); lib.id,
&path_excluder,
&exif_paths,
now,
span_mode,
years_back,
&client_timezone,
);
memories_with_dates.extend(exif_memories);
memories_with_dates.extend(fs_memories);
}
match span_mode { match span_mode {
// Sort by absolute time for a more 'overview' // Sort by absolute time for a more 'overview'