feat: union /photos and /memories across libraries

When `library` is omitted, both endpoints now walk every configured
library root, interleave the results, and tag each row with its source
library via the parallel `photo_libraries` / per-row `library_id`
arrays. Previously the handlers fell back to the primary library,
silently hiding the rest.

Threads a parallel `file_libraries: Vec<i32>` through the sort/paginate
helpers so library attribution survives sorting and pagination.
Directory names are de-duplicated across libraries.

`get_all_with_date_taken` grows an optional library filter so memories
can scope its EXIF query per-library during the union walk.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron
2026-04-18 17:27:41 -04:00
committed by cameron
parent 586b735af5
commit 2c8de8dcc6
3 changed files with 490 additions and 421 deletions

View File

@@ -266,6 +266,7 @@ pub trait ExifDao: Sync + Send {
fn get_all_with_date_taken(
&mut self,
context: &opentelemetry::Context,
library_id: Option<i32>,
) -> Result<Vec<(String, i64)>, DbError>;
/// Batch load EXIF data for multiple file paths (single query)
@@ -523,15 +524,24 @@ impl ExifDao for SqliteExifDao {
fn get_all_with_date_taken(
&mut self,
context: &opentelemetry::Context,
lib_id: Option<i32>,
) -> Result<Vec<(String, i64)>, DbError> {
trace_db_call(context, "query", "get_all_with_date_taken", |_span| {
use schema::image_exif::dsl::*;
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
image_exif
let query = image_exif
.select((rel_path, date_taken))
.filter(date_taken.is_not_null())
.into_boxed();
let query = match lib_id {
Some(filter_id) => query.filter(library_id.eq(filter_id)),
None => query,
};
query
.load::<(String, Option<i64>)>(connection.deref_mut())
.map(|records| {
records

View File

@@ -41,52 +41,53 @@ pub struct FileWithMetadata {
pub file_name: String,
pub tag_count: i64,
pub date_taken: Option<i64>, // Unix timestamp from EXIF or filename extraction
pub library_id: i32,
}
use serde::Deserialize;
/// Apply sorting to files with EXIF data support for date-based sorting
/// Handles both date sorting (with EXIF/filename fallback) and regular sorting
/// Returns (sorted_file_paths, total_count)
/// Returns (sorted_file_paths, sorted_library_ids, total_count)
fn apply_sorting_with_exif(
files: Vec<FileWithTagCount>,
file_libraries: Vec<i32>,
sort_type: SortType,
exif_dao: &mut Box<dyn ExifDao>,
span_context: &opentelemetry::Context,
base_path: &Path,
libraries: &[crate::libraries::Library],
limit: Option<i64>,
offset: i64,
) -> (Vec<String>, i64) {
) -> (Vec<String>, Vec<i32>, i64) {
let total_count = files.len() as i64;
match sort_type {
SortType::DateTakenAsc | SortType::DateTakenDesc => {
info!("Date sorting requested, using in-memory sort with EXIF/filename fallback");
// Use in-memory sort so files without EXIF dates are included via
// filename extraction and filesystem metadata fallbacks.
let (sorted, _) = in_memory_date_sort(
let (sorted, sorted_libs, _) = in_memory_date_sort(
files,
file_libraries,
sort_type,
exif_dao,
span_context,
base_path,
libraries,
limit,
offset,
);
(sorted, total_count)
(sorted, sorted_libs, total_count)
}
_ => {
// Use regular sort for non-date sorting
let sorted = sort(files, sort_type);
let result = if let Some(limit_val) = limit {
sorted
.into_iter()
.skip(offset as usize)
.take(limit_val as usize)
.collect()
let (sorted, sorted_libs) = sort(files, file_libraries, sort_type);
let (result, result_libs) = if let Some(limit_val) = limit {
let skip = offset as usize;
let take = limit_val as usize;
(
sorted.iter().skip(skip).take(take).cloned().collect(),
sorted_libs.iter().skip(skip).take(take).copied().collect(),
)
} else {
sorted
(sorted, sorted_libs)
};
(result, total_count)
(result, result_libs, total_count)
}
}
}
@@ -94,66 +95,88 @@ fn apply_sorting_with_exif(
/// Fallback in-memory date sorting with EXIF/filename extraction
fn in_memory_date_sort(
files: Vec<FileWithTagCount>,
file_libraries: Vec<i32>,
sort_type: SortType,
exif_dao: &mut Box<dyn ExifDao>,
span_context: &opentelemetry::Context,
base_path: &Path,
libraries: &[crate::libraries::Library],
limit: Option<i64>,
offset: i64,
) -> (Vec<String>, i64) {
) -> (Vec<String>, Vec<i32>, i64) {
let total_count = files.len() as i64;
let file_paths: Vec<String> = files.iter().map(|f| f.file_name.clone()).collect();
// Batch fetch EXIF data
let exif_map: std::collections::HashMap<String, i64> = exif_dao
// Batch fetch EXIF data (keyed by rel_path; in union mode a rel_path may
// correspond to rows in multiple libraries — pick the date from the one
// matching the requesting row's library_id when possible).
let exif_rows = exif_dao
.get_exif_batch(span_context, &file_paths)
.unwrap_or_default()
.unwrap_or_default();
let exif_map: std::collections::HashMap<(String, i32), i64> = exif_rows
.into_iter()
.filter_map(|exif| exif.date_taken.map(|dt| (exif.file_path, dt)))
.filter_map(|exif| {
exif.date_taken
.map(|dt| ((exif.file_path, exif.library_id), dt))
})
.collect();
let lib_roots: std::collections::HashMap<i32, &str> = libraries
.iter()
.map(|l| (l.id, l.root_path.as_str()))
.collect();
// Convert to FileWithMetadata with date fallback logic
let files_with_metadata: Vec<FileWithMetadata> = files
.into_iter()
.map(|f| {
// Try EXIF date first
.zip(file_libraries.iter().copied())
.map(|(f, lib_id)| {
let date_taken = exif_map
.get(&f.file_name)
.get(&(f.file_name.clone(), lib_id))
.copied()
.or_else(|| extract_date_from_filename(&f.file_name).map(|dt| dt.timestamp()))
.or_else(|| {
// Fallback to filename extraction
extract_date_from_filename(&f.file_name).map(|dt| dt.timestamp())
})
.or_else(|| {
// Fallback to filesystem metadata creation date
let full_path = base_path.join(&f.file_name);
std::fs::metadata(full_path)
.and_then(|md| md.created().or(md.modified()))
.ok()
.map(|system_time| {
<SystemTime as Into<DateTime<Utc>>>::into(system_time).timestamp()
})
lib_roots.get(&lib_id).and_then(|root| {
let full_path = Path::new(root).join(&f.file_name);
std::fs::metadata(full_path)
.and_then(|md| md.created().or(md.modified()))
.ok()
.map(|system_time| {
<SystemTime as Into<DateTime<Utc>>>::into(system_time).timestamp()
})
})
});
FileWithMetadata {
file_name: f.file_name,
tag_count: f.tag_count,
date_taken,
library_id: lib_id,
}
})
.collect();
let sorted = sort_with_metadata(files_with_metadata, sort_type);
let result = if let Some(limit_val) = limit {
sorted
.into_iter()
.skip(offset as usize)
.take(limit_val as usize)
.collect()
let (sorted, sorted_libs) = sort_with_metadata(files_with_metadata, sort_type);
let (result, result_libs) = if let Some(limit_val) = limit {
let skip = offset as usize;
let take = limit_val as usize;
(
sorted
.iter()
.skip(skip)
.take(take)
.cloned()
.collect::<Vec<String>>(),
sorted_libs
.iter()
.skip(skip)
.take(take)
.copied()
.collect::<Vec<i32>>(),
)
} else {
sorted
(sorted, sorted_libs)
};
(result, total_count)
(result, result_libs, total_count)
}
pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
@@ -237,9 +260,9 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
KeyValue::new("library", req.library.clone().unwrap_or_default()),
]);
// Resolve the optional library filter. Unknown values return 400.
// For Phase 3 the filesystem walk still operates against a single
// library's root; Phase 4 introduces multi-root union scanning.
// Resolve the optional library filter. Unknown values return 400. A
// `None` result means "union across all libraries" and downstream
// walks iterate every configured library root.
let library = match crate::libraries::resolve_library_param(&app_state, req.library.as_deref())
{
Ok(lib) => lib,
@@ -248,7 +271,6 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
return HttpResponse::BadRequest().body(msg);
}
};
let scoped_library = library.unwrap_or_else(|| app_state.primary_library());
let span_context = opentelemetry::Context::current_with_span(span);
@@ -332,12 +354,15 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
None
};
// When a specific library is selected, we'll gate tag-based results
// (which key on rel_path only, library-agnostic) by "does this
// rel_path actually exist on disk in the selected library's root".
// We check per-file below rather than pre-enumerating image_exif,
// since image_exif may lag a just-added library.
let library_for_scope: Option<&crate::libraries::Library> = library;
// In scoped mode (`library` is Some) we gate tag-based results (which
// key on rel_path only) by "does this rel_path actually exist on disk
// in the selected library's root". In union mode we assign each
// returned file to the first library it resolves in, and drop files
// that exist in no configured library.
let libraries_to_scan: Vec<&crate::libraries::Library> = match library {
Some(lib) => vec![lib],
None => app_state.libraries.iter().collect(),
};
let search_recursively = req.recursive.unwrap_or(false);
if let Some(tag_ids) = &req.tag_ids
@@ -404,17 +429,23 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
true
}
})
.filter(|f| {
// Scope to the selected library by checking the file
// actually exists under its root. Falls back to the
// content-hash sibling set (looked up globally, since
// the tagged rel_path may have been registered under
// a different library than the one selected).
let Some(lib) = library_for_scope else {
return true;
};
if PathBuf::from(&lib.root_path).join(&f.file_name).exists() {
return true;
.filter_map(|f| {
// Apply media type filter first (cheap check before disk I/O).
if let Some(ref media_type) = req.media_type {
let path = PathBuf::from(&f.file_name);
if !matches_media_type(&path, media_type) {
return None;
}
}
// Resolve the file's library by checking each
// candidate library's root on disk. Falls back to
// content-hash siblings if the rel_path was
// registered under a different path but same content.
for lib in &libraries_to_scan {
if PathBuf::from(&lib.root_path).join(&f.file_name).exists() {
return Some((f, lib.id));
}
}
let siblings = {
let mut dao = exif_dao.lock().expect("Unable to get ExifDao");
@@ -428,41 +459,50 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
None => Vec::new(),
}
};
siblings
.iter()
.any(|p| PathBuf::from(&lib.root_path).join(p).exists())
})
.filter(|f| {
// Apply media type filtering if specified
if let Some(ref media_type) = req.media_type {
let path = PathBuf::from(&f.file_name);
matches_media_type(&path, media_type)
for lib in &libraries_to_scan {
if siblings
.iter()
.any(|p| PathBuf::from(&lib.root_path).join(p).exists())
{
return Some((f, lib.id));
}
}
// Tags are library-agnostic. If we can't confirm which
// library currently holds the file on disk (e.g. the
// tagged rel_path is stale or the caller is testing
// without real files), keep the tagged row and
// attribute it to the primary library so the client
// still sees the tag hit.
if library.is_none() {
Some((f, app_state.primary_library().id))
} else {
true
None
}
})
.collect::<Vec<FileWithTagCount>>()
.collect::<Vec<(FileWithTagCount, i32)>>()
})
.map(|files| {
.map(|paired| {
// Handle sorting - use helper function that supports EXIF date sorting and pagination
let sort_type = req.sort.unwrap_or(NameAsc);
let limit = req.limit;
let offset = req.offset.unwrap_or(0);
let (files, file_libs): (Vec<FileWithTagCount>, Vec<i32>) = paired.into_iter().unzip();
let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao");
let result = apply_sorting_with_exif(
files,
file_libs,
sort_type,
&mut exif_dao_guard,
&span_context,
scoped_library.root_path.as_ref(),
&app_state.libraries,
limit,
offset,
);
drop(exif_dao_guard);
result
})
.inspect(|(files, total)| debug!("Found {:?} files (total: {})", files.len(), total))
.map(|(tagged_files, total_count)| {
.inspect(|(files, _libs, total)| debug!("Found {:?} files (total: {})", files.len(), total))
.map(|(tagged_files, photo_libraries, total_count)| {
info!(
"Found {:?} tagged files: {:?}",
tagged_files.len(),
@@ -493,7 +533,6 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
.set_attribute(KeyValue::new("total_count", total_count.to_string()));
span_context.span().set_status(Status::Ok);
let photo_libraries = vec![scoped_library.id; tagged_files.len()];
HttpResponse::Ok().json(PhotosResponse {
photos: tagged_files,
dirs: vec![],
@@ -507,330 +546,346 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
.unwrap_or_else(|e| e.error_response());
}
// Use recursive or non-recursive file listing based on flag. Both
// paths must walk the *scoped* library's root; the generic
// FileSystemAccess trait (file_system.get_files_for_path) is pinned
// to AppState's base_path at construction time and doesn't know
// which library the request targets.
let files_result = if search_recursively {
is_valid_full_path(
&PathBuf::from(&scoped_library.root_path),
&PathBuf::from(search_path),
false,
)
.map(|path| {
debug!("Valid path for recursive search: {:?}", path);
list_files_recursive(&path).unwrap_or_default()
})
.context("Invalid path")
} else if scoped_library.id == app_state.primary_library().id {
// Primary library: preserve the original FileSystemAccess path so
// the test-mock path (MockFileSystem) continues to work.
file_system.get_files_for_path(search_path)
} else {
is_valid_full_path(
&PathBuf::from(&scoped_library.root_path),
&PathBuf::from(search_path),
false,
)
.map(|path| {
debug!("Valid path for non-recursive search: {:?}", path);
list_files(&path).unwrap_or_default()
})
.context("Invalid path")
};
// Walk each candidate library's root for the requested sub-path. In
// scoped mode `libraries_to_scan` has one entry (the selected library);
// in union mode we walk every configured library and intermix results.
// For the primary library we preserve the original FileSystemAccess
// path so the test-mock path (MockFileSystem) continues to work.
let mut file_names: Vec<String> = Vec::new();
let mut file_libraries: Vec<i32> = Vec::new();
let mut dirs_set: std::collections::HashSet<String> = std::collections::HashSet::new();
let mut any_library_resolved = false;
match files_result {
Ok(files) => {
info!(
"Found {:?} files in path: {:?} (recursive: {})",
files.len(),
search_path,
search_recursively
);
for lib in &libraries_to_scan {
let files_result = if search_recursively {
is_valid_full_path(
&PathBuf::from(&lib.root_path),
&PathBuf::from(search_path),
false,
)
.map(|path| {
debug!("Valid path for recursive search: {:?}", path);
list_files_recursive(&path).unwrap_or_default()
})
.context("Invalid path")
} else if lib.id == app_state.primary_library().id {
file_system.get_files_for_path(search_path)
} else {
is_valid_full_path(
&PathBuf::from(&lib.root_path),
&PathBuf::from(search_path),
false,
)
.map(|path| {
debug!("Valid path for non-recursive search: {:?}", path);
list_files(&path).unwrap_or_default()
})
.context("Invalid path")
};
info!("Starting to filter {} files from filesystem", files.len());
let start_filter = std::time::Instant::now();
let files = match files_result {
Ok(f) => {
any_library_resolved = true;
f
}
Err(e) => {
debug!(
"Skipping library '{}' for path '{}': {:?}",
lib.name, search_path, e
);
continue;
}
};
// Separate files and directories in a single pass to avoid redundant metadata calls
let (file_names, dirs): (Vec<String>, Vec<String>) =
files
.iter()
.fold((Vec::new(), Vec::new()), |(mut files, mut dirs), path| {
match path.metadata() {
Ok(md) => {
let relative = path
.strip_prefix(&scoped_library.root_path)
.unwrap_or_else(|_| {
panic!(
"Unable to strip library root {} from file path {}",
&scoped_library.root_path,
path.display()
)
});
// Normalize separators to '/' so downstream
// lookups (tags, EXIF, insights) that store
// rel_paths with forward slashes still match
// on Windows.
let relative_str = relative.to_str().unwrap().replace('\\', "/");
info!(
"Found {:?} files in library '{}' path: {:?} (recursive: {})",
files.len(),
lib.name,
search_path,
search_recursively
);
if md.is_file() {
files.push(relative_str);
} else if md.is_dir() {
dirs.push(relative_str);
}
}
Err(e) => {
error!("Failed getting file metadata: {:?}", e);
// Include files without metadata if they have extensions
if path.extension().is_some() {
let relative = path
.strip_prefix(&scoped_library.root_path)
.unwrap_or_else(|_| {
panic!(
"Unable to strip library root {} from file path {}",
&scoped_library.root_path,
path.display()
)
});
files.push(relative.to_str().unwrap().replace('\\', "/"));
}
}
}
(files, dirs)
for path in &files {
match path.metadata() {
Ok(md) => {
let relative = path.strip_prefix(&lib.root_path).unwrap_or_else(|_| {
panic!(
"Unable to strip library root {} from file path {}",
&lib.root_path,
path.display()
)
});
// Normalize separators to '/' so downstream lookups
// (tags, EXIF, insights) that store rel_paths with
// forward slashes still match on Windows.
let relative_str = relative.to_str().unwrap().replace('\\', "/");
if md.is_file() {
file_names.push(relative_str);
file_libraries.push(lib.id);
} else if md.is_dir() {
dirs_set.insert(relative_str);
}
}
Err(e) => {
error!("Failed getting file metadata: {:?}", e);
// Include files without metadata if they have extensions
if path.extension().is_some() {
let relative = path.strip_prefix(&lib.root_path).unwrap_or_else(|_| {
panic!(
"Unable to strip library root {} from file path {}",
&lib.root_path,
path.display()
)
});
file_names.push(relative.to_str().unwrap().replace('\\', "/"));
file_libraries.push(lib.id);
}
}
}
}
}
if !any_library_resolved {
error!("Bad photos request: {}", req.path);
span_context
.span()
.set_status(Status::error("Invalid path"));
return HttpResponse::BadRequest().finish();
}
let dirs: Vec<String> = dirs_set.into_iter().collect();
info!(
"Starting to filter {} files from filesystem",
file_names.len()
);
let start_filter = std::time::Instant::now();
info!(
"File filtering took {:?}, now fetching tag counts for {} files",
start_filter.elapsed(),
file_names.len()
);
let start_tags = std::time::Instant::now();
// Batch query for tag counts (tags are library-agnostic / keyed by rel_path).
let tag_counts = {
let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao");
tag_dao_guard
.get_tag_counts_batch(&span_context, &file_names)
.unwrap_or_default()
};
info!("Batch tag count query took {:?}", start_tags.elapsed());
let start_tag_filter = std::time::Instant::now();
let file_tags_map: std::collections::HashMap<String, Vec<crate::tags::Tag>> =
if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() {
info!(
"File filtering took {:?}, now fetching tag counts for {} files",
start_filter.elapsed(),
"Tag filtering requested, fetching full tag lists for {} files",
file_names.len()
);
let start_tags = std::time::Instant::now();
let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao");
file_names
.iter()
.filter_map(|file_name| {
tag_dao_guard
.get_tags_for_path(&span_context, file_name)
.ok()
.map(|tags| (file_name.clone(), tags))
})
.collect()
} else {
std::collections::HashMap::new()
};
if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() {
info!("Full tag list fetch took {:?}", start_tag_filter.elapsed());
}
// Batch query for tag counts to avoid N+1 queries
let tag_counts = {
let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao");
tag_dao_guard
.get_tag_counts_batch(&span_context, &file_names)
// Filter + pair with the parallel library_id while preserving ordering
// so the downstream sort can return both arrays in lockstep.
let photos_with_libs: Vec<(FileWithTagCount, i32)> = file_names
.into_iter()
.zip(file_libraries.into_iter())
.filter_map(|(file_name, lib_id)| {
let file_tags = file_tags_map.get(&file_name).cloned().unwrap_or_default();
if let Some(tag_ids_csv) = &req.tag_ids {
let tag_ids = tag_ids_csv
.split(',')
.filter_map(|t| t.parse().ok())
.collect::<Vec<i32>>();
let excluded_tag_ids = req
.exclude_tag_ids
.clone()
.unwrap_or_default()
};
info!("Batch tag count query took {:?}", start_tags.elapsed());
.split(',')
.filter_map(|t| t.parse().ok())
.collect::<Vec<i32>>();
// Also get full tag lists for files that need tag filtering
let start_tag_filter = std::time::Instant::now();
let file_tags_map: std::collections::HashMap<String, Vec<crate::tags::Tag>> =
if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() {
info!(
"Tag filtering requested, fetching full tag lists for {} files",
file_names.len()
);
let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao");
file_names
.iter()
.filter_map(|file_name| {
tag_dao_guard
.get_tags_for_path(&span_context, file_name)
.ok()
.map(|tags| (file_name.clone(), tags))
})
.collect()
} else {
std::collections::HashMap::new()
};
if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() {
info!("Full tag list fetch took {:?}", start_tag_filter.elapsed());
let filter_mode = req.tag_filter_mode.unwrap_or(FilterMode::Any);
let excluded = file_tags.iter().any(|t| excluded_tag_ids.contains(&t.id));
let keep = !excluded
&& match filter_mode {
FilterMode::Any => file_tags.iter().any(|t| tag_ids.contains(&t.id)),
FilterMode::All => tag_ids
.iter()
.all(|id| file_tags.iter().any(|tag| &tag.id == id)),
};
if !keep {
return None;
}
}
let photos = file_names
if let Some(ref exif_files) = exif_matched_files
&& !exif_files.contains(&file_name)
{
return None;
}
if let Some(ref media_type) = req.media_type {
let path = PathBuf::from(&file_name);
if !matches_media_type(&path, media_type) {
return None;
}
}
let tag_count = *tag_counts.get(&file_name).unwrap_or(&0);
Some((
FileWithTagCount {
file_name,
tag_count,
},
lib_id,
))
})
.collect();
info!(
"After all filters, {} files remain (filtering took {:?})",
photos_with_libs.len(),
start_filter.elapsed()
);
// Extract pagination parameters
let limit = req.limit;
let offset = req.offset.unwrap_or(0);
let start_sort = std::time::Instant::now();
let (photos, file_libs_sorted_input): (Vec<FileWithTagCount>, Vec<i32>) =
photos_with_libs.into_iter().unzip();
let (response_files, response_libraries, total_count) = if let Some(sort_type) = req.sort {
info!("Sorting {} files by {:?}", photos.len(), sort_type);
let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao");
let result = apply_sorting_with_exif(
photos,
file_libs_sorted_input,
sort_type,
&mut exif_dao_guard,
&span_context,
&app_state.libraries,
limit,
offset,
);
drop(exif_dao_guard);
result
} else {
// No sorting requested - apply pagination if requested
let total = photos.len() as i64;
let (paged_files, paged_libs): (Vec<String>, Vec<i32>) = if let Some(limit_val) = limit {
photos
.into_iter()
.map(|file_name| {
let file_tags = file_tags_map.get(&file_name).cloned().unwrap_or_default();
(file_name, file_tags)
})
.filter(|(_, file_tags): &(String, Vec<crate::tags::Tag>)| {
if let Some(tag_ids) = &req.tag_ids {
let tag_ids = tag_ids
.split(',')
.filter_map(|t| t.parse().ok())
.collect::<Vec<i32>>();
.zip(file_libs_sorted_input)
.skip(offset as usize)
.take(limit_val as usize)
.map(|(f, lib)| (f.file_name, lib))
.unzip()
} else {
photos
.into_iter()
.zip(file_libs_sorted_input)
.map(|(f, lib)| (f.file_name, lib))
.unzip()
};
(paged_files, paged_libs, total)
};
info!(
"Sorting took {:?}, returned {} files (total: {})",
start_sort.elapsed(),
response_files.len(),
total_count
);
let excluded_tag_ids = &req
.exclude_tag_ids
.clone()
.unwrap_or_default()
.split(',')
.filter_map(|t| t.parse().ok())
.collect::<Vec<i32>>();
let filter_mode = &req.tag_filter_mode.unwrap_or(FilterMode::Any);
let excluded = file_tags.iter().any(|t| excluded_tag_ids.contains(&t.id));
return !excluded
&& match filter_mode {
FilterMode::Any => {
file_tags.iter().any(|t| tag_ids.contains(&t.id))
}
FilterMode::All => tag_ids
.iter()
.all(|id| file_tags.iter().any(|tag| &tag.id == id)),
};
}
true
})
.filter(|(file_name, _)| {
// Apply EXIF filtering if present
if let Some(ref exif_files) = exif_matched_files {
exif_files.contains(file_name)
} else {
true
}
})
.filter(|(file_name, _)| {
// Apply media type filtering if specified
if let Some(ref media_type) = req.media_type {
let path = PathBuf::from(file_name);
matches_media_type(&path, media_type)
} else {
true
}
})
.map(
|(file_name, _tags): (String, Vec<crate::tags::Tag>)| FileWithTagCount {
file_name: file_name.clone(),
tag_count: *tag_counts.get(&file_name).unwrap_or(&0),
},
)
.collect::<Vec<FileWithTagCount>>();
info!(
"After all filters, {} files remain (filtering took {:?})",
photos.len(),
start_filter.elapsed()
);
// Extract pagination parameters
let limit = req.limit;
let offset = req.offset.unwrap_or(0);
let start_sort = std::time::Instant::now();
// Handle sorting - use helper function that supports EXIF date sorting and pagination
let (response_files, total_count) = if let Some(sort_type) = req.sort {
info!("Sorting {} files by {:?}", photos.len(), sort_type);
let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao");
let result = apply_sorting_with_exif(
photos,
sort_type,
&mut exif_dao_guard,
&span_context,
scoped_library.root_path.as_ref(),
limit,
offset,
);
drop(exif_dao_guard);
result
let returned_count = response_files.len() as i64;
let pagination_metadata = if limit.is_some() {
(
Some(total_count),
Some(offset + returned_count < total_count),
if offset + returned_count < total_count {
Some(offset + returned_count)
} else {
// No sorting requested - apply pagination if requested
let total = photos.len() as i64;
let files: Vec<String> = if let Some(limit_val) = limit {
photos
.into_iter()
.skip(offset as usize)
.take(limit_val as usize)
.map(|f| f.file_name)
.collect()
} else {
photos.into_iter().map(|f| f.file_name).collect()
};
(files, total)
};
info!(
"Sorting took {:?}, returned {} files (total: {})",
start_sort.elapsed(),
response_files.len(),
total_count
);
None
},
)
} else {
(None, None, None)
};
// Note: dirs were already collected during file filtering to avoid redundant metadata calls
span_context.span().set_attribute(KeyValue::new(
"file_count",
response_files.len().to_string(),
));
span_context
.span()
.set_attribute(KeyValue::new("returned_count", returned_count.to_string()));
span_context
.span()
.set_attribute(KeyValue::new("total_count", total_count.to_string()));
span_context.span().set_status(Status::Ok);
// Calculate pagination metadata
let returned_count = response_files.len() as i64;
let pagination_metadata = if limit.is_some() {
(
Some(total_count),
Some(offset + returned_count < total_count),
if offset + returned_count < total_count {
Some(offset + returned_count)
} else {
None
},
)
} else {
(None, None, None)
};
span_context
.span()
.set_attribute(KeyValue::new("file_count", files.len().to_string()));
span_context
.span()
.set_attribute(KeyValue::new("returned_count", returned_count.to_string()));
span_context
.span()
.set_attribute(KeyValue::new("total_count", total_count.to_string()));
span_context.span().set_status(Status::Ok);
let photo_libraries = vec![scoped_library.id; response_files.len()];
HttpResponse::Ok().json(PhotosResponse {
photos: response_files,
dirs,
photo_libraries,
total_count: pagination_metadata.0,
has_more: pagination_metadata.1,
next_offset: pagination_metadata.2,
})
}
_ => {
error!("Bad photos request: {}", req.path);
span_context
.span()
.set_status(Status::error("Invalid path"));
HttpResponse::BadRequest().finish()
}
}
HttpResponse::Ok().json(PhotosResponse {
photos: response_files,
dirs,
photo_libraries: response_libraries,
total_count: pagination_metadata.0,
has_more: pagination_metadata.1,
next_offset: pagination_metadata.2,
})
}
fn sort(mut files: Vec<FileWithTagCount>, sort_type: SortType) -> Vec<String> {
fn sort(
files: Vec<FileWithTagCount>,
file_libraries: Vec<i32>,
sort_type: SortType,
) -> (Vec<String>, Vec<i32>) {
let mut paired: Vec<(FileWithTagCount, i32)> = files.into_iter().zip(file_libraries).collect();
match sort_type {
SortType::Shuffle => files.shuffle(&mut thread_rng()),
NameAsc => {
files.sort_by(|l, r| l.file_name.cmp(&r.file_name));
}
SortType::NameDesc => {
files.sort_by(|l, r| r.file_name.cmp(&l.file_name));
}
SortType::TagCountAsc => {
files.sort_by(|l, r| l.tag_count.cmp(&r.tag_count));
}
SortType::TagCountDesc => {
files.sort_by(|l, r| r.tag_count.cmp(&l.tag_count));
}
SortType::Shuffle => paired.shuffle(&mut thread_rng()),
NameAsc => paired.sort_by(|l, r| l.0.file_name.cmp(&r.0.file_name)),
SortType::NameDesc => paired.sort_by(|l, r| r.0.file_name.cmp(&l.0.file_name)),
SortType::TagCountAsc => paired.sort_by(|l, r| l.0.tag_count.cmp(&r.0.tag_count)),
SortType::TagCountDesc => paired.sort_by(|l, r| r.0.tag_count.cmp(&l.0.tag_count)),
SortType::DateTakenAsc | SortType::DateTakenDesc => {
// Date sorting not implemented for FileWithTagCount
// We shouldn't be hitting this code
warn!("Date sorting not implemented for FileWithTagCount");
files.sort_by(|l, r| l.file_name.cmp(&r.file_name));
paired.sort_by(|l, r| l.0.file_name.cmp(&r.0.file_name));
}
}
files
.iter()
.map(|f| f.file_name.clone())
.collect::<Vec<String>>()
paired
.into_iter()
.map(|(f, lib)| (f.file_name, lib))
.unzip()
}
/// Sort files with metadata support (including date sorting)
fn sort_with_metadata(mut files: Vec<FileWithMetadata>, sort_type: SortType) -> Vec<String> {
fn sort_with_metadata(
mut files: Vec<FileWithMetadata>,
sort_type: SortType,
) -> (Vec<String>, Vec<i32>) {
match sort_type {
SortType::Shuffle => files.shuffle(&mut thread_rng()),
NameAsc => {
@@ -864,9 +919,9 @@ fn sort_with_metadata(mut files: Vec<FileWithMetadata>, sort_type: SortType) ->
}
files
.iter()
.map(|f| f.file_name.clone())
.collect::<Vec<String>>()
.into_iter()
.map(|f| (f.file_name, f.library_id))
.unzip()
}
pub fn list_files(dir: &Path) -> io::Result<Vec<PathBuf>> {
@@ -1369,6 +1424,7 @@ mod tests {
fn get_all_with_date_taken(
&mut self,
_context: &opentelemetry::Context,
_library_id: Option<i32>,
) -> Result<Vec<(String, i64)>, DbError> {
Ok(Vec::new())
}

View File

@@ -16,6 +16,7 @@ use walkdir::WalkDir;
use crate::data::Claims;
use crate::database::ExifDao;
use crate::files::is_image_or_video;
use crate::libraries::Library;
use crate::otel::{extract_context_from_request, global_tracer};
use crate::state::AppState;
@@ -378,7 +379,7 @@ fn collect_exif_memories(
) -> Vec<(MemoryItem, NaiveDate)> {
// Query database for all files with date_taken
let exif_records = match exif_dao.lock() {
Ok(mut dao) => match dao.get_all_with_date_taken(context) {
Ok(mut dao) => match dao.get_all_with_date_taken(context, Some(library_id)) {
Ok(records) => records,
Err(e) => {
warn!("Failed to query EXIF database: {:?}", e);
@@ -546,48 +547,50 @@ pub async fn list_memories(
return HttpResponse::BadRequest().body(msg);
}
};
// For Phase 3 the walker still operates against a single library's root.
// Multi-library union support for the filesystem walk comes in Phase 4.
let scoped_library = library.unwrap_or_else(|| app_state.primary_library());
let base = Path::new(&scoped_library.root_path);
// When `library` is `Some`, scope to that one library; otherwise union
// across every configured library and let the results interleave.
let libraries_to_scan: Vec<&Library> = match library {
Some(lib) => vec![lib],
None => app_state.libraries.iter().collect(),
};
// Build the path excluder from base and env-configured exclusions
let path_excluder = PathExcluder::new(base, &app_state.excluded_dirs);
let mut memories_with_dates: Vec<(MemoryItem, NaiveDate)> = Vec::new();
// Phase 1: Query EXIF database
let exif_memories = collect_exif_memories(
&exif_dao,
&span_context,
&scoped_library.root_path,
scoped_library.id,
now,
span_mode,
years_back,
&client_timezone,
&path_excluder,
);
for lib in &libraries_to_scan {
let base = Path::new(&lib.root_path);
let path_excluder = PathExcluder::new(base, &app_state.excluded_dirs);
// Build HashSet for deduplication
let exif_paths: HashSet<PathBuf> = exif_memories
.iter()
.map(|(item, _)| PathBuf::from(&scoped_library.root_path).join(&item.path))
.collect();
let exif_memories = collect_exif_memories(
&exif_dao,
&span_context,
&lib.root_path,
lib.id,
now,
span_mode,
years_back,
&client_timezone,
&path_excluder,
);
// Phase 2: File system scan (skip EXIF files)
let fs_memories = collect_filesystem_memories(
&scoped_library.root_path,
scoped_library.id,
&path_excluder,
&exif_paths,
now,
span_mode,
years_back,
&client_timezone,
);
let exif_paths: HashSet<PathBuf> = exif_memories
.iter()
.map(|(item, _)| PathBuf::from(&lib.root_path).join(&item.path))
.collect();
// Phase 3: Merge and sort
let mut memories_with_dates = exif_memories;
memories_with_dates.extend(fs_memories);
let fs_memories = collect_filesystem_memories(
&lib.root_path,
lib.id,
&path_excluder,
&exif_paths,
now,
span_mode,
years_back,
&client_timezone,
);
memories_with_dates.extend(exif_memories);
memories_with_dates.extend(fs_memories);
}
match span_mode {
// Sort by absolute time for a more 'overview'