From be483c9c1acd6385311ff7c62f1fc2828a3eadd2 Mon Sep 17 00:00:00 2001 From: Cameron Date: Sun, 18 Jan 2026 19:17:10 -0500 Subject: [PATCH] Add database optimizations for photo search and pagination Implement database-level sorting with composite indexes for efficient date and tag queries. Add pagination metadata support and optimize tag count queries using batch processing. --- .../down.sql | 4 + .../up.sql | 15 + src/data/mod.rs | 14 + src/database/mod.rs | 72 +++ src/files.rs | 428 ++++++++++++++---- src/main.rs | 3 + src/tags.rs | 79 ++++ 7 files changed, 519 insertions(+), 96 deletions(-) create mode 100644 migrations/2026-01-18-000000_optimize_photo_search/down.sql create mode 100644 migrations/2026-01-18-000000_optimize_photo_search/up.sql diff --git a/migrations/2026-01-18-000000_optimize_photo_search/down.sql b/migrations/2026-01-18-000000_optimize_photo_search/down.sql new file mode 100644 index 0000000..562c67b --- /dev/null +++ b/migrations/2026-01-18-000000_optimize_photo_search/down.sql @@ -0,0 +1,4 @@ +-- Revert search performance optimization indexes + +DROP INDEX IF EXISTS idx_image_exif_date_path; +DROP INDEX IF EXISTS idx_tagged_photo_count; diff --git a/migrations/2026-01-18-000000_optimize_photo_search/up.sql b/migrations/2026-01-18-000000_optimize_photo_search/up.sql new file mode 100644 index 0000000..61f327e --- /dev/null +++ b/migrations/2026-01-18-000000_optimize_photo_search/up.sql @@ -0,0 +1,15 @@ +-- Add composite indexes for search performance optimization +-- This migration addresses N+1 query issues and enables database-level sorting + +-- Covering index for date-sorted queries (supports ORDER BY + pagination) +-- Enables efficient date-based sorting without loading all files into memory +CREATE INDEX IF NOT EXISTS idx_image_exif_date_path + ON image_exif(date_taken DESC, file_path); + +-- Optimize batch tag count queries with GROUP BY +-- Reduces N individual queries to a single batch query +CREATE INDEX IF NOT EXISTS idx_tagged_photo_count + ON tagged_photo(photo_name, tag_id); + +-- Update query planner statistics to optimize query execution +ANALYZE; diff --git a/src/data/mod.rs b/src/data/mod.rs index 4ef8f39..6c7460c 100644 --- a/src/data/mod.rs +++ b/src/data/mod.rs @@ -101,6 +101,16 @@ impl FromRequest for Claims { pub struct PhotosResponse { pub photos: Vec, pub dirs: Vec, + + // Pagination metadata (only present when limit is set) + #[serde(skip_serializing_if = "Option::is_none")] + pub total_count: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub has_more: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub next_offset: Option, } #[derive(Copy, Clone, Deserialize, PartialEq, Debug)] @@ -141,6 +151,10 @@ pub struct FilesRequest { // Media type filtering pub media_type: Option, + + // Pagination parameters (optional - backward compatible) + pub limit: Option, + pub offset: Option, } #[derive(Copy, Clone, Deserialize, PartialEq, Debug)] diff --git a/src/database/mod.rs b/src/database/mod.rs index 0bfd3f1..b93e396 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -296,6 +296,17 @@ pub trait ExifDao: Sync + Send { &mut self, context: &opentelemetry::Context, ) -> Result, DbError>; + + /// Get files sorted by date with optional pagination + /// Returns (sorted_file_paths, total_count) + fn get_files_sorted_by_date( + &mut self, + context: &opentelemetry::Context, + file_paths: &[String], + ascending: bool, + limit: Option, + offset: i64, + ) -> Result<(Vec, i64), DbError>; } pub struct SqliteExifDao { @@ -581,4 +592,65 @@ impl ExifDao for SqliteExifDao { }) .map_err(|_| DbError::new(DbErrorKind::QueryError)) } + + fn get_files_sorted_by_date( + &mut self, + context: &opentelemetry::Context, + file_paths: &[String], + ascending: bool, + limit: Option, + offset: i64, + ) -> Result<(Vec, i64), DbError> { + trace_db_call(context, "query", "get_files_sorted_by_date", |span| { + use diesel::dsl::count_star; + use opentelemetry::KeyValue; + use opentelemetry::trace::Span; + use schema::image_exif::dsl::*; + + span.set_attributes(vec![ + KeyValue::new("file_count", file_paths.len() as i64), + KeyValue::new("ascending", ascending.to_string()), + KeyValue::new("limit", limit.map(|l| l.to_string()).unwrap_or_default()), + KeyValue::new("offset", offset.to_string()), + ]); + + if file_paths.is_empty() { + return Ok((Vec::new(), 0)); + } + + let connection = &mut *self.connection.lock().unwrap(); + + // Get total count of files that have EXIF data + let total_count: i64 = image_exif + .filter(file_path.eq_any(file_paths)) + .select(count_star()) + .first(connection) + .map_err(|_| anyhow::anyhow!("Count query error"))?; + + // Build sorted query + let mut query = image_exif.filter(file_path.eq_any(file_paths)).into_boxed(); + + // Apply sorting + // Note: SQLite NULL handling varies - NULLs appear first for ASC, last for DESC by default + if ascending { + query = query.order(date_taken.asc()); + } else { + query = query.order(date_taken.desc()); + } + + // Apply pagination if requested + if let Some(limit_val) = limit { + query = query.limit(limit_val).offset(offset); + } + + // Execute and extract file paths + let results: Vec = query + .select(file_path) + .load::(connection) + .map_err(|_| anyhow::anyhow!("Query error"))?; + + Ok((results, total_count)) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } } diff --git a/src/files.rs b/src/files.rs index 8721568..bfcc754 100644 --- a/src/files.rs +++ b/src/files.rs @@ -47,69 +47,142 @@ use serde::Deserialize; /// Apply sorting to files with EXIF data support for date-based sorting /// Handles both date sorting (with EXIF/filename fallback) and regular sorting +/// Returns (sorted_file_paths, total_count) fn apply_sorting_with_exif( files: Vec, sort_type: SortType, exif_dao: &mut Box, span_context: &opentelemetry::Context, base_path: &Path, -) -> Vec { + limit: Option, + offset: i64, +) -> (Vec, i64) { + let total_count = files.len() as i64; + match sort_type { SortType::DateTakenAsc | SortType::DateTakenDesc => { - info!("Date sorting requested, fetching EXIF data"); + info!("Date sorting requested, using database-level sorting"); // Collect file paths for batch EXIF query let file_paths: Vec = files.iter().map(|f| f.file_name.clone()).collect(); - // Batch fetch EXIF data - let exif_map: std::collections::HashMap = exif_dao - .get_exif_batch(span_context, &file_paths) - .unwrap_or_default() - .into_iter() - .filter_map(|exif| exif.date_taken.map(|dt| (exif.file_path, dt))) - .collect(); - - // Convert to FileWithMetadata with date fallback logic - let files_with_metadata: Vec = files - .into_iter() - .map(|f| { - // Try EXIF date first - let date_taken = exif_map - .get(&f.file_name) - .copied() - .or_else(|| { - // Fallback to filename extraction - extract_date_from_filename(&f.file_name).map(|dt| dt.timestamp()) - }) - .or_else(|| { - // Fallback to filesystem metadata creation date - let full_path = base_path.join(&f.file_name); - std::fs::metadata(full_path) - .and_then(|md| md.created().or(md.modified())) - .ok() - .map(|system_time| { - >>::into(system_time) - .timestamp() - }) - }); - - FileWithMetadata { - file_name: f.file_name, - tag_count: f.tag_count, - date_taken, - } - }) - .collect(); - - sort_with_metadata(files_with_metadata, sort_type) + // Try database-level sorting first (most efficient) + let ascending = sort_type == SortType::DateTakenAsc; + match exif_dao.get_files_sorted_by_date( + span_context, + &file_paths, + ascending, + limit, + offset, + ) { + Ok((sorted_files, db_total)) => { + info!( + "Database-level date sorting succeeded, returned {} files", + sorted_files.len() + ); + (sorted_files, db_total) + } + Err(e) => { + warn!( + "Database-level sorting failed: {:?}, falling back to in-memory sort", + e + ); + // Fallback to in-memory sorting with date extraction + let (sorted, _) = in_memory_date_sort( + files, + sort_type, + exif_dao, + span_context, + base_path, + limit, + offset, + ); + (sorted, total_count) + } + } } _ => { // Use regular sort for non-date sorting - sort(files, sort_type) + let sorted = sort(files, sort_type); + let result = if let Some(limit_val) = limit { + sorted + .into_iter() + .skip(offset as usize) + .take(limit_val as usize) + .collect() + } else { + sorted + }; + (result, total_count) } } } +/// Fallback in-memory date sorting with EXIF/filename extraction +fn in_memory_date_sort( + files: Vec, + sort_type: SortType, + exif_dao: &mut Box, + span_context: &opentelemetry::Context, + base_path: &Path, + limit: Option, + offset: i64, +) -> (Vec, i64) { + let total_count = files.len() as i64; + let file_paths: Vec = files.iter().map(|f| f.file_name.clone()).collect(); + + // Batch fetch EXIF data + let exif_map: std::collections::HashMap = exif_dao + .get_exif_batch(span_context, &file_paths) + .unwrap_or_default() + .into_iter() + .filter_map(|exif| exif.date_taken.map(|dt| (exif.file_path, dt))) + .collect(); + + // Convert to FileWithMetadata with date fallback logic + let files_with_metadata: Vec = files + .into_iter() + .map(|f| { + // Try EXIF date first + let date_taken = exif_map + .get(&f.file_name) + .copied() + .or_else(|| { + // Fallback to filename extraction + extract_date_from_filename(&f.file_name).map(|dt| dt.timestamp()) + }) + .or_else(|| { + // Fallback to filesystem metadata creation date + let full_path = base_path.join(&f.file_name); + std::fs::metadata(full_path) + .and_then(|md| md.created().or(md.modified())) + .ok() + .map(|system_time| { + >>::into(system_time).timestamp() + }) + }); + + FileWithMetadata { + file_name: f.file_name, + tag_count: f.tag_count, + date_taken, + } + }) + .collect(); + + let sorted = sort_with_metadata(files_with_metadata, sort_type); + let result = if let Some(limit_val) = limit { + sorted + .into_iter() + .skip(offset as usize) + .take(limit_val as usize) + .collect() + } else { + sorted + }; + (result, total_count) +} + pub async fn list_photos( _: Claims, request: HttpRequest, @@ -171,6 +244,23 @@ pub async fn list_photos( .map(|mt| format!("{:?}", mt)) .unwrap_or_default(), ), + // Pagination parameters + KeyValue::new("pagination.enabled", req.limit.is_some().to_string()), + KeyValue::new( + "pagination.limit", + req.limit.map(|l| l.to_string()).unwrap_or_default(), + ), + KeyValue::new("pagination.offset", req.offset.unwrap_or(0).to_string()), + // Optimization flags + KeyValue::new("optimization.batch_tags", "true"), + KeyValue::new( + "optimization.db_sort", + matches!( + req.sort, + Some(SortType::DateTakenAsc | SortType::DateTakenDesc) + ) + .to_string(), + ), ]); let span_context = opentelemetry::Context::current_with_span(span); @@ -332,8 +422,10 @@ pub async fn list_photos( .collect::>() }) .map(|files| { - // Handle sorting - use helper function that supports EXIF date sorting + // Handle sorting - use helper function that supports EXIF date sorting and pagination let sort_type = req.sort.unwrap_or(NameAsc); + let limit = req.limit; + let offset = req.offset.unwrap_or(0); let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao"); let result = apply_sorting_with_exif( files, @@ -341,25 +433,50 @@ pub async fn list_photos( &mut exif_dao_guard, &span_context, app_state.base_path.as_ref(), + limit, + offset, ); drop(exif_dao_guard); result }) - .inspect(|files| debug!("Found {:?} files", files.len())) - .map(|tagged_files: Vec| { + .inspect(|(files, total)| debug!("Found {:?} files (total: {})", files.len(), total)) + .map(|(tagged_files, total_count)| { info!( "Found {:?} tagged files: {:?}", tagged_files.len(), tagged_files ); + + let returned_count = tagged_files.len() as i64; + let offset = req.offset.unwrap_or(0); + let pagination_metadata = if req.limit.is_some() { + ( + Some(total_count), + Some(offset + returned_count < total_count), + if offset + returned_count < total_count { + Some(offset + returned_count) + } else { + None + }, + ) + } else { + (None, None, None) + }; + span_context .span() .set_attribute(KeyValue::new("file_count", tagged_files.len().to_string())); + span_context + .span() + .set_attribute(KeyValue::new("total_count", total_count.to_string())); span_context.span().set_status(Status::Ok); HttpResponse::Ok().json(PhotosResponse { photos: tagged_files, dirs: vec![], + total_count: pagination_metadata.0, + has_more: pagination_metadata.1, + next_offset: pagination_metadata.2, }) }) .into_http_internal_err() @@ -393,38 +510,99 @@ pub async fn list_photos( ); info!("Starting to filter {} files from filesystem", files.len()); + let start_filter = std::time::Instant::now(); - let photos = files - .iter() - .filter(|&f| { - f.metadata().map_or_else( - |e| { - error!("Failed getting file metadata: {:?}", e); - f.extension().is_some() - }, - |md| md.is_file(), - ) - }) - .map(|path: &PathBuf| { - let relative = path.strip_prefix(&app_state.base_path).unwrap_or_else(|_| { - panic!( - "Unable to strip base path {} from file path {}", - &app_state.base_path.path(), - path.display() - ) + // Separate files and directories in a single pass to avoid redundant metadata calls + let (file_names, dirs): (Vec, Vec) = + files + .iter() + .fold((Vec::new(), Vec::new()), |(mut files, mut dirs), path| { + match path.metadata() { + Ok(md) => { + let relative = + path.strip_prefix(&app_state.base_path).unwrap_or_else(|_| { + panic!( + "Unable to strip base path {} from file path {}", + &app_state.base_path.path(), + path.display() + ) + }); + let relative_str = relative.to_str().unwrap().to_string(); + + if md.is_file() { + files.push(relative_str); + } else if md.is_dir() { + dirs.push(relative_str); + } + } + Err(e) => { + error!("Failed getting file metadata: {:?}", e); + // Include files without metadata if they have extensions + if path.extension().is_some() { + let relative = path + .strip_prefix(&app_state.base_path) + .unwrap_or_else(|_| { + panic!( + "Unable to strip base path {} from file path {}", + &app_state.base_path.path(), + path.display() + ) + }); + files.push(relative.to_str().unwrap().to_string()); + } + } + } + (files, dirs) }); - relative.to_path_buf() - }) - .map(|f| f.to_str().unwrap().to_string()) - .map(|file_name| { - let mut tag_dao = tag_dao.lock().expect("Unable to get TagDao"); - let file_tags = tag_dao - .get_tags_for_path(&span_context, &file_name) - .unwrap_or_default(); + info!( + "File filtering took {:?}, now fetching tag counts for {} files", + start_filter.elapsed(), + file_names.len() + ); + let start_tags = std::time::Instant::now(); + + // Batch query for tag counts to avoid N+1 queries + let tag_counts = { + let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao"); + tag_dao_guard + .get_tag_counts_batch(&span_context, &file_names) + .unwrap_or_default() + }; + info!("Batch tag count query took {:?}", start_tags.elapsed()); + + // Also get full tag lists for files that need tag filtering + let start_tag_filter = std::time::Instant::now(); + let file_tags_map: std::collections::HashMap> = + if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() { + info!( + "Tag filtering requested, fetching full tag lists for {} files", + file_names.len() + ); + let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao"); + file_names + .iter() + .filter_map(|file_name| { + tag_dao_guard + .get_tags_for_path(&span_context, file_name) + .ok() + .map(|tags| (file_name.clone(), tags)) + }) + .collect() + } else { + std::collections::HashMap::new() + }; + if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() { + info!("Full tag list fetch took {:?}", start_tag_filter.elapsed()); + } + + let photos = file_names + .into_iter() + .map(|file_name| { + let file_tags = file_tags_map.get(&file_name).cloned().unwrap_or_default(); (file_name, file_tags) }) - .filter(|(_, file_tags)| { + .filter(|(_, file_tags): &(String, Vec)| { if let Some(tag_ids) = &req.tag_ids { let tag_ids = tag_ids .split(',') @@ -472,16 +650,28 @@ pub async fn list_photos( true } }) - .map(|(file_name, tags)| FileWithTagCount { - file_name, - tag_count: tags.len() as i64, - }) + .map( + |(file_name, _tags): (String, Vec)| FileWithTagCount { + file_name: file_name.clone(), + tag_count: *tag_counts.get(&file_name).unwrap_or(&0), + }, + ) .collect::>(); - info!("After all filters, {} files remain", photos.len()); + info!( + "After all filters, {} files remain (filtering took {:?})", + photos.len(), + start_filter.elapsed() + ); - // Handle sorting - use helper function that supports EXIF date sorting - let response_files = if let Some(sort_type) = req.sort { + // Extract pagination parameters + let limit = req.limit; + let offset = req.offset.unwrap_or(0); + let start_sort = std::time::Instant::now(); + + // Handle sorting - use helper function that supports EXIF date sorting and pagination + let (response_files, total_count) = if let Some(sort_type) = req.sort { + info!("Sorting {} files by {:?}", photos.len(), sort_type); let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao"); let result = apply_sorting_with_exif( photos, @@ -489,35 +679,68 @@ pub async fn list_photos( &mut exif_dao_guard, &span_context, app_state.base_path.as_ref(), + limit, + offset, ); drop(exif_dao_guard); result } else { - // No sorting requested - photos - .into_iter() - .map(|f| f.file_name) - .collect::>() + // No sorting requested - apply pagination if requested + let total = photos.len() as i64; + let files: Vec = if let Some(limit_val) = limit { + photos + .into_iter() + .skip(offset as usize) + .take(limit_val as usize) + .map(|f| f.file_name) + .collect() + } else { + photos.into_iter().map(|f| f.file_name).collect() + }; + (files, total) }; + info!( + "Sorting took {:?}, returned {} files (total: {})", + start_sort.elapsed(), + response_files.len(), + total_count + ); - let dirs = files - .iter() - .filter(|&f| f.metadata().is_ok_and(|md| md.is_dir())) - .map(|path: &PathBuf| { - let relative = path.strip_prefix(&app_state.base_path).unwrap(); - relative.to_path_buf() - }) - .map(|f| f.to_str().unwrap().to_string()) - .collect::>(); + // Note: dirs were already collected during file filtering to avoid redundant metadata calls + + // Calculate pagination metadata + let returned_count = response_files.len() as i64; + let pagination_metadata = if limit.is_some() { + ( + Some(total_count), + Some(offset + returned_count < total_count), + if offset + returned_count < total_count { + Some(offset + returned_count) + } else { + None + }, + ) + } else { + (None, None, None) + }; span_context .span() .set_attribute(KeyValue::new("file_count", files.len().to_string())); + span_context + .span() + .set_attribute(KeyValue::new("returned_count", returned_count.to_string())); + span_context + .span() + .set_attribute(KeyValue::new("total_count", total_count.to_string())); span_context.span().set_status(Status::Ok); HttpResponse::Ok().json(PhotosResponse { photos: response_files, dirs, + total_count: pagination_metadata.0, + has_more: pagination_metadata.1, + next_offset: pagination_metadata.2, }) } _ => { @@ -902,7 +1125,7 @@ mod tests { &mut self, _context: &opentelemetry::Context, data: crate::database::models::InsertImageExif, - ) -> Result { + ) -> Result { // Return a dummy ImageExif for tests Ok(crate::database::models::ImageExif { id: 1, @@ -1020,6 +1243,19 @@ mod tests { ) -> Result, DbError> { Ok(Vec::new()) } + + fn get_files_sorted_by_date( + &mut self, + _context: &opentelemetry::Context, + file_paths: &[String], + _ascending: bool, + _limit: Option, + _offset: i64, + ) -> Result<(Vec, i64), DbError> { + // For tests, just return all files unsorted + let count = file_paths.len() as i64; + Ok((file_paths.to_vec(), count)) + } } mod api { diff --git a/src/main.rs b/src/main.rs index b37f5fd..b7bc844 100644 --- a/src/main.rs +++ b/src/main.rs @@ -527,6 +527,9 @@ async fn favorites( HttpResponse::Ok().json(PhotosResponse { photos: favorites, dirs: Vec::new(), + total_count: None, + has_more: None, + next_offset: None, }) } Ok(Err(e)) => { diff --git a/src/tags.rs b/src/tags.rs index ef23ec6..3a16c2f 100644 --- a/src/tags.rs +++ b/src/tags.rs @@ -322,6 +322,11 @@ pub trait TagDao { &mut self, context: &opentelemetry::Context, ) -> anyhow::Result>; + fn get_tag_counts_batch( + &mut self, + context: &opentelemetry::Context, + file_paths: &[String], + ) -> anyhow::Result>; } pub struct SqliteTagDao { @@ -642,6 +647,65 @@ impl TagDao for SqliteTagDao { .load(&mut self.connection) .with_context(|| "Unable to get photo names") } + + fn get_tag_counts_batch( + &mut self, + context: &opentelemetry::Context, + file_paths: &[String], + ) -> anyhow::Result> { + use std::collections::HashMap; + + trace_db_call(context, "query", "get_tag_counts_batch", |span| { + span.set_attribute(KeyValue::new("file_count", file_paths.len() as i64)); + + if file_paths.is_empty() { + return Ok(HashMap::new()); + } + + use diesel::dsl::*; + use diesel::sql_types::*; + + // Build dynamic query with placeholders + let placeholders = std::iter::repeat_n("?", file_paths.len()) + .collect::>() + .join(", "); + + let query_str = format!( + r#" + SELECT photo_name, COUNT(DISTINCT tag_id) as tag_count + FROM tagged_photo + WHERE photo_name IN ({}) + GROUP BY photo_name + "#, + placeholders + ); + + let mut query = sql_query(query_str).into_boxed(); + + // Bind all file paths + for path in file_paths { + query = query.bind::(path); + } + + #[derive(QueryableByName, Debug)] + struct TagCountRow { + #[diesel(sql_type = Text)] + photo_name: String, + #[diesel(sql_type = BigInt)] + tag_count: i64, + } + + // Execute query and convert to HashMap + query + .load::(&mut self.connection) + .with_context(|| "Unable to get batch tag counts") + .map(|rows| { + rows.into_iter() + .map(|row| (row.photo_name, row.tag_count)) + .collect::>() + }) + }) + } } #[cfg(test)] @@ -818,6 +882,21 @@ mod tests { ) -> anyhow::Result> { todo!() } + + fn get_tag_counts_batch( + &mut self, + _context: &opentelemetry::Context, + file_paths: &[String], + ) -> anyhow::Result> { + use std::collections::HashMap; + let mut counts = HashMap::new(); + for path in file_paths { + if let Some(tags) = self.tagged_photos.borrow().get(path) { + counts.insert(path.clone(), tags.len() as i64); + } + } + Ok(counts) + } } #[actix_rt::test]