Add database optimizations for photo search and pagination

Implement database-level sorting with composite indexes for efficient date and tag queries. Add pagination metadata support and optimize tag count queries using batch processing.
This commit is contained in:
Cameron
2026-01-18 19:17:10 -05:00
parent 5e646dfc19
commit be483c9c1a
7 changed files with 519 additions and 96 deletions

View File

@@ -0,0 +1,4 @@
-- Revert search performance optimization indexes
DROP INDEX IF EXISTS idx_image_exif_date_path;
DROP INDEX IF EXISTS idx_tagged_photo_count;

View File

@@ -0,0 +1,15 @@
-- Add composite indexes for search performance optimization
-- This migration addresses N+1 query issues and enables database-level sorting
-- Covering index for date-sorted queries (supports ORDER BY + pagination)
-- Enables efficient date-based sorting without loading all files into memory
CREATE INDEX IF NOT EXISTS idx_image_exif_date_path
ON image_exif(date_taken DESC, file_path);
-- Optimize batch tag count queries with GROUP BY
-- Reduces N individual queries to a single batch query
CREATE INDEX IF NOT EXISTS idx_tagged_photo_count
ON tagged_photo(photo_name, tag_id);
-- Update query planner statistics to optimize query execution
ANALYZE;

View File

@@ -101,6 +101,16 @@ impl FromRequest for Claims {
pub struct PhotosResponse {
pub photos: Vec<String>,
pub dirs: Vec<String>,
// Pagination metadata (only present when limit is set)
#[serde(skip_serializing_if = "Option::is_none")]
pub total_count: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub has_more: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub next_offset: Option<i64>,
}
#[derive(Copy, Clone, Deserialize, PartialEq, Debug)]
@@ -141,6 +151,10 @@ pub struct FilesRequest {
// Media type filtering
pub media_type: Option<MediaType>,
// Pagination parameters (optional - backward compatible)
pub limit: Option<i64>,
pub offset: Option<i64>,
}
#[derive(Copy, Clone, Deserialize, PartialEq, Debug)]

View File

@@ -296,6 +296,17 @@ pub trait ExifDao: Sync + Send {
&mut self,
context: &opentelemetry::Context,
) -> Result<Vec<String>, DbError>;
/// Get files sorted by date with optional pagination
/// Returns (sorted_file_paths, total_count)
fn get_files_sorted_by_date(
&mut self,
context: &opentelemetry::Context,
file_paths: &[String],
ascending: bool,
limit: Option<i64>,
offset: i64,
) -> Result<(Vec<String>, i64), DbError>;
}
pub struct SqliteExifDao {
@@ -581,4 +592,65 @@ impl ExifDao for SqliteExifDao {
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_files_sorted_by_date(
&mut self,
context: &opentelemetry::Context,
file_paths: &[String],
ascending: bool,
limit: Option<i64>,
offset: i64,
) -> Result<(Vec<String>, i64), DbError> {
trace_db_call(context, "query", "get_files_sorted_by_date", |span| {
use diesel::dsl::count_star;
use opentelemetry::KeyValue;
use opentelemetry::trace::Span;
use schema::image_exif::dsl::*;
span.set_attributes(vec![
KeyValue::new("file_count", file_paths.len() as i64),
KeyValue::new("ascending", ascending.to_string()),
KeyValue::new("limit", limit.map(|l| l.to_string()).unwrap_or_default()),
KeyValue::new("offset", offset.to_string()),
]);
if file_paths.is_empty() {
return Ok((Vec::new(), 0));
}
let connection = &mut *self.connection.lock().unwrap();
// Get total count of files that have EXIF data
let total_count: i64 = image_exif
.filter(file_path.eq_any(file_paths))
.select(count_star())
.first(connection)
.map_err(|_| anyhow::anyhow!("Count query error"))?;
// Build sorted query
let mut query = image_exif.filter(file_path.eq_any(file_paths)).into_boxed();
// Apply sorting
// Note: SQLite NULL handling varies - NULLs appear first for ASC, last for DESC by default
if ascending {
query = query.order(date_taken.asc());
} else {
query = query.order(date_taken.desc());
}
// Apply pagination if requested
if let Some(limit_val) = limit {
query = query.limit(limit_val).offset(offset);
}
// Execute and extract file paths
let results: Vec<String> = query
.select(file_path)
.load::<String>(connection)
.map_err(|_| anyhow::anyhow!("Query error"))?;
Ok((results, total_count))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
}

View File

@@ -47,69 +47,142 @@ use serde::Deserialize;
/// Apply sorting to files with EXIF data support for date-based sorting
/// Handles both date sorting (with EXIF/filename fallback) and regular sorting
/// Returns (sorted_file_paths, total_count)
fn apply_sorting_with_exif(
files: Vec<FileWithTagCount>,
sort_type: SortType,
exif_dao: &mut Box<dyn ExifDao>,
span_context: &opentelemetry::Context,
base_path: &Path,
) -> Vec<String> {
limit: Option<i64>,
offset: i64,
) -> (Vec<String>, i64) {
let total_count = files.len() as i64;
match sort_type {
SortType::DateTakenAsc | SortType::DateTakenDesc => {
info!("Date sorting requested, fetching EXIF data");
info!("Date sorting requested, using database-level sorting");
// Collect file paths for batch EXIF query
let file_paths: Vec<String> = files.iter().map(|f| f.file_name.clone()).collect();
// Batch fetch EXIF data
let exif_map: std::collections::HashMap<String, i64> = exif_dao
.get_exif_batch(span_context, &file_paths)
.unwrap_or_default()
.into_iter()
.filter_map(|exif| exif.date_taken.map(|dt| (exif.file_path, dt)))
.collect();
// Convert to FileWithMetadata with date fallback logic
let files_with_metadata: Vec<FileWithMetadata> = files
.into_iter()
.map(|f| {
// Try EXIF date first
let date_taken = exif_map
.get(&f.file_name)
.copied()
.or_else(|| {
// Fallback to filename extraction
extract_date_from_filename(&f.file_name).map(|dt| dt.timestamp())
})
.or_else(|| {
// Fallback to filesystem metadata creation date
let full_path = base_path.join(&f.file_name);
std::fs::metadata(full_path)
.and_then(|md| md.created().or(md.modified()))
.ok()
.map(|system_time| {
<SystemTime as Into<DateTime<Utc>>>::into(system_time)
.timestamp()
})
});
FileWithMetadata {
file_name: f.file_name,
tag_count: f.tag_count,
date_taken,
}
})
.collect();
sort_with_metadata(files_with_metadata, sort_type)
// Try database-level sorting first (most efficient)
let ascending = sort_type == SortType::DateTakenAsc;
match exif_dao.get_files_sorted_by_date(
span_context,
&file_paths,
ascending,
limit,
offset,
) {
Ok((sorted_files, db_total)) => {
info!(
"Database-level date sorting succeeded, returned {} files",
sorted_files.len()
);
(sorted_files, db_total)
}
Err(e) => {
warn!(
"Database-level sorting failed: {:?}, falling back to in-memory sort",
e
);
// Fallback to in-memory sorting with date extraction
let (sorted, _) = in_memory_date_sort(
files,
sort_type,
exif_dao,
span_context,
base_path,
limit,
offset,
);
(sorted, total_count)
}
}
}
_ => {
// Use regular sort for non-date sorting
sort(files, sort_type)
let sorted = sort(files, sort_type);
let result = if let Some(limit_val) = limit {
sorted
.into_iter()
.skip(offset as usize)
.take(limit_val as usize)
.collect()
} else {
sorted
};
(result, total_count)
}
}
}
/// Fallback in-memory date sorting with EXIF/filename extraction
fn in_memory_date_sort(
files: Vec<FileWithTagCount>,
sort_type: SortType,
exif_dao: &mut Box<dyn ExifDao>,
span_context: &opentelemetry::Context,
base_path: &Path,
limit: Option<i64>,
offset: i64,
) -> (Vec<String>, i64) {
let total_count = files.len() as i64;
let file_paths: Vec<String> = files.iter().map(|f| f.file_name.clone()).collect();
// Batch fetch EXIF data
let exif_map: std::collections::HashMap<String, i64> = exif_dao
.get_exif_batch(span_context, &file_paths)
.unwrap_or_default()
.into_iter()
.filter_map(|exif| exif.date_taken.map(|dt| (exif.file_path, dt)))
.collect();
// Convert to FileWithMetadata with date fallback logic
let files_with_metadata: Vec<FileWithMetadata> = files
.into_iter()
.map(|f| {
// Try EXIF date first
let date_taken = exif_map
.get(&f.file_name)
.copied()
.or_else(|| {
// Fallback to filename extraction
extract_date_from_filename(&f.file_name).map(|dt| dt.timestamp())
})
.or_else(|| {
// Fallback to filesystem metadata creation date
let full_path = base_path.join(&f.file_name);
std::fs::metadata(full_path)
.and_then(|md| md.created().or(md.modified()))
.ok()
.map(|system_time| {
<SystemTime as Into<DateTime<Utc>>>::into(system_time).timestamp()
})
});
FileWithMetadata {
file_name: f.file_name,
tag_count: f.tag_count,
date_taken,
}
})
.collect();
let sorted = sort_with_metadata(files_with_metadata, sort_type);
let result = if let Some(limit_val) = limit {
sorted
.into_iter()
.skip(offset as usize)
.take(limit_val as usize)
.collect()
} else {
sorted
};
(result, total_count)
}
pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
_: Claims,
request: HttpRequest,
@@ -171,6 +244,23 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
.map(|mt| format!("{:?}", mt))
.unwrap_or_default(),
),
// Pagination parameters
KeyValue::new("pagination.enabled", req.limit.is_some().to_string()),
KeyValue::new(
"pagination.limit",
req.limit.map(|l| l.to_string()).unwrap_or_default(),
),
KeyValue::new("pagination.offset", req.offset.unwrap_or(0).to_string()),
// Optimization flags
KeyValue::new("optimization.batch_tags", "true"),
KeyValue::new(
"optimization.db_sort",
matches!(
req.sort,
Some(SortType::DateTakenAsc | SortType::DateTakenDesc)
)
.to_string(),
),
]);
let span_context = opentelemetry::Context::current_with_span(span);
@@ -332,8 +422,10 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
.collect::<Vec<FileWithTagCount>>()
})
.map(|files| {
// Handle sorting - use helper function that supports EXIF date sorting
// Handle sorting - use helper function that supports EXIF date sorting and pagination
let sort_type = req.sort.unwrap_or(NameAsc);
let limit = req.limit;
let offset = req.offset.unwrap_or(0);
let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao");
let result = apply_sorting_with_exif(
files,
@@ -341,25 +433,50 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
&mut exif_dao_guard,
&span_context,
app_state.base_path.as_ref(),
limit,
offset,
);
drop(exif_dao_guard);
result
})
.inspect(|files| debug!("Found {:?} files", files.len()))
.map(|tagged_files: Vec<String>| {
.inspect(|(files, total)| debug!("Found {:?} files (total: {})", files.len(), total))
.map(|(tagged_files, total_count)| {
info!(
"Found {:?} tagged files: {:?}",
tagged_files.len(),
tagged_files
);
let returned_count = tagged_files.len() as i64;
let offset = req.offset.unwrap_or(0);
let pagination_metadata = if req.limit.is_some() {
(
Some(total_count),
Some(offset + returned_count < total_count),
if offset + returned_count < total_count {
Some(offset + returned_count)
} else {
None
},
)
} else {
(None, None, None)
};
span_context
.span()
.set_attribute(KeyValue::new("file_count", tagged_files.len().to_string()));
span_context
.span()
.set_attribute(KeyValue::new("total_count", total_count.to_string()));
span_context.span().set_status(Status::Ok);
HttpResponse::Ok().json(PhotosResponse {
photos: tagged_files,
dirs: vec![],
total_count: pagination_metadata.0,
has_more: pagination_metadata.1,
next_offset: pagination_metadata.2,
})
})
.into_http_internal_err()
@@ -393,38 +510,99 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
);
info!("Starting to filter {} files from filesystem", files.len());
let start_filter = std::time::Instant::now();
let photos = files
.iter()
.filter(|&f| {
f.metadata().map_or_else(
|e| {
error!("Failed getting file metadata: {:?}", e);
f.extension().is_some()
},
|md| md.is_file(),
)
})
.map(|path: &PathBuf| {
let relative = path.strip_prefix(&app_state.base_path).unwrap_or_else(|_| {
panic!(
"Unable to strip base path {} from file path {}",
&app_state.base_path.path(),
path.display()
)
// Separate files and directories in a single pass to avoid redundant metadata calls
let (file_names, dirs): (Vec<String>, Vec<String>) =
files
.iter()
.fold((Vec::new(), Vec::new()), |(mut files, mut dirs), path| {
match path.metadata() {
Ok(md) => {
let relative =
path.strip_prefix(&app_state.base_path).unwrap_or_else(|_| {
panic!(
"Unable to strip base path {} from file path {}",
&app_state.base_path.path(),
path.display()
)
});
let relative_str = relative.to_str().unwrap().to_string();
if md.is_file() {
files.push(relative_str);
} else if md.is_dir() {
dirs.push(relative_str);
}
}
Err(e) => {
error!("Failed getting file metadata: {:?}", e);
// Include files without metadata if they have extensions
if path.extension().is_some() {
let relative = path
.strip_prefix(&app_state.base_path)
.unwrap_or_else(|_| {
panic!(
"Unable to strip base path {} from file path {}",
&app_state.base_path.path(),
path.display()
)
});
files.push(relative.to_str().unwrap().to_string());
}
}
}
(files, dirs)
});
relative.to_path_buf()
})
.map(|f| f.to_str().unwrap().to_string())
.map(|file_name| {
let mut tag_dao = tag_dao.lock().expect("Unable to get TagDao");
let file_tags = tag_dao
.get_tags_for_path(&span_context, &file_name)
.unwrap_or_default();
info!(
"File filtering took {:?}, now fetching tag counts for {} files",
start_filter.elapsed(),
file_names.len()
);
let start_tags = std::time::Instant::now();
// Batch query for tag counts to avoid N+1 queries
let tag_counts = {
let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao");
tag_dao_guard
.get_tag_counts_batch(&span_context, &file_names)
.unwrap_or_default()
};
info!("Batch tag count query took {:?}", start_tags.elapsed());
// Also get full tag lists for files that need tag filtering
let start_tag_filter = std::time::Instant::now();
let file_tags_map: std::collections::HashMap<String, Vec<crate::tags::Tag>> =
if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() {
info!(
"Tag filtering requested, fetching full tag lists for {} files",
file_names.len()
);
let mut tag_dao_guard = tag_dao.lock().expect("Unable to get TagDao");
file_names
.iter()
.filter_map(|file_name| {
tag_dao_guard
.get_tags_for_path(&span_context, file_name)
.ok()
.map(|tags| (file_name.clone(), tags))
})
.collect()
} else {
std::collections::HashMap::new()
};
if req.tag_ids.is_some() || req.exclude_tag_ids.is_some() {
info!("Full tag list fetch took {:?}", start_tag_filter.elapsed());
}
let photos = file_names
.into_iter()
.map(|file_name| {
let file_tags = file_tags_map.get(&file_name).cloned().unwrap_or_default();
(file_name, file_tags)
})
.filter(|(_, file_tags)| {
.filter(|(_, file_tags): &(String, Vec<crate::tags::Tag>)| {
if let Some(tag_ids) = &req.tag_ids {
let tag_ids = tag_ids
.split(',')
@@ -472,16 +650,28 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
true
}
})
.map(|(file_name, tags)| FileWithTagCount {
file_name,
tag_count: tags.len() as i64,
})
.map(
|(file_name, _tags): (String, Vec<crate::tags::Tag>)| FileWithTagCount {
file_name: file_name.clone(),
tag_count: *tag_counts.get(&file_name).unwrap_or(&0),
},
)
.collect::<Vec<FileWithTagCount>>();
info!("After all filters, {} files remain", photos.len());
info!(
"After all filters, {} files remain (filtering took {:?})",
photos.len(),
start_filter.elapsed()
);
// Handle sorting - use helper function that supports EXIF date sorting
let response_files = if let Some(sort_type) = req.sort {
// Extract pagination parameters
let limit = req.limit;
let offset = req.offset.unwrap_or(0);
let start_sort = std::time::Instant::now();
// Handle sorting - use helper function that supports EXIF date sorting and pagination
let (response_files, total_count) = if let Some(sort_type) = req.sort {
info!("Sorting {} files by {:?}", photos.len(), sort_type);
let mut exif_dao_guard = exif_dao.lock().expect("Unable to get ExifDao");
let result = apply_sorting_with_exif(
photos,
@@ -489,35 +679,68 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
&mut exif_dao_guard,
&span_context,
app_state.base_path.as_ref(),
limit,
offset,
);
drop(exif_dao_guard);
result
} else {
// No sorting requested
photos
.into_iter()
.map(|f| f.file_name)
.collect::<Vec<String>>()
// No sorting requested - apply pagination if requested
let total = photos.len() as i64;
let files: Vec<String> = if let Some(limit_val) = limit {
photos
.into_iter()
.skip(offset as usize)
.take(limit_val as usize)
.map(|f| f.file_name)
.collect()
} else {
photos.into_iter().map(|f| f.file_name).collect()
};
(files, total)
};
info!(
"Sorting took {:?}, returned {} files (total: {})",
start_sort.elapsed(),
response_files.len(),
total_count
);
let dirs = files
.iter()
.filter(|&f| f.metadata().is_ok_and(|md| md.is_dir()))
.map(|path: &PathBuf| {
let relative = path.strip_prefix(&app_state.base_path).unwrap();
relative.to_path_buf()
})
.map(|f| f.to_str().unwrap().to_string())
.collect::<Vec<String>>();
// Note: dirs were already collected during file filtering to avoid redundant metadata calls
// Calculate pagination metadata
let returned_count = response_files.len() as i64;
let pagination_metadata = if limit.is_some() {
(
Some(total_count),
Some(offset + returned_count < total_count),
if offset + returned_count < total_count {
Some(offset + returned_count)
} else {
None
},
)
} else {
(None, None, None)
};
span_context
.span()
.set_attribute(KeyValue::new("file_count", files.len().to_string()));
span_context
.span()
.set_attribute(KeyValue::new("returned_count", returned_count.to_string()));
span_context
.span()
.set_attribute(KeyValue::new("total_count", total_count.to_string()));
span_context.span().set_status(Status::Ok);
HttpResponse::Ok().json(PhotosResponse {
photos: response_files,
dirs,
total_count: pagination_metadata.0,
has_more: pagination_metadata.1,
next_offset: pagination_metadata.2,
})
}
_ => {
@@ -902,7 +1125,7 @@ mod tests {
&mut self,
_context: &opentelemetry::Context,
data: crate::database::models::InsertImageExif,
) -> Result<crate::database::models::ImageExif, crate::database::DbError> {
) -> Result<crate::database::models::ImageExif, DbError> {
// Return a dummy ImageExif for tests
Ok(crate::database::models::ImageExif {
id: 1,
@@ -1020,6 +1243,19 @@ mod tests {
) -> Result<Vec<String>, DbError> {
Ok(Vec::new())
}
fn get_files_sorted_by_date(
&mut self,
_context: &opentelemetry::Context,
file_paths: &[String],
_ascending: bool,
_limit: Option<i64>,
_offset: i64,
) -> Result<(Vec<String>, i64), DbError> {
// For tests, just return all files unsorted
let count = file_paths.len() as i64;
Ok((file_paths.to_vec(), count))
}
}
mod api {

View File

@@ -527,6 +527,9 @@ async fn favorites(
HttpResponse::Ok().json(PhotosResponse {
photos: favorites,
dirs: Vec::new(),
total_count: None,
has_more: None,
next_offset: None,
})
}
Ok(Err(e)) => {

View File

@@ -322,6 +322,11 @@ pub trait TagDao {
&mut self,
context: &opentelemetry::Context,
) -> anyhow::Result<Vec<String>>;
fn get_tag_counts_batch(
&mut self,
context: &opentelemetry::Context,
file_paths: &[String],
) -> anyhow::Result<std::collections::HashMap<String, i64>>;
}
pub struct SqliteTagDao {
@@ -642,6 +647,65 @@ impl TagDao for SqliteTagDao {
.load(&mut self.connection)
.with_context(|| "Unable to get photo names")
}
fn get_tag_counts_batch(
&mut self,
context: &opentelemetry::Context,
file_paths: &[String],
) -> anyhow::Result<std::collections::HashMap<String, i64>> {
use std::collections::HashMap;
trace_db_call(context, "query", "get_tag_counts_batch", |span| {
span.set_attribute(KeyValue::new("file_count", file_paths.len() as i64));
if file_paths.is_empty() {
return Ok(HashMap::new());
}
use diesel::dsl::*;
use diesel::sql_types::*;
// Build dynamic query with placeholders
let placeholders = std::iter::repeat_n("?", file_paths.len())
.collect::<Vec<_>>()
.join(", ");
let query_str = format!(
r#"
SELECT photo_name, COUNT(DISTINCT tag_id) as tag_count
FROM tagged_photo
WHERE photo_name IN ({})
GROUP BY photo_name
"#,
placeholders
);
let mut query = sql_query(query_str).into_boxed();
// Bind all file paths
for path in file_paths {
query = query.bind::<Text, _>(path);
}
#[derive(QueryableByName, Debug)]
struct TagCountRow {
#[diesel(sql_type = Text)]
photo_name: String,
#[diesel(sql_type = BigInt)]
tag_count: i64,
}
// Execute query and convert to HashMap
query
.load::<TagCountRow>(&mut self.connection)
.with_context(|| "Unable to get batch tag counts")
.map(|rows| {
rows.into_iter()
.map(|row| (row.photo_name, row.tag_count))
.collect::<HashMap<String, i64>>()
})
})
}
}
#[cfg(test)]
@@ -818,6 +882,21 @@ mod tests {
) -> anyhow::Result<Vec<String>> {
todo!()
}
fn get_tag_counts_batch(
&mut self,
_context: &opentelemetry::Context,
file_paths: &[String],
) -> anyhow::Result<std::collections::HashMap<String, i64>> {
use std::collections::HashMap;
let mut counts = HashMap::new();
for path in file_paths {
if let Some(tags) = self.tagged_photos.borrow().get(path) {
counts.insert(path.clone(), tags.len() as i64);
}
}
Ok(counts)
}
}
#[actix_rt::test]