perf: DB-backed recursive /photos + watcher reconciliation
Recursive listings now query image_exif instead of walking disk, taking union-mode /photos from ~17s to sub-second on a 10k-file library. The watcher's full scan prunes stale image_exif rows so the DB stays in parity with the filesystem when files are deleted externally. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -385,6 +385,28 @@ pub trait ExifDao: Sync + Send {
|
|||||||
context: &opentelemetry::Context,
|
context: &opentelemetry::Context,
|
||||||
hash: &str,
|
hash: &str,
|
||||||
) -> Result<Vec<String>, DbError>;
|
) -> Result<Vec<String>, DbError>;
|
||||||
|
|
||||||
|
/// List `(library_id, rel_path)` pairs for the given libraries, optionally
|
||||||
|
/// restricted to rows whose rel_path starts with `path_prefix`. When
|
||||||
|
/// `library_ids` is empty, rows from every library are returned. Used by
|
||||||
|
/// `/photos` recursive listing to skip the filesystem walk — the watcher
|
||||||
|
/// keeps image_exif in parity with disk via the reconciliation pass.
|
||||||
|
fn list_rel_paths_for_libraries(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
library_ids: &[i32],
|
||||||
|
path_prefix: Option<&str>,
|
||||||
|
) -> Result<Vec<(i32, String)>, DbError>;
|
||||||
|
|
||||||
|
/// Delete a single image_exif row scoped to `(library_id, rel_path)`.
|
||||||
|
/// Distinct from `delete_exif`, which matches on rel_path alone and
|
||||||
|
/// would clobber same-named files across libraries.
|
||||||
|
fn delete_exif_by_library(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
library_id: i32,
|
||||||
|
rel_path: &str,
|
||||||
|
) -> Result<(), DbError>;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct SqliteExifDao {
|
pub struct SqliteExifDao {
|
||||||
@@ -933,6 +955,59 @@ impl ExifDao for SqliteExifDao {
|
|||||||
})
|
})
|
||||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn list_rel_paths_for_libraries(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
library_ids: &[i32],
|
||||||
|
path_prefix: Option<&str>,
|
||||||
|
) -> Result<Vec<(i32, String)>, DbError> {
|
||||||
|
trace_db_call(context, "query", "list_rel_paths_for_libraries", |_span| {
|
||||||
|
use schema::image_exif::dsl::*;
|
||||||
|
|
||||||
|
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||||
|
|
||||||
|
let mut query = image_exif.select((library_id, rel_path)).into_boxed();
|
||||||
|
|
||||||
|
if !library_ids.is_empty() {
|
||||||
|
query = query.filter(library_id.eq_any(library_ids.to_vec()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(prefix) = path_prefix.map(str::trim).filter(|s| !s.is_empty()) {
|
||||||
|
// Trailing slash normalization so "2024" matches "2024/..."
|
||||||
|
// without also matching "2024-archive/...".
|
||||||
|
let prefix = prefix.trim_end_matches('/');
|
||||||
|
let pattern = format!("{}/%", prefix.replace('%', "\\%").replace('_', "\\_"));
|
||||||
|
query = query.filter(rel_path.like(pattern).escape('\\'));
|
||||||
|
}
|
||||||
|
|
||||||
|
query
|
||||||
|
.load::<(i32, String)>(connection.deref_mut())
|
||||||
|
.map_err(|_| anyhow::anyhow!("Query error"))
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn delete_exif_by_library(
|
||||||
|
&mut self,
|
||||||
|
context: &opentelemetry::Context,
|
||||||
|
library_id_val: i32,
|
||||||
|
rel_path_val: &str,
|
||||||
|
) -> Result<(), DbError> {
|
||||||
|
trace_db_call(context, "delete", "delete_exif_by_library", |_span| {
|
||||||
|
use schema::image_exif::dsl::*;
|
||||||
|
|
||||||
|
diesel::delete(
|
||||||
|
image_exif
|
||||||
|
.filter(library_id.eq(library_id_val))
|
||||||
|
.filter(rel_path.eq(rel_path_val)),
|
||||||
|
)
|
||||||
|
.execute(self.connection.lock().unwrap().deref_mut())
|
||||||
|
.map(|_| ())
|
||||||
|
.map_err(|_| anyhow::anyhow!("Delete error"))
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
225
src/files.rs
225
src/files.rs
@@ -546,91 +546,89 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
|
|||||||
.unwrap_or_else(|e| e.error_response());
|
.unwrap_or_else(|e| e.error_response());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Walk each candidate library's root for the requested sub-path. In
|
// In scoped mode `libraries_to_scan` has one entry (the selected library);
|
||||||
// scoped mode `libraries_to_scan` has one entry (the selected library);
|
// in union mode we enumerate every configured library and intermix results.
|
||||||
// in union mode we walk every configured library and intermix results.
|
//
|
||||||
// For the primary library we preserve the original FileSystemAccess
|
// Recursive mode pulls rel_paths from image_exif (kept in parity with disk
|
||||||
// path so the test-mock path (MockFileSystem) continues to work.
|
// by the watcher's full-scan reconciliation) instead of walking — a ~10k
|
||||||
|
// file library drops from multi-second to ~10ms for the listing itself.
|
||||||
|
// Non-recursive mode still walks because we need directory metadata for
|
||||||
|
// the `dirs` response and listing a single directory is cheap.
|
||||||
let mut file_names: Vec<String> = Vec::new();
|
let mut file_names: Vec<String> = Vec::new();
|
||||||
let mut file_libraries: Vec<i32> = Vec::new();
|
let mut file_libraries: Vec<i32> = Vec::new();
|
||||||
let mut dirs_set: std::collections::HashSet<String> = std::collections::HashSet::new();
|
let mut dirs_set: std::collections::HashSet<String> = std::collections::HashSet::new();
|
||||||
let mut any_library_resolved = false;
|
let mut any_library_resolved = false;
|
||||||
|
|
||||||
for lib in &libraries_to_scan {
|
if search_recursively {
|
||||||
let files_result = if search_recursively {
|
let start_db_list = std::time::Instant::now();
|
||||||
is_valid_full_path(
|
let lib_ids: Vec<i32> = libraries_to_scan.iter().map(|l| l.id).collect();
|
||||||
&PathBuf::from(&lib.root_path),
|
let trimmed = search_path.trim();
|
||||||
&PathBuf::from(search_path),
|
let prefix = if trimmed.is_empty() || trimmed == "/" {
|
||||||
false,
|
None
|
||||||
)
|
|
||||||
.map(|path| {
|
|
||||||
debug!("Valid path for recursive search: {:?}", path);
|
|
||||||
list_files_recursive(&path).unwrap_or_default()
|
|
||||||
})
|
|
||||||
.context("Invalid path")
|
|
||||||
} else if lib.id == app_state.primary_library().id {
|
|
||||||
file_system.get_files_for_path(search_path)
|
|
||||||
} else {
|
} else {
|
||||||
is_valid_full_path(
|
Some(trimmed)
|
||||||
&PathBuf::from(&lib.root_path),
|
|
||||||
&PathBuf::from(search_path),
|
|
||||||
false,
|
|
||||||
)
|
|
||||||
.map(|path| {
|
|
||||||
debug!("Valid path for non-recursive search: {:?}", path);
|
|
||||||
list_files(&path).unwrap_or_default()
|
|
||||||
})
|
|
||||||
.context("Invalid path")
|
|
||||||
};
|
};
|
||||||
|
let rows = {
|
||||||
let files = match files_result {
|
let mut dao = exif_dao.lock().expect("Unable to get ExifDao");
|
||||||
Ok(f) => {
|
dao.list_rel_paths_for_libraries(&span_context, &lib_ids, prefix)
|
||||||
any_library_resolved = true;
|
.unwrap_or_else(|e| {
|
||||||
f
|
warn!("list_rel_paths_for_libraries failed: {:?}", e);
|
||||||
}
|
Vec::new()
|
||||||
Err(e) => {
|
})
|
||||||
debug!(
|
|
||||||
"Skipping library '{}' for path '{}': {:?}",
|
|
||||||
lib.name, search_path, e
|
|
||||||
);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
info!(
|
info!(
|
||||||
"Found {:?} files in library '{}' path: {:?} (recursive: {})",
|
"DB-backed recursive listing: {} files across {} libraries in {:?}",
|
||||||
files.len(),
|
rows.len(),
|
||||||
lib.name,
|
lib_ids.len(),
|
||||||
search_path,
|
start_db_list.elapsed()
|
||||||
search_recursively
|
|
||||||
);
|
);
|
||||||
|
any_library_resolved = true;
|
||||||
|
for (lib_id, path) in rows {
|
||||||
|
file_libraries.push(lib_id);
|
||||||
|
file_names.push(path);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for lib in &libraries_to_scan {
|
||||||
|
let files_result = if lib.id == app_state.primary_library().id {
|
||||||
|
file_system.get_files_for_path(search_path)
|
||||||
|
} else {
|
||||||
|
is_valid_full_path(
|
||||||
|
&PathBuf::from(&lib.root_path),
|
||||||
|
&PathBuf::from(search_path),
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.map(|path| {
|
||||||
|
debug!("Valid path for non-recursive search: {:?}", path);
|
||||||
|
list_files(&path).unwrap_or_default()
|
||||||
|
})
|
||||||
|
.context("Invalid path")
|
||||||
|
};
|
||||||
|
|
||||||
for path in &files {
|
let files = match files_result {
|
||||||
match path.metadata() {
|
Ok(f) => {
|
||||||
Ok(md) => {
|
any_library_resolved = true;
|
||||||
let relative = path.strip_prefix(&lib.root_path).unwrap_or_else(|_| {
|
f
|
||||||
panic!(
|
|
||||||
"Unable to strip library root {} from file path {}",
|
|
||||||
&lib.root_path,
|
|
||||||
path.display()
|
|
||||||
)
|
|
||||||
});
|
|
||||||
// Normalize separators to '/' so downstream lookups
|
|
||||||
// (tags, EXIF, insights) that store rel_paths with
|
|
||||||
// forward slashes still match on Windows.
|
|
||||||
let relative_str = relative.to_str().unwrap().replace('\\', "/");
|
|
||||||
|
|
||||||
if md.is_file() {
|
|
||||||
file_names.push(relative_str);
|
|
||||||
file_libraries.push(lib.id);
|
|
||||||
} else if md.is_dir() {
|
|
||||||
dirs_set.insert(relative_str);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!("Failed getting file metadata: {:?}", e);
|
debug!(
|
||||||
// Include files without metadata if they have extensions
|
"Skipping library '{}' for path '{}': {:?}",
|
||||||
if path.extension().is_some() {
|
lib.name, search_path, e
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
info!(
|
||||||
|
"Found {:?} files in library '{}' path: {:?} (recursive: {})",
|
||||||
|
files.len(),
|
||||||
|
lib.name,
|
||||||
|
search_path,
|
||||||
|
search_recursively
|
||||||
|
);
|
||||||
|
|
||||||
|
for path in &files {
|
||||||
|
match path.metadata() {
|
||||||
|
Ok(md) => {
|
||||||
let relative = path.strip_prefix(&lib.root_path).unwrap_or_else(|_| {
|
let relative = path.strip_prefix(&lib.root_path).unwrap_or_else(|_| {
|
||||||
panic!(
|
panic!(
|
||||||
"Unable to strip library root {} from file path {}",
|
"Unable to strip library root {} from file path {}",
|
||||||
@@ -638,8 +636,32 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
|
|||||||
path.display()
|
path.display()
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
file_names.push(relative.to_str().unwrap().replace('\\', "/"));
|
// Normalize separators to '/' so downstream lookups
|
||||||
file_libraries.push(lib.id);
|
// (tags, EXIF, insights) that store rel_paths with
|
||||||
|
// forward slashes still match on Windows.
|
||||||
|
let relative_str = relative.to_str().unwrap().replace('\\', "/");
|
||||||
|
|
||||||
|
if md.is_file() {
|
||||||
|
file_names.push(relative_str);
|
||||||
|
file_libraries.push(lib.id);
|
||||||
|
} else if md.is_dir() {
|
||||||
|
dirs_set.insert(relative_str);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Failed getting file metadata: {:?}", e);
|
||||||
|
// Include files without metadata if they have extensions
|
||||||
|
if path.extension().is_some() {
|
||||||
|
let relative = path.strip_prefix(&lib.root_path).unwrap_or_else(|_| {
|
||||||
|
panic!(
|
||||||
|
"Unable to strip library root {} from file path {}",
|
||||||
|
&lib.root_path,
|
||||||
|
path.display()
|
||||||
|
)
|
||||||
|
});
|
||||||
|
file_names.push(relative.to_str().unwrap().replace('\\', "/"));
|
||||||
|
file_libraries.push(lib.id);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -943,43 +965,6 @@ pub fn list_files(dir: &Path) -> io::Result<Vec<PathBuf>> {
|
|||||||
Ok(files)
|
Ok(files)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn list_files_recursive(dir: &Path) -> io::Result<Vec<PathBuf>> {
|
|
||||||
let tracer = global_tracer();
|
|
||||||
let mut span = tracer.start("list_files_recursive");
|
|
||||||
let dir_name_string = dir.to_str().unwrap_or_default().to_string();
|
|
||||||
span.set_attribute(KeyValue::new("dir", dir_name_string));
|
|
||||||
info!("Recursively listing files in: {:?}", dir);
|
|
||||||
|
|
||||||
let mut result = Vec::new();
|
|
||||||
|
|
||||||
fn visit_dirs(dir: &Path, files: &mut Vec<PathBuf>) -> io::Result<()> {
|
|
||||||
if dir.is_dir() {
|
|
||||||
for entry in read_dir(dir)? {
|
|
||||||
let entry = entry?;
|
|
||||||
let path = entry.path();
|
|
||||||
|
|
||||||
if path.is_dir() {
|
|
||||||
visit_dirs(&path, files)?;
|
|
||||||
} else if is_image_or_video(&path) {
|
|
||||||
files.push(path);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
visit_dirs(dir, &mut result)?;
|
|
||||||
|
|
||||||
span.set_attribute(KeyValue::new("file_count", result.len().to_string()));
|
|
||||||
span.set_status(Status::Ok);
|
|
||||||
info!(
|
|
||||||
"Found {:?} files recursively in directory: {:?}",
|
|
||||||
result.len(),
|
|
||||||
dir
|
|
||||||
);
|
|
||||||
Ok(result)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_image_or_video(path: &Path) -> bool {
|
pub fn is_image_or_video(path: &Path) -> bool {
|
||||||
file_types::is_media_file(path)
|
file_types::is_media_file(path)
|
||||||
}
|
}
|
||||||
@@ -1567,6 +1552,24 @@ mod tests {
|
|||||||
) -> Result<Vec<String>, DbError> {
|
) -> Result<Vec<String>, DbError> {
|
||||||
Ok(vec![])
|
Ok(vec![])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn list_rel_paths_for_libraries(
|
||||||
|
&mut self,
|
||||||
|
_context: &opentelemetry::Context,
|
||||||
|
_library_ids: &[i32],
|
||||||
|
_path_prefix: Option<&str>,
|
||||||
|
) -> Result<Vec<(i32, String)>, DbError> {
|
||||||
|
Ok(vec![])
|
||||||
|
}
|
||||||
|
|
||||||
|
fn delete_exif_by_library(
|
||||||
|
&mut self,
|
||||||
|
_context: &opentelemetry::Context,
|
||||||
|
_library_id: i32,
|
||||||
|
_rel_path: &str,
|
||||||
|
) -> Result<(), DbError> {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mod api {
|
mod api {
|
||||||
|
|||||||
47
src/main.rs
47
src/main.rs
@@ -14,7 +14,10 @@ use prometheus::{self, IntGauge};
|
|||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
use std::time::{Duration, SystemTime};
|
use std::time::{Duration, SystemTime};
|
||||||
use std::{collections::HashMap, io::prelude::*};
|
use std::{
|
||||||
|
collections::{HashMap, HashSet},
|
||||||
|
io::prelude::*,
|
||||||
|
};
|
||||||
use std::{env, fs::File};
|
use std::{env, fs::File};
|
||||||
use std::{
|
use std::{
|
||||||
io::ErrorKind,
|
io::ErrorKind,
|
||||||
@@ -1916,6 +1919,48 @@ fn process_new_files(
|
|||||||
info!("Processing thumbnails for new files...");
|
info!("Processing thumbnails for new files...");
|
||||||
create_thumbnails(std::slice::from_ref(library));
|
create_thumbnails(std::slice::from_ref(library));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Reconciliation: on a full scan, prune image_exif rows whose rel_path no
|
||||||
|
// longer exists on disk for this library. Keeps the DB in parity so
|
||||||
|
// downstream DB-backed listings (e.g. recursive /photos) don't return
|
||||||
|
// phantom files. Skipped on quick scans — those only look at recently
|
||||||
|
// modified files and can't distinguish "missing" from "unchanged".
|
||||||
|
if modified_since.is_none() {
|
||||||
|
let disk_paths: HashSet<String> = files.iter().map(|(_, rel)| rel.clone()).collect();
|
||||||
|
let db_paths: Vec<String> = {
|
||||||
|
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
|
||||||
|
dao.get_rel_paths_for_library(&context, library.id)
|
||||||
|
.unwrap_or_else(|e| {
|
||||||
|
error!(
|
||||||
|
"Reconciliation: failed to load image_exif rel_paths for lib {}: {:?}",
|
||||||
|
library.id, e
|
||||||
|
);
|
||||||
|
Vec::new()
|
||||||
|
})
|
||||||
|
};
|
||||||
|
|
||||||
|
let stale: Vec<String> = db_paths
|
||||||
|
.into_iter()
|
||||||
|
.filter(|p| !disk_paths.contains(p))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if !stale.is_empty() {
|
||||||
|
info!(
|
||||||
|
"Reconciliation: pruning {} stale image_exif rows for library '{}'",
|
||||||
|
stale.len(),
|
||||||
|
library.name
|
||||||
|
);
|
||||||
|
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
|
||||||
|
for rel in &stale {
|
||||||
|
if let Err(e) = dao.delete_exif_by_library(&context, library.id, rel) {
|
||||||
|
warn!(
|
||||||
|
"Reconciliation: failed to delete {} (lib {}): {:?}",
|
||||||
|
rel, library.id, e
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
Reference in New Issue
Block a user