Tests and improved pattern-excluding behavior

This commit is contained in:
Cameron
2025-12-01 12:54:40 -05:00
parent f5c53d1e0e
commit a7d065aadc

View File

@@ -16,6 +16,83 @@ use crate::files::is_image_or_video;
use crate::otel::{extract_context_from_request, global_tracer}; use crate::otel::{extract_context_from_request, global_tracer};
use crate::state::AppState; use crate::state::AppState;
// Helper that encapsulates path-exclusion semantics
#[derive(Debug)]
struct PathExcluder {
excluded_dirs: Vec<PathBuf>,
excluded_patterns: Vec<String>,
}
impl PathExcluder {
/// Build from a `base` path and the raw exclusion entries.
///
/// Rules:
/// - Entries starting with '/' are interpreted as "absolute under base"
/// (e.g. "/photos/private" -> base/photos/private).
/// - Entries without '/' are treated as substring patterns that match
/// anywhere in the full path string (still scoped under base).
fn new(base: &Path, raw_excluded: &[String]) -> Self {
let mut excluded_dirs = Vec::new();
let mut excluded_patterns = Vec::new();
for dir in raw_excluded {
if dir.starts_with('/') {
// Absolute under base
let rel = &dir[1..];
if !rel.is_empty() {
excluded_dirs.push(base.join(rel));
}
} else {
// Pattern anywhere under base
excluded_patterns.push(dir.clone());
}
}
debug!(
"PathExcluder created. dirs={:?}, patterns={:?}",
excluded_dirs, excluded_patterns
);
Self {
excluded_dirs,
excluded_patterns,
}
}
/// Returns true if `path` should be excluded.
fn is_excluded(&self, path: &Path) -> bool {
// Directory-based exclusions
for excluded in &self.excluded_dirs {
if path.starts_with(excluded) {
debug!("PathExcluder: excluded by dir: {:?} (rule: {:?})", path, excluded);
return true;
}
}
// Pattern-based exclusions: match whole path components (dir or file name),
// not substrings.
if !self.excluded_patterns.is_empty() {
for component in path.components() {
if let Some(comp_str) = component.as_os_str().to_str() {
if self
.excluded_patterns
.iter()
.any(|pat| pat == comp_str)
{
debug!(
"PathExcluder: excluded by component pattern: {:?} (component: {:?}, patterns: {:?})",
path, comp_str, self.excluded_patterns
);
return true;
}
}
}
}
false
}
}
#[derive(Copy, Clone, Deserialize, PartialEq, Debug)] #[derive(Copy, Clone, Deserialize, PartialEq, Debug)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum MemoriesSpan { pub enum MemoriesSpan {
@@ -242,27 +319,8 @@ pub async fn list_memories(
let base = Path::new(&app_state.base_path); let base = Path::new(&app_state.base_path);
// Build a list of excluded directories and patterns, all scoped under base: // Build the path excluder from base and env-configured exclusions
// - entries starting with '/' are treated as absolute *under base* (e.g. "/foo/bar" -> base/foo/bar) let path_excluder = PathExcluder::new(base, &app_state.excluded_dirs);
// - entries without '/' are treated as substring patterns matched anywhere in the path
let mut excluded_dirs: Vec<PathBuf> = Vec::new();
let mut excluded_patterns: Vec<String> = Vec::new();
for dir in &app_state.excluded_dirs {
if dir.starts_with('/') {
// "Absolute under base": strip leading '/' and join with base
let rel = &dir[1..];
if !rel.is_empty() {
excluded_dirs.push(base.join(rel));
}
} else {
// Pure pattern (no '/'): match as substring anywhere in the path
excluded_patterns.push(dir.clone());
}
}
debug!("Excluded directories (under base): {:?}", excluded_dirs);
debug!("Excluded path patterns: {:?}", excluded_patterns);
let entries: Vec<_> = WalkDir::new(base) let entries: Vec<_> = WalkDir::new(base)
.into_iter() .into_iter()
@@ -270,28 +328,10 @@ pub async fn list_memories(
.filter(|e| { .filter(|e| {
let path = e.path(); let path = e.path();
// Skip excluded directories (all are under base) // Skip paths that should be excluded
if !excluded_dirs.is_empty() { if path_excluder.is_excluded(path) {
for excluded in &excluded_dirs {
if path.starts_with(excluded) {
debug!("Skipping excluded dir path: {:?}", path);
return false; return false;
} }
}
}
// Skip paths that match any of the relative patterns (substring match under base)
if !excluded_patterns.is_empty() {
if let Some(path_str) = path.to_str() {
if excluded_patterns.iter().any(|pat| path_str.contains(pat)) {
debug!(
"Skipping excluded pattern match: {:?} (patterns: {:?})",
path, excluded_patterns
);
return false;
}
}
}
true true
}) })
@@ -420,11 +460,9 @@ fn same_month_any_year(a: NaiveDate, b: NaiveDate) -> bool {
mod tests { mod tests {
use super::*; use super::*;
use chrono::Timelike; use chrono::Timelike;
use std::fs::File; use std::fs::{self, File};
use tempfile::tempdir; use tempfile::tempdir;
// Add new tests for our date extraction functionality
#[test] #[test]
fn test_extract_date_from_filename_screenshot_format() { fn test_extract_date_from_filename_screenshot_format() {
let filename = "Screenshot_2014-06-01-20-44-50.png"; let filename = "Screenshot_2014-06-01-20-44-50.png";
@@ -568,4 +606,138 @@ mod tests {
let dt_modified = DateTime::<Utc>::from_timestamp(modified.unwrap(), 0).unwrap(); let dt_modified = DateTime::<Utc>::from_timestamp(modified.unwrap(), 0).unwrap();
assert_eq!(dt_modified.year(), today.year()); assert_eq!(dt_modified.year(), today.year());
} }
}
#[test]
fn test_path_excluder_absolute_under_base() {
let tmp = tempdir().unwrap();
let base = tmp.path();
// Simulate structure:
// base/photos/private/secret.jpg
// base/photos/public/ok.jpg
// base/screenshots/img.png
let photos_private = base.join("photos/private");
let photos_public = base.join("photos/public");
let screenshots = base.join("screenshots");
fs::create_dir_all(&photos_private).unwrap();
fs::create_dir_all(&photos_public).unwrap();
fs::create_dir_all(&screenshots).unwrap();
let secret = photos_private.join("secret.jpg");
let ok = photos_public.join("ok.jpg");
let shot = screenshots.join("img.png");
File::create(&secret).unwrap();
File::create(&ok).unwrap();
File::create(&shot).unwrap();
// Exclude "/photos/private" and "/screenshots" under base
let excluded = vec![
String::from("/photos/private"),
String::from("/screenshots"),
];
let excluder = PathExcluder::new(base, &excluded);
assert!(excluder.is_excluded(&secret), "secret should be excluded");
assert!(excluder.is_excluded(&shot), "screenshots should be excluded");
assert!(
!excluder.is_excluded(&ok),
"public photo should NOT be excluded"
);
}
#[test]
fn test_path_excluder_pattern_anywhere_under_base() {
let tmp = tempdir().unwrap();
let base = tmp.path();
// Simulate:
// base/a/tmp_file.jpg
// base/b/normal.jpg
// base/c/sometmpdir/file.jpg
let a = base.join("a");
let b = base.join("b");
let c = base.join("c/tmp");
fs::create_dir_all(&a).unwrap();
fs::create_dir_all(&b).unwrap();
fs::create_dir_all(&c).unwrap();
let tmp_file = a.join("tmp_file.jpg");
let normal = b.join("normal.jpg");
let tmp_dir_file = c.join("file.jpg");
File::create(&tmp_file).unwrap();
File::create(&normal).unwrap();
File::create(&tmp_dir_file).unwrap();
// Exclude any path containing "tmp"
let excluded = vec![String::from("tmp")];
let excluder = PathExcluder::new(base, &excluded);
assert!(
!excluder.is_excluded(&tmp_file),
"file with 'tmp' in name should NOT be excluded"
);
assert!(
excluder.is_excluded(&tmp_dir_file),
"file in directory with 'tmp' in path should be excluded"
);
assert!(
!excluder.is_excluded(&normal),
"file without 'tmp' in its path should NOT be excluded"
);
}
#[test]
fn test_path_excluder_mixed_absolute_and_pattern() {
let tmp = tempdir().unwrap();
let base = tmp.path();
// Simulate:
// base/photos/private/secret_tmp.jpg -> excluded by absolute dir rule
// base/photos/private/secret.jpg -> excluded by absolute dir rule
// base/photos/tmp/public.jpg -> excluded by pattern "tmp" (dir name)
// base/photos/public/tmp_public.jpg -> NOT excluded (file name contains "tmp" but not equal)
// base/other/keep.jpg -> NOT excluded
let photos_private = base.join("photos/private");
let photos_tmp = base.join("photos/tmp");
let photos_public = base.join("photos/public");
let other = base.join("other");
fs::create_dir_all(&photos_private).unwrap();
fs::create_dir_all(&photos_tmp).unwrap();
fs::create_dir_all(&photos_public).unwrap();
fs::create_dir_all(&other).unwrap();
let secret_tmp = photos_private.join("secret_tmp.jpg");
let secret = photos_private.join("secret.jpg");
let tmp_dir_file = photos_tmp.join("public.jpg");
let tmp_in_name = photos_public.join("tmp_public.jpg");
let keep = other.join("keep.jpg");
File::create(&secret_tmp).unwrap();
File::create(&secret).unwrap();
File::create(&tmp_dir_file).unwrap();
File::create(&tmp_in_name).unwrap();
File::create(&keep).unwrap();
// Mixed: exclude "/photos/private" (dir) and any component equal to "tmp"
let excluded = vec![String::from("/photos/private"), String::from("tmp")];
let excluder = PathExcluder::new(base, &excluded);
// Entire private tree is excluded by dir rule
assert!(excluder.is_excluded(&secret_tmp));
assert!(excluder.is_excluded(&secret));
// Dir 'tmp' under photos excluded by pattern
assert!(excluder.is_excluded(&tmp_dir_file));
// File name containing 'tmp' but not equal should NOT be excluded
assert!(!excluder.is_excluded(&tmp_in_name));
// keep.jpg doesn't match any rule
assert!(!excluder.is_excluded(&keep));
}
}