Merge pull request 'file_types: filter macOS AppleDouble + .DS_Store from media predicates' (#99) from feature/filter-fs-metadata into master
Reviewed-on: #99
This commit was merged in pull request #99.
This commit is contained in:
@@ -22,8 +22,38 @@ pub fn needs_ffmpeg_thumbnail(path: &Path) -> bool {
|
|||||||
/// Supported video file extensions
|
/// Supported video file extensions
|
||||||
pub const VIDEO_EXTENSIONS: &[&str] = &["mp4", "mov", "avi", "mkv"];
|
pub const VIDEO_EXTENSIONS: &[&str] = &["mp4", "mov", "avi", "mkv"];
|
||||||
|
|
||||||
|
/// Filenames that are filesystem metadata, not real media — exact
|
||||||
|
/// basename match. Extend if a new platform sidecar appears (Windows
|
||||||
|
/// Thumbs.db / desktop.ini live here too if those libraries land).
|
||||||
|
const METADATA_FILENAMES: &[&str] = &[".DS_Store"];
|
||||||
|
|
||||||
|
/// True if the basename is a filesystem metadata sidecar that should be
|
||||||
|
/// invisible to every media predicate.
|
||||||
|
///
|
||||||
|
/// macOS writes `._<name>` AppleDouble companions when copying to
|
||||||
|
/// non-HFS volumes — each holds the extended attributes of `<name>`,
|
||||||
|
/// NOT a copy of the bytes. Same extension as the real file, so a
|
||||||
|
/// pure-extension match treats `._photo.jpg` as a JPEG, ships it to
|
||||||
|
/// the decoder, and accumulates failed rows: face_detections
|
||||||
|
/// `status='failed'`, clip_embedding `status='failed'`, plus a
|
||||||
|
/// pointless `image_exif` row whose `content_hash` will be the hash
|
||||||
|
/// of the metadata blob. The downstream noise (failed-row counts that
|
||||||
|
/// never go to zero, 422 bursts to Apollo, evictor timer reset by
|
||||||
|
/// those 422s) is the visible damage. `.DS_Store` is the per-directory
|
||||||
|
/// version (Finder view state) — no extension, but cheap to guard
|
||||||
|
/// here too in case some future predicate matches by content type.
|
||||||
|
pub fn is_filesystem_metadata(path: &Path) -> bool {
|
||||||
|
let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
name.starts_with("._") || METADATA_FILENAMES.contains(&name)
|
||||||
|
}
|
||||||
|
|
||||||
/// Check if a path has an image extension
|
/// Check if a path has an image extension
|
||||||
pub fn is_image_file(path: &Path) -> bool {
|
pub fn is_image_file(path: &Path) -> bool {
|
||||||
|
if is_filesystem_metadata(path) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
|
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
|
||||||
let ext_lower = ext.to_lowercase();
|
let ext_lower = ext.to_lowercase();
|
||||||
IMAGE_EXTENSIONS.contains(&ext_lower.as_str())
|
IMAGE_EXTENSIONS.contains(&ext_lower.as_str())
|
||||||
@@ -34,6 +64,9 @@ pub fn is_image_file(path: &Path) -> bool {
|
|||||||
|
|
||||||
/// Check if a path has a video extension
|
/// Check if a path has a video extension
|
||||||
pub fn is_video_file(path: &Path) -> bool {
|
pub fn is_video_file(path: &Path) -> bool {
|
||||||
|
if is_filesystem_metadata(path) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
|
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
|
||||||
let ext_lower = ext.to_lowercase();
|
let ext_lower = ext.to_lowercase();
|
||||||
VIDEO_EXTENSIONS.contains(&ext_lower.as_str())
|
VIDEO_EXTENSIONS.contains(&ext_lower.as_str())
|
||||||
@@ -98,4 +131,46 @@ mod tests {
|
|||||||
assert!(!is_media_file(Path::new("document.txt")));
|
assert!(!is_media_file(Path::new("document.txt")));
|
||||||
assert!(!is_media_file(Path::new("no_extension")));
|
assert!(!is_media_file(Path::new("no_extension")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_apple_double_excluded_from_media() {
|
||||||
|
// The bug-of-record: ImageApi was shipping macOS AppleDouble
|
||||||
|
// sidecars to Apollo's CLIP/face decoders, accumulating failed
|
||||||
|
// rows and pinning Apollo's eviction timer with the 422 burst.
|
||||||
|
// Predicate-level guard means every downstream walker
|
||||||
|
// (face_watch, backfill, clip_watch, watcher) inherits the fix
|
||||||
|
// without touching their filters.
|
||||||
|
assert!(!is_image_file(Path::new("._photo.jpg")));
|
||||||
|
assert!(!is_image_file(Path::new("dir/._photo.JPG")));
|
||||||
|
assert!(!is_image_file(Path::new("a/b/._DSC_2182-S.jpg")));
|
||||||
|
assert!(!is_video_file(Path::new("._video.mp4")));
|
||||||
|
assert!(!is_media_file(Path::new("._photo.png")));
|
||||||
|
// A real file that merely starts with "_" (no leading dot) is
|
||||||
|
// not AppleDouble — must NOT be filtered.
|
||||||
|
assert!(is_image_file(Path::new("_photo.jpg")));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_ds_store_excluded() {
|
||||||
|
// Finder per-directory metadata. No image extension so
|
||||||
|
// is_image_file would already say false; the guard makes the
|
||||||
|
// predicate's *reason* explicit and covers a hypothetical
|
||||||
|
// future caller matching by basename.
|
||||||
|
assert!(!is_image_file(Path::new(".DS_Store")));
|
||||||
|
assert!(!is_video_file(Path::new(".DS_Store")));
|
||||||
|
assert!(!is_media_file(Path::new("some/dir/.DS_Store")));
|
||||||
|
assert!(is_filesystem_metadata(Path::new(".DS_Store")));
|
||||||
|
assert!(is_filesystem_metadata(Path::new("dir/.DS_Store")));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_dotfiles_other_than_apple_double_are_unaffected() {
|
||||||
|
// We deliberately scope to `._*` + the exact .DS_Store name —
|
||||||
|
// not all dotfiles — because a user could plausibly name a
|
||||||
|
// cover image `.cover.jpg` and we shouldn't silently drop it.
|
||||||
|
// If that turns out to be wrong, broaden here; for now,
|
||||||
|
// narrow + explicit > broad + surprising.
|
||||||
|
assert!(is_image_file(Path::new(".cover.jpg")));
|
||||||
|
assert!(!is_filesystem_metadata(Path::new(".cover.jpg")));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user