use std::path::Path; use walkdir::DirEntry; /// Supported image file extensions pub const IMAGE_EXTENSIONS: &[&str] = &[ "jpg", "jpeg", "png", "webp", "tiff", "tif", "heif", "heic", "avif", "nef", "arw", ]; /// Extensions the `image` crate cannot decode — we fall back to ffmpeg to /// extract an embedded preview or decode the frame. pub const FFMPEG_THUMBNAIL_EXTENSIONS: &[&str] = &["heif", "heic", "nef", "arw"]; /// Returns true if thumbnail generation should go through ffmpeg instead of /// the `image` crate (RAW formats, HEIF/HEIC). pub fn needs_ffmpeg_thumbnail(path: &Path) -> bool { match path.extension().and_then(|e| e.to_str()) { Some(ext) => FFMPEG_THUMBNAIL_EXTENSIONS.contains(&ext.to_lowercase().as_str()), None => false, } } /// Supported video file extensions pub const VIDEO_EXTENSIONS: &[&str] = &["mp4", "mov", "avi", "mkv"]; /// Audio file extensions accepted as voice-clone references (TTS). Mirrors /// the formats Chatterbox can decode (wav/mp3/flac/m4a/aac/ogg). pub const AUDIO_EXTENSIONS: &[&str] = &["wav", "mp3", "flac", "m4a", "aac", "ogg", "oga", "opus"]; /// Filenames that are filesystem metadata, not real media — exact /// basename match. Extend if a new platform sidecar appears (Windows /// Thumbs.db / desktop.ini live here too if those libraries land). const METADATA_FILENAMES: &[&str] = &[".DS_Store"]; /// True if the basename is a filesystem metadata sidecar that should be /// invisible to every media predicate. /// /// macOS writes `._` AppleDouble companions when copying to /// non-HFS volumes — each holds the extended attributes of ``, /// NOT a copy of the bytes. Same extension as the real file, so a /// pure-extension match treats `._photo.jpg` as a JPEG, ships it to /// the decoder, and accumulates failed rows: face_detections /// `status='failed'`, clip_embedding `status='failed'`, plus a /// pointless `image_exif` row whose `content_hash` will be the hash /// of the metadata blob. The downstream noise (failed-row counts that /// never go to zero, 422 bursts to Apollo, evictor timer reset by /// those 422s) is the visible damage. `.DS_Store` is the per-directory /// version (Finder view state) — no extension, but cheap to guard /// here too in case some future predicate matches by content type. pub fn is_filesystem_metadata(path: &Path) -> bool { let Some(name) = path.file_name().and_then(|n| n.to_str()) else { return false; }; name.starts_with("._") || METADATA_FILENAMES.contains(&name) } /// Check if a path has an image extension pub fn is_image_file(path: &Path) -> bool { if is_filesystem_metadata(path) { return false; } if let Some(ext) = path.extension().and_then(|e| e.to_str()) { let ext_lower = ext.to_lowercase(); IMAGE_EXTENSIONS.contains(&ext_lower.as_str()) } else { false } } /// Check if a path has a video extension pub fn is_video_file(path: &Path) -> bool { if is_filesystem_metadata(path) { return false; } if let Some(ext) = path.extension().and_then(|e| e.to_str()) { let ext_lower = ext.to_lowercase(); VIDEO_EXTENSIONS.contains(&ext_lower.as_str()) } else { false } } /// Check if a path has an audio extension (voice-clone references) pub fn is_audio_file(path: &Path) -> bool { if is_filesystem_metadata(path) { return false; } if let Some(ext) = path.extension().and_then(|e| e.to_str()) { let ext_lower = ext.to_lowercase(); AUDIO_EXTENSIONS.contains(&ext_lower.as_str()) } else { false } } /// Check if a path has a supported media extension (image or video) pub fn is_media_file(path: &Path) -> bool { is_image_file(path) || is_video_file(path) } /// Check if a DirEntry is an image file (for walkdir usage) #[allow(dead_code)] pub fn direntry_is_image(entry: &DirEntry) -> bool { is_image_file(entry.path()) } /// Check if a DirEntry is a video file (for walkdir usage) #[allow(dead_code)] pub fn direntry_is_video(entry: &DirEntry) -> bool { is_video_file(entry.path()) } /// Check if a DirEntry is a media file (for walkdir usage) #[allow(dead_code)] pub fn direntry_is_media(entry: &DirEntry) -> bool { is_media_file(entry.path()) } #[cfg(test)] mod tests { use super::*; use std::path::Path; #[test] fn test_is_image_file() { assert!(is_image_file(Path::new("photo.jpg"))); assert!(is_image_file(Path::new("photo.JPG"))); assert!(is_image_file(Path::new("photo.png"))); assert!(is_image_file(Path::new("photo.nef"))); assert!(!is_image_file(Path::new("video.mp4"))); assert!(!is_image_file(Path::new("document.txt"))); } #[test] fn test_is_video_file() { assert!(is_video_file(Path::new("video.mp4"))); assert!(is_video_file(Path::new("video.MP4"))); assert!(is_video_file(Path::new("video.mov"))); assert!(is_video_file(Path::new("video.avi"))); assert!(!is_video_file(Path::new("photo.jpg"))); assert!(!is_video_file(Path::new("document.txt"))); } #[test] fn test_is_media_file() { assert!(is_media_file(Path::new("photo.jpg"))); assert!(is_media_file(Path::new("video.mp4"))); assert!(is_media_file(Path::new("photo.PNG"))); assert!(!is_media_file(Path::new("document.txt"))); assert!(!is_media_file(Path::new("no_extension"))); } #[test] fn test_apple_double_excluded_from_media() { // The bug-of-record: ImageApi was shipping macOS AppleDouble // sidecars to Apollo's CLIP/face decoders, accumulating failed // rows and pinning Apollo's eviction timer with the 422 burst. // Predicate-level guard means every downstream walker // (face_watch, backfill, clip_watch, watcher) inherits the fix // without touching their filters. assert!(!is_image_file(Path::new("._photo.jpg"))); assert!(!is_image_file(Path::new("dir/._photo.JPG"))); assert!(!is_image_file(Path::new("a/b/._DSC_2182-S.jpg"))); assert!(!is_video_file(Path::new("._video.mp4"))); assert!(!is_media_file(Path::new("._photo.png"))); // A real file that merely starts with "_" (no leading dot) is // not AppleDouble — must NOT be filtered. assert!(is_image_file(Path::new("_photo.jpg"))); } #[test] fn test_ds_store_excluded() { // Finder per-directory metadata. No image extension so // is_image_file would already say false; the guard makes the // predicate's *reason* explicit and covers a hypothetical // future caller matching by basename. assert!(!is_image_file(Path::new(".DS_Store"))); assert!(!is_video_file(Path::new(".DS_Store"))); assert!(!is_media_file(Path::new("some/dir/.DS_Store"))); assert!(is_filesystem_metadata(Path::new(".DS_Store"))); assert!(is_filesystem_metadata(Path::new("dir/.DS_Store"))); } #[test] fn test_dotfiles_other_than_apple_double_are_unaffected() { // We deliberately scope to `._*` + the exact .DS_Store name — // not all dotfiles — because a user could plausibly name a // cover image `.cover.jpg` and we shouldn't silently drop it. // If that turns out to be wrong, broaden here; for now, // narrow + explicit > broad + surprising. assert!(is_image_file(Path::new(".cover.jpg"))); assert!(!is_filesystem_metadata(Path::new(".cover.jpg"))); } }