Files
ImageApi/src/file_types.rs
T
Cameron Cordes 69268d03fe Add TTS endpoints backed by Chatterbox via llama-swap
LlamaCppClient gains text_to_speech (OpenAI /audio/speech), list_voices and
create_voice (voice library at the swap-root /upstream/<model>/voices
passthrough), plus a tts_model slot configured via LLAMA_SWAP_TTS_MODEL
(default "chatterbox").

New Claims-gated routes:
- POST /tts/speech        -> { audio_base64, format } for data: URI playback
- GET  /tts/voices        -> voice library passthrough
- POST /tts/voices/upload -> clone a voice from an uploaded clip (multipart)
- POST /tts/voices/from-library -> clone from a library file (ffmpeg-extracts
  audio from video; audio forwarded as-is)

Security: voice_name sanitized to [A-Za-z0-9_-] (it becomes an upstream
filename), 25 MB upload cap, library refs restricted to real audio/video,
path confined via is_valid_full_path. Adds is_audio_file + unit tests for the
sanitizer, mime guesser, and swap-root derivation.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-02 22:04:42 -04:00

194 lines
7.4 KiB
Rust

use std::path::Path;
use walkdir::DirEntry;
/// Supported image file extensions
pub const IMAGE_EXTENSIONS: &[&str] = &[
"jpg", "jpeg", "png", "webp", "tiff", "tif", "heif", "heic", "avif", "nef", "arw",
];
/// Extensions the `image` crate cannot decode — we fall back to ffmpeg to
/// extract an embedded preview or decode the frame.
pub const FFMPEG_THUMBNAIL_EXTENSIONS: &[&str] = &["heif", "heic", "nef", "arw"];
/// Returns true if thumbnail generation should go through ffmpeg instead of
/// the `image` crate (RAW formats, HEIF/HEIC).
pub fn needs_ffmpeg_thumbnail(path: &Path) -> bool {
match path.extension().and_then(|e| e.to_str()) {
Some(ext) => FFMPEG_THUMBNAIL_EXTENSIONS.contains(&ext.to_lowercase().as_str()),
None => false,
}
}
/// Supported video file extensions
pub const VIDEO_EXTENSIONS: &[&str] = &["mp4", "mov", "avi", "mkv"];
/// Audio file extensions accepted as voice-clone references (TTS). Mirrors
/// the formats Chatterbox can decode (wav/mp3/flac/m4a/aac/ogg).
pub const AUDIO_EXTENSIONS: &[&str] = &["wav", "mp3", "flac", "m4a", "aac", "ogg", "oga", "opus"];
/// Filenames that are filesystem metadata, not real media — exact
/// basename match. Extend if a new platform sidecar appears (Windows
/// Thumbs.db / desktop.ini live here too if those libraries land).
const METADATA_FILENAMES: &[&str] = &[".DS_Store"];
/// True if the basename is a filesystem metadata sidecar that should be
/// invisible to every media predicate.
///
/// macOS writes `._<name>` AppleDouble companions when copying to
/// non-HFS volumes — each holds the extended attributes of `<name>`,
/// NOT a copy of the bytes. Same extension as the real file, so a
/// pure-extension match treats `._photo.jpg` as a JPEG, ships it to
/// the decoder, and accumulates failed rows: face_detections
/// `status='failed'`, clip_embedding `status='failed'`, plus a
/// pointless `image_exif` row whose `content_hash` will be the hash
/// of the metadata blob. The downstream noise (failed-row counts that
/// never go to zero, 422 bursts to Apollo, evictor timer reset by
/// those 422s) is the visible damage. `.DS_Store` is the per-directory
/// version (Finder view state) — no extension, but cheap to guard
/// here too in case some future predicate matches by content type.
pub fn is_filesystem_metadata(path: &Path) -> bool {
let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
return false;
};
name.starts_with("._") || METADATA_FILENAMES.contains(&name)
}
/// Check if a path has an image extension
pub fn is_image_file(path: &Path) -> bool {
if is_filesystem_metadata(path) {
return false;
}
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
let ext_lower = ext.to_lowercase();
IMAGE_EXTENSIONS.contains(&ext_lower.as_str())
} else {
false
}
}
/// Check if a path has a video extension
pub fn is_video_file(path: &Path) -> bool {
if is_filesystem_metadata(path) {
return false;
}
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
let ext_lower = ext.to_lowercase();
VIDEO_EXTENSIONS.contains(&ext_lower.as_str())
} else {
false
}
}
/// Check if a path has an audio extension (voice-clone references)
pub fn is_audio_file(path: &Path) -> bool {
if is_filesystem_metadata(path) {
return false;
}
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
let ext_lower = ext.to_lowercase();
AUDIO_EXTENSIONS.contains(&ext_lower.as_str())
} else {
false
}
}
/// Check if a path has a supported media extension (image or video)
pub fn is_media_file(path: &Path) -> bool {
is_image_file(path) || is_video_file(path)
}
/// Check if a DirEntry is an image file (for walkdir usage)
#[allow(dead_code)]
pub fn direntry_is_image(entry: &DirEntry) -> bool {
is_image_file(entry.path())
}
/// Check if a DirEntry is a video file (for walkdir usage)
#[allow(dead_code)]
pub fn direntry_is_video(entry: &DirEntry) -> bool {
is_video_file(entry.path())
}
/// Check if a DirEntry is a media file (for walkdir usage)
#[allow(dead_code)]
pub fn direntry_is_media(entry: &DirEntry) -> bool {
is_media_file(entry.path())
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
#[test]
fn test_is_image_file() {
assert!(is_image_file(Path::new("photo.jpg")));
assert!(is_image_file(Path::new("photo.JPG")));
assert!(is_image_file(Path::new("photo.png")));
assert!(is_image_file(Path::new("photo.nef")));
assert!(!is_image_file(Path::new("video.mp4")));
assert!(!is_image_file(Path::new("document.txt")));
}
#[test]
fn test_is_video_file() {
assert!(is_video_file(Path::new("video.mp4")));
assert!(is_video_file(Path::new("video.MP4")));
assert!(is_video_file(Path::new("video.mov")));
assert!(is_video_file(Path::new("video.avi")));
assert!(!is_video_file(Path::new("photo.jpg")));
assert!(!is_video_file(Path::new("document.txt")));
}
#[test]
fn test_is_media_file() {
assert!(is_media_file(Path::new("photo.jpg")));
assert!(is_media_file(Path::new("video.mp4")));
assert!(is_media_file(Path::new("photo.PNG")));
assert!(!is_media_file(Path::new("document.txt")));
assert!(!is_media_file(Path::new("no_extension")));
}
#[test]
fn test_apple_double_excluded_from_media() {
// The bug-of-record: ImageApi was shipping macOS AppleDouble
// sidecars to Apollo's CLIP/face decoders, accumulating failed
// rows and pinning Apollo's eviction timer with the 422 burst.
// Predicate-level guard means every downstream walker
// (face_watch, backfill, clip_watch, watcher) inherits the fix
// without touching their filters.
assert!(!is_image_file(Path::new("._photo.jpg")));
assert!(!is_image_file(Path::new("dir/._photo.JPG")));
assert!(!is_image_file(Path::new("a/b/._DSC_2182-S.jpg")));
assert!(!is_video_file(Path::new("._video.mp4")));
assert!(!is_media_file(Path::new("._photo.png")));
// A real file that merely starts with "_" (no leading dot) is
// not AppleDouble — must NOT be filtered.
assert!(is_image_file(Path::new("_photo.jpg")));
}
#[test]
fn test_ds_store_excluded() {
// Finder per-directory metadata. No image extension so
// is_image_file would already say false; the guard makes the
// predicate's *reason* explicit and covers a hypothetical
// future caller matching by basename.
assert!(!is_image_file(Path::new(".DS_Store")));
assert!(!is_video_file(Path::new(".DS_Store")));
assert!(!is_media_file(Path::new("some/dir/.DS_Store")));
assert!(is_filesystem_metadata(Path::new(".DS_Store")));
assert!(is_filesystem_metadata(Path::new("dir/.DS_Store")));
}
#[test]
fn test_dotfiles_other_than_apple_double_are_unaffected() {
// We deliberately scope to `._*` + the exact .DS_Store name —
// not all dotfiles — because a user could plausibly name a
// cover image `.cover.jpg` and we shouldn't silently drop it.
// If that turns out to be wrong, broaden here; for now,
// narrow + explicit > broad + surprising.
assert!(is_image_file(Path::new(".cover.jpg")));
assert!(!is_filesystem_metadata(Path::new(".cover.jpg")));
}
}