69268d03fe
LlamaCppClient gains text_to_speech (OpenAI /audio/speech), list_voices and
create_voice (voice library at the swap-root /upstream/<model>/voices
passthrough), plus a tts_model slot configured via LLAMA_SWAP_TTS_MODEL
(default "chatterbox").
New Claims-gated routes:
- POST /tts/speech -> { audio_base64, format } for data: URI playback
- GET /tts/voices -> voice library passthrough
- POST /tts/voices/upload -> clone a voice from an uploaded clip (multipart)
- POST /tts/voices/from-library -> clone from a library file (ffmpeg-extracts
audio from video; audio forwarded as-is)
Security: voice_name sanitized to [A-Za-z0-9_-] (it becomes an upstream
filename), 25 MB upload cap, library refs restricted to real audio/video,
path confined via is_valid_full_path. Adds is_audio_file + unit tests for the
sanitizer, mime guesser, and swap-root derivation.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
194 lines
7.4 KiB
Rust
194 lines
7.4 KiB
Rust
use std::path::Path;
|
|
use walkdir::DirEntry;
|
|
|
|
/// Supported image file extensions
|
|
pub const IMAGE_EXTENSIONS: &[&str] = &[
|
|
"jpg", "jpeg", "png", "webp", "tiff", "tif", "heif", "heic", "avif", "nef", "arw",
|
|
];
|
|
|
|
/// Extensions the `image` crate cannot decode — we fall back to ffmpeg to
|
|
/// extract an embedded preview or decode the frame.
|
|
pub const FFMPEG_THUMBNAIL_EXTENSIONS: &[&str] = &["heif", "heic", "nef", "arw"];
|
|
|
|
/// Returns true if thumbnail generation should go through ffmpeg instead of
|
|
/// the `image` crate (RAW formats, HEIF/HEIC).
|
|
pub fn needs_ffmpeg_thumbnail(path: &Path) -> bool {
|
|
match path.extension().and_then(|e| e.to_str()) {
|
|
Some(ext) => FFMPEG_THUMBNAIL_EXTENSIONS.contains(&ext.to_lowercase().as_str()),
|
|
None => false,
|
|
}
|
|
}
|
|
|
|
/// Supported video file extensions
|
|
pub const VIDEO_EXTENSIONS: &[&str] = &["mp4", "mov", "avi", "mkv"];
|
|
|
|
/// Audio file extensions accepted as voice-clone references (TTS). Mirrors
|
|
/// the formats Chatterbox can decode (wav/mp3/flac/m4a/aac/ogg).
|
|
pub const AUDIO_EXTENSIONS: &[&str] = &["wav", "mp3", "flac", "m4a", "aac", "ogg", "oga", "opus"];
|
|
|
|
/// Filenames that are filesystem metadata, not real media — exact
|
|
/// basename match. Extend if a new platform sidecar appears (Windows
|
|
/// Thumbs.db / desktop.ini live here too if those libraries land).
|
|
const METADATA_FILENAMES: &[&str] = &[".DS_Store"];
|
|
|
|
/// True if the basename is a filesystem metadata sidecar that should be
|
|
/// invisible to every media predicate.
|
|
///
|
|
/// macOS writes `._<name>` AppleDouble companions when copying to
|
|
/// non-HFS volumes — each holds the extended attributes of `<name>`,
|
|
/// NOT a copy of the bytes. Same extension as the real file, so a
|
|
/// pure-extension match treats `._photo.jpg` as a JPEG, ships it to
|
|
/// the decoder, and accumulates failed rows: face_detections
|
|
/// `status='failed'`, clip_embedding `status='failed'`, plus a
|
|
/// pointless `image_exif` row whose `content_hash` will be the hash
|
|
/// of the metadata blob. The downstream noise (failed-row counts that
|
|
/// never go to zero, 422 bursts to Apollo, evictor timer reset by
|
|
/// those 422s) is the visible damage. `.DS_Store` is the per-directory
|
|
/// version (Finder view state) — no extension, but cheap to guard
|
|
/// here too in case some future predicate matches by content type.
|
|
pub fn is_filesystem_metadata(path: &Path) -> bool {
|
|
let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
|
|
return false;
|
|
};
|
|
name.starts_with("._") || METADATA_FILENAMES.contains(&name)
|
|
}
|
|
|
|
/// Check if a path has an image extension
|
|
pub fn is_image_file(path: &Path) -> bool {
|
|
if is_filesystem_metadata(path) {
|
|
return false;
|
|
}
|
|
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
|
|
let ext_lower = ext.to_lowercase();
|
|
IMAGE_EXTENSIONS.contains(&ext_lower.as_str())
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
|
|
/// Check if a path has a video extension
|
|
pub fn is_video_file(path: &Path) -> bool {
|
|
if is_filesystem_metadata(path) {
|
|
return false;
|
|
}
|
|
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
|
|
let ext_lower = ext.to_lowercase();
|
|
VIDEO_EXTENSIONS.contains(&ext_lower.as_str())
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
|
|
/// Check if a path has an audio extension (voice-clone references)
|
|
pub fn is_audio_file(path: &Path) -> bool {
|
|
if is_filesystem_metadata(path) {
|
|
return false;
|
|
}
|
|
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
|
|
let ext_lower = ext.to_lowercase();
|
|
AUDIO_EXTENSIONS.contains(&ext_lower.as_str())
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
|
|
/// Check if a path has a supported media extension (image or video)
|
|
pub fn is_media_file(path: &Path) -> bool {
|
|
is_image_file(path) || is_video_file(path)
|
|
}
|
|
|
|
/// Check if a DirEntry is an image file (for walkdir usage)
|
|
#[allow(dead_code)]
|
|
pub fn direntry_is_image(entry: &DirEntry) -> bool {
|
|
is_image_file(entry.path())
|
|
}
|
|
|
|
/// Check if a DirEntry is a video file (for walkdir usage)
|
|
#[allow(dead_code)]
|
|
pub fn direntry_is_video(entry: &DirEntry) -> bool {
|
|
is_video_file(entry.path())
|
|
}
|
|
|
|
/// Check if a DirEntry is a media file (for walkdir usage)
|
|
#[allow(dead_code)]
|
|
pub fn direntry_is_media(entry: &DirEntry) -> bool {
|
|
is_media_file(entry.path())
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use std::path::Path;
|
|
|
|
#[test]
|
|
fn test_is_image_file() {
|
|
assert!(is_image_file(Path::new("photo.jpg")));
|
|
assert!(is_image_file(Path::new("photo.JPG")));
|
|
assert!(is_image_file(Path::new("photo.png")));
|
|
assert!(is_image_file(Path::new("photo.nef")));
|
|
assert!(!is_image_file(Path::new("video.mp4")));
|
|
assert!(!is_image_file(Path::new("document.txt")));
|
|
}
|
|
|
|
#[test]
|
|
fn test_is_video_file() {
|
|
assert!(is_video_file(Path::new("video.mp4")));
|
|
assert!(is_video_file(Path::new("video.MP4")));
|
|
assert!(is_video_file(Path::new("video.mov")));
|
|
assert!(is_video_file(Path::new("video.avi")));
|
|
assert!(!is_video_file(Path::new("photo.jpg")));
|
|
assert!(!is_video_file(Path::new("document.txt")));
|
|
}
|
|
|
|
#[test]
|
|
fn test_is_media_file() {
|
|
assert!(is_media_file(Path::new("photo.jpg")));
|
|
assert!(is_media_file(Path::new("video.mp4")));
|
|
assert!(is_media_file(Path::new("photo.PNG")));
|
|
assert!(!is_media_file(Path::new("document.txt")));
|
|
assert!(!is_media_file(Path::new("no_extension")));
|
|
}
|
|
|
|
#[test]
|
|
fn test_apple_double_excluded_from_media() {
|
|
// The bug-of-record: ImageApi was shipping macOS AppleDouble
|
|
// sidecars to Apollo's CLIP/face decoders, accumulating failed
|
|
// rows and pinning Apollo's eviction timer with the 422 burst.
|
|
// Predicate-level guard means every downstream walker
|
|
// (face_watch, backfill, clip_watch, watcher) inherits the fix
|
|
// without touching their filters.
|
|
assert!(!is_image_file(Path::new("._photo.jpg")));
|
|
assert!(!is_image_file(Path::new("dir/._photo.JPG")));
|
|
assert!(!is_image_file(Path::new("a/b/._DSC_2182-S.jpg")));
|
|
assert!(!is_video_file(Path::new("._video.mp4")));
|
|
assert!(!is_media_file(Path::new("._photo.png")));
|
|
// A real file that merely starts with "_" (no leading dot) is
|
|
// not AppleDouble — must NOT be filtered.
|
|
assert!(is_image_file(Path::new("_photo.jpg")));
|
|
}
|
|
|
|
#[test]
|
|
fn test_ds_store_excluded() {
|
|
// Finder per-directory metadata. No image extension so
|
|
// is_image_file would already say false; the guard makes the
|
|
// predicate's *reason* explicit and covers a hypothetical
|
|
// future caller matching by basename.
|
|
assert!(!is_image_file(Path::new(".DS_Store")));
|
|
assert!(!is_video_file(Path::new(".DS_Store")));
|
|
assert!(!is_media_file(Path::new("some/dir/.DS_Store")));
|
|
assert!(is_filesystem_metadata(Path::new(".DS_Store")));
|
|
assert!(is_filesystem_metadata(Path::new("dir/.DS_Store")));
|
|
}
|
|
|
|
#[test]
|
|
fn test_dotfiles_other_than_apple_double_are_unaffected() {
|
|
// We deliberately scope to `._*` + the exact .DS_Store name —
|
|
// not all dotfiles — because a user could plausibly name a
|
|
// cover image `.cover.jpg` and we shouldn't silently drop it.
|
|
// If that turns out to be wrong, broaden here; for now,
|
|
// narrow + explicit > broad + surprising.
|
|
assert!(is_image_file(Path::new(".cover.jpg")));
|
|
assert!(!is_filesystem_metadata(Path::new(".cover.jpg")));
|
|
}
|
|
}
|