diff --git a/CLAUDE.md b/CLAUDE.md index 70d4452..8a8dea4 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -111,6 +111,15 @@ All database access goes through trait-based DAOs (e.g., `ExifDao`, `SqliteExifD 2. Creates 200x200 thumbnails in THUMBNAILS directory (mirrors source structure) 3. Videos: extracts frame at 3-second mark via ffmpeg 4. Images: uses `image` crate for JPEG/PNG processing +5. RAW formats (NEF/CR2/ARW/DNG/etc.): the `image` crate can't decode RAW + pixel data, so the pipeline pulls an embedded JPEG preview instead. Fast + path is `exif::read_jpeg_at_ifd` against IFD0 (PRIMARY) and IFD1 + (THUMBNAIL) — covers most older bodies and DNGs. Slow-path fallback shells + out to **`exiftool`** for `PreviewImage` / `JpgFromRaw` / `OtherImage`, + which reaches MakerNote / SubIFD-hosted previews kamadak-exif can't see + (e.g. Nikon's `PreviewIFD`, where modern Nikon bodies store the full-res + review JPEG). All candidates are pooled and the largest valid JPEG wins. + See `src/exif.rs::extract_embedded_jpeg_preview`. **File Watching:** Runs in background thread with two-tier strategy: @@ -364,6 +373,8 @@ Configurable env: ## Dependencies of Note +### Rust crates + - **actix-web**: HTTP framework - **diesel**: ORM for SQLite - **jsonwebtoken**: JWT implementation @@ -374,3 +385,18 @@ Configurable env: - **opentelemetry**: Distributed tracing - **bcrypt**: Password hashing - **infer**: Magic number file type detection + +### External binaries (must be on `PATH`) + +- **`ffmpeg`** — video thumbnail extraction (`StreamActor`, HLS pipeline) and + the HEIF/HEIC/NEF/ARW thumbnail fallback in `generate_image_thumbnail_ffmpeg`. + Required for any deploy that holds video or HEIF files. +- **`exiftool`** — optional but strongly recommended for RAW-heavy libraries. + The thumbnail pipeline shells out to it as the slow-path fallback for + embedded preview extraction (Nikon MakerNote `PreviewIFD`, Canon SubIFDs, + etc. — anything kamadak-exif's IFD0/IFD1 readers can't reach). Without + exiftool installed, RAWs whose preview lives outside IFD0/IFD1 will fall + through to ffmpeg, which often produces black thumbnails. Install via + package manager: `apt install libimage-exiftool-perl`, + `brew install exiftool`, `winget install OliverBetz.ExifTool`, or + `choco install exiftool`. diff --git a/README.md b/README.md index 31978d9..fceba81 100644 --- a/README.md +++ b/README.md @@ -28,14 +28,31 @@ Builds used in development: the `gyan.dev` full build on Windows, and distro `ff packages on Linux work fine. If HEIC thumbnails silently fail, check `ffmpeg -formats | grep heif` to confirm HEIF support. -### RAW photo thumbnails (no extra dependency) +### RAW photo thumbnails RAW formats (ARW, NEF, CR2, CR3, DNG, RAF, ORF, RW2, PEF, SRW, TIFF) are thumbnailed -by reading the embedded JPEG preview from the TIFF IFD1 using `kamadak-exif`. No -external RAW decoder (libraw / dcraw) is required. Files without an embedded preview -fall back to ffmpeg (works for most NEF files), and anything that still can't be -decoded is marked with a `.unsupported` sentinel in the thumbnail directory -so we don't retry it every scan. Delete those sentinels to force retries after a -tooling upgrade. +by reading an embedded JPEG preview out of the TIFF container — no external RAW +decoder (libraw / dcraw) is involved. The pipeline tries two layers in order and +keeps the largest valid JPEG: + +1. **Fast path (no extra dependency)** — `kamadak-exif` reads + `JPEGInterchangeFormat` from IFD0 / IFD1 directly. Covers older bodies and + most DNGs. +2. **`exiftool` fallback (recommended for RAW-heavy libraries)** — shells out + to extract `PreviewImage` / `JpgFromRaw` / `OtherImage`, which reaches + MakerNote and SubIFD-hosted previews kamadak-exif can't see (e.g. Nikon's + `PreviewIFD`, where modern Nikon bodies stash the full-res review JPEG). + If `exiftool` isn't on `PATH` this layer is skipped silently and only the + fast-path result is used. + +Install `exiftool` via your package manager: +- macOS: `brew install exiftool` +- Linux (Debian/Ubuntu): `apt install libimage-exiftool-perl` +- Windows: `winget install OliverBetz.ExifTool` or `choco install exiftool` + +Files where neither layer produces a valid preview fall back to ffmpeg. Anything +that still can't be decoded is marked with a `.unsupported` sentinel in +the thumbnail directory so we don't retry it every scan. Delete those sentinels +(and any cached black thumbnails) to force retries after a tooling upgrade. ## Environment There are a handful of required environment variables to have the API run. diff --git a/src/exif.rs b/src/exif.rs index eca6eb7..b7f7112 100644 --- a/src/exif.rs +++ b/src/exif.rs @@ -1,6 +1,7 @@ use std::fs::File; use std::io::{BufReader, Read, Seek, SeekFrom}; use std::path::Path; +use std::process::Command; use anyhow::{Result, anyhow}; use exif::{In, Reader, Tag, Value}; @@ -28,7 +29,7 @@ pub struct ExifData { /// TIFF-based RAW formats where `JPEGInterchangeFormat` offsets are /// absolute file offsets (the file itself is a TIFF container). -fn is_tiff_raw(path: &Path) -> bool { +pub fn is_tiff_raw(path: &Path) -> bool { matches!( path.extension() .and_then(|e| e.to_str()) @@ -40,26 +41,18 @@ fn is_tiff_raw(path: &Path) -> bool { ) } -/// Returns the bytes of the embedded JPEG thumbnail in a TIFF-based RAW or -/// TIFF file. Used to thumbnail formats whose RAW pixel data can't be decoded -/// by our normal tools (e.g. Sony ARW). Returns `None` if no preview is -/// present, the file isn't a TIFF container, or the data doesn't look like -/// a valid JPEG. -pub fn extract_embedded_jpeg_preview(path: &Path) -> Option> { - if !is_tiff_raw(path) { - return None; - } - - let file = File::open(path).ok()?; - let mut bufreader = BufReader::new(file); - let exif = Reader::new().read_from_container(&mut bufreader).ok()?; - +/// Read the JPEG bytes pointed to by `JPEGInterchangeFormat` / +/// `JPEGInterchangeFormatLength` in a single IFD. Returns `None` on any +/// failure: tags missing, length zero, file read failure, or bytes that +/// don't start with the JPEG SOI marker (some MakerNote pointers reference +/// TIFF-wrapped previews or other non-JPEG payloads we can't load). +fn read_jpeg_at_ifd(exif: &exif::Exif, path: &Path, ifd: In) -> Option> { let offset = exif - .get_field(Tag::JPEGInterchangeFormat, In::THUMBNAIL)? + .get_field(Tag::JPEGInterchangeFormat, ifd)? .value .get_uint(0)?; let length = exif - .get_field(Tag::JPEGInterchangeFormatLength, In::THUMBNAIL)? + .get_field(Tag::JPEGInterchangeFormatLength, ifd)? .value .get_uint(0)?; if length == 0 { @@ -71,8 +64,6 @@ pub fn extract_embedded_jpeg_preview(path: &Path) -> Option> { let mut buf = vec![0u8; length as usize]; file.read_exact(&mut buf).ok()?; - // JPEG SOI marker sanity check — MakerNote offsets sometimes point at - // TIFF-wrapped previews or other non-JPEG data. if buf.len() < 2 || buf[0] != 0xFF || buf[1] != 0xD8 { return None; } @@ -80,6 +71,95 @@ pub fn extract_embedded_jpeg_preview(path: &Path) -> Option> { Some(buf) } +/// Tags exiftool exposes for embedded JPEG previews, in priority order. The +/// largest valid JPEG returned by any of them wins. Different camera makers +/// stash their largest preview under different names: Nikon's full-res +/// preview lives under `PreviewImage` in the MakerNote `PreviewIFD`, Canon / +/// Sony often expose theirs as `JpgFromRaw`, and `OtherImage` is a catch-all +/// some sub-IFD chains use. +const EXIFTOOL_PREVIEW_TAGS: &[&str] = &["PreviewImage", "JpgFromRaw", "OtherImage"]; + +/// Shell out to `exiftool -b -` for one tag. Returns the response bytes +/// only if exiftool succeeded AND the bytes start with the JPEG SOI marker +/// (some MakerNote tags hold TIFF-wrapped previews or other non-JPEG payloads +/// we can't load). +fn extract_exiftool_tag(path: &Path, tag: &str) -> Option> { + let output = Command::new("exiftool") + .arg("-b") + .arg(format!("-{}", tag)) + .arg(path) + .output() + .ok()?; + + if !output.status.success() { + return None; + } + let bytes = output.stdout; + if bytes.len() < 2 || bytes[0] != 0xFF || bytes[1] != 0xD8 { + return None; + } + Some(bytes) +} + +/// Try each EXIFTOOL_PREVIEW_TAGS in turn and return the largest valid JPEG. +/// If `exiftool` isn't on PATH the very first spawn returns `None` and we +/// silently bail — callers fall back to whatever the IFD0/IFD1 fast path +/// found. +fn extract_preview_via_exiftool(path: &Path) -> Option> { + let mut best: Option> = None; + for &tag in EXIFTOOL_PREVIEW_TAGS { + let Some(bytes) = extract_exiftool_tag(path, tag) else { + continue; + }; + match &best { + None => best = Some(bytes), + Some(b) if b.len() < bytes.len() => best = Some(bytes), + _ => {} + } + } + best +} + +/// Returns the bytes of the embedded JPEG preview in a TIFF-based RAW or +/// TIFF file. Used to thumbnail formats whose RAW pixel data can't be decoded +/// by our normal tools (e.g. Sony ARW), and to serve a usable full-size +/// image for clients that can't decode the RAW container directly. Returns +/// `None` if no preview is present, the file isn't a TIFF container, or the +/// data doesn't look like a valid JPEG. +/// +/// Strategy: +/// 1. Fast path: read `JPEGInterchangeFormat` from IFD0 (PRIMARY) and IFD1 +/// (THUMBNAIL) directly via kamadak-exif. No subprocess, no external +/// dependency. +/// 2. Slow path: shell out to `exiftool -b -` for each of +/// `PreviewImage` / `JpgFromRaw` / `OtherImage`. kamadak-exif can't +/// reach SubIFDs or MakerNote sub-IFDs, but most modern Nikon bodies +/// stash their large preview JPEG in the Nikon MakerNote's PreviewIFD; +/// Canon / Sony often use `JpgFromRaw` in a SubIFD chain. Skipped +/// gracefully if exiftool isn't on PATH. +/// +/// All candidates are pooled and the largest valid JPEG wins, so a deploy +/// without exiftool degrades to "fast-path only" behavior rather than +/// breaking outright. +pub fn extract_embedded_jpeg_preview(path: &Path) -> Option> { + if !is_tiff_raw(path) { + return None; + } + + let file = File::open(path).ok()?; + let mut bufreader = BufReader::new(file); + let exif = Reader::new().read_from_container(&mut bufreader).ok()?; + + let primary = read_jpeg_at_ifd(&exif, path, In::PRIMARY); + let thumbnail = read_jpeg_at_ifd(&exif, path, In::THUMBNAIL); + let exiftool = extract_preview_via_exiftool(path); + + [primary, thumbnail, exiftool] + .into_iter() + .flatten() + .max_by_key(|v| v.len()) +} + pub fn supports_exif(path: &Path) -> bool { if let Some(ext) = path.extension() { let ext_lower = ext.to_string_lossy().to_lowercase(); diff --git a/src/main.rs b/src/main.rs index 5e87129..12a4003 100644 --- a/src/main.rs +++ b/src/main.rs @@ -215,6 +215,23 @@ async fn get_image( } } + // Full-size requests for RAW formats (NEF/CR2/ARW/etc.) can't just + // NamedFile-stream the original bytes — browsers won't decode the + // RAW container, so a `` lands as a broken image. Serve + // the embedded JPEG preview instead (typically the camera's in-body + // review JPEG, ~1–2 MP). Falls through to NamedFile if no preview is + // available, which preserves the historical behavior for callers + // that genuinely want the original bytes. + if image_size == PhotoSize::Full && exif::is_tiff_raw(&path) { + if let Some(preview) = exif::extract_embedded_jpeg_preview(&path) { + span.set_status(Status::Ok); + return HttpResponse::Ok() + .content_type("image/jpeg") + .insert_header(("Cache-Control", "public, max-age=3600")) + .body(preview); + } + } + if let Ok(file) = NamedFile::open(&path) { span.set_status(Status::Ok); // Enable ETag and set cache headers for full images (1 hour cache)