From 00b3c80141fe51b7948e75b821ea21d7323e899d Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Tue, 28 Apr 2026 16:52:10 +0000 Subject: [PATCH 1/2] RAW: try IFD0 + IFD1 for embedded preview, serve at full size The thumbnail pipeline's embedded-JPEG extractor only checked IFD1 (THUMBNAIL), which on many Nikon NEFs is missing or zero-length even when IFD0 (PRIMARY) carries a perfectly good 1-2 MP reduced-resolution preview the camera writes for in-body review. The previous behavior produced black thumbs on disk: the buggy IFD1 pointer resolved to a short byte sequence that happened to satisfy the SOI sanity check, image::load_from_memory accepted it, and the resize path quietly wrote a black JPEG. Now both IFDs are checked and the larger valid JPEG wins. Format- agnostic: applies to every TIFF-based RAW (NEF / ARW / CR2 / DNG / RAF / ORF / RW2 / PEF / SRW / TIFF). is_tiff_raw is now pub so main.rs can gate its full-size handler on it. Also extends the /image handler so size=full requests for RAW formats serve the embedded preview as image/jpeg instead of NamedFile-streaming the original RAW bytes - browsers can't decode a .nef container, so would otherwise land as a broken image. Falls through to NamedFile if no preview is present, preserving the historical behavior for callers that genuinely want the original bytes. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/exif.rs | 61 ++++++++++++++++++++++++++++++++++++----------------- src/main.rs | 17 +++++++++++++++ 2 files changed, 59 insertions(+), 19 deletions(-) diff --git a/src/exif.rs b/src/exif.rs index eca6eb7..6b14cca 100644 --- a/src/exif.rs +++ b/src/exif.rs @@ -28,7 +28,7 @@ pub struct ExifData { /// TIFF-based RAW formats where `JPEGInterchangeFormat` offsets are /// absolute file offsets (the file itself is a TIFF container). -fn is_tiff_raw(path: &Path) -> bool { +pub fn is_tiff_raw(path: &Path) -> bool { matches!( path.extension() .and_then(|e| e.to_str()) @@ -40,26 +40,18 @@ fn is_tiff_raw(path: &Path) -> bool { ) } -/// Returns the bytes of the embedded JPEG thumbnail in a TIFF-based RAW or -/// TIFF file. Used to thumbnail formats whose RAW pixel data can't be decoded -/// by our normal tools (e.g. Sony ARW). Returns `None` if no preview is -/// present, the file isn't a TIFF container, or the data doesn't look like -/// a valid JPEG. -pub fn extract_embedded_jpeg_preview(path: &Path) -> Option> { - if !is_tiff_raw(path) { - return None; - } - - let file = File::open(path).ok()?; - let mut bufreader = BufReader::new(file); - let exif = Reader::new().read_from_container(&mut bufreader).ok()?; - +/// Read the JPEG bytes pointed to by `JPEGInterchangeFormat` / +/// `JPEGInterchangeFormatLength` in a single IFD. Returns `None` on any +/// failure: tags missing, length zero, file read failure, or bytes that +/// don't start with the JPEG SOI marker (some MakerNote pointers reference +/// TIFF-wrapped previews or other non-JPEG payloads we can't load). +fn read_jpeg_at_ifd(exif: &exif::Exif, path: &Path, ifd: In) -> Option> { let offset = exif - .get_field(Tag::JPEGInterchangeFormat, In::THUMBNAIL)? + .get_field(Tag::JPEGInterchangeFormat, ifd)? .value .get_uint(0)?; let length = exif - .get_field(Tag::JPEGInterchangeFormatLength, In::THUMBNAIL)? + .get_field(Tag::JPEGInterchangeFormatLength, ifd)? .value .get_uint(0)?; if length == 0 { @@ -71,8 +63,6 @@ pub fn extract_embedded_jpeg_preview(path: &Path) -> Option> { let mut buf = vec![0u8; length as usize]; file.read_exact(&mut buf).ok()?; - // JPEG SOI marker sanity check — MakerNote offsets sometimes point at - // TIFF-wrapped previews or other non-JPEG data. if buf.len() < 2 || buf[0] != 0xFF || buf[1] != 0xD8 { return None; } @@ -80,6 +70,39 @@ pub fn extract_embedded_jpeg_preview(path: &Path) -> Option> { Some(buf) } +/// Returns the bytes of the embedded JPEG preview in a TIFF-based RAW or +/// TIFF file. Used to thumbnail formats whose RAW pixel data can't be decoded +/// by our normal tools (e.g. Sony ARW), and to serve a usable full-size +/// image for clients that can't decode the RAW container directly. Returns +/// `None` if no preview is present, the file isn't a TIFF container, or the +/// data doesn't look like a valid JPEG. +/// +/// Both IFD0 (PRIMARY) and IFD1 (THUMBNAIL) are checked, preferring the +/// larger valid JPEG. Conventions vary by camera: most modern Nikon NEFs +/// expose the larger reduced-resolution preview (~1–2 MP) via IFD0 and a +/// small chip via IFD1; some bodies leave one or the other empty or zero- +/// length, and an earlier THUMBNAIL-only implementation produced black +/// thumbnails for any NEF whose IFD1 thumbnail was missing or corrupted. +pub fn extract_embedded_jpeg_preview(path: &Path) -> Option> { + if !is_tiff_raw(path) { + return None; + } + + let file = File::open(path).ok()?; + let mut bufreader = BufReader::new(file); + let exif = Reader::new().read_from_container(&mut bufreader).ok()?; + + let primary = read_jpeg_at_ifd(&exif, path, In::PRIMARY); + let thumbnail = read_jpeg_at_ifd(&exif, path, In::THUMBNAIL); + + match (primary, thumbnail) { + (Some(p), Some(t)) => Some(if p.len() >= t.len() { p } else { t }), + (Some(p), None) => Some(p), + (None, Some(t)) => Some(t), + (None, None) => None, + } +} + pub fn supports_exif(path: &Path) -> bool { if let Some(ext) = path.extension() { let ext_lower = ext.to_string_lossy().to_lowercase(); diff --git a/src/main.rs b/src/main.rs index 5e87129..12a4003 100644 --- a/src/main.rs +++ b/src/main.rs @@ -215,6 +215,23 @@ async fn get_image( } } + // Full-size requests for RAW formats (NEF/CR2/ARW/etc.) can't just + // NamedFile-stream the original bytes — browsers won't decode the + // RAW container, so a `` lands as a broken image. Serve + // the embedded JPEG preview instead (typically the camera's in-body + // review JPEG, ~1–2 MP). Falls through to NamedFile if no preview is + // available, which preserves the historical behavior for callers + // that genuinely want the original bytes. + if image_size == PhotoSize::Full && exif::is_tiff_raw(&path) { + if let Some(preview) = exif::extract_embedded_jpeg_preview(&path) { + span.set_status(Status::Ok); + return HttpResponse::Ok() + .content_type("image/jpeg") + .insert_header(("Cache-Control", "public, max-age=3600")) + .body(preview); + } + } + if let Ok(file) = NamedFile::open(&path) { span.set_status(Status::Ok); // Enable ETag and set cache headers for full images (1 hour cache) From 6521a328bf464071e46307bc3c33d40b000fb5cc Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Tue, 28 Apr 2026 17:13:36 +0000 Subject: [PATCH 2/2] RAW preview: exiftool fallback for MakerNote / SubIFD previews MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kamadak-exif's In::PRIMARY / In::THUMBNAIL only address IFD0 and IFD1. On modern Nikon NEFs the full-res review JPEG lives in the MakerNote's PreviewIFD (and many Canon CR2s / DNGs put theirs in a SubIFD chain) — both unreachable through the existing reader, so the previous patch still produced no preview for those files and the pipeline fell through to ffmpeg, which writes black frames when it can't decode the RAW. Add a slow-path layer in extract_embedded_jpeg_preview that shells out to exiftool for PreviewImage / JpgFromRaw / OtherImage (one process per tag). All candidates from both layers are pooled and the largest valid JPEG wins. exiftool not on PATH degrades to fast-path-only behavior rather than breaking — the fallback is a strict superset. Documented the new optional dependency in README.md and CLAUDE.md with install commands for apt / brew / winget / choco. Co-Authored-By: Claude Opus 4.7 (1M context) --- CLAUDE.md | 26 +++++++++++++++++ README.md | 31 +++++++++++++++----- src/exif.rs | 81 +++++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 119 insertions(+), 19 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 70d4452..8a8dea4 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -111,6 +111,15 @@ All database access goes through trait-based DAOs (e.g., `ExifDao`, `SqliteExifD 2. Creates 200x200 thumbnails in THUMBNAILS directory (mirrors source structure) 3. Videos: extracts frame at 3-second mark via ffmpeg 4. Images: uses `image` crate for JPEG/PNG processing +5. RAW formats (NEF/CR2/ARW/DNG/etc.): the `image` crate can't decode RAW + pixel data, so the pipeline pulls an embedded JPEG preview instead. Fast + path is `exif::read_jpeg_at_ifd` against IFD0 (PRIMARY) and IFD1 + (THUMBNAIL) — covers most older bodies and DNGs. Slow-path fallback shells + out to **`exiftool`** for `PreviewImage` / `JpgFromRaw` / `OtherImage`, + which reaches MakerNote / SubIFD-hosted previews kamadak-exif can't see + (e.g. Nikon's `PreviewIFD`, where modern Nikon bodies store the full-res + review JPEG). All candidates are pooled and the largest valid JPEG wins. + See `src/exif.rs::extract_embedded_jpeg_preview`. **File Watching:** Runs in background thread with two-tier strategy: @@ -364,6 +373,8 @@ Configurable env: ## Dependencies of Note +### Rust crates + - **actix-web**: HTTP framework - **diesel**: ORM for SQLite - **jsonwebtoken**: JWT implementation @@ -374,3 +385,18 @@ Configurable env: - **opentelemetry**: Distributed tracing - **bcrypt**: Password hashing - **infer**: Magic number file type detection + +### External binaries (must be on `PATH`) + +- **`ffmpeg`** — video thumbnail extraction (`StreamActor`, HLS pipeline) and + the HEIF/HEIC/NEF/ARW thumbnail fallback in `generate_image_thumbnail_ffmpeg`. + Required for any deploy that holds video or HEIF files. +- **`exiftool`** — optional but strongly recommended for RAW-heavy libraries. + The thumbnail pipeline shells out to it as the slow-path fallback for + embedded preview extraction (Nikon MakerNote `PreviewIFD`, Canon SubIFDs, + etc. — anything kamadak-exif's IFD0/IFD1 readers can't reach). Without + exiftool installed, RAWs whose preview lives outside IFD0/IFD1 will fall + through to ffmpeg, which often produces black thumbnails. Install via + package manager: `apt install libimage-exiftool-perl`, + `brew install exiftool`, `winget install OliverBetz.ExifTool`, or + `choco install exiftool`. diff --git a/README.md b/README.md index 31978d9..fceba81 100644 --- a/README.md +++ b/README.md @@ -28,14 +28,31 @@ Builds used in development: the `gyan.dev` full build on Windows, and distro `ff packages on Linux work fine. If HEIC thumbnails silently fail, check `ffmpeg -formats | grep heif` to confirm HEIF support. -### RAW photo thumbnails (no extra dependency) +### RAW photo thumbnails RAW formats (ARW, NEF, CR2, CR3, DNG, RAF, ORF, RW2, PEF, SRW, TIFF) are thumbnailed -by reading the embedded JPEG preview from the TIFF IFD1 using `kamadak-exif`. No -external RAW decoder (libraw / dcraw) is required. Files without an embedded preview -fall back to ffmpeg (works for most NEF files), and anything that still can't be -decoded is marked with a `.unsupported` sentinel in the thumbnail directory -so we don't retry it every scan. Delete those sentinels to force retries after a -tooling upgrade. +by reading an embedded JPEG preview out of the TIFF container — no external RAW +decoder (libraw / dcraw) is involved. The pipeline tries two layers in order and +keeps the largest valid JPEG: + +1. **Fast path (no extra dependency)** — `kamadak-exif` reads + `JPEGInterchangeFormat` from IFD0 / IFD1 directly. Covers older bodies and + most DNGs. +2. **`exiftool` fallback (recommended for RAW-heavy libraries)** — shells out + to extract `PreviewImage` / `JpgFromRaw` / `OtherImage`, which reaches + MakerNote and SubIFD-hosted previews kamadak-exif can't see (e.g. Nikon's + `PreviewIFD`, where modern Nikon bodies stash the full-res review JPEG). + If `exiftool` isn't on `PATH` this layer is skipped silently and only the + fast-path result is used. + +Install `exiftool` via your package manager: +- macOS: `brew install exiftool` +- Linux (Debian/Ubuntu): `apt install libimage-exiftool-perl` +- Windows: `winget install OliverBetz.ExifTool` or `choco install exiftool` + +Files where neither layer produces a valid preview fall back to ffmpeg. Anything +that still can't be decoded is marked with a `.unsupported` sentinel in +the thumbnail directory so we don't retry it every scan. Delete those sentinels +(and any cached black thumbnails) to force retries after a tooling upgrade. ## Environment There are a handful of required environment variables to have the API run. diff --git a/src/exif.rs b/src/exif.rs index 6b14cca..b7f7112 100644 --- a/src/exif.rs +++ b/src/exif.rs @@ -1,6 +1,7 @@ use std::fs::File; use std::io::{BufReader, Read, Seek, SeekFrom}; use std::path::Path; +use std::process::Command; use anyhow::{Result, anyhow}; use exif::{In, Reader, Tag, Value}; @@ -70,6 +71,55 @@ fn read_jpeg_at_ifd(exif: &exif::Exif, path: &Path, ifd: In) -> Option> Some(buf) } +/// Tags exiftool exposes for embedded JPEG previews, in priority order. The +/// largest valid JPEG returned by any of them wins. Different camera makers +/// stash their largest preview under different names: Nikon's full-res +/// preview lives under `PreviewImage` in the MakerNote `PreviewIFD`, Canon / +/// Sony often expose theirs as `JpgFromRaw`, and `OtherImage` is a catch-all +/// some sub-IFD chains use. +const EXIFTOOL_PREVIEW_TAGS: &[&str] = &["PreviewImage", "JpgFromRaw", "OtherImage"]; + +/// Shell out to `exiftool -b -` for one tag. Returns the response bytes +/// only if exiftool succeeded AND the bytes start with the JPEG SOI marker +/// (some MakerNote tags hold TIFF-wrapped previews or other non-JPEG payloads +/// we can't load). +fn extract_exiftool_tag(path: &Path, tag: &str) -> Option> { + let output = Command::new("exiftool") + .arg("-b") + .arg(format!("-{}", tag)) + .arg(path) + .output() + .ok()?; + + if !output.status.success() { + return None; + } + let bytes = output.stdout; + if bytes.len() < 2 || bytes[0] != 0xFF || bytes[1] != 0xD8 { + return None; + } + Some(bytes) +} + +/// Try each EXIFTOOL_PREVIEW_TAGS in turn and return the largest valid JPEG. +/// If `exiftool` isn't on PATH the very first spawn returns `None` and we +/// silently bail — callers fall back to whatever the IFD0/IFD1 fast path +/// found. +fn extract_preview_via_exiftool(path: &Path) -> Option> { + let mut best: Option> = None; + for &tag in EXIFTOOL_PREVIEW_TAGS { + let Some(bytes) = extract_exiftool_tag(path, tag) else { + continue; + }; + match &best { + None => best = Some(bytes), + Some(b) if b.len() < bytes.len() => best = Some(bytes), + _ => {} + } + } + best +} + /// Returns the bytes of the embedded JPEG preview in a TIFF-based RAW or /// TIFF file. Used to thumbnail formats whose RAW pixel data can't be decoded /// by our normal tools (e.g. Sony ARW), and to serve a usable full-size @@ -77,12 +127,20 @@ fn read_jpeg_at_ifd(exif: &exif::Exif, path: &Path, ifd: In) -> Option> /// `None` if no preview is present, the file isn't a TIFF container, or the /// data doesn't look like a valid JPEG. /// -/// Both IFD0 (PRIMARY) and IFD1 (THUMBNAIL) are checked, preferring the -/// larger valid JPEG. Conventions vary by camera: most modern Nikon NEFs -/// expose the larger reduced-resolution preview (~1–2 MP) via IFD0 and a -/// small chip via IFD1; some bodies leave one or the other empty or zero- -/// length, and an earlier THUMBNAIL-only implementation produced black -/// thumbnails for any NEF whose IFD1 thumbnail was missing or corrupted. +/// Strategy: +/// 1. Fast path: read `JPEGInterchangeFormat` from IFD0 (PRIMARY) and IFD1 +/// (THUMBNAIL) directly via kamadak-exif. No subprocess, no external +/// dependency. +/// 2. Slow path: shell out to `exiftool -b -` for each of +/// `PreviewImage` / `JpgFromRaw` / `OtherImage`. kamadak-exif can't +/// reach SubIFDs or MakerNote sub-IFDs, but most modern Nikon bodies +/// stash their large preview JPEG in the Nikon MakerNote's PreviewIFD; +/// Canon / Sony often use `JpgFromRaw` in a SubIFD chain. Skipped +/// gracefully if exiftool isn't on PATH. +/// +/// All candidates are pooled and the largest valid JPEG wins, so a deploy +/// without exiftool degrades to "fast-path only" behavior rather than +/// breaking outright. pub fn extract_embedded_jpeg_preview(path: &Path) -> Option> { if !is_tiff_raw(path) { return None; @@ -94,13 +152,12 @@ pub fn extract_embedded_jpeg_preview(path: &Path) -> Option> { let primary = read_jpeg_at_ifd(&exif, path, In::PRIMARY); let thumbnail = read_jpeg_at_ifd(&exif, path, In::THUMBNAIL); + let exiftool = extract_preview_via_exiftool(path); - match (primary, thumbnail) { - (Some(p), Some(t)) => Some(if p.len() >= t.len() { p } else { t }), - (Some(p), None) => Some(p), - (None, Some(t)) => Some(t), - (None, None) => None, - } + [primary, thumbnail, exiftool] + .into_iter() + .flatten() + .max_by_key(|v| v.len()) } pub fn supports_exif(path: &Path) -> bool {