Files
ImageApi/src/exif.rs
Cameron Cordes 16d6586b7d exif: GET /image/exif/full — exiftool dump for the DETAILS modal
The curated `image_exif` columns are a small slice of what exiftool
can read (camera/lens/GPS/capture/dates). Apollo's DETAILS modal wants
to surface everything — white balance, metering, MakerNotes, IPTC,
ICC profile, Composite tags, the lot — for an operator inspecting a
photo's provenance.

`read_full_exif_via_exiftool(path)` shells out to `exiftool -j -G -n`:
JSON output, group-prefixed keys (`EXIF:Make`, `MakerNotes:LensInfo`),
numeric values (callers can reformat). Spawned via web::block to keep
it off the actix worker — RAW with rich MakerNotes can take a few
seconds.

The endpoint is on-demand only; the indexer / file watcher does NOT
call it. Falls back to 503 with a clear message when exiftool isn't
on PATH so Apollo can render an "install exiftool" hint. Multi-library
union resolution mirrors set_image_gps / get_file_metadata.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-06 19:42:41 -04:00

580 lines
20 KiB
Rust

use std::fs::File;
use std::io::{BufReader, Read, Seek, SeekFrom};
use std::path::Path;
use std::process::Command;
use anyhow::{Result, anyhow};
use exif::{In, Reader, Tag, Value};
use image::DynamicImage;
use log::debug;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct ExifData {
pub camera_make: Option<String>,
pub camera_model: Option<String>,
pub lens_model: Option<String>,
pub width: Option<i32>,
pub height: Option<i32>,
pub orientation: Option<i32>,
pub gps_latitude: Option<f64>,
pub gps_longitude: Option<f64>,
pub gps_altitude: Option<f64>,
pub focal_length: Option<f64>,
pub aperture: Option<f64>,
pub shutter_speed: Option<String>,
pub iso: Option<i32>,
pub date_taken: Option<i64>,
}
/// TIFF-based RAW formats where `JPEGInterchangeFormat` offsets are
/// absolute file offsets (the file itself is a TIFF container).
pub fn is_tiff_raw(path: &Path) -> bool {
matches!(
path.extension()
.and_then(|e| e.to_str())
.map(|s| s.to_lowercase())
.as_deref(),
Some(
"tiff" | "tif" | "nef" | "cr2" | "arw" | "dng" | "raf" | "orf" | "rw2" | "pef" | "srw"
)
)
}
/// Read the JPEG bytes pointed to by `JPEGInterchangeFormat` /
/// `JPEGInterchangeFormatLength` in a single IFD. Returns `None` on any
/// failure: tags missing, length zero, file read failure, or bytes that
/// don't start with the JPEG SOI marker (some MakerNote pointers reference
/// TIFF-wrapped previews or other non-JPEG payloads we can't load).
fn read_jpeg_at_ifd(exif: &exif::Exif, path: &Path, ifd: In) -> Option<Vec<u8>> {
let offset = exif
.get_field(Tag::JPEGInterchangeFormat, ifd)?
.value
.get_uint(0)?;
let length = exif
.get_field(Tag::JPEGInterchangeFormatLength, ifd)?
.value
.get_uint(0)?;
if length == 0 {
return None;
}
let mut file = File::open(path).ok()?;
file.seek(SeekFrom::Start(offset as u64)).ok()?;
let mut buf = vec![0u8; length as usize];
file.read_exact(&mut buf).ok()?;
if buf.len() < 2 || buf[0] != 0xFF || buf[1] != 0xD8 {
return None;
}
Some(buf)
}
/// Shell out to `exiftool -j -G -n <path>` and return the per-file tag map.
///
/// `-j` requests JSON; the response is always an array of one element per
/// input path. `-G` prefixes each key with the group name (`EXIF:Make`,
/// `MakerNotes:LensInfo`, `File:FileSize`, …) so a UI can group the dump.
/// `-n` returns numeric / raw values rather than exiftool's pretty-printed
/// human strings, which keeps the output stable for clients that want to
/// reformat (e.g. divide a focal-length numerator/denominator).
///
/// Returns:
/// - `Ok(Some(value))` — the parsed object for this file.
/// - `Ok(None)` — exiftool ran but the array was empty / not an object.
/// - `Err(_)` — exiftool isn't on PATH, the spawn failed, or its stderr
/// indicates an unsupported file. Caller surfaces a 503 / 422.
///
/// Used by `GET /image/exif/full` to power Apollo's DETAILS modal "FULL
/// EXIF" pane. Per-file shell-out is fine for this on-demand surface;
/// the indexer does NOT call this on the hot path (kamadak-exif covers
/// the indexed columns; exiftool is the slow-path preview helper).
pub fn read_full_exif_via_exiftool(path: &Path) -> Result<Option<serde_json::Value>> {
let output = Command::new("exiftool")
.arg("-j")
.arg("-G")
.arg("-n")
.arg(path)
.output()
.map_err(|e| anyhow!("exiftool spawn failed (is it on PATH?): {}", e))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(anyhow!(
"exiftool exited with {}: {}",
output.status,
stderr.trim()
));
}
let parsed: serde_json::Value = serde_json::from_slice(&output.stdout)
.map_err(|e| anyhow!("exiftool returned non-JSON output: {}", e))?;
// `-j` always wraps the result in an array — pull out the first object.
let arr = parsed
.as_array()
.ok_or_else(|| anyhow!("expected JSON array from exiftool -j"))?;
Ok(arr.first().cloned())
}
/// Tags exiftool exposes for embedded JPEG previews, in priority order. The
/// largest valid JPEG returned by any of them wins. Different camera makers
/// stash their largest preview under different names: Nikon's full-res
/// preview lives under `PreviewImage` in the MakerNote `PreviewIFD`, Canon /
/// Sony often expose theirs as `JpgFromRaw`, and `OtherImage` is a catch-all
/// some sub-IFD chains use.
const EXIFTOOL_PREVIEW_TAGS: &[&str] = &["PreviewImage", "JpgFromRaw", "OtherImage"];
/// Shell out to `exiftool -b -<tag>` for one tag. Returns the response bytes
/// only if exiftool succeeded AND the bytes start with the JPEG SOI marker
/// (some MakerNote tags hold TIFF-wrapped previews or other non-JPEG payloads
/// we can't load).
fn extract_exiftool_tag(path: &Path, tag: &str) -> Option<Vec<u8>> {
let output = Command::new("exiftool")
.arg("-b")
.arg(format!("-{}", tag))
.arg(path)
.output()
.ok()?;
if !output.status.success() {
return None;
}
let bytes = output.stdout;
if bytes.len() < 2 || bytes[0] != 0xFF || bytes[1] != 0xD8 {
return None;
}
Some(bytes)
}
/// Try each EXIFTOOL_PREVIEW_TAGS in turn and return the largest valid JPEG.
/// If `exiftool` isn't on PATH the very first spawn returns `None` and we
/// silently bail — callers fall back to whatever the IFD0/IFD1 fast path
/// found.
fn extract_preview_via_exiftool(path: &Path) -> Option<Vec<u8>> {
let mut best: Option<Vec<u8>> = None;
for &tag in EXIFTOOL_PREVIEW_TAGS {
let Some(bytes) = extract_exiftool_tag(path, tag) else {
continue;
};
match &best {
None => best = Some(bytes),
Some(b) if b.len() < bytes.len() => best = Some(bytes),
_ => {}
}
}
best
}
/// Returns the bytes of the embedded JPEG preview in a TIFF-based RAW or
/// TIFF file. Used to thumbnail formats whose RAW pixel data can't be decoded
/// by our normal tools (e.g. Sony ARW), and to serve a usable full-size
/// image for clients that can't decode the RAW container directly. Returns
/// `None` if no preview is present, the file isn't a TIFF container, or the
/// data doesn't look like a valid JPEG.
///
/// Strategy:
/// 1. Fast path: read `JPEGInterchangeFormat` from IFD0 (PRIMARY) and IFD1
/// (THUMBNAIL) directly via kamadak-exif. No subprocess, no external
/// dependency.
/// 2. Slow path: shell out to `exiftool -b -<tag>` for each of
/// `PreviewImage` / `JpgFromRaw` / `OtherImage`. kamadak-exif can't
/// reach SubIFDs or MakerNote sub-IFDs, but most modern Nikon bodies
/// stash their large preview JPEG in the Nikon MakerNote's PreviewIFD;
/// Canon / Sony often use `JpgFromRaw` in a SubIFD chain. Skipped
/// gracefully if exiftool isn't on PATH.
///
/// All candidates are pooled and the largest valid JPEG wins, so a deploy
/// without exiftool degrades to "fast-path only" behavior rather than
/// breaking outright.
pub fn extract_embedded_jpeg_preview(path: &Path) -> Option<Vec<u8>> {
if !is_tiff_raw(path) {
return None;
}
let file = File::open(path).ok()?;
let mut bufreader = BufReader::new(file);
let exif = Reader::new().read_from_container(&mut bufreader).ok()?;
let primary = read_jpeg_at_ifd(&exif, path, In::PRIMARY);
let thumbnail = read_jpeg_at_ifd(&exif, path, In::THUMBNAIL);
let exiftool = extract_preview_via_exiftool(path);
[primary, thumbnail, exiftool]
.into_iter()
.flatten()
.max_by_key(|v| v.len())
}
/// Write GPS lat/lon into the file's EXIF in place via exiftool. Touches
/// nothing else — camera, dates, MakerNote, etc. all stay as-is. Uses
/// `-overwrite_original` so no `.orig` sidecar is left behind (the
/// caller's responsibility to back up the file system if they want
/// rollback). Returns Err if exiftool isn't on PATH, the file format
/// doesn't support EXIF, lat/lon are out of range, or exiftool prints
/// to stderr.
///
/// We pass lat/lon as positive decimal numbers and let the *Ref tags
/// carry the sign (N/S, E/W). exiftool happily accepts signed decimals
/// too, but the explicit ref form is unambiguous across exiftool
/// versions and matches what cameras write.
pub fn write_gps(path: &Path, lat: f64, lon: f64) -> Result<()> {
if !supports_exif(path) {
return Err(anyhow!(
"Format does not support EXIF GPS write: {}",
path.display()
));
}
if !(-90.0..=90.0).contains(&lat) || !(-180.0..=180.0).contains(&lon) {
return Err(anyhow!("GPS coordinates out of range: {}, {}", lat, lon));
}
let lat_ref = if lat >= 0.0 { "N" } else { "S" };
let lon_ref = if lon >= 0.0 { "E" } else { "W" };
let lat_abs = lat.abs();
let lon_abs = lon.abs();
let output = Command::new("exiftool")
.arg("-overwrite_original")
.arg("-P")
.arg(format!("-GPSLatitude={}", lat_abs))
.arg(format!("-GPSLatitudeRef={}", lat_ref))
.arg(format!("-GPSLongitude={}", lon_abs))
.arg(format!("-GPSLongitudeRef={}", lon_ref))
.arg(path)
.output()
.map_err(|e| anyhow!("exiftool spawn failed (is it on PATH?): {}", e))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(anyhow!(
"exiftool failed (exit {}): {}",
output.status.code().unwrap_or(-1),
stderr.trim()
));
}
Ok(())
}
pub fn supports_exif(path: &Path) -> bool {
if let Some(ext) = path.extension() {
let ext_lower = ext.to_string_lossy().to_lowercase();
matches!(
ext_lower.as_str(),
// JPEG formats
"jpg" | "jpeg" |
// TIFF and RAW formats based on TIFF
"tiff" | "tif" | "nef" | "cr2" | "cr3" | "arw" | "dng" | "raf" | "orf" | "rw2" | "pef" | "srw" |
// HEIF and variants
"heif" | "heic" | "avif" |
// PNG
"png" |
// WebP
"webp"
)
} else {
false
}
}
pub fn extract_exif_from_path(path: &Path) -> Result<ExifData> {
debug!("Extracting EXIF from: {:?}", path);
if !supports_exif(path) {
return Err(anyhow!("File type does not support EXIF"));
}
let file = File::open(path)?;
let mut bufreader = BufReader::new(file);
let exifreader = Reader::new();
let exif = exifreader.read_from_container(&mut bufreader)?;
let mut data = ExifData::default();
for field in exif.fields() {
match field.tag {
Tag::Make => {
data.camera_make = get_string_value(field);
}
Tag::Model => {
data.camera_model = get_string_value(field);
}
Tag::LensModel => {
data.lens_model = get_string_value(field);
}
Tag::PixelXDimension | Tag::ImageWidth => {
if data.width.is_none() {
data.width = get_u32_value(field).map(|v| v as i32);
}
}
Tag::PixelYDimension | Tag::ImageLength => {
if data.height.is_none() {
data.height = get_u32_value(field).map(|v| v as i32);
}
}
Tag::Orientation => {
data.orientation = get_u32_value(field).map(|v| v as i32);
}
Tag::FocalLength => {
data.focal_length = get_rational_value(field);
}
Tag::FNumber => {
data.aperture = get_rational_value(field);
}
Tag::ExposureTime => {
data.shutter_speed = get_rational_string(field);
}
Tag::PhotographicSensitivity | Tag::ISOSpeed => {
if data.iso.is_none() {
data.iso = get_u32_value(field).map(|v| v as i32);
}
}
Tag::DateTime | Tag::DateTimeOriginal => {
if data.date_taken.is_none() {
data.date_taken = parse_exif_datetime(field);
}
}
_ => {}
}
}
// Extract GPS coordinates
if let Some(lat) = extract_gps_coordinate(&exif, Tag::GPSLatitude, Tag::GPSLatitudeRef) {
data.gps_latitude = Some(lat);
}
if let Some(lon) = extract_gps_coordinate(&exif, Tag::GPSLongitude, Tag::GPSLongitudeRef) {
data.gps_longitude = Some(lon);
}
if let Some(alt) = extract_gps_altitude(&exif) {
data.gps_altitude = Some(alt);
}
debug!("Extracted EXIF data: {:?}", data);
Ok(data)
}
/// Read just the EXIF Orientation tag (1..=8) from a file. Cheaper than a
/// full `extract_exif_from_path` when the caller only needs orientation —
/// e.g. the thumbnail pipeline, which has to bake the rotation into the
/// resized pixels because the saved thumb has no EXIF chunk for the browser
/// to apply.
pub fn read_orientation(path: &Path) -> Option<i32> {
let file = File::open(path).ok()?;
let mut reader = BufReader::new(file);
let exif = Reader::new().read_from_container(&mut reader).ok()?;
let field = exif.get_field(Tag::Orientation, In::PRIMARY)?;
get_u32_value(field).map(|v| v as i32)
}
/// Apply an EXIF Orientation (1..=8) to a `DynamicImage`, returning a
/// canonically-oriented copy. Orientations:
/// 1 → as-is, 2 → flipH, 3 → rot180, 4 → flipV,
/// 5 → rot90CW + flipH, 6 → rot90CW, 7 → rot270CW + flipH, 8 → rot270CW.
/// Anything else (missing tag, garbage values) is returned unchanged.
pub fn apply_orientation(img: DynamicImage, orientation: i32) -> DynamicImage {
match orientation {
2 => img.fliph(),
3 => img.rotate180(),
4 => img.flipv(),
5 => img.rotate90().fliph(),
6 => img.rotate90(),
7 => img.rotate270().fliph(),
8 => img.rotate270(),
_ => img,
}
}
fn get_string_value(field: &exif::Field) -> Option<String> {
match &field.value {
Value::Ascii(vec) => {
if let Some(bytes) = vec.first() {
String::from_utf8(bytes.to_vec())
.ok()
.map(|s| s.trim_end_matches('\0').to_string())
} else {
None
}
}
_ => {
let display = field.display_value().to_string();
if display.is_empty() {
None
} else {
Some(display)
}
}
}
}
fn get_u32_value(field: &exif::Field) -> Option<u32> {
match &field.value {
Value::Short(vec) => vec.first().map(|&v| v as u32),
Value::Long(vec) => vec.first().copied(),
_ => None,
}
}
fn get_rational_value(field: &exif::Field) -> Option<f64> {
match &field.value {
Value::Rational(vec) => {
if let Some(rational) = vec.first() {
if rational.denom == 0 {
None
} else {
Some(rational.num as f64 / rational.denom as f64)
}
} else {
None
}
}
_ => None,
}
}
fn get_rational_string(field: &exif::Field) -> Option<String> {
match &field.value {
Value::Rational(vec) => {
if let Some(rational) = vec.first() {
if rational.denom == 0 {
None
} else if rational.num < rational.denom {
Some(format!("{}/{}", rational.num, rational.denom))
} else {
let value = rational.num as f64 / rational.denom as f64;
Some(format!("{:.2}", value))
}
} else {
None
}
}
_ => None,
}
}
fn parse_exif_datetime(field: &exif::Field) -> Option<i64> {
if let Some(datetime_str) = get_string_value(field) {
use chrono::NaiveDateTime;
// EXIF datetime format: "YYYY:MM:DD HH:MM:SS"
// Note: EXIF dates are local time without timezone info
// We return the timestamp as if it were UTC, and the client will display it as-is
NaiveDateTime::parse_from_str(&datetime_str, "%Y:%m:%d %H:%M:%S")
.ok()
.map(|dt| dt.and_utc().timestamp())
} else {
None
}
}
fn extract_gps_coordinate(exif: &exif::Exif, coord_tag: Tag, ref_tag: Tag) -> Option<f64> {
let coord_field = exif.get_field(coord_tag, In::PRIMARY)?;
let ref_field = exif.get_field(ref_tag, In::PRIMARY)?;
let coordinates = match &coord_field.value {
Value::Rational(vec) => {
if vec.len() < 3 {
return None;
}
let degrees = vec[0].num as f64 / vec[0].denom as f64;
let minutes = vec[1].num as f64 / vec[1].denom as f64;
let seconds = vec[2].num as f64 / vec[2].denom as f64;
degrees + (minutes / 60.0) + (seconds / 3600.0)
}
_ => return None,
};
let reference = get_string_value(ref_field)?;
let sign = if reference.starts_with('S') || reference.starts_with('W') {
-1.0
} else {
1.0
};
Some(coordinates * sign)
}
fn extract_gps_altitude(exif: &exif::Exif) -> Option<f64> {
let alt_field = exif.get_field(Tag::GPSAltitude, In::PRIMARY)?;
match &alt_field.value {
Value::Rational(vec) => {
if let Some(rational) = vec.first() {
if rational.denom == 0 {
None
} else {
let altitude = rational.num as f64 / rational.denom as f64;
// Check if below sea level
if let Some(ref_field) = exif.get_field(Tag::GPSAltitudeRef, In::PRIMARY)
&& let Some(ref_val) = get_u32_value(ref_field)
&& ref_val == 1
{
return Some(-altitude);
}
Some(altitude)
}
} else {
None
}
}
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_supports_exif_jpeg() {
assert!(supports_exif(Path::new("test.jpg")));
assert!(supports_exif(Path::new("test.jpeg")));
assert!(supports_exif(Path::new("test.JPG")));
}
#[test]
fn test_supports_exif_raw_formats() {
assert!(supports_exif(Path::new("test.nef"))); // Nikon
assert!(supports_exif(Path::new("test.NEF")));
assert!(supports_exif(Path::new("test.cr2"))); // Canon
assert!(supports_exif(Path::new("test.cr3"))); // Canon
assert!(supports_exif(Path::new("test.arw"))); // Sony
assert!(supports_exif(Path::new("test.dng"))); // Adobe DNG
}
#[test]
fn test_supports_exif_tiff() {
assert!(supports_exif(Path::new("test.tiff")));
assert!(supports_exif(Path::new("test.tif")));
assert!(supports_exif(Path::new("test.TIFF")));
}
#[test]
fn test_supports_exif_heif() {
assert!(supports_exif(Path::new("test.heif")));
assert!(supports_exif(Path::new("test.heic")));
assert!(supports_exif(Path::new("test.avif")));
}
#[test]
fn test_supports_exif_png_webp() {
assert!(supports_exif(Path::new("test.png")));
assert!(supports_exif(Path::new("test.PNG")));
assert!(supports_exif(Path::new("test.webp")));
assert!(supports_exif(Path::new("test.WEBP")));
}
#[test]
fn test_supports_exif_unsupported() {
assert!(!supports_exif(Path::new("test.mp4")));
assert!(!supports_exif(Path::new("test.mov")));
assert!(!supports_exif(Path::new("test.txt")));
assert!(!supports_exif(Path::new("test.gif")));
}
#[test]
fn test_supports_exif_no_extension() {
assert!(!supports_exif(Path::new("test")));
}
}