feat: add content_hash backfill + register every media file
Adds blake3 content hashing as the basis for derivative dedup (thumbnails, HLS) across libraries. Computed inline by the watcher on ingest and by a new `backfill_hashes` binary for historical rows. Key changes: - `content_hash` and `size_bytes` are now populated on new image_exif rows; a new ExifDao surface (`get_rows_missing_hash`, `backfill_content_hash`, `find_by_content_hash`) supports backfill and future hash-keyed lookups. - The watcher now registers every image/video in image_exif, not just files with parseable EXIF. EXIF becomes optional enrichment; videos and other non-EXIF files still get a hashed row. This also makes DB-indexed sort/filter cover the full library. - `/image` thumbnail serve dual-looks up hash-keyed path first, then falls back to the legacy mirrored layout. - Upload flow accepts `?library=` query param + hashes uploaded files. - Store_exif logs the underlying Diesel error on insert failure so constraint violations surface instead of hiding behind a generic InsertError. - New migration normalizes rel_path separators to forward slash across all tables, deduplicating any rows that collide after normalization. Fixes spurious UNIQUE violations from mixed backslash/forward-slash paths on Windows ingest. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
184
src/bin/backfill_hashes.rs
Normal file
184
src/bin/backfill_hashes.rs
Normal file
@@ -0,0 +1,184 @@
|
||||
//! Backfill `image_exif.content_hash` + `size_bytes` for rows that were
|
||||
//! ingested before hash computation was wired into the watcher.
|
||||
//!
|
||||
//! The watcher computes hashes for new files as they're ingested, so this
|
||||
//! binary is a one-shot tool for the historical backlog. Safe to re-run;
|
||||
//! only rows with NULL content_hash are processed.
|
||||
|
||||
use std::path::Path;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Instant;
|
||||
|
||||
use clap::Parser;
|
||||
use rayon::prelude::*;
|
||||
|
||||
use image_api::content_hash;
|
||||
use image_api::database::{ExifDao, SqliteExifDao, connect};
|
||||
use image_api::libraries::{self, Library};
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(name = "backfill_hashes")]
|
||||
#[command(about = "Compute content_hash for image_exif rows missing one")]
|
||||
struct Args {
|
||||
/// Max rows to hash per batch. The process loops until no rows remain.
|
||||
#[arg(long, default_value_t = 500)]
|
||||
batch_size: i64,
|
||||
|
||||
/// Rayon parallelism override. 0 uses the default thread pool size.
|
||||
#[arg(long, default_value_t = 0)]
|
||||
parallelism: usize,
|
||||
|
||||
/// Dry-run: log what would be hashed without writing to the DB.
|
||||
#[arg(long)]
|
||||
dry_run: bool,
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
env_logger::init();
|
||||
dotenv::dotenv().ok();
|
||||
|
||||
let args = Args::parse();
|
||||
if args.parallelism > 0 {
|
||||
rayon::ThreadPoolBuilder::new()
|
||||
.num_threads(args.parallelism)
|
||||
.build_global()
|
||||
.expect("Unable to configure rayon thread pool");
|
||||
}
|
||||
|
||||
// Resolve libraries (patch placeholder if still unset) so we can map
|
||||
// library_id back to a root_path on disk.
|
||||
let base_path = dotenv::var("BASE_PATH").ok();
|
||||
let mut seed_conn = connect();
|
||||
if let Some(base) = base_path.as_deref() {
|
||||
libraries::seed_or_patch_from_env(&mut seed_conn, base);
|
||||
}
|
||||
let libs = libraries::load_all(&mut seed_conn);
|
||||
drop(seed_conn);
|
||||
if libs.is_empty() {
|
||||
anyhow::bail!("No libraries configured; cannot backfill hashes");
|
||||
}
|
||||
let libs_by_id: std::collections::HashMap<i32, Library> =
|
||||
libs.into_iter().map(|lib| (lib.id, lib)).collect();
|
||||
println!(
|
||||
"Configured libraries: {}",
|
||||
libs_by_id
|
||||
.values()
|
||||
.map(|l| format!("{} -> {}", l.name, l.root_path))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
);
|
||||
|
||||
let dao: Arc<Mutex<Box<dyn ExifDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(SqliteExifDao::new())));
|
||||
let ctx = opentelemetry::Context::new();
|
||||
|
||||
let mut total_hashed = 0u64;
|
||||
let mut total_missing = 0u64;
|
||||
let mut total_errors = 0u64;
|
||||
let start = Instant::now();
|
||||
|
||||
loop {
|
||||
let rows = {
|
||||
let mut guard = dao.lock().expect("Unable to lock ExifDao");
|
||||
guard
|
||||
.get_rows_missing_hash(&ctx, args.batch_size)
|
||||
.map_err(|e| anyhow::anyhow!("DB error: {:?}", e))?
|
||||
};
|
||||
if rows.is_empty() {
|
||||
break;
|
||||
}
|
||||
println!("Processing batch of {} rows", rows.len());
|
||||
|
||||
// Compute hashes in parallel (I/O-bound; rayon helps on local disks,
|
||||
// throttled by network on SMB mounts — use --parallelism to tune).
|
||||
let results: Vec<(i32, String, Option<content_hash::FileIdentity>)> = rows
|
||||
.into_par_iter()
|
||||
.map(|(library_id, rel_path)| {
|
||||
let abs = libs_by_id
|
||||
.get(&library_id)
|
||||
.map(|lib| Path::new(&lib.root_path).join(&rel_path));
|
||||
match abs {
|
||||
Some(abs_path) if abs_path.exists() => {
|
||||
match content_hash::compute(&abs_path) {
|
||||
Ok(id) => (library_id, rel_path, Some(id)),
|
||||
Err(e) => {
|
||||
eprintln!("hash error for {}: {:?}", abs_path.display(), e);
|
||||
(library_id, rel_path, None)
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(_) => (library_id, rel_path, None), // file missing on disk
|
||||
None => {
|
||||
eprintln!("Row refers to unknown library_id {}", library_id);
|
||||
(library_id, rel_path, None)
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Persist sequentially — SQLite writes serialize anyway.
|
||||
if !args.dry_run {
|
||||
let mut guard = dao.lock().expect("Unable to lock ExifDao");
|
||||
for (library_id, rel_path, ident) in &results {
|
||||
match ident {
|
||||
Some(id) => {
|
||||
match guard.backfill_content_hash(
|
||||
&ctx,
|
||||
*library_id,
|
||||
rel_path,
|
||||
&id.content_hash,
|
||||
id.size_bytes,
|
||||
) {
|
||||
Ok(_) => total_hashed += 1,
|
||||
Err(e) => {
|
||||
eprintln!("persist error for {}: {:?}", rel_path, e);
|
||||
total_errors += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
total_missing += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (_, rel_path, ident) in &results {
|
||||
match ident {
|
||||
Some(id) => {
|
||||
println!(
|
||||
"[dry-run] {} -> {} ({} bytes)",
|
||||
rel_path, id.content_hash, id.size_bytes
|
||||
);
|
||||
total_hashed += 1;
|
||||
}
|
||||
None => {
|
||||
total_missing += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
println!(
|
||||
"[dry-run] processed one batch of {}. Stopping — a real run would continue \
|
||||
until no NULL content_hash rows remain.",
|
||||
results.len()
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
let elapsed = start.elapsed().as_secs_f64().max(0.001);
|
||||
let rate = total_hashed as f64 / elapsed;
|
||||
println!(
|
||||
" hashed={} missing={} errors={} ({:.1} files/sec)",
|
||||
total_hashed, total_missing, total_errors, rate
|
||||
);
|
||||
}
|
||||
|
||||
println!();
|
||||
println!(
|
||||
"Done. hashed={}, skipped (missing on disk)={}, errors={}, elapsed={:.1}s",
|
||||
total_hashed,
|
||||
total_missing,
|
||||
total_errors,
|
||||
start.elapsed().as_secs_f64()
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
@@ -67,7 +67,7 @@ fn main() -> anyhow::Result<()> {
|
||||
let context = opentelemetry::Context::new();
|
||||
|
||||
let relative_path = match path.strip_prefix(&base) {
|
||||
Ok(p) => p.to_str().unwrap().to_string(),
|
||||
Ok(p) => p.to_str().unwrap().replace('\\', "/"),
|
||||
Err(_) => {
|
||||
eprintln!(
|
||||
"Error: Could not create relative path for {}",
|
||||
|
||||
103
src/content_hash.rs
Normal file
103
src/content_hash.rs
Normal file
@@ -0,0 +1,103 @@
|
||||
//! Content-based file identity used to dedup derivative outputs
|
||||
//! (thumbnails, HLS segments) across libraries.
|
||||
//!
|
||||
//! Hashes are computed with blake3 streaming so that network-mounted
|
||||
//! libraries don't need to load whole files into memory. The result is
|
||||
//! a 64-character hex string; we shard derivative directories on the
|
||||
//! first two characters to keep any single directory's fanout bounded.
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::{self, Read};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
/// Size of the read buffer used when streaming a file through blake3.
|
||||
/// 1 MiB trades a bit of RSS for fewer syscalls on slow network mounts.
|
||||
const HASH_BUFFER_SIZE: usize = 1024 * 1024;
|
||||
|
||||
/// Hash identity of a file, together with its byte length.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct FileIdentity {
|
||||
pub content_hash: String,
|
||||
pub size_bytes: i64,
|
||||
}
|
||||
|
||||
/// Stream a file through blake3 and return the hex-encoded digest + size.
|
||||
pub fn compute(path: &Path) -> io::Result<FileIdentity> {
|
||||
let mut file = File::open(path)?;
|
||||
let size_bytes = file.metadata()?.len() as i64;
|
||||
|
||||
let mut hasher = blake3::Hasher::new();
|
||||
let mut buf = vec![0u8; HASH_BUFFER_SIZE];
|
||||
loop {
|
||||
let n = file.read(&mut buf)?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
hasher.update(&buf[..n]);
|
||||
}
|
||||
|
||||
Ok(FileIdentity {
|
||||
content_hash: hasher.finalize().to_hex().to_string(),
|
||||
size_bytes,
|
||||
})
|
||||
}
|
||||
|
||||
/// Hash-keyed thumbnail path: `<thumbs_dir>/<hash[..2]>/<hash>.jpg`.
|
||||
/// Generation and serving both consult this first; the legacy mirrored
|
||||
/// path acts as a fallback for pre-backfill rows.
|
||||
pub fn thumbnail_path(thumbs_dir: &Path, hash: &str) -> PathBuf {
|
||||
let shard = shard_prefix(hash);
|
||||
thumbs_dir.join(shard).join(format!("{}.jpg", hash))
|
||||
}
|
||||
|
||||
/// Hash-keyed HLS output directory: `<video_dir>/<hash[..2]>/<hash>/`.
|
||||
/// The playlist lives at `playlist.m3u8` inside this directory and its
|
||||
/// segments are co-located so HLS relative references Just Work.
|
||||
pub fn hls_dir(video_dir: &Path, hash: &str) -> PathBuf {
|
||||
let shard = shard_prefix(hash);
|
||||
video_dir.join(shard).join(hash)
|
||||
}
|
||||
|
||||
fn shard_prefix(hash: &str) -> &str {
|
||||
let end = hash.char_indices().nth(2).map(|(i, _)| i).unwrap_or(hash.len());
|
||||
&hash[..end]
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn identical_content_yields_identical_hash() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let a = dir.path().join("a.bin");
|
||||
let b = dir.path().join("b.bin");
|
||||
std::fs::write(&a, b"hello world").unwrap();
|
||||
std::fs::write(&b, b"hello world").unwrap();
|
||||
let ha = compute(&a).unwrap();
|
||||
let hb = compute(&b).unwrap();
|
||||
assert_eq!(ha, hb);
|
||||
assert_eq!(ha.size_bytes, 11);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn different_content_yields_different_hash() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let a = dir.path().join("a.bin");
|
||||
let b = dir.path().join("b.bin");
|
||||
std::fs::write(&a, b"aaa").unwrap();
|
||||
std::fs::write(&b, b"bbb").unwrap();
|
||||
assert_ne!(compute(&a).unwrap(), compute(&b).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn derivative_paths_shard_by_first_two_hex() {
|
||||
let thumbs = Path::new("/tmp/thumbs");
|
||||
let p = thumbnail_path(thumbs, "abcdef0123");
|
||||
assert_eq!(p, PathBuf::from("/tmp/thumbs/ab/abcdef0123.jpg"));
|
||||
|
||||
let video = Path::new("/tmp/video");
|
||||
let d = hls_dir(video, "1234deadbeef");
|
||||
assert_eq!(d, PathBuf::from("/tmp/video/12/1234deadbeef"));
|
||||
}
|
||||
}
|
||||
@@ -312,6 +312,35 @@ pub trait ExifDao: Sync + Send {
|
||||
base_path: &str,
|
||||
recursive: bool,
|
||||
) -> Result<Vec<(String, f64, f64, Option<i64>)>, DbError>;
|
||||
|
||||
/// Return rows that still lack a `content_hash`, oldest first. Used by
|
||||
/// the `backfill_hashes` binary to batch through the historical
|
||||
/// backlog. Returns `(library_id, rel_path)` tuples so the caller can
|
||||
/// resolve each file on disk.
|
||||
fn get_rows_missing_hash(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
limit: i64,
|
||||
) -> Result<Vec<(i32, String)>, DbError>;
|
||||
|
||||
/// Persist the computed blake3 hash + file size for an existing row.
|
||||
fn backfill_content_hash(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
library_id: i32,
|
||||
rel_path: &str,
|
||||
hash: &str,
|
||||
size_bytes: i64,
|
||||
) -> Result<(), DbError>;
|
||||
|
||||
/// Return the first EXIF row with the given content hash (any library).
|
||||
/// Used by thumbnail/HLS generation to detect pre-existing derivatives
|
||||
/// from another library before regenerating.
|
||||
fn find_by_content_hash(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
hash: &str,
|
||||
) -> Result<Option<ImageExif>, DbError>;
|
||||
}
|
||||
|
||||
pub struct SqliteExifDao {
|
||||
@@ -346,13 +375,21 @@ impl ExifDao for SqliteExifDao {
|
||||
diesel::insert_into(image_exif)
|
||||
.values(&exif_data)
|
||||
.execute(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Insert error"))?;
|
||||
.map_err(|e| {
|
||||
log::warn!(
|
||||
"image_exif insert failed (lib={}, rel_path={:?}): {}",
|
||||
exif_data.library_id,
|
||||
exif_data.file_path,
|
||||
e
|
||||
);
|
||||
anyhow::anyhow!("Insert error: {}", e)
|
||||
})?;
|
||||
|
||||
image_exif
|
||||
.filter(library_id.eq(exif_data.library_id))
|
||||
.filter(rel_path.eq(&exif_data.file_path))
|
||||
.first::<ImageExif>(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))
|
||||
.map_err(|e| anyhow::anyhow!("Post-insert lookup failed: {}", e))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::InsertError))
|
||||
}
|
||||
@@ -672,4 +709,70 @@ impl ExifDao for SqliteExifDao {
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn get_rows_missing_hash(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
limit: i64,
|
||||
) -> Result<Vec<(i32, String)>, DbError> {
|
||||
trace_db_call(context, "query", "get_rows_missing_hash", |_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
image_exif
|
||||
.filter(content_hash.is_null())
|
||||
.select((library_id, rel_path))
|
||||
.order(id.asc())
|
||||
.limit(limit)
|
||||
.load::<(i32, String)>(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn backfill_content_hash(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
library_id_val: i32,
|
||||
rel_path_val: &str,
|
||||
hash: &str,
|
||||
size_val: i64,
|
||||
) -> Result<(), DbError> {
|
||||
trace_db_call(context, "update", "backfill_content_hash", |_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
diesel::update(
|
||||
image_exif
|
||||
.filter(library_id.eq(library_id_val))
|
||||
.filter(rel_path.eq(rel_path_val)),
|
||||
)
|
||||
.set((content_hash.eq(hash), size_bytes.eq(size_val)))
|
||||
.execute(connection.deref_mut())
|
||||
.map(|_| ())
|
||||
.map_err(|_| anyhow::anyhow!("Update error"))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
|
||||
}
|
||||
|
||||
fn find_by_content_hash(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
hash: &str,
|
||||
) -> Result<Option<ImageExif>, DbError> {
|
||||
trace_db_call(context, "query", "find_by_content_hash", |_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
image_exif
|
||||
.filter(content_hash.eq(hash))
|
||||
.first::<ImageExif>(connection.deref_mut())
|
||||
.optional()
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
}
|
||||
|
||||
27
src/files.rs
27
src/files.rs
@@ -1360,6 +1360,33 @@ mod tests {
|
||||
) -> Result<Vec<(String, f64, f64, Option<i64>)>, DbError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_rows_missing_hash(
|
||||
&mut self,
|
||||
_context: &opentelemetry::Context,
|
||||
_limit: i64,
|
||||
) -> Result<Vec<(i32, String)>, DbError> {
|
||||
Ok(Vec::new())
|
||||
}
|
||||
|
||||
fn backfill_content_hash(
|
||||
&mut self,
|
||||
_context: &opentelemetry::Context,
|
||||
_library_id: i32,
|
||||
_rel_path: &str,
|
||||
_hash: &str,
|
||||
_size_bytes: i64,
|
||||
) -> Result<(), DbError> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn find_by_content_hash(
|
||||
&mut self,
|
||||
_context: &opentelemetry::Context,
|
||||
_hash: &str,
|
||||
) -> Result<Option<crate::database::models::ImageExif>, DbError> {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
mod api {
|
||||
|
||||
@@ -4,6 +4,7 @@ extern crate diesel;
|
||||
pub mod ai;
|
||||
pub mod auth;
|
||||
pub mod cleanup;
|
||||
pub mod content_hash;
|
||||
pub mod data;
|
||||
pub mod database;
|
||||
pub mod error;
|
||||
|
||||
198
src/main.rs
198
src/main.rs
@@ -61,6 +61,7 @@ mod error;
|
||||
mod exif;
|
||||
mod file_types;
|
||||
mod files;
|
||||
mod content_hash;
|
||||
mod geo;
|
||||
mod libraries;
|
||||
mod state;
|
||||
@@ -96,6 +97,7 @@ async fn get_image(
|
||||
request: HttpRequest,
|
||||
req: web::Query<ThumbnailRequest>,
|
||||
app_state: Data<AppState>,
|
||||
exif_dao: Data<Mutex<Box<dyn ExifDao>>>,
|
||||
) -> impl Responder {
|
||||
let tracer = global_tracer();
|
||||
let context = extract_context_from_request(&request);
|
||||
@@ -108,16 +110,45 @@ async fn get_image(
|
||||
let relative_path = path
|
||||
.strip_prefix(&app_state.base_path)
|
||||
.expect("Error stripping base path prefix from thumbnail");
|
||||
let relative_path_str = relative_path.to_string_lossy().replace('\\', "/");
|
||||
|
||||
let thumbs = &app_state.thumbnail_path;
|
||||
let mut thumb_path = Path::new(&thumbs).join(relative_path);
|
||||
let legacy_thumb_path = Path::new(&thumbs).join(relative_path);
|
||||
|
||||
// If it's a video and GIF format is requested, try to serve GIF thumbnail
|
||||
// Gif thumbnails are a separate lookup (video GIF previews).
|
||||
// Dual-lookup for gif is out of scope; preserve existing flow.
|
||||
if req.format == Some(ThumbnailFormat::Gif) && is_video_file(&path) {
|
||||
thumb_path = Path::new(&app_state.gif_path).join(relative_path);
|
||||
thumb_path.set_extension("gif");
|
||||
let mut gif_path = Path::new(&app_state.gif_path).join(relative_path);
|
||||
gif_path.set_extension("gif");
|
||||
trace!("Gif thumbnail path: {:?}", gif_path);
|
||||
if let Ok(file) = NamedFile::open(&gif_path) {
|
||||
span.set_status(Status::Ok);
|
||||
return file
|
||||
.use_etag(true)
|
||||
.use_last_modified(true)
|
||||
.prefer_utf8(true)
|
||||
.into_response(&request);
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve the hash-keyed thumbnail (if the row already has a
|
||||
// content_hash) and fall back to the legacy mirrored path.
|
||||
let hash_thumb_path: Option<PathBuf> = {
|
||||
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
|
||||
match dao.get_exif(&context, &relative_path_str) {
|
||||
Ok(Some(row)) => row
|
||||
.content_hash
|
||||
.as_deref()
|
||||
.map(|h| content_hash::thumbnail_path(Path::new(thumbs), h)),
|
||||
_ => None,
|
||||
}
|
||||
};
|
||||
let thumb_path = hash_thumb_path
|
||||
.as_ref()
|
||||
.filter(|p| p.exists())
|
||||
.cloned()
|
||||
.unwrap_or_else(|| legacy_thumb_path.clone());
|
||||
|
||||
// Handle circular thumbnail request
|
||||
if req.shape == Some(ThumbnailShape::Circle) {
|
||||
match create_circular_thumbnail(&thumb_path, thumbs).await {
|
||||
@@ -141,8 +172,6 @@ async fn get_image(
|
||||
trace!("Thumbnail path: {:?}", thumb_path);
|
||||
if let Ok(file) = NamedFile::open(&thumb_path) {
|
||||
span.set_status(Status::Ok);
|
||||
// The NamedFile will automatically set the correct content-type
|
||||
// Enable ETag and set cache headers for thumbnails (1 day cache)
|
||||
return file
|
||||
.use_etag(true)
|
||||
.use_last_modified(true)
|
||||
@@ -406,11 +435,23 @@ async fn upload_image(
|
||||
.expect("Error stripping library root prefix")
|
||||
.to_str()
|
||||
.unwrap()
|
||||
.to_string();
|
||||
.replace('\\', "/");
|
||||
|
||||
match exif::extract_exif_from_path(&uploaded_path) {
|
||||
Ok(exif_data) => {
|
||||
let timestamp = Utc::now().timestamp();
|
||||
let (content_hash, size_bytes) =
|
||||
match content_hash::compute(&uploaded_path) {
|
||||
Ok(id) => (Some(id.content_hash), Some(id.size_bytes)),
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"Failed to hash uploaded {}: {:?}",
|
||||
uploaded_path.display(),
|
||||
e
|
||||
);
|
||||
(None, None)
|
||||
}
|
||||
};
|
||||
let insert_exif = InsertImageExif {
|
||||
library_id: target_library.id,
|
||||
file_path: relative_path.clone(),
|
||||
@@ -430,8 +471,8 @@ async fn upload_image(
|
||||
date_taken: exif_data.date_taken,
|
||||
created_time: timestamp,
|
||||
last_modified: timestamp,
|
||||
content_hash: None,
|
||||
size_bytes: None,
|
||||
content_hash,
|
||||
size_bytes,
|
||||
};
|
||||
|
||||
if let Ok(mut dao) = exif_dao.lock() {
|
||||
@@ -1566,11 +1607,13 @@ fn process_new_files(
|
||||
.filter(|entry| is_image(entry) || is_video(entry))
|
||||
.filter_map(|entry| {
|
||||
let file_path = entry.path().to_path_buf();
|
||||
// Canonical rel_path is forward-slash regardless of OS so DB
|
||||
// comparisons against the batch EXIF lookup line up.
|
||||
let relative_path = file_path
|
||||
.strip_prefix(base_path)
|
||||
.ok()?
|
||||
.to_str()?
|
||||
.to_string();
|
||||
.replace('\\', "/");
|
||||
Some((file_path, relative_path))
|
||||
})
|
||||
.collect();
|
||||
@@ -1600,82 +1643,107 @@ fn process_new_files(
|
||||
};
|
||||
|
||||
let mut new_files_found = false;
|
||||
let mut files_needing_exif = Vec::new();
|
||||
let mut files_needing_row = Vec::new();
|
||||
|
||||
// Check each file for missing thumbnail or EXIF data
|
||||
// Register every image/video file in image_exif. Rows without EXIF
|
||||
// still carry library_id, rel_path, content_hash, and size_bytes so
|
||||
// derivative dedup and DB-indexed sort/filter work for every file,
|
||||
// not just photos with parseable EXIF.
|
||||
for (file_path, relative_path) in &files {
|
||||
// Check if thumbnail exists
|
||||
let thumb_path = thumbnail_directory.join(relative_path);
|
||||
let needs_thumbnail = !thumb_path.exists();
|
||||
let needs_row = !existing_exif_paths.contains_key(relative_path);
|
||||
|
||||
// Check if EXIF data exists (for supported files)
|
||||
let needs_exif = if exif::supports_exif(file_path) {
|
||||
!existing_exif_paths.contains_key(relative_path)
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
if needs_thumbnail || needs_exif {
|
||||
if needs_thumbnail || needs_row {
|
||||
new_files_found = true;
|
||||
|
||||
if needs_thumbnail {
|
||||
info!("New file detected (missing thumbnail): {}", relative_path);
|
||||
}
|
||||
|
||||
if needs_exif {
|
||||
files_needing_exif.push((file_path.clone(), relative_path.clone()));
|
||||
if needs_row {
|
||||
files_needing_row.push((file_path.clone(), relative_path.clone()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process EXIF data for files that need it
|
||||
if !files_needing_exif.is_empty() {
|
||||
if !files_needing_row.is_empty() {
|
||||
info!(
|
||||
"Processing EXIF data for {} files",
|
||||
files_needing_exif.len()
|
||||
"Registering {} new files in image_exif",
|
||||
files_needing_row.len()
|
||||
);
|
||||
|
||||
for (file_path, relative_path) in files_needing_exif {
|
||||
match exif::extract_exif_from_path(&file_path) {
|
||||
Ok(exif_data) => {
|
||||
let timestamp = Utc::now().timestamp();
|
||||
let insert_exif = InsertImageExif {
|
||||
library_id: library.id,
|
||||
file_path: relative_path.clone(),
|
||||
camera_make: exif_data.camera_make,
|
||||
camera_model: exif_data.camera_model,
|
||||
lens_model: exif_data.lens_model,
|
||||
width: exif_data.width,
|
||||
height: exif_data.height,
|
||||
orientation: exif_data.orientation,
|
||||
gps_latitude: exif_data.gps_latitude.map(|v| v as f32),
|
||||
gps_longitude: exif_data.gps_longitude.map(|v| v as f32),
|
||||
gps_altitude: exif_data.gps_altitude.map(|v| v as f32),
|
||||
focal_length: exif_data.focal_length.map(|v| v as f32),
|
||||
aperture: exif_data.aperture.map(|v| v as f32),
|
||||
shutter_speed: exif_data.shutter_speed,
|
||||
iso: exif_data.iso,
|
||||
date_taken: exif_data.date_taken,
|
||||
created_time: timestamp,
|
||||
last_modified: timestamp,
|
||||
content_hash: None,
|
||||
size_bytes: None,
|
||||
};
|
||||
for (file_path, relative_path) in files_needing_row {
|
||||
let timestamp = Utc::now().timestamp();
|
||||
|
||||
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
|
||||
if let Err(e) = dao.store_exif(&context, insert_exif) {
|
||||
error!("Failed to store EXIF data for {}: {:?}", relative_path, e);
|
||||
} else {
|
||||
debug!("EXIF data stored for {}", relative_path);
|
||||
// Hash + size from filesystem metadata — always attempted so
|
||||
// every file gets a content_hash, even when EXIF is absent.
|
||||
let (content_hash, size_bytes) = match content_hash::compute(&file_path) {
|
||||
Ok(id) => (Some(id.content_hash), Some(id.size_bytes)),
|
||||
Err(e) => {
|
||||
warn!("Failed to hash {}: {:?}", file_path.display(), e);
|
||||
(None, None)
|
||||
}
|
||||
};
|
||||
|
||||
// EXIF is best-effort enrichment. When extraction fails (or the
|
||||
// file type doesn't support EXIF) we still store a row with all
|
||||
// EXIF fields NULL; the file remains visible to sort-by-date
|
||||
// and tag queries via its rel_path and filesystem timestamps.
|
||||
let exif_fields = if exif::supports_exif(&file_path) {
|
||||
match exif::extract_exif_from_path(&file_path) {
|
||||
Ok(data) => Some(data),
|
||||
Err(e) => {
|
||||
debug!(
|
||||
"No EXIF or parse error for {}: {:?}",
|
||||
file_path.display(),
|
||||
e
|
||||
);
|
||||
None
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
debug!(
|
||||
"No EXIF data or error extracting from {}: {:?}",
|
||||
file_path.display(),
|
||||
e
|
||||
);
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let insert_exif = InsertImageExif {
|
||||
library_id: library.id,
|
||||
file_path: relative_path.clone(),
|
||||
camera_make: exif_fields.as_ref().and_then(|e| e.camera_make.clone()),
|
||||
camera_model: exif_fields.as_ref().and_then(|e| e.camera_model.clone()),
|
||||
lens_model: exif_fields.as_ref().and_then(|e| e.lens_model.clone()),
|
||||
width: exif_fields.as_ref().and_then(|e| e.width),
|
||||
height: exif_fields.as_ref().and_then(|e| e.height),
|
||||
orientation: exif_fields.as_ref().and_then(|e| e.orientation),
|
||||
gps_latitude: exif_fields
|
||||
.as_ref()
|
||||
.and_then(|e| e.gps_latitude.map(|v| v as f32)),
|
||||
gps_longitude: exif_fields
|
||||
.as_ref()
|
||||
.and_then(|e| e.gps_longitude.map(|v| v as f32)),
|
||||
gps_altitude: exif_fields
|
||||
.as_ref()
|
||||
.and_then(|e| e.gps_altitude.map(|v| v as f32)),
|
||||
focal_length: exif_fields
|
||||
.as_ref()
|
||||
.and_then(|e| e.focal_length.map(|v| v as f32)),
|
||||
aperture: exif_fields
|
||||
.as_ref()
|
||||
.and_then(|e| e.aperture.map(|v| v as f32)),
|
||||
shutter_speed: exif_fields.as_ref().and_then(|e| e.shutter_speed.clone()),
|
||||
iso: exif_fields.as_ref().and_then(|e| e.iso),
|
||||
date_taken: exif_fields.as_ref().and_then(|e| e.date_taken),
|
||||
created_time: timestamp,
|
||||
last_modified: timestamp,
|
||||
content_hash,
|
||||
size_bytes,
|
||||
};
|
||||
|
||||
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
|
||||
if let Err(e) = dao.store_exif(&context, insert_exif) {
|
||||
error!("Failed to register {} in image_exif: {:?}", relative_path, e);
|
||||
} else {
|
||||
debug!("Registered {} in image_exif", relative_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user