feat: add content_hash backfill + register every media file
Adds blake3 content hashing as the basis for derivative dedup (thumbnails, HLS) across libraries. Computed inline by the watcher on ingest and by a new `backfill_hashes` binary for historical rows. Key changes: - `content_hash` and `size_bytes` are now populated on new image_exif rows; a new ExifDao surface (`get_rows_missing_hash`, `backfill_content_hash`, `find_by_content_hash`) supports backfill and future hash-keyed lookups. - The watcher now registers every image/video in image_exif, not just files with parseable EXIF. EXIF becomes optional enrichment; videos and other non-EXIF files still get a hashed row. This also makes DB-indexed sort/filter cover the full library. - `/image` thumbnail serve dual-looks up hash-keyed path first, then falls back to the legacy mirrored layout. - Upload flow accepts `?library=` query param + hashes uploaded files. - Store_exif logs the underlying Diesel error on insert failure so constraint violations surface instead of hiding behind a generic InsertError. - New migration normalizes rel_path separators to forward slash across all tables, deduplicating any rows that collide after normalization. Fixes spurious UNIQUE violations from mixed backslash/forward-slash paths on Windows ingest. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
198
src/main.rs
198
src/main.rs
@@ -61,6 +61,7 @@ mod error;
|
||||
mod exif;
|
||||
mod file_types;
|
||||
mod files;
|
||||
mod content_hash;
|
||||
mod geo;
|
||||
mod libraries;
|
||||
mod state;
|
||||
@@ -96,6 +97,7 @@ async fn get_image(
|
||||
request: HttpRequest,
|
||||
req: web::Query<ThumbnailRequest>,
|
||||
app_state: Data<AppState>,
|
||||
exif_dao: Data<Mutex<Box<dyn ExifDao>>>,
|
||||
) -> impl Responder {
|
||||
let tracer = global_tracer();
|
||||
let context = extract_context_from_request(&request);
|
||||
@@ -108,16 +110,45 @@ async fn get_image(
|
||||
let relative_path = path
|
||||
.strip_prefix(&app_state.base_path)
|
||||
.expect("Error stripping base path prefix from thumbnail");
|
||||
let relative_path_str = relative_path.to_string_lossy().replace('\\', "/");
|
||||
|
||||
let thumbs = &app_state.thumbnail_path;
|
||||
let mut thumb_path = Path::new(&thumbs).join(relative_path);
|
||||
let legacy_thumb_path = Path::new(&thumbs).join(relative_path);
|
||||
|
||||
// If it's a video and GIF format is requested, try to serve GIF thumbnail
|
||||
// Gif thumbnails are a separate lookup (video GIF previews).
|
||||
// Dual-lookup for gif is out of scope; preserve existing flow.
|
||||
if req.format == Some(ThumbnailFormat::Gif) && is_video_file(&path) {
|
||||
thumb_path = Path::new(&app_state.gif_path).join(relative_path);
|
||||
thumb_path.set_extension("gif");
|
||||
let mut gif_path = Path::new(&app_state.gif_path).join(relative_path);
|
||||
gif_path.set_extension("gif");
|
||||
trace!("Gif thumbnail path: {:?}", gif_path);
|
||||
if let Ok(file) = NamedFile::open(&gif_path) {
|
||||
span.set_status(Status::Ok);
|
||||
return file
|
||||
.use_etag(true)
|
||||
.use_last_modified(true)
|
||||
.prefer_utf8(true)
|
||||
.into_response(&request);
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve the hash-keyed thumbnail (if the row already has a
|
||||
// content_hash) and fall back to the legacy mirrored path.
|
||||
let hash_thumb_path: Option<PathBuf> = {
|
||||
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
|
||||
match dao.get_exif(&context, &relative_path_str) {
|
||||
Ok(Some(row)) => row
|
||||
.content_hash
|
||||
.as_deref()
|
||||
.map(|h| content_hash::thumbnail_path(Path::new(thumbs), h)),
|
||||
_ => None,
|
||||
}
|
||||
};
|
||||
let thumb_path = hash_thumb_path
|
||||
.as_ref()
|
||||
.filter(|p| p.exists())
|
||||
.cloned()
|
||||
.unwrap_or_else(|| legacy_thumb_path.clone());
|
||||
|
||||
// Handle circular thumbnail request
|
||||
if req.shape == Some(ThumbnailShape::Circle) {
|
||||
match create_circular_thumbnail(&thumb_path, thumbs).await {
|
||||
@@ -141,8 +172,6 @@ async fn get_image(
|
||||
trace!("Thumbnail path: {:?}", thumb_path);
|
||||
if let Ok(file) = NamedFile::open(&thumb_path) {
|
||||
span.set_status(Status::Ok);
|
||||
// The NamedFile will automatically set the correct content-type
|
||||
// Enable ETag and set cache headers for thumbnails (1 day cache)
|
||||
return file
|
||||
.use_etag(true)
|
||||
.use_last_modified(true)
|
||||
@@ -406,11 +435,23 @@ async fn upload_image(
|
||||
.expect("Error stripping library root prefix")
|
||||
.to_str()
|
||||
.unwrap()
|
||||
.to_string();
|
||||
.replace('\\', "/");
|
||||
|
||||
match exif::extract_exif_from_path(&uploaded_path) {
|
||||
Ok(exif_data) => {
|
||||
let timestamp = Utc::now().timestamp();
|
||||
let (content_hash, size_bytes) =
|
||||
match content_hash::compute(&uploaded_path) {
|
||||
Ok(id) => (Some(id.content_hash), Some(id.size_bytes)),
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"Failed to hash uploaded {}: {:?}",
|
||||
uploaded_path.display(),
|
||||
e
|
||||
);
|
||||
(None, None)
|
||||
}
|
||||
};
|
||||
let insert_exif = InsertImageExif {
|
||||
library_id: target_library.id,
|
||||
file_path: relative_path.clone(),
|
||||
@@ -430,8 +471,8 @@ async fn upload_image(
|
||||
date_taken: exif_data.date_taken,
|
||||
created_time: timestamp,
|
||||
last_modified: timestamp,
|
||||
content_hash: None,
|
||||
size_bytes: None,
|
||||
content_hash,
|
||||
size_bytes,
|
||||
};
|
||||
|
||||
if let Ok(mut dao) = exif_dao.lock() {
|
||||
@@ -1566,11 +1607,13 @@ fn process_new_files(
|
||||
.filter(|entry| is_image(entry) || is_video(entry))
|
||||
.filter_map(|entry| {
|
||||
let file_path = entry.path().to_path_buf();
|
||||
// Canonical rel_path is forward-slash regardless of OS so DB
|
||||
// comparisons against the batch EXIF lookup line up.
|
||||
let relative_path = file_path
|
||||
.strip_prefix(base_path)
|
||||
.ok()?
|
||||
.to_str()?
|
||||
.to_string();
|
||||
.replace('\\', "/");
|
||||
Some((file_path, relative_path))
|
||||
})
|
||||
.collect();
|
||||
@@ -1600,82 +1643,107 @@ fn process_new_files(
|
||||
};
|
||||
|
||||
let mut new_files_found = false;
|
||||
let mut files_needing_exif = Vec::new();
|
||||
let mut files_needing_row = Vec::new();
|
||||
|
||||
// Check each file for missing thumbnail or EXIF data
|
||||
// Register every image/video file in image_exif. Rows without EXIF
|
||||
// still carry library_id, rel_path, content_hash, and size_bytes so
|
||||
// derivative dedup and DB-indexed sort/filter work for every file,
|
||||
// not just photos with parseable EXIF.
|
||||
for (file_path, relative_path) in &files {
|
||||
// Check if thumbnail exists
|
||||
let thumb_path = thumbnail_directory.join(relative_path);
|
||||
let needs_thumbnail = !thumb_path.exists();
|
||||
let needs_row = !existing_exif_paths.contains_key(relative_path);
|
||||
|
||||
// Check if EXIF data exists (for supported files)
|
||||
let needs_exif = if exif::supports_exif(file_path) {
|
||||
!existing_exif_paths.contains_key(relative_path)
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
if needs_thumbnail || needs_exif {
|
||||
if needs_thumbnail || needs_row {
|
||||
new_files_found = true;
|
||||
|
||||
if needs_thumbnail {
|
||||
info!("New file detected (missing thumbnail): {}", relative_path);
|
||||
}
|
||||
|
||||
if needs_exif {
|
||||
files_needing_exif.push((file_path.clone(), relative_path.clone()));
|
||||
if needs_row {
|
||||
files_needing_row.push((file_path.clone(), relative_path.clone()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process EXIF data for files that need it
|
||||
if !files_needing_exif.is_empty() {
|
||||
if !files_needing_row.is_empty() {
|
||||
info!(
|
||||
"Processing EXIF data for {} files",
|
||||
files_needing_exif.len()
|
||||
"Registering {} new files in image_exif",
|
||||
files_needing_row.len()
|
||||
);
|
||||
|
||||
for (file_path, relative_path) in files_needing_exif {
|
||||
match exif::extract_exif_from_path(&file_path) {
|
||||
Ok(exif_data) => {
|
||||
let timestamp = Utc::now().timestamp();
|
||||
let insert_exif = InsertImageExif {
|
||||
library_id: library.id,
|
||||
file_path: relative_path.clone(),
|
||||
camera_make: exif_data.camera_make,
|
||||
camera_model: exif_data.camera_model,
|
||||
lens_model: exif_data.lens_model,
|
||||
width: exif_data.width,
|
||||
height: exif_data.height,
|
||||
orientation: exif_data.orientation,
|
||||
gps_latitude: exif_data.gps_latitude.map(|v| v as f32),
|
||||
gps_longitude: exif_data.gps_longitude.map(|v| v as f32),
|
||||
gps_altitude: exif_data.gps_altitude.map(|v| v as f32),
|
||||
focal_length: exif_data.focal_length.map(|v| v as f32),
|
||||
aperture: exif_data.aperture.map(|v| v as f32),
|
||||
shutter_speed: exif_data.shutter_speed,
|
||||
iso: exif_data.iso,
|
||||
date_taken: exif_data.date_taken,
|
||||
created_time: timestamp,
|
||||
last_modified: timestamp,
|
||||
content_hash: None,
|
||||
size_bytes: None,
|
||||
};
|
||||
for (file_path, relative_path) in files_needing_row {
|
||||
let timestamp = Utc::now().timestamp();
|
||||
|
||||
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
|
||||
if let Err(e) = dao.store_exif(&context, insert_exif) {
|
||||
error!("Failed to store EXIF data for {}: {:?}", relative_path, e);
|
||||
} else {
|
||||
debug!("EXIF data stored for {}", relative_path);
|
||||
// Hash + size from filesystem metadata — always attempted so
|
||||
// every file gets a content_hash, even when EXIF is absent.
|
||||
let (content_hash, size_bytes) = match content_hash::compute(&file_path) {
|
||||
Ok(id) => (Some(id.content_hash), Some(id.size_bytes)),
|
||||
Err(e) => {
|
||||
warn!("Failed to hash {}: {:?}", file_path.display(), e);
|
||||
(None, None)
|
||||
}
|
||||
};
|
||||
|
||||
// EXIF is best-effort enrichment. When extraction fails (or the
|
||||
// file type doesn't support EXIF) we still store a row with all
|
||||
// EXIF fields NULL; the file remains visible to sort-by-date
|
||||
// and tag queries via its rel_path and filesystem timestamps.
|
||||
let exif_fields = if exif::supports_exif(&file_path) {
|
||||
match exif::extract_exif_from_path(&file_path) {
|
||||
Ok(data) => Some(data),
|
||||
Err(e) => {
|
||||
debug!(
|
||||
"No EXIF or parse error for {}: {:?}",
|
||||
file_path.display(),
|
||||
e
|
||||
);
|
||||
None
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
debug!(
|
||||
"No EXIF data or error extracting from {}: {:?}",
|
||||
file_path.display(),
|
||||
e
|
||||
);
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let insert_exif = InsertImageExif {
|
||||
library_id: library.id,
|
||||
file_path: relative_path.clone(),
|
||||
camera_make: exif_fields.as_ref().and_then(|e| e.camera_make.clone()),
|
||||
camera_model: exif_fields.as_ref().and_then(|e| e.camera_model.clone()),
|
||||
lens_model: exif_fields.as_ref().and_then(|e| e.lens_model.clone()),
|
||||
width: exif_fields.as_ref().and_then(|e| e.width),
|
||||
height: exif_fields.as_ref().and_then(|e| e.height),
|
||||
orientation: exif_fields.as_ref().and_then(|e| e.orientation),
|
||||
gps_latitude: exif_fields
|
||||
.as_ref()
|
||||
.and_then(|e| e.gps_latitude.map(|v| v as f32)),
|
||||
gps_longitude: exif_fields
|
||||
.as_ref()
|
||||
.and_then(|e| e.gps_longitude.map(|v| v as f32)),
|
||||
gps_altitude: exif_fields
|
||||
.as_ref()
|
||||
.and_then(|e| e.gps_altitude.map(|v| v as f32)),
|
||||
focal_length: exif_fields
|
||||
.as_ref()
|
||||
.and_then(|e| e.focal_length.map(|v| v as f32)),
|
||||
aperture: exif_fields
|
||||
.as_ref()
|
||||
.and_then(|e| e.aperture.map(|v| v as f32)),
|
||||
shutter_speed: exif_fields.as_ref().and_then(|e| e.shutter_speed.clone()),
|
||||
iso: exif_fields.as_ref().and_then(|e| e.iso),
|
||||
date_taken: exif_fields.as_ref().and_then(|e| e.date_taken),
|
||||
created_time: timestamp,
|
||||
last_modified: timestamp,
|
||||
content_hash,
|
||||
size_bytes,
|
||||
};
|
||||
|
||||
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
|
||||
if let Err(e) = dao.store_exif(&context, insert_exif) {
|
||||
error!("Failed to register {} in image_exif: {:?}", relative_path, e);
|
||||
} else {
|
||||
debug!("Registered {} in image_exif", relative_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user