duplicates: perceptual hash + soft-mark resolution + upload 409
Adds pHash + dHash columns alongside the existing blake3 content_hash so
near-duplicates (re-encoded, resized, format-converted copies) become
queryable. /duplicates/{exact,perceptual} return groups; /duplicates/
{resolve,unresolve} flip a duplicate_of_hash soft-mark on losing rows
and union perceptual-only tag sets onto the survivor. The default
/photos listing filters duplicate_of_hash IS NULL so demoted siblings
stop cluttering the grid; include_duplicates=true opts back in for
Apollo's review modal. Upload now hashes bytes pre-write and returns
409 with the canonical sibling when a file's bytes already exist.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
52
src/main.rs
52
src/main.rs
@@ -62,6 +62,8 @@ use opentelemetry::{KeyValue, global};
|
||||
mod ai;
|
||||
mod auth;
|
||||
mod content_hash;
|
||||
mod perceptual_hash;
|
||||
mod duplicates;
|
||||
mod data;
|
||||
mod database;
|
||||
mod error;
|
||||
@@ -530,6 +532,11 @@ async fn set_image_gps(
|
||||
.ok()
|
||||
.map(|c| c.content_hash),
|
||||
size_bytes: content_hash::compute(&full_path).ok().map(|c| c.size_bytes),
|
||||
// GPS-update path doesn't touch perceptual hashes either; columns
|
||||
// ignored by update_exif. Compute best-effort so a new file lands
|
||||
// with a usable signal; failure just leaves prior values in place.
|
||||
phash_64: perceptual_hash::compute(&full_path).map(|h| h.phash_64),
|
||||
dhash_64: perceptual_hash::compute(&full_path).map(|h| h.dhash_64),
|
||||
};
|
||||
|
||||
let updated = {
|
||||
@@ -652,6 +659,39 @@ async fn upload_image(
|
||||
&full_path.to_str().unwrap().to_string(),
|
||||
true,
|
||||
) {
|
||||
// Pre-write content-hash check: if these exact bytes already
|
||||
// exist anywhere in any library (and aren't themselves
|
||||
// soft-marked as duplicates), don't write the file. Return
|
||||
// 409 with the canonical sibling so the mobile app can show
|
||||
// a friendly "already in your library" toast.
|
||||
let upload_hash = blake3::Hasher::new()
|
||||
.update(&file_content)
|
||||
.finalize()
|
||||
.to_hex()
|
||||
.to_string();
|
||||
{
|
||||
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
|
||||
if let Ok(Some(existing)) =
|
||||
dao.find_by_content_hash(&span_context, &upload_hash)
|
||||
{
|
||||
if existing.duplicate_of_hash.is_none() {
|
||||
let library_name = libraries::load_all(&mut crate::database::connect())
|
||||
.into_iter()
|
||||
.find(|l| l.id == existing.library_id)
|
||||
.map(|l| l.name);
|
||||
span.set_status(Status::Ok);
|
||||
return HttpResponse::Conflict().json(serde_json::json!({
|
||||
"duplicate_of": {
|
||||
"library_id": existing.library_id,
|
||||
"rel_path": existing.file_path,
|
||||
},
|
||||
"content_hash": upload_hash,
|
||||
"library_name": library_name,
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let context =
|
||||
opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());
|
||||
tracer
|
||||
@@ -710,6 +750,7 @@ async fn upload_image(
|
||||
(None, None)
|
||||
}
|
||||
};
|
||||
let perceptual = perceptual_hash::compute(&uploaded_path);
|
||||
let insert_exif = InsertImageExif {
|
||||
library_id: target_library.id,
|
||||
file_path: relative_path.clone(),
|
||||
@@ -731,6 +772,8 @@ async fn upload_image(
|
||||
last_modified: timestamp,
|
||||
content_hash,
|
||||
size_bytes,
|
||||
phash_64: perceptual.map(|h| h.phash_64),
|
||||
dhash_64: perceptual.map(|h| h.dhash_64),
|
||||
};
|
||||
|
||||
if let Ok(mut dao) = exif_dao.lock() {
|
||||
@@ -1661,6 +1704,7 @@ fn main() -> std::io::Result<()> {
|
||||
.add_feature(add_tag_services::<_, SqliteTagDao>)
|
||||
.add_feature(knowledge::add_knowledge_services::<_, SqliteKnowledgeDao>)
|
||||
.add_feature(faces::add_face_services::<_, faces::SqliteFaceDao>)
|
||||
.add_feature(duplicates::add_duplicate_services)
|
||||
.app_data(app_data.clone())
|
||||
.app_data::<Data<RealFileSystem>>(Data::new(RealFileSystem::new(
|
||||
app_data.base_path.clone(),
|
||||
@@ -2309,6 +2353,12 @@ fn process_new_files(
|
||||
}
|
||||
};
|
||||
|
||||
// Perceptual hashes (pHash + dHash). Best-effort — None for
|
||||
// videos and decode failures. Drives near-duplicate detection
|
||||
// in the Apollo duplicates surface; failure here is non-fatal
|
||||
// and never blocks indexing.
|
||||
let perceptual = perceptual_hash::compute(&file_path);
|
||||
|
||||
// EXIF is best-effort enrichment. When extraction fails (or the
|
||||
// file type doesn't support EXIF) we still store a row with all
|
||||
// EXIF fields NULL; the file remains visible to sort-by-date
|
||||
@@ -2360,6 +2410,8 @@ fn process_new_files(
|
||||
last_modified: timestamp,
|
||||
content_hash,
|
||||
size_bytes,
|
||||
phash_64: perceptual.map(|h| h.phash_64),
|
||||
dhash_64: perceptual.map(|h| h.dhash_64),
|
||||
};
|
||||
|
||||
let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
|
||||
|
||||
Reference in New Issue
Block a user