duplicates: perceptual hash + soft-mark resolution + upload 409

Adds pHash + dHash columns alongside the existing blake3 content_hash so
near-duplicates (re-encoded, resized, format-converted copies) become
queryable. /duplicates/{exact,perceptual} return groups; /duplicates/
{resolve,unresolve} flip a duplicate_of_hash soft-mark on losing rows
and union perceptual-only tag sets onto the survivor. The default
/photos listing filters duplicate_of_hash IS NULL so demoted siblings
stop cluttering the grid; include_duplicates=true opts back in for
Apollo's review modal. Upload now hashes bytes pre-write and returns
409 with the canonical sibling when a file's bytes already exist.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-05-03 17:36:01 -04:00
parent 4340b164eb
commit 7584cd8792
14 changed files with 1852 additions and 1 deletions

View File

@@ -583,9 +583,10 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
} else {
Some(trimmed)
};
let include_duplicates = req.include_duplicates.unwrap_or(false);
let rows = {
let mut dao = exif_dao.lock().expect("Unable to get ExifDao");
dao.list_rel_paths_for_libraries(&span_context, &lib_ids, prefix)
dao.list_rel_paths_for_libraries(&span_context, &lib_ids, prefix, include_duplicates)
.unwrap_or_else(|e| {
warn!("list_rel_paths_for_libraries failed: {:?}", e);
Vec::new()
@@ -1503,6 +1504,10 @@ mod tests {
last_modified: data.last_modified,
content_hash: data.content_hash.clone(),
size_bytes: data.size_bytes,
phash_64: data.phash_64,
dhash_64: data.dhash_64,
duplicate_of_hash: None,
duplicate_decided_at: None,
})
}
@@ -1542,6 +1547,10 @@ mod tests {
last_modified: data.last_modified,
content_hash: data.content_hash.clone(),
size_bytes: data.size_bytes,
phash_64: data.phash_64,
dhash_64: data.dhash_64,
duplicate_of_hash: None,
duplicate_decided_at: None,
})
}
@@ -1689,6 +1698,7 @@ mod tests {
_context: &opentelemetry::Context,
_library_ids: &[i32],
_path_prefix: Option<&str>,
_include_duplicates: bool,
) -> Result<Vec<(i32, String)>, DbError> {
Ok(vec![])
}
@@ -1719,6 +1729,82 @@ mod tests {
) -> Result<Vec<(i32, String)>, DbError> {
Ok(Vec::new())
}
fn get_rows_missing_perceptual_hash(
&mut self,
_context: &opentelemetry::Context,
_limit: i64,
) -> Result<Vec<(i32, String)>, DbError> {
Ok(Vec::new())
}
fn backfill_perceptual_hash(
&mut self,
_context: &opentelemetry::Context,
_library_id: i32,
_rel_path: &str,
_phash_64: Option<i64>,
_dhash_64: Option<i64>,
) -> Result<(), DbError> {
Ok(())
}
fn list_duplicates_exact(
&mut self,
_context: &opentelemetry::Context,
_library_id: Option<i32>,
_include_resolved: bool,
) -> Result<Vec<crate::database::DuplicateRow>, DbError> {
Ok(Vec::new())
}
fn list_perceptual_candidates(
&mut self,
_context: &opentelemetry::Context,
_library_id: Option<i32>,
_include_resolved: bool,
) -> Result<Vec<crate::database::DuplicateRow>, DbError> {
Ok(Vec::new())
}
fn lookup_duplicate_row(
&mut self,
_context: &opentelemetry::Context,
_library_id: i32,
_rel_path: &str,
) -> Result<Option<crate::database::DuplicateRow>, DbError> {
Ok(None)
}
fn set_duplicate_of(
&mut self,
_context: &opentelemetry::Context,
_library_id: i32,
_rel_path: &str,
_survivor_hash: &str,
_decided_at: i64,
) -> Result<(), DbError> {
Ok(())
}
fn clear_duplicate_of(
&mut self,
_context: &opentelemetry::Context,
_library_id: i32,
_rel_path: &str,
) -> Result<(), DbError> {
Ok(())
}
fn union_perceptual_tags(
&mut self,
_context: &opentelemetry::Context,
_survivor_hash: &str,
_demoted_hash: &str,
_survivor_rel_path: &str,
) -> Result<(), DbError> {
Ok(())
}
}
mod api {