duplicates: perceptual hash + soft-mark resolution + upload 409
Adds pHash + dHash columns alongside the existing blake3 content_hash so
near-duplicates (re-encoded, resized, format-converted copies) become
queryable. /duplicates/{exact,perceptual} return groups; /duplicates/
{resolve,unresolve} flip a duplicate_of_hash soft-mark on losing rows
and union perceptual-only tag sets onto the survivor. The default
/photos listing filters duplicate_of_hash IS NULL so demoted siblings
stop cluttering the grid; include_duplicates=true opts back in for
Apollo's review modal. Upload now hashes bytes pre-write and returns
409 with the canonical sibling when a file's bytes already exist.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -9,6 +9,25 @@ use crate::database::models::{
|
||||
};
|
||||
use crate::otel::trace_db_call;
|
||||
|
||||
/// Wire shape for a single member of a duplicate group, returned by
|
||||
/// `list_duplicates_*` and `lookup_duplicate_row`. Carries everything
|
||||
/// the Apollo modal needs to render a member tile and its meta line —
|
||||
/// thumbnails are derived from `(library_id, rel_path)` upstream.
|
||||
#[derive(Debug, Clone, serde::Serialize)]
|
||||
pub struct DuplicateRow {
|
||||
pub library_id: i32,
|
||||
pub rel_path: String,
|
||||
pub content_hash: String,
|
||||
pub size_bytes: Option<i64>,
|
||||
pub date_taken: Option<i64>,
|
||||
pub width: Option<i32>,
|
||||
pub height: Option<i32>,
|
||||
pub phash_64: Option<i64>,
|
||||
pub dhash_64: Option<i64>,
|
||||
pub duplicate_of_hash: Option<String>,
|
||||
pub duplicate_decided_at: Option<i64>,
|
||||
}
|
||||
|
||||
pub mod calendar_dao;
|
||||
pub mod daily_summary_dao;
|
||||
pub mod insights_dao;
|
||||
@@ -377,6 +396,104 @@ pub trait ExifDao: Sync + Send {
|
||||
size_bytes: i64,
|
||||
) -> Result<(), DbError>;
|
||||
|
||||
/// Return image rows that have a `content_hash` but no `phash_64`,
|
||||
/// oldest first. Used by the `backfill_perceptual_hash` binary.
|
||||
/// Filters by image extension at the DB layer to avoid ever asking
|
||||
/// `image_hasher` to decode a video. Returns `(library_id, rel_path)`.
|
||||
fn get_rows_missing_perceptual_hash(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
limit: i64,
|
||||
) -> Result<Vec<(i32, String)>, DbError>;
|
||||
|
||||
/// Persist computed perceptual hashes (pHash + dHash) for an
|
||||
/// existing image_exif row. Either column may be left NULL by
|
||||
/// passing `None`, but in practice the binary computes both or
|
||||
/// neither — `image_hasher` either decodes the image and produces
|
||||
/// both signals, or fails entirely.
|
||||
fn backfill_perceptual_hash(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
library_id: i32,
|
||||
rel_path: &str,
|
||||
phash_64: Option<i64>,
|
||||
dhash_64: Option<i64>,
|
||||
) -> Result<(), DbError>;
|
||||
|
||||
/// Group exact-hash duplicates: rows whose `content_hash` appears
|
||||
/// more than once across the (optionally library-scoped) corpus.
|
||||
/// Returns one [`DuplicateRow`] per member; callers group by
|
||||
/// `content_hash`. When `include_resolved=false`, rows already
|
||||
/// soft-marked (`duplicate_of_hash IS NOT NULL`) are excluded so
|
||||
/// the modal doesn't re-surface decisions the user already made.
|
||||
fn list_duplicates_exact(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
library_id: Option<i32>,
|
||||
include_resolved: bool,
|
||||
) -> Result<Vec<DuplicateRow>, DbError>;
|
||||
|
||||
/// Return all rows with a non-null `phash_64` (optionally library-
|
||||
/// scoped), used by the perceptual-cluster routine in
|
||||
/// [`crate::main`] to single-link cluster via Hamming distance.
|
||||
/// Each returned row is a *distinct content_hash* — exact duplicates
|
||||
/// are collapsed at the DB layer so the in-memory clusterer doesn't
|
||||
/// rediscover them.
|
||||
fn list_perceptual_candidates(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
library_id: Option<i32>,
|
||||
include_resolved: bool,
|
||||
) -> Result<Vec<DuplicateRow>, DbError>;
|
||||
|
||||
/// Look up a single row's metadata by `(library_id, rel_path)`. Used
|
||||
/// by the resolve endpoint to map the request payload to the
|
||||
/// underlying `content_hash` before writing the soft-mark. Returns
|
||||
/// `Ok(None)` if the file doesn't exist in `image_exif`.
|
||||
fn lookup_duplicate_row(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
library_id: i32,
|
||||
rel_path: &str,
|
||||
) -> Result<Option<DuplicateRow>, DbError>;
|
||||
|
||||
/// Soft-mark a file as a duplicate of `survivor_hash`. Sets
|
||||
/// `duplicate_of_hash` and `duplicate_decided_at` on the row(s)
|
||||
/// matching `(library_id, rel_path)`. The file stays on disk; the
|
||||
/// default `/photos` listing hides it because of the
|
||||
/// `duplicate_of_hash IS NULL` filter.
|
||||
fn set_duplicate_of(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
library_id: i32,
|
||||
rel_path: &str,
|
||||
survivor_hash: &str,
|
||||
decided_at: i64,
|
||||
) -> Result<(), DbError>;
|
||||
|
||||
/// Reverse a soft-mark: clears `duplicate_of_hash` and
|
||||
/// `duplicate_decided_at`. Used by the modal's UNRESOLVE chip.
|
||||
fn clear_duplicate_of(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
library_id: i32,
|
||||
rel_path: &str,
|
||||
) -> Result<(), DbError>;
|
||||
|
||||
/// Union the tags from `demoted_hash` onto `survivor_hash`. Used at
|
||||
/// resolve time for *perceptual* duplicates (different content_hashes,
|
||||
/// independent tag sets) so the user doesn't lose their tagging work
|
||||
/// when promoting a survivor. Idempotent: a tag already on the survivor
|
||||
/// is left alone. Exact duplicates (same content_hash) don't need this
|
||||
/// because their tag rows are already shared.
|
||||
fn union_perceptual_tags(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
survivor_hash: &str,
|
||||
demoted_hash: &str,
|
||||
survivor_rel_path: &str,
|
||||
) -> Result<(), DbError>;
|
||||
|
||||
/// Return the first EXIF row with the given content hash (any library).
|
||||
/// Used by thumbnail/HLS generation to detect pre-existing derivatives
|
||||
/// from another library before regenerating.
|
||||
@@ -440,11 +557,17 @@ pub trait ExifDao: Sync + Send {
|
||||
/// `library_ids` is empty, rows from every library are returned. Used by
|
||||
/// `/photos` recursive listing to skip the filesystem walk — the watcher
|
||||
/// keeps image_exif in parity with disk via the reconciliation pass.
|
||||
///
|
||||
/// `include_duplicates=false` filters out rows soft-marked with
|
||||
/// `duplicate_of_hash IS NOT NULL` so the default photo listing hides
|
||||
/// demoted siblings; the Apollo duplicates modal passes `true` to
|
||||
/// see both survivors and demoted members inside a group.
|
||||
fn list_rel_paths_for_libraries(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
library_ids: &[i32],
|
||||
path_prefix: Option<&str>,
|
||||
include_duplicates: bool,
|
||||
) -> Result<Vec<(i32, String)>, DbError>;
|
||||
|
||||
/// Delete a single image_exif row scoped to `(library_id, rel_path)`.
|
||||
@@ -1077,6 +1200,7 @@ impl ExifDao for SqliteExifDao {
|
||||
context: &opentelemetry::Context,
|
||||
library_ids: &[i32],
|
||||
path_prefix: Option<&str>,
|
||||
include_duplicates: bool,
|
||||
) -> Result<Vec<(i32, String)>, DbError> {
|
||||
trace_db_call(context, "query", "list_rel_paths_for_libraries", |_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
@@ -1097,6 +1221,10 @@ impl ExifDao for SqliteExifDao {
|
||||
query = query.filter(rel_path.like(pattern).escape('\\'));
|
||||
}
|
||||
|
||||
if !include_duplicates {
|
||||
query = query.filter(duplicate_of_hash.is_null());
|
||||
}
|
||||
|
||||
query
|
||||
.load::<(i32, String)>(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))
|
||||
@@ -1168,6 +1296,421 @@ impl ExifDao for SqliteExifDao {
|
||||
)
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn get_rows_missing_perceptual_hash(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
limit: i64,
|
||||
) -> Result<Vec<(i32, String)>, DbError> {
|
||||
trace_db_call(
|
||||
context,
|
||||
"query",
|
||||
"get_rows_missing_perceptual_hash",
|
||||
|_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
// Image-only filter via extension. Videos and decode-failures
|
||||
// would always come back NULL otherwise and the binary would
|
||||
// grind through them on every run. The list mirrors the file
|
||||
// formats `image` 0.25 / `image_hasher` 3.x can decode.
|
||||
image_exif
|
||||
.filter(content_hash.is_not_null())
|
||||
.filter(phash_64.is_null())
|
||||
.filter(
|
||||
rel_path
|
||||
.like("%.jpg")
|
||||
.or(rel_path.like("%.jpeg"))
|
||||
.or(rel_path.like("%.JPG"))
|
||||
.or(rel_path.like("%.JPEG"))
|
||||
.or(rel_path.like("%.png"))
|
||||
.or(rel_path.like("%.PNG"))
|
||||
.or(rel_path.like("%.webp"))
|
||||
.or(rel_path.like("%.WEBP"))
|
||||
.or(rel_path.like("%.tif"))
|
||||
.or(rel_path.like("%.tiff"))
|
||||
.or(rel_path.like("%.TIF"))
|
||||
.or(rel_path.like("%.TIFF"))
|
||||
.or(rel_path.like("%.avif"))
|
||||
.or(rel_path.like("%.AVIF")),
|
||||
)
|
||||
.select((library_id, rel_path))
|
||||
.order(id.asc())
|
||||
.limit(limit)
|
||||
.load::<(i32, String)>(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))
|
||||
},
|
||||
)
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn backfill_perceptual_hash(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
library_id_val: i32,
|
||||
rel_path_val: &str,
|
||||
phash_val: Option<i64>,
|
||||
dhash_val: Option<i64>,
|
||||
) -> Result<(), DbError> {
|
||||
trace_db_call(context, "update", "backfill_perceptual_hash", |_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
diesel::update(
|
||||
image_exif
|
||||
.filter(library_id.eq(library_id_val))
|
||||
.filter(rel_path.eq(rel_path_val)),
|
||||
)
|
||||
.set((phash_64.eq(phash_val), dhash_64.eq(dhash_val)))
|
||||
.execute(connection.deref_mut())
|
||||
.map(|_| ())
|
||||
.map_err(|_| anyhow::anyhow!("Update error"))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
|
||||
}
|
||||
|
||||
fn list_duplicates_exact(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
library_id_filter: Option<i32>,
|
||||
include_resolved: bool,
|
||||
) -> Result<Vec<DuplicateRow>, DbError> {
|
||||
trace_db_call(context, "query", "list_duplicates_exact", |_span| {
|
||||
// Sub-select the content_hashes that appear more than once
|
||||
// (optionally library-scoped), then load the full member rows
|
||||
// for those hashes ordered by hash + library + path so the
|
||||
// caller can stream-group without buffering the full dataset.
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
// Step 1: hashes with count > 1.
|
||||
let dup_hashes: Vec<String> = {
|
||||
use schema::image_exif::dsl::*;
|
||||
let mut q = image_exif
|
||||
.filter(content_hash.is_not_null())
|
||||
.group_by(content_hash)
|
||||
.select(content_hash.assume_not_null())
|
||||
.having(diesel::dsl::count_star().gt(1))
|
||||
.into_boxed();
|
||||
if let Some(lib) = library_id_filter {
|
||||
q = q.filter(library_id.eq(lib));
|
||||
}
|
||||
q.load::<String>(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))?
|
||||
};
|
||||
|
||||
if dup_hashes.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
// Step 2: every member row for those hashes.
|
||||
use schema::image_exif::dsl::*;
|
||||
let mut q = image_exif
|
||||
.filter(content_hash.eq_any(&dup_hashes))
|
||||
.select((
|
||||
library_id,
|
||||
rel_path,
|
||||
content_hash.assume_not_null(),
|
||||
size_bytes,
|
||||
date_taken,
|
||||
width,
|
||||
height,
|
||||
phash_64,
|
||||
dhash_64,
|
||||
duplicate_of_hash,
|
||||
duplicate_decided_at,
|
||||
))
|
||||
.order((content_hash.asc(), library_id.asc(), rel_path.asc()))
|
||||
.into_boxed();
|
||||
if let Some(lib) = library_id_filter {
|
||||
q = q.filter(library_id.eq(lib));
|
||||
}
|
||||
if !include_resolved {
|
||||
q = q.filter(duplicate_of_hash.is_null());
|
||||
}
|
||||
|
||||
let rows: Vec<(
|
||||
i32,
|
||||
String,
|
||||
String,
|
||||
Option<i64>,
|
||||
Option<i64>,
|
||||
Option<i32>,
|
||||
Option<i32>,
|
||||
Option<i64>,
|
||||
Option<i64>,
|
||||
Option<String>,
|
||||
Option<i64>,
|
||||
)> = q
|
||||
.load(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))?;
|
||||
|
||||
Ok(rows
|
||||
.into_iter()
|
||||
.map(|r| DuplicateRow {
|
||||
library_id: r.0,
|
||||
rel_path: r.1,
|
||||
content_hash: r.2,
|
||||
size_bytes: r.3,
|
||||
date_taken: r.4,
|
||||
width: r.5,
|
||||
height: r.6,
|
||||
phash_64: r.7,
|
||||
dhash_64: r.8,
|
||||
duplicate_of_hash: r.9,
|
||||
duplicate_decided_at: r.10,
|
||||
})
|
||||
.collect())
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn list_perceptual_candidates(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
library_id_filter: Option<i32>,
|
||||
include_resolved: bool,
|
||||
) -> Result<Vec<DuplicateRow>, DbError> {
|
||||
trace_db_call(context, "query", "list_perceptual_candidates", |_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
// For perceptual candidates we want one canonical row per
|
||||
// distinct content_hash — exact dups are clustered by the
|
||||
// exact-dup query and would only pollute the perceptual
|
||||
// graph with zero-distance edges. Diesel doesn't have a
|
||||
// clean `DISTINCT ON`, so we load every row and dedup
|
||||
// client-side keyed on content_hash. The result set is small
|
||||
// (only rows with a phash) and the cost is negligible vs
|
||||
// the BK-tree clustering that follows.
|
||||
let mut q = image_exif
|
||||
.filter(content_hash.is_not_null())
|
||||
.filter(phash_64.is_not_null())
|
||||
.select((
|
||||
library_id,
|
||||
rel_path,
|
||||
content_hash.assume_not_null(),
|
||||
size_bytes,
|
||||
date_taken,
|
||||
width,
|
||||
height,
|
||||
phash_64,
|
||||
dhash_64,
|
||||
duplicate_of_hash,
|
||||
duplicate_decided_at,
|
||||
))
|
||||
.order((content_hash.asc(), library_id.asc(), rel_path.asc()))
|
||||
.into_boxed();
|
||||
|
||||
if let Some(lib) = library_id_filter {
|
||||
q = q.filter(library_id.eq(lib));
|
||||
}
|
||||
if !include_resolved {
|
||||
q = q.filter(duplicate_of_hash.is_null());
|
||||
}
|
||||
|
||||
let rows: Vec<(
|
||||
i32,
|
||||
String,
|
||||
String,
|
||||
Option<i64>,
|
||||
Option<i64>,
|
||||
Option<i32>,
|
||||
Option<i32>,
|
||||
Option<i64>,
|
||||
Option<i64>,
|
||||
Option<String>,
|
||||
Option<i64>,
|
||||
)> = q
|
||||
.load(connection.deref_mut())
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))?;
|
||||
|
||||
// Dedup keyed on content_hash, keeping the first occurrence
|
||||
// (deterministic by the SQL ORDER BY: lowest library_id,
|
||||
// then lexicographically smallest rel_path).
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
let mut out = Vec::with_capacity(rows.len());
|
||||
for r in rows {
|
||||
if seen.insert(r.2.clone()) {
|
||||
out.push(DuplicateRow {
|
||||
library_id: r.0,
|
||||
rel_path: r.1,
|
||||
content_hash: r.2,
|
||||
size_bytes: r.3,
|
||||
date_taken: r.4,
|
||||
width: r.5,
|
||||
height: r.6,
|
||||
phash_64: r.7,
|
||||
dhash_64: r.8,
|
||||
duplicate_of_hash: r.9,
|
||||
duplicate_decided_at: r.10,
|
||||
});
|
||||
}
|
||||
}
|
||||
Ok(out)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn lookup_duplicate_row(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
library_id_val: i32,
|
||||
rel_path_val: &str,
|
||||
) -> Result<Option<DuplicateRow>, DbError> {
|
||||
trace_db_call(context, "query", "lookup_duplicate_row", |_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
image_exif
|
||||
.filter(library_id.eq(library_id_val))
|
||||
.filter(rel_path.eq(rel_path_val))
|
||||
.filter(content_hash.is_not_null())
|
||||
.select((
|
||||
library_id,
|
||||
rel_path,
|
||||
content_hash.assume_not_null(),
|
||||
size_bytes,
|
||||
date_taken,
|
||||
width,
|
||||
height,
|
||||
phash_64,
|
||||
dhash_64,
|
||||
duplicate_of_hash,
|
||||
duplicate_decided_at,
|
||||
))
|
||||
.first::<(
|
||||
i32,
|
||||
String,
|
||||
String,
|
||||
Option<i64>,
|
||||
Option<i64>,
|
||||
Option<i32>,
|
||||
Option<i32>,
|
||||
Option<i64>,
|
||||
Option<i64>,
|
||||
Option<String>,
|
||||
Option<i64>,
|
||||
)>(connection.deref_mut())
|
||||
.optional()
|
||||
.map(|opt| {
|
||||
opt.map(|r| DuplicateRow {
|
||||
library_id: r.0,
|
||||
rel_path: r.1,
|
||||
content_hash: r.2,
|
||||
size_bytes: r.3,
|
||||
date_taken: r.4,
|
||||
width: r.5,
|
||||
height: r.6,
|
||||
phash_64: r.7,
|
||||
dhash_64: r.8,
|
||||
duplicate_of_hash: r.9,
|
||||
duplicate_decided_at: r.10,
|
||||
})
|
||||
})
|
||||
.map_err(|_| anyhow::anyhow!("Query error"))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn set_duplicate_of(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
library_id_val: i32,
|
||||
rel_path_val: &str,
|
||||
survivor_hash: &str,
|
||||
decided_at: i64,
|
||||
) -> Result<(), DbError> {
|
||||
trace_db_call(context, "update", "set_duplicate_of", |_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
diesel::update(
|
||||
image_exif
|
||||
.filter(library_id.eq(library_id_val))
|
||||
.filter(rel_path.eq(rel_path_val)),
|
||||
)
|
||||
.set((
|
||||
duplicate_of_hash.eq(survivor_hash),
|
||||
duplicate_decided_at.eq(decided_at),
|
||||
))
|
||||
.execute(connection.deref_mut())
|
||||
.map(|_| ())
|
||||
.map_err(|_| anyhow::anyhow!("Update error"))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
|
||||
}
|
||||
|
||||
fn clear_duplicate_of(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
library_id_val: i32,
|
||||
rel_path_val: &str,
|
||||
) -> Result<(), DbError> {
|
||||
trace_db_call(context, "update", "clear_duplicate_of", |_span| {
|
||||
use schema::image_exif::dsl::*;
|
||||
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
diesel::update(
|
||||
image_exif
|
||||
.filter(library_id.eq(library_id_val))
|
||||
.filter(rel_path.eq(rel_path_val)),
|
||||
)
|
||||
.set((
|
||||
duplicate_of_hash.eq::<Option<String>>(None),
|
||||
duplicate_decided_at.eq::<Option<i64>>(None),
|
||||
))
|
||||
.execute(connection.deref_mut())
|
||||
.map(|_| ())
|
||||
.map_err(|_| anyhow::anyhow!("Update error"))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
|
||||
}
|
||||
|
||||
fn union_perceptual_tags(
|
||||
&mut self,
|
||||
context: &opentelemetry::Context,
|
||||
survivor_hash: &str,
|
||||
demoted_hash: &str,
|
||||
survivor_rel_path: &str,
|
||||
) -> Result<(), DbError> {
|
||||
trace_db_call(context, "update", "union_perceptual_tags", |_span| {
|
||||
// INSERT OR IGNORE handles two relevant uniqueness paths:
|
||||
// - tagged_photo (rel_path, tag_id) is the historical key,
|
||||
// so existing tag rows under the survivor's path collide
|
||||
// and stay put.
|
||||
// - The (rel_path, tag_id) collision is the one that
|
||||
// matters for idempotence; (content_hash, tag_id) at the
|
||||
// bytes level isn't enforced by SQLite but the read path
|
||||
// dedups on it, so an extra row would be cosmetic.
|
||||
// Tags whose rel_path differs are inserted, picking up the
|
||||
// survivor's content_hash so they live under the right bytes.
|
||||
let mut connection = self.connection.lock().expect("Unable to get ExifDao");
|
||||
|
||||
diesel::sql_query(
|
||||
"INSERT OR IGNORE INTO tagged_photo (rel_path, tag_id, created_time, content_hash) \
|
||||
SELECT ?, tag_id, strftime('%s','now'), ? \
|
||||
FROM tagged_photo \
|
||||
WHERE content_hash = ? \
|
||||
AND tag_id NOT IN ( \
|
||||
SELECT tag_id FROM tagged_photo WHERE content_hash = ? \
|
||||
)",
|
||||
)
|
||||
.bind::<diesel::sql_types::Text, _>(survivor_rel_path)
|
||||
.bind::<diesel::sql_types::Text, _>(survivor_hash)
|
||||
.bind::<diesel::sql_types::Text, _>(demoted_hash)
|
||||
.bind::<diesel::sql_types::Text, _>(survivor_hash)
|
||||
.execute(connection.deref_mut())
|
||||
.map(|_| ())
|
||||
.map_err(|_| anyhow::anyhow!("Tag union error"))
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -1204,6 +1747,8 @@ mod exif_dao_tests {
|
||||
last_modified: 0,
|
||||
content_hash: None,
|
||||
size_bytes: None,
|
||||
phash_64: None,
|
||||
dhash_64: None,
|
||||
},
|
||||
)
|
||||
.expect("insert exif row");
|
||||
|
||||
@@ -59,6 +59,10 @@ pub struct InsertImageExif {
|
||||
pub last_modified: i64,
|
||||
pub content_hash: Option<String>,
|
||||
pub size_bytes: Option<i64>,
|
||||
/// 64-bit pHash (DCT) packed as i64. NULL for videos and decode failures.
|
||||
pub phash_64: Option<i64>,
|
||||
/// 64-bit dHash (gradient). NULL for videos and decode failures.
|
||||
pub dhash_64: Option<i64>,
|
||||
}
|
||||
|
||||
// Field order matches the post-migration column order in `image_exif`.
|
||||
@@ -86,6 +90,14 @@ pub struct ImageExif {
|
||||
pub last_modified: i64,
|
||||
pub content_hash: Option<String>,
|
||||
pub size_bytes: Option<i64>,
|
||||
pub phash_64: Option<i64>,
|
||||
pub dhash_64: Option<i64>,
|
||||
/// When non-null, this row is a soft-marked duplicate of the file
|
||||
/// whose `content_hash` matches this value. The default `/photos`
|
||||
/// listing filters such rows out.
|
||||
pub duplicate_of_hash: Option<String>,
|
||||
/// Unix seconds at which the resolve was committed.
|
||||
pub duplicate_decided_at: Option<i64>,
|
||||
}
|
||||
|
||||
#[derive(Insertable)]
|
||||
|
||||
@@ -121,6 +121,10 @@ diesel::table! {
|
||||
last_modified -> BigInt,
|
||||
content_hash -> Nullable<Text>,
|
||||
size_bytes -> Nullable<BigInt>,
|
||||
phash_64 -> Nullable<BigInt>,
|
||||
dhash_64 -> Nullable<BigInt>,
|
||||
duplicate_of_hash -> Nullable<Text>,
|
||||
duplicate_decided_at -> Nullable<BigInt>,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user