8503ef7884
Pure mechanical cleanup of accumulated drift in files outside the
HLS-content-hash branch's main change set. No behavior change.
- `cargo fmt` on every previously-misformatted file
(`ai/insight_generator.rs`, `database/knowledge_dao.rs`,
`faces.rs`, `knowledge.rs`, `libraries.rs`).
- `cargo clippy --fix`:
- `needless_borrow`: `&library` → `library` in `handlers/image.rs`
(two sites in the photo-listing path).
- Manual clippy pass for warnings clippy emits but can't auto-apply:
- `field_reassign_with_default` in `database/reconcile.rs::run` —
consolidated into a struct-literal initializer.
- `needless_range_loop` in `database/knowledge_dao.rs::union_perceptual_tags`
— inner `for b in (a+1)..indices.len() { let ib = indices[b]; ... }`
becomes `for &ib in &indices[a + 1..] { ... }`.
- Doc-list indentation: continuation lines under nested bullets in
`database/mod.rs::get_memories_in_window` and
`database/knowledge_dao.rs::build_entity_graph` realigned to the
list-item content column.
Deliberately not touched (each deserves its own focused commit, with
testing, rather than getting bundled into a sweep):
- 4× `deprecated count_distinct` in `faces.rs` — diesel API migration
to `AggregateExpressionMethods::aggregate_distinct` may shift result
types; needs verification against the existing stats queries.
- `await_holding_lock` in `knowledge.rs:807` — `std::sync::Mutex` held
across `ollama.generate(...).await`. Genuine concurrency bug; fix
requires understanding the surrounding flow before just dropping
the guard.
- 2× `type_complexity` in `database/mod.rs` — cosmetic, would need a
`type` alias and corresponding callers updated.
- Dead `total_deleted` on `library_maintenance::GcStats` and
`file_scan::enumerate_indexable_files` — both are public surface
retained for future use; deletion is a separate decision.
All 707 tests still pass. Release build clean.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
381 lines
14 KiB
Rust
381 lines
14 KiB
Rust
//! Reconciliation pass for hash-keyed derived data.
|
|
//!
|
|
//! As `backfill_unhashed_backlog` populates `image_exif.content_hash`
|
|
//! for legacy rows, we want the matching `tagged_photo` and
|
|
//! `photo_insights` rows — which were inserted before the hash was
|
|
//! known — to inherit the hash too. Otherwise reads keep falling back
|
|
//! to the rel_path path even when a hash is now available.
|
|
//!
|
|
//! Two passes:
|
|
//! 1. **Hash backfill** — for every `tagged_photo` / `photo_insights`
|
|
//! row with NULL `content_hash`, look up the matching
|
|
//! `image_exif.content_hash` and write it. SQL-only; idempotent;
|
|
//! a no-op once everything is hashed.
|
|
//! 2. **Insight scalar merge** — when multiple `photo_insights` rows
|
|
//! share a `content_hash` with `is_current = true`, only the
|
|
//! earliest `generated_at` keeps `is_current = true` (per the
|
|
//! "earliest wins" rule in CLAUDE.md → "Multi-library data
|
|
//! model"). Others are demoted, not deleted, so they remain
|
|
//! visible in history endpoints.
|
|
//!
|
|
//! Tags are set-valued under the policy (union on read), so there's no
|
|
//! analogous "collapse" pass — duplicate `(tag_id, content_hash)` rows
|
|
//! across libraries are harmless and correctly de-duped at read time
|
|
//! by the existing `DISTINCT` queries.
|
|
//!
|
|
//! The pass operates on the database alone — no filesystem access —
|
|
//! so it doesn't need the library availability gate.
|
|
|
|
// The lib doesn't call into this module directly — the watcher (in the
|
|
// bin) does. Dead-code analysis at the lib level can't see that, so
|
|
// suppress at the module level. Tests still exercise every function.
|
|
#![allow(dead_code)]
|
|
|
|
use diesel::prelude::*;
|
|
use diesel::sql_query;
|
|
use diesel::sqlite::SqliteConnection;
|
|
use log::{debug, info, warn};
|
|
|
|
/// Outcome of a reconciliation tick. Tracked so the watcher can log
|
|
/// progress when something changed and stay quiet when nothing did.
|
|
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
|
|
pub struct ReconcileStats {
|
|
pub tagged_photo_hashes_filled: usize,
|
|
pub photo_insights_hashes_filled: usize,
|
|
pub photo_insights_demoted: usize,
|
|
}
|
|
|
|
impl ReconcileStats {
|
|
pub fn changed(&self) -> bool {
|
|
self.tagged_photo_hashes_filled > 0
|
|
|| self.photo_insights_hashes_filled > 0
|
|
|| self.photo_insights_demoted > 0
|
|
}
|
|
}
|
|
|
|
/// Run the reconciliation pass. Idempotent — safe to call on every
|
|
/// watcher tick. Errors are logged but never propagated; reconciliation
|
|
/// is best-effort and a transient DB hiccup must not stall the watcher.
|
|
pub fn run(conn: &mut SqliteConnection) -> ReconcileStats {
|
|
let stats = ReconcileStats {
|
|
tagged_photo_hashes_filled: match backfill_tagged_photo_hashes(conn) {
|
|
Ok(n) => n,
|
|
Err(e) => {
|
|
warn!("reconcile: tagged_photo hash backfill failed: {:?}", e);
|
|
0
|
|
}
|
|
},
|
|
photo_insights_hashes_filled: match backfill_photo_insights_hashes(conn) {
|
|
Ok(n) => n,
|
|
Err(e) => {
|
|
warn!("reconcile: photo_insights hash backfill failed: {:?}", e);
|
|
0
|
|
}
|
|
},
|
|
photo_insights_demoted: match collapse_insight_currents(conn) {
|
|
Ok(n) => n,
|
|
Err(e) => {
|
|
warn!("reconcile: photo_insights scalar merge failed: {:?}", e);
|
|
0
|
|
}
|
|
},
|
|
};
|
|
|
|
if stats.changed() {
|
|
info!(
|
|
"reconcile: filled {} tagged_photo hash(es), {} photo_insights hash(es); demoted {} non-current insight row(s)",
|
|
stats.tagged_photo_hashes_filled,
|
|
stats.photo_insights_hashes_filled,
|
|
stats.photo_insights_demoted,
|
|
);
|
|
} else {
|
|
debug!("reconcile: no changes this tick");
|
|
}
|
|
|
|
stats
|
|
}
|
|
|
|
/// Populate `tagged_photo.content_hash` for any row that still has
|
|
/// NULL by joining on `rel_path` against `image_exif`. tagged_photo
|
|
/// doesn't carry `library_id`, so a path that exists under multiple
|
|
/// libraries with different content is genuinely ambiguous; we pick
|
|
/// any non-null hash for that path. Same trade-off as the migration
|
|
/// backfill — see `migrations/2026-05-01-000000_hash_keyed_derived_data`.
|
|
fn backfill_tagged_photo_hashes(conn: &mut SqliteConnection) -> QueryResult<usize> {
|
|
sql_query(
|
|
"UPDATE tagged_photo \
|
|
SET content_hash = ( \
|
|
SELECT content_hash FROM image_exif \
|
|
WHERE image_exif.rel_path = tagged_photo.rel_path \
|
|
AND image_exif.content_hash IS NOT NULL \
|
|
LIMIT 1 \
|
|
) \
|
|
WHERE content_hash IS NULL \
|
|
AND EXISTS ( \
|
|
SELECT 1 FROM image_exif \
|
|
WHERE image_exif.rel_path = tagged_photo.rel_path \
|
|
AND image_exif.content_hash IS NOT NULL \
|
|
)",
|
|
)
|
|
.execute(conn)
|
|
}
|
|
|
|
/// Populate `photo_insights.content_hash` from `image_exif`, keyed on
|
|
/// `(library_id, rel_path)`. Unambiguous because photo_insights carries
|
|
/// library_id.
|
|
fn backfill_photo_insights_hashes(conn: &mut SqliteConnection) -> QueryResult<usize> {
|
|
sql_query(
|
|
"UPDATE photo_insights \
|
|
SET content_hash = ( \
|
|
SELECT content_hash FROM image_exif \
|
|
WHERE image_exif.library_id = photo_insights.library_id \
|
|
AND image_exif.rel_path = photo_insights.rel_path \
|
|
AND image_exif.content_hash IS NOT NULL \
|
|
LIMIT 1 \
|
|
) \
|
|
WHERE content_hash IS NULL \
|
|
AND EXISTS ( \
|
|
SELECT 1 FROM image_exif \
|
|
WHERE image_exif.library_id = photo_insights.library_id \
|
|
AND image_exif.rel_path = photo_insights.rel_path \
|
|
AND image_exif.content_hash IS NOT NULL \
|
|
)",
|
|
)
|
|
.execute(conn)
|
|
}
|
|
|
|
/// Scalar-merge step: when multiple rows share a `content_hash` and
|
|
/// claim `is_current = true`, demote all but the earliest by
|
|
/// `generated_at` (ties broken by lowest id, deterministic).
|
|
///
|
|
/// Demoted rows keep their data — only `is_current` flips. Clients that
|
|
/// hit `/insights/history` still see the full sequence; only the
|
|
/// "current" pointer is unique per hash.
|
|
fn collapse_insight_currents(conn: &mut SqliteConnection) -> QueryResult<usize> {
|
|
sql_query(
|
|
"UPDATE photo_insights \
|
|
SET is_current = 0 \
|
|
WHERE is_current = 1 \
|
|
AND content_hash IS NOT NULL \
|
|
AND id NOT IN ( \
|
|
SELECT MIN(p2.id) FROM photo_insights p2 \
|
|
WHERE p2.is_current = 1 \
|
|
AND p2.content_hash = photo_insights.content_hash \
|
|
AND p2.generated_at = ( \
|
|
SELECT MIN(p3.generated_at) FROM photo_insights p3 \
|
|
WHERE p3.is_current = 1 \
|
|
AND p3.content_hash = p2.content_hash \
|
|
) \
|
|
)",
|
|
)
|
|
.execute(conn)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::database::test::in_memory_db_connection;
|
|
|
|
fn ensure_library(conn: &mut SqliteConnection, library_id: i32) {
|
|
// Migration seeds library id=1; tests that reference id>1 must
|
|
// create those rows themselves, otherwise FK enforcement (added
|
|
// in the tags-edit migration) rejects image_exif inserts.
|
|
diesel::sql_query(
|
|
"INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
|
|
VALUES (?, 'test-' || ?, '/tmp/test-' || ?, 0)",
|
|
)
|
|
.bind::<diesel::sql_types::Integer, _>(library_id)
|
|
.bind::<diesel::sql_types::Integer, _>(library_id)
|
|
.bind::<diesel::sql_types::Integer, _>(library_id)
|
|
.execute(conn)
|
|
.unwrap();
|
|
}
|
|
|
|
fn insert_image_exif(
|
|
conn: &mut SqliteConnection,
|
|
library_id: i32,
|
|
rel_path: &str,
|
|
content_hash: Option<&str>,
|
|
) {
|
|
use crate::database::schema::image_exif;
|
|
ensure_library(conn, library_id);
|
|
diesel::sql_query(
|
|
"INSERT INTO image_exif (library_id, rel_path, created_time, last_modified, content_hash) \
|
|
VALUES (?, ?, 0, 0, ?)",
|
|
)
|
|
.bind::<diesel::sql_types::Integer, _>(library_id)
|
|
.bind::<diesel::sql_types::Text, _>(rel_path)
|
|
.bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(content_hash)
|
|
.execute(conn)
|
|
.unwrap();
|
|
// Keep clippy happy that the import is used.
|
|
let _ = image_exif::table;
|
|
}
|
|
|
|
fn insert_tagged_photo(conn: &mut SqliteConnection, rel_path: &str, tag_id: i32) {
|
|
diesel::sql_query(
|
|
"INSERT INTO tagged_photo (rel_path, tag_id, created_time) VALUES (?, ?, 0)",
|
|
)
|
|
.bind::<diesel::sql_types::Text, _>(rel_path)
|
|
.bind::<diesel::sql_types::Integer, _>(tag_id)
|
|
.execute(conn)
|
|
.unwrap();
|
|
}
|
|
|
|
fn insert_tag(conn: &mut SqliteConnection, id: i32, name: &str) {
|
|
diesel::sql_query("INSERT INTO tags (id, name, created_time) VALUES (?, ?, 0)")
|
|
.bind::<diesel::sql_types::Integer, _>(id)
|
|
.bind::<diesel::sql_types::Text, _>(name)
|
|
.execute(conn)
|
|
.unwrap();
|
|
}
|
|
|
|
fn insert_insight(
|
|
conn: &mut SqliteConnection,
|
|
library_id: i32,
|
|
rel_path: &str,
|
|
generated_at: i64,
|
|
is_current: bool,
|
|
) -> i32 {
|
|
ensure_library(conn, library_id);
|
|
diesel::sql_query(
|
|
"INSERT INTO photo_insights (library_id, rel_path, title, summary, generated_at, model_version, is_current, backend) \
|
|
VALUES (?, ?, 't', 's', ?, 'v', ?, 'local')",
|
|
)
|
|
.bind::<diesel::sql_types::Integer, _>(library_id)
|
|
.bind::<diesel::sql_types::Text, _>(rel_path)
|
|
.bind::<diesel::sql_types::BigInt, _>(generated_at)
|
|
.bind::<diesel::sql_types::Bool, _>(is_current)
|
|
.execute(conn)
|
|
.unwrap();
|
|
diesel::sql_query("SELECT last_insert_rowid() AS id")
|
|
.get_result::<TestId>(conn)
|
|
.map(|r| r.id)
|
|
.unwrap()
|
|
}
|
|
|
|
#[derive(QueryableByName)]
|
|
struct TestId {
|
|
#[diesel(sql_type = diesel::sql_types::Integer)]
|
|
id: i32,
|
|
}
|
|
|
|
#[derive(QueryableByName, Debug)]
|
|
struct HashOnly {
|
|
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
|
|
content_hash: Option<String>,
|
|
}
|
|
|
|
#[derive(QueryableByName, Debug)]
|
|
struct CurrentRow {
|
|
#[diesel(sql_type = diesel::sql_types::Integer)]
|
|
id: i32,
|
|
#[diesel(sql_type = diesel::sql_types::Bool)]
|
|
is_current: bool,
|
|
}
|
|
|
|
#[test]
|
|
fn backfill_fills_tagged_photo_hash_when_image_exif_has_one() {
|
|
let mut conn = in_memory_db_connection();
|
|
insert_tag(&mut conn, 1, "vacation");
|
|
insert_tagged_photo(&mut conn, "trip/IMG.jpg", 1);
|
|
// No image_exif row yet — backfill no-op.
|
|
let stats = run(&mut conn);
|
|
assert_eq!(stats.tagged_photo_hashes_filled, 0);
|
|
|
|
// image_exif row appears with a hash; next reconcile fills it.
|
|
insert_image_exif(&mut conn, 1, "trip/IMG.jpg", Some("hashabc"));
|
|
let stats = run(&mut conn);
|
|
assert_eq!(stats.tagged_photo_hashes_filled, 1);
|
|
|
|
let row = diesel::sql_query(
|
|
"SELECT content_hash FROM tagged_photo WHERE rel_path = 'trip/IMG.jpg'",
|
|
)
|
|
.get_result::<HashOnly>(&mut conn)
|
|
.unwrap();
|
|
assert_eq!(row.content_hash.as_deref(), Some("hashabc"));
|
|
|
|
// Idempotent: a second run is a no-op.
|
|
let stats = run(&mut conn);
|
|
assert_eq!(stats.tagged_photo_hashes_filled, 0);
|
|
}
|
|
|
|
#[test]
|
|
fn backfill_skips_tagged_photo_when_image_exif_has_no_hash() {
|
|
let mut conn = in_memory_db_connection();
|
|
insert_tag(&mut conn, 1, "vacation");
|
|
insert_tagged_photo(&mut conn, "trip/IMG.jpg", 1);
|
|
// image_exif exists but its hash is null.
|
|
insert_image_exif(&mut conn, 1, "trip/IMG.jpg", None);
|
|
|
|
let stats = run(&mut conn);
|
|
assert_eq!(stats.tagged_photo_hashes_filled, 0);
|
|
}
|
|
|
|
#[test]
|
|
fn backfill_fills_photo_insights_hash_scoped_by_library() {
|
|
let mut conn = in_memory_db_connection();
|
|
// Row in library 1 only — must not be filled by a hash from
|
|
// library 2's same-rel_path entry.
|
|
insert_image_exif(&mut conn, 1, "shared.jpg", Some("hash-lib1"));
|
|
let id1 = insert_insight(&mut conn, 1, "shared.jpg", 100, true);
|
|
|
|
let stats = run(&mut conn);
|
|
assert_eq!(stats.photo_insights_hashes_filled, 1);
|
|
|
|
let row = diesel::sql_query("SELECT content_hash FROM photo_insights WHERE id = ?")
|
|
.bind::<diesel::sql_types::Integer, _>(id1)
|
|
.get_result::<HashOnly>(&mut conn)
|
|
.unwrap();
|
|
assert_eq!(row.content_hash.as_deref(), Some("hash-lib1"));
|
|
}
|
|
|
|
#[test]
|
|
fn collapse_keeps_earliest_is_current_per_hash() {
|
|
let mut conn = in_memory_db_connection();
|
|
// Two libraries, same content_hash via image_exif. Insights
|
|
// were generated independently in each library, both currently
|
|
// is_current = true. The earlier one wins.
|
|
insert_image_exif(&mut conn, 1, "a.jpg", Some("h1"));
|
|
insert_image_exif(&mut conn, 2, "a.jpg", Some("h1"));
|
|
let earlier = insert_insight(&mut conn, 1, "a.jpg", 100, true);
|
|
let later = insert_insight(&mut conn, 2, "a.jpg", 200, true);
|
|
|
|
// First pass fills the content_hash; second collapses.
|
|
let stats = run(&mut conn);
|
|
assert_eq!(stats.photo_insights_hashes_filled, 2);
|
|
assert_eq!(stats.photo_insights_demoted, 1);
|
|
|
|
let rows = diesel::sql_query("SELECT id, is_current FROM photo_insights ORDER BY id")
|
|
.get_results::<CurrentRow>(&mut conn)
|
|
.unwrap();
|
|
let earlier_row = rows.iter().find(|r| r.id == earlier).unwrap();
|
|
let later_row = rows.iter().find(|r| r.id == later).unwrap();
|
|
assert!(
|
|
earlier_row.is_current,
|
|
"earlier insight should remain current"
|
|
);
|
|
assert!(!later_row.is_current, "later insight should be demoted");
|
|
|
|
// Idempotent.
|
|
let stats = run(&mut conn);
|
|
assert_eq!(stats.photo_insights_demoted, 0);
|
|
}
|
|
|
|
#[test]
|
|
fn collapse_does_not_demote_a_solo_current_row() {
|
|
let mut conn = in_memory_db_connection();
|
|
insert_image_exif(&mut conn, 1, "a.jpg", Some("h1"));
|
|
let solo = insert_insight(&mut conn, 1, "a.jpg", 100, true);
|
|
|
|
let stats = run(&mut conn);
|
|
assert_eq!(stats.photo_insights_demoted, 0);
|
|
|
|
let row = diesel::sql_query("SELECT id, is_current FROM photo_insights WHERE id = ?")
|
|
.bind::<diesel::sql_types::Integer, _>(solo)
|
|
.get_result::<CurrentRow>(&mut conn)
|
|
.unwrap();
|
|
assert!(row.is_current);
|
|
}
|
|
}
|