//! Reconciliation pass for hash-keyed derived data. //! //! As `backfill_unhashed_backlog` populates `image_exif.content_hash` //! for legacy rows, we want the matching `tagged_photo` and //! `photo_insights` rows — which were inserted before the hash was //! known — to inherit the hash too. Otherwise reads keep falling back //! to the rel_path path even when a hash is now available. //! //! Two passes: //! 1. **Hash backfill** — for every `tagged_photo` / `photo_insights` //! row with NULL `content_hash`, look up the matching //! `image_exif.content_hash` and write it. SQL-only; idempotent; //! a no-op once everything is hashed. //! 2. **Insight scalar merge** — when multiple `photo_insights` rows //! share a `content_hash` with `is_current = true`, only the //! earliest `generated_at` keeps `is_current = true` (per the //! "earliest wins" rule in CLAUDE.md → "Multi-library data //! model"). Others are demoted, not deleted, so they remain //! visible in history endpoints. //! //! Tags are set-valued under the policy (union on read), so there's no //! analogous "collapse" pass — duplicate `(tag_id, content_hash)` rows //! across libraries are harmless and correctly de-duped at read time //! by the existing `DISTINCT` queries. //! //! The pass operates on the database alone — no filesystem access — //! so it doesn't need the library availability gate. // The lib doesn't call into this module directly — the watcher (in the // bin) does. Dead-code analysis at the lib level can't see that, so // suppress at the module level. Tests still exercise every function. #![allow(dead_code)] use diesel::prelude::*; use diesel::sql_query; use diesel::sqlite::SqliteConnection; use log::{debug, info, warn}; /// Outcome of a reconciliation tick. Tracked so the watcher can log /// progress when something changed and stay quiet when nothing did. #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] pub struct ReconcileStats { pub tagged_photo_hashes_filled: usize, pub photo_insights_hashes_filled: usize, pub photo_insights_demoted: usize, } impl ReconcileStats { pub fn changed(&self) -> bool { self.tagged_photo_hashes_filled > 0 || self.photo_insights_hashes_filled > 0 || self.photo_insights_demoted > 0 } } /// Run the reconciliation pass. Idempotent — safe to call on every /// watcher tick. Errors are logged but never propagated; reconciliation /// is best-effort and a transient DB hiccup must not stall the watcher. pub fn run(conn: &mut SqliteConnection) -> ReconcileStats { let mut stats = ReconcileStats::default(); stats.tagged_photo_hashes_filled = match backfill_tagged_photo_hashes(conn) { Ok(n) => n, Err(e) => { warn!("reconcile: tagged_photo hash backfill failed: {:?}", e); 0 } }; stats.photo_insights_hashes_filled = match backfill_photo_insights_hashes(conn) { Ok(n) => n, Err(e) => { warn!("reconcile: photo_insights hash backfill failed: {:?}", e); 0 } }; stats.photo_insights_demoted = match collapse_insight_currents(conn) { Ok(n) => n, Err(e) => { warn!("reconcile: photo_insights scalar merge failed: {:?}", e); 0 } }; if stats.changed() { info!( "reconcile: filled {} tagged_photo hash(es), {} photo_insights hash(es); demoted {} non-current insight row(s)", stats.tagged_photo_hashes_filled, stats.photo_insights_hashes_filled, stats.photo_insights_demoted, ); } else { debug!("reconcile: no changes this tick"); } stats } /// Populate `tagged_photo.content_hash` for any row that still has /// NULL by joining on `rel_path` against `image_exif`. tagged_photo /// doesn't carry `library_id`, so a path that exists under multiple /// libraries with different content is genuinely ambiguous; we pick /// any non-null hash for that path. Same trade-off as the migration /// backfill — see `migrations/2026-05-01-000000_hash_keyed_derived_data`. fn backfill_tagged_photo_hashes(conn: &mut SqliteConnection) -> QueryResult { sql_query( "UPDATE tagged_photo \ SET content_hash = ( \ SELECT content_hash FROM image_exif \ WHERE image_exif.rel_path = tagged_photo.rel_path \ AND image_exif.content_hash IS NOT NULL \ LIMIT 1 \ ) \ WHERE content_hash IS NULL \ AND EXISTS ( \ SELECT 1 FROM image_exif \ WHERE image_exif.rel_path = tagged_photo.rel_path \ AND image_exif.content_hash IS NOT NULL \ )", ) .execute(conn) } /// Populate `photo_insights.content_hash` from `image_exif`, keyed on /// `(library_id, rel_path)`. Unambiguous because photo_insights carries /// library_id. fn backfill_photo_insights_hashes(conn: &mut SqliteConnection) -> QueryResult { sql_query( "UPDATE photo_insights \ SET content_hash = ( \ SELECT content_hash FROM image_exif \ WHERE image_exif.library_id = photo_insights.library_id \ AND image_exif.rel_path = photo_insights.rel_path \ AND image_exif.content_hash IS NOT NULL \ LIMIT 1 \ ) \ WHERE content_hash IS NULL \ AND EXISTS ( \ SELECT 1 FROM image_exif \ WHERE image_exif.library_id = photo_insights.library_id \ AND image_exif.rel_path = photo_insights.rel_path \ AND image_exif.content_hash IS NOT NULL \ )", ) .execute(conn) } /// Scalar-merge step: when multiple rows share a `content_hash` and /// claim `is_current = true`, demote all but the earliest by /// `generated_at` (ties broken by lowest id, deterministic). /// /// Demoted rows keep their data — only `is_current` flips. Clients that /// hit `/insights/history` still see the full sequence; only the /// "current" pointer is unique per hash. fn collapse_insight_currents(conn: &mut SqliteConnection) -> QueryResult { sql_query( "UPDATE photo_insights \ SET is_current = 0 \ WHERE is_current = 1 \ AND content_hash IS NOT NULL \ AND id NOT IN ( \ SELECT MIN(p2.id) FROM photo_insights p2 \ WHERE p2.is_current = 1 \ AND p2.content_hash = photo_insights.content_hash \ AND p2.generated_at = ( \ SELECT MIN(p3.generated_at) FROM photo_insights p3 \ WHERE p3.is_current = 1 \ AND p3.content_hash = p2.content_hash \ ) \ )", ) .execute(conn) } #[cfg(test)] mod tests { use super::*; use crate::database::test::in_memory_db_connection; fn ensure_library(conn: &mut SqliteConnection, library_id: i32) { // Migration seeds library id=1; tests that reference id>1 must // create those rows themselves, otherwise FK enforcement (added // in the tags-edit migration) rejects image_exif inserts. diesel::sql_query( "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \ VALUES (?, 'test-' || ?, '/tmp/test-' || ?, 0)", ) .bind::(library_id) .bind::(library_id) .bind::(library_id) .execute(conn) .unwrap(); } fn insert_image_exif( conn: &mut SqliteConnection, library_id: i32, rel_path: &str, content_hash: Option<&str>, ) { use crate::database::schema::image_exif; ensure_library(conn, library_id); diesel::sql_query( "INSERT INTO image_exif (library_id, rel_path, created_time, last_modified, content_hash) \ VALUES (?, ?, 0, 0, ?)", ) .bind::(library_id) .bind::(rel_path) .bind::, _>(content_hash) .execute(conn) .unwrap(); // Keep clippy happy that the import is used. let _ = image_exif::table; } fn insert_tagged_photo(conn: &mut SqliteConnection, rel_path: &str, tag_id: i32) { diesel::sql_query( "INSERT INTO tagged_photo (rel_path, tag_id, created_time) VALUES (?, ?, 0)", ) .bind::(rel_path) .bind::(tag_id) .execute(conn) .unwrap(); } fn insert_tag(conn: &mut SqliteConnection, id: i32, name: &str) { diesel::sql_query("INSERT INTO tags (id, name, created_time) VALUES (?, ?, 0)") .bind::(id) .bind::(name) .execute(conn) .unwrap(); } fn insert_insight( conn: &mut SqliteConnection, library_id: i32, rel_path: &str, generated_at: i64, is_current: bool, ) -> i32 { ensure_library(conn, library_id); diesel::sql_query( "INSERT INTO photo_insights (library_id, rel_path, title, summary, generated_at, model_version, is_current, backend) \ VALUES (?, ?, 't', 's', ?, 'v', ?, 'local')", ) .bind::(library_id) .bind::(rel_path) .bind::(generated_at) .bind::(is_current) .execute(conn) .unwrap(); diesel::sql_query("SELECT last_insert_rowid() AS id") .get_result::(conn) .map(|r| r.id) .unwrap() } #[derive(QueryableByName)] struct TestId { #[diesel(sql_type = diesel::sql_types::Integer)] id: i32, } #[derive(QueryableByName, Debug)] struct HashOnly { #[diesel(sql_type = diesel::sql_types::Nullable)] content_hash: Option, } #[derive(QueryableByName, Debug)] struct CurrentRow { #[diesel(sql_type = diesel::sql_types::Integer)] id: i32, #[diesel(sql_type = diesel::sql_types::Bool)] is_current: bool, } #[test] fn backfill_fills_tagged_photo_hash_when_image_exif_has_one() { let mut conn = in_memory_db_connection(); insert_tag(&mut conn, 1, "vacation"); insert_tagged_photo(&mut conn, "trip/IMG.jpg", 1); // No image_exif row yet — backfill no-op. let stats = run(&mut conn); assert_eq!(stats.tagged_photo_hashes_filled, 0); // image_exif row appears with a hash; next reconcile fills it. insert_image_exif(&mut conn, 1, "trip/IMG.jpg", Some("hashabc")); let stats = run(&mut conn); assert_eq!(stats.tagged_photo_hashes_filled, 1); let row = diesel::sql_query( "SELECT content_hash FROM tagged_photo WHERE rel_path = 'trip/IMG.jpg'", ) .get_result::(&mut conn) .unwrap(); assert_eq!(row.content_hash.as_deref(), Some("hashabc")); // Idempotent: a second run is a no-op. let stats = run(&mut conn); assert_eq!(stats.tagged_photo_hashes_filled, 0); } #[test] fn backfill_skips_tagged_photo_when_image_exif_has_no_hash() { let mut conn = in_memory_db_connection(); insert_tag(&mut conn, 1, "vacation"); insert_tagged_photo(&mut conn, "trip/IMG.jpg", 1); // image_exif exists but its hash is null. insert_image_exif(&mut conn, 1, "trip/IMG.jpg", None); let stats = run(&mut conn); assert_eq!(stats.tagged_photo_hashes_filled, 0); } #[test] fn backfill_fills_photo_insights_hash_scoped_by_library() { let mut conn = in_memory_db_connection(); // Row in library 1 only — must not be filled by a hash from // library 2's same-rel_path entry. insert_image_exif(&mut conn, 1, "shared.jpg", Some("hash-lib1")); let id1 = insert_insight(&mut conn, 1, "shared.jpg", 100, true); let stats = run(&mut conn); assert_eq!(stats.photo_insights_hashes_filled, 1); let row = diesel::sql_query( "SELECT content_hash FROM photo_insights WHERE id = ?", ) .bind::(id1) .get_result::(&mut conn) .unwrap(); assert_eq!(row.content_hash.as_deref(), Some("hash-lib1")); } #[test] fn collapse_keeps_earliest_is_current_per_hash() { let mut conn = in_memory_db_connection(); // Two libraries, same content_hash via image_exif. Insights // were generated independently in each library, both currently // is_current = true. The earlier one wins. insert_image_exif(&mut conn, 1, "a.jpg", Some("h1")); insert_image_exif(&mut conn, 2, "a.jpg", Some("h1")); let earlier = insert_insight(&mut conn, 1, "a.jpg", 100, true); let later = insert_insight(&mut conn, 2, "a.jpg", 200, true); // First pass fills the content_hash; second collapses. let stats = run(&mut conn); assert_eq!(stats.photo_insights_hashes_filled, 2); assert_eq!(stats.photo_insights_demoted, 1); let rows = diesel::sql_query( "SELECT id, is_current FROM photo_insights ORDER BY id", ) .get_results::(&mut conn) .unwrap(); let earlier_row = rows.iter().find(|r| r.id == earlier).unwrap(); let later_row = rows.iter().find(|r| r.id == later).unwrap(); assert!(earlier_row.is_current, "earlier insight should remain current"); assert!(!later_row.is_current, "later insight should be demoted"); // Idempotent. let stats = run(&mut conn); assert_eq!(stats.photo_insights_demoted, 0); } #[test] fn collapse_does_not_demote_a_solo_current_row() { let mut conn = in_memory_db_connection(); insert_image_exif(&mut conn, 1, "a.jpg", Some("h1")); let solo = insert_insight(&mut conn, 1, "a.jpg", 100, true); let stats = run(&mut conn); assert_eq!(stats.photo_insights_demoted, 0); let row = diesel::sql_query( "SELECT id, is_current FROM photo_insights WHERE id = ?", ) .bind::(solo) .get_result::(&mut conn) .unwrap(); assert!(row.is_current); } }