-- Phase B of the multi-library data-model rollout: add a nullable -- `content_hash` column to derived/user-intent tables that should follow -- the bytes rather than the path. Reads will prefer hash-key joins and -- fall back to rel_path while the column is null. A separate -- reconciliation pass collapses duplicates as the column populates. -- -- See CLAUDE.md → "Multi-library data model" for the policy. The -- reference implementation is `face_detections`, which has been -- hash-keyed since it was introduced. -- -- Tables in this migration: -- * tagged_photo — user-intent (tags follow the bytes) -- * photo_insights — intrinsic to bytes (LLM-generated description) -- -- favorites is the natural third candidate but its DAO is barely used in -- v1 and the row count is tiny; deferring lets this migration stay -- focused on the high-volume tables that drive cross-library overhead. -- --------------------------------------------------------------------------- -- tagged_photo -- --------------------------------------------------------------------------- ALTER TABLE tagged_photo ADD COLUMN content_hash TEXT; -- Backfill: for each tagged_photo row, find the content_hash for its -- rel_path. tagged_photo doesn't carry a library_id, so a rel_path that -- exists under multiple libraries with different content is genuinely -- ambiguous — we take the first matching image_exif row. The -- reconciliation pass at runtime cleans up any rows that resolve -- differently once a hash is known per library. UPDATE tagged_photo SET content_hash = ( SELECT content_hash FROM image_exif WHERE image_exif.rel_path = tagged_photo.rel_path AND image_exif.content_hash IS NOT NULL LIMIT 1 ) WHERE content_hash IS NULL; -- Hash-key index. Partial (only non-null rows) to keep the index small -- during the transitional window where most rows are still null. CREATE INDEX idx_tagged_photo_content_hash ON tagged_photo (content_hash) WHERE content_hash IS NOT NULL; -- --------------------------------------------------------------------------- -- photo_insights -- --------------------------------------------------------------------------- ALTER TABLE photo_insights ADD COLUMN content_hash TEXT; -- Backfill keyed on (library_id, rel_path) — photo_insights already -- carries library_id, so the resolution is unambiguous. UPDATE photo_insights SET content_hash = ( SELECT content_hash FROM image_exif WHERE image_exif.library_id = photo_insights.library_id AND image_exif.rel_path = photo_insights.rel_path AND image_exif.content_hash IS NOT NULL LIMIT 1 ) WHERE content_hash IS NULL; CREATE INDEX idx_photo_insights_content_hash ON photo_insights (content_hash) WHERE content_hash IS NOT NULL;