ImageApi/migrations/2026-05-01-000000_hash_keyed_derived_data/up.sql

-- Phase B of the multi-library data-model rollout: add a nullable
-- `content_hash` column to derived/user-intent tables that should follow
-- the bytes rather than the path. Reads will prefer hash-key joins and
-- fall back to rel_path while the column is null. A separate
-- reconciliation pass collapses duplicates as the column populates.
--
-- See CLAUDE.md → "Multi-library data model" for the policy. The
-- reference implementation is `face_detections`, which has been
-- hash-keyed since it was introduced.
--
-- Tables in this migration:
--   * tagged_photo   — user-intent (tags follow the bytes)
--   * photo_insights — intrinsic to bytes (LLM-generated description)
--
-- favorites is the natural third candidate but its DAO is barely used in
-- v1 and the row count is tiny; deferring lets this migration stay
-- focused on the high-volume tables that drive cross-library overhead.

-- ---------------------------------------------------------------------------
-- tagged_photo
-- ---------------------------------------------------------------------------
ALTER TABLE tagged_photo ADD COLUMN content_hash TEXT;

-- Backfill: for each tagged_photo row, find the content_hash for its
-- rel_path. tagged_photo doesn't carry a library_id, so a rel_path that
-- exists under multiple libraries with different content is genuinely
-- ambiguous — we take the first matching image_exif row. The
-- reconciliation pass at runtime cleans up any rows that resolve
-- differently once a hash is known per library.
UPDATE tagged_photo
SET content_hash = (
    SELECT content_hash FROM image_exif
    WHERE image_exif.rel_path = tagged_photo.rel_path
      AND image_exif.content_hash IS NOT NULL
    LIMIT 1
)
WHERE content_hash IS NULL;

-- Hash-key index. Partial (only non-null rows) to keep the index small
-- during the transitional window where most rows are still null.
CREATE INDEX idx_tagged_photo_content_hash
    ON tagged_photo (content_hash)
    WHERE content_hash IS NOT NULL;

-- ---------------------------------------------------------------------------
-- photo_insights
-- ---------------------------------------------------------------------------
ALTER TABLE photo_insights ADD COLUMN content_hash TEXT;

-- Backfill keyed on (library_id, rel_path) — photo_insights already
-- carries library_id, so the resolution is unambiguous.
UPDATE photo_insights
SET content_hash = (
    SELECT content_hash FROM image_exif
    WHERE image_exif.library_id = photo_insights.library_id
      AND image_exif.rel_path = photo_insights.rel_path
      AND image_exif.content_hash IS NOT NULL
    LIMIT 1
)
WHERE content_hash IS NULL;

CREATE INDEX idx_photo_insights_content_hash
    ON photo_insights (content_hash)
    WHERE content_hash IS NOT NULL;