ImageApi/migrations/2026-05-10-000300_entity_facts_provenance/up.sql

-- Track which model + backend generated each fact so the curator
-- can audit which configurations produce trustworthy knowledge.
--
-- photo_insights already carries `model_version` + `backend`, and
-- entity_facts.source_insight_id links to it — but:
--   1. source_insight_id is only set after an insight is stored
--      (post-loop), so chat-continuation facts and facts whose insight
--      was regenerated lose the link.
--   2. JOINing for every read is more friction than just embedding the
--      provenance on the fact row itself.
--   3. Manual facts (POST /knowledge/facts) have no insight at all and
--      need to record "manual" as their provenance.
--
-- Two nullable TEXT columns are enough for the audit use case: model
-- (e.g. "qwen2.5:7b", "anthropic/claude-sonnet-4") and backend
-- ("local", "hybrid", "manual"). Pre-existing rows leave both NULL —
-- legacy facts predate this tracking and can't be back-filled
-- reliably from training_messages without burning compute.

ALTER TABLE entity_facts ADD COLUMN created_by_model TEXT;
ALTER TABLE entity_facts ADD COLUMN created_by_backend TEXT;

-- Indexes are cheap and useful for "show me all facts from model X"
-- audit queries — partial so the legacy NULL rows don't bloat them.
CREATE INDEX idx_entity_facts_created_by_model
    ON entity_facts(created_by_model)
    WHERE created_by_model IS NOT NULL;
CREATE INDEX idx_entity_facts_created_by_backend
    ON entity_facts(created_by_backend)
    WHERE created_by_backend IS NOT NULL;