-- Track which model + backend generated each fact so the curator -- can audit which configurations produce trustworthy knowledge. -- -- photo_insights already carries `model_version` + `backend`, and -- entity_facts.source_insight_id links to it — but: -- 1. source_insight_id is only set after an insight is stored -- (post-loop), so chat-continuation facts and facts whose insight -- was regenerated lose the link. -- 2. JOINing for every read is more friction than just embedding the -- provenance on the fact row itself. -- 3. Manual facts (POST /knowledge/facts) have no insight at all and -- need to record "manual" as their provenance. -- -- Two nullable TEXT columns are enough for the audit use case: model -- (e.g. "qwen2.5:7b", "anthropic/claude-sonnet-4") and backend -- ("local", "hybrid", "manual"). Pre-existing rows leave both NULL — -- legacy facts predate this tracking and can't be back-filled -- reliably from training_messages without burning compute. ALTER TABLE entity_facts ADD COLUMN created_by_model TEXT; ALTER TABLE entity_facts ADD COLUMN created_by_backend TEXT; -- Indexes are cheap and useful for "show me all facts from model X" -- audit queries — partial so the legacy NULL rows don't bloat them. CREATE INDEX idx_entity_facts_created_by_model ON entity_facts(created_by_model) WHERE created_by_model IS NOT NULL; CREATE INDEX idx_entity_facts_created_by_backend ON entity_facts(created_by_backend) WHERE created_by_backend IS NOT NULL;