feature/knowledge-curation #91

Merged
cameron merged 19 commits from feature/knowledge-curation into master 2026-05-12 15:40:57 +00:00
2 changed files with 49 additions and 0 deletions
Showing only changes of commit fb078b4906 - Show all commits

View File

@@ -0,0 +1,6 @@
-- Irreversible: we collapsed multiple raw entity_type strings to
-- canonical forms and don't have a per-row record of the original.
-- The down migration is intentionally a no-op (the rewritten values
-- are still semantically correct), and the up migration is safe to
-- re-run because every UPDATE is conditional on `!= canonical`.
SELECT 1;

View File

@@ -0,0 +1,43 @@
-- Canonicalize `entities.entity_type` so legacy rows from before
-- `normalize_entity_type` landed in upsert_entity stop polluting
-- client-side filters. Mirrors the synonym map in
-- `src/database/knowledge_dao.rs::normalize_entity_type`:
-- person ← person | people | human | individual | contact
-- place ← place | location | venue | site | area | landmark
-- event ← event | occasion | activity | celebration
-- thing ← thing | object | item | product
-- Types outside the synonym set (e.g. "friend", "family") are not
-- recognized as canonical and get a lowercase+trim pass instead, so
-- at minimum case variants collapse.
--
-- `UPDATE OR IGNORE` skips rows that would violate UNIQUE(name,
-- entity_type) after the rewrite. Two rows like ("Sarah", "person")
-- + ("Sarah", "Person") would otherwise collide — the duplicate
-- survives unchanged so the curator can merge it via the curation
-- UI rather than have the migration silently delete data.
UPDATE OR IGNORE entities
SET entity_type = 'person'
WHERE LOWER(TRIM(entity_type)) IN ('person', 'people', 'human', 'individual', 'contact')
AND entity_type != 'person';
UPDATE OR IGNORE entities
SET entity_type = 'place'
WHERE LOWER(TRIM(entity_type)) IN ('place', 'location', 'venue', 'site', 'area', 'landmark')
AND entity_type != 'place';
UPDATE OR IGNORE entities
SET entity_type = 'event'
WHERE LOWER(TRIM(entity_type)) IN ('event', 'occasion', 'activity', 'celebration')
AND entity_type != 'event';
UPDATE OR IGNORE entities
SET entity_type = 'thing'
WHERE LOWER(TRIM(entity_type)) IN ('thing', 'object', 'item', 'product')
AND entity_type != 'thing';
-- Anything left ("Friend" vs "friend") gets a lowercase+trim sweep
-- so at least case variants of the same custom type collapse.
UPDATE OR IGNORE entities
SET entity_type = LOWER(TRIM(entity_type))
WHERE entity_type != LOWER(TRIM(entity_type));