diff --git a/migrations/2026-05-11-000000_normalize_entity_types/down.sql b/migrations/2026-05-11-000000_normalize_entity_types/down.sql new file mode 100644 index 0000000..4c1a2f2 --- /dev/null +++ b/migrations/2026-05-11-000000_normalize_entity_types/down.sql @@ -0,0 +1,6 @@ +-- Irreversible: we collapsed multiple raw entity_type strings to +-- canonical forms and don't have a per-row record of the original. +-- The down migration is intentionally a no-op (the rewritten values +-- are still semantically correct), and the up migration is safe to +-- re-run because every UPDATE is conditional on `!= canonical`. +SELECT 1; diff --git a/migrations/2026-05-11-000000_normalize_entity_types/up.sql b/migrations/2026-05-11-000000_normalize_entity_types/up.sql new file mode 100644 index 0000000..def6ab4 --- /dev/null +++ b/migrations/2026-05-11-000000_normalize_entity_types/up.sql @@ -0,0 +1,43 @@ +-- Canonicalize `entities.entity_type` so legacy rows from before +-- `normalize_entity_type` landed in upsert_entity stop polluting +-- client-side filters. Mirrors the synonym map in +-- `src/database/knowledge_dao.rs::normalize_entity_type`: +-- person ← person | people | human | individual | contact +-- place ← place | location | venue | site | area | landmark +-- event ← event | occasion | activity | celebration +-- thing ← thing | object | item | product +-- Types outside the synonym set (e.g. "friend", "family") are not +-- recognized as canonical and get a lowercase+trim pass instead, so +-- at minimum case variants collapse. +-- +-- `UPDATE OR IGNORE` skips rows that would violate UNIQUE(name, +-- entity_type) after the rewrite. Two rows like ("Sarah", "person") +-- + ("Sarah", "Person") would otherwise collide — the duplicate +-- survives unchanged so the curator can merge it via the curation +-- UI rather than have the migration silently delete data. + +UPDATE OR IGNORE entities +SET entity_type = 'person' +WHERE LOWER(TRIM(entity_type)) IN ('person', 'people', 'human', 'individual', 'contact') + AND entity_type != 'person'; + +UPDATE OR IGNORE entities +SET entity_type = 'place' +WHERE LOWER(TRIM(entity_type)) IN ('place', 'location', 'venue', 'site', 'area', 'landmark') + AND entity_type != 'place'; + +UPDATE OR IGNORE entities +SET entity_type = 'event' +WHERE LOWER(TRIM(entity_type)) IN ('event', 'occasion', 'activity', 'celebration') + AND entity_type != 'event'; + +UPDATE OR IGNORE entities +SET entity_type = 'thing' +WHERE LOWER(TRIM(entity_type)) IN ('thing', 'object', 'item', 'product') + AND entity_type != 'thing'; + +-- Anything left ("Friend" vs "friend") gets a lowercase+trim sweep +-- so at least case variants of the same custom type collapse. +UPDATE OR IGNORE entities +SET entity_type = LOWER(TRIM(entity_type)) +WHERE entity_type != LOWER(TRIM(entity_type));