feature/knowledge-curation #91
@@ -0,0 +1,6 @@
|
||||
-- Irreversible: we collapsed multiple raw entity_type strings to
|
||||
-- canonical forms and don't have a per-row record of the original.
|
||||
-- The down migration is intentionally a no-op (the rewritten values
|
||||
-- are still semantically correct), and the up migration is safe to
|
||||
-- re-run because every UPDATE is conditional on `!= canonical`.
|
||||
SELECT 1;
|
||||
43
migrations/2026-05-11-000000_normalize_entity_types/up.sql
Normal file
43
migrations/2026-05-11-000000_normalize_entity_types/up.sql
Normal file
@@ -0,0 +1,43 @@
|
||||
-- Canonicalize `entities.entity_type` so legacy rows from before
|
||||
-- `normalize_entity_type` landed in upsert_entity stop polluting
|
||||
-- client-side filters. Mirrors the synonym map in
|
||||
-- `src/database/knowledge_dao.rs::normalize_entity_type`:
|
||||
-- person ← person | people | human | individual | contact
|
||||
-- place ← place | location | venue | site | area | landmark
|
||||
-- event ← event | occasion | activity | celebration
|
||||
-- thing ← thing | object | item | product
|
||||
-- Types outside the synonym set (e.g. "friend", "family") are not
|
||||
-- recognized as canonical and get a lowercase+trim pass instead, so
|
||||
-- at minimum case variants collapse.
|
||||
--
|
||||
-- `UPDATE OR IGNORE` skips rows that would violate UNIQUE(name,
|
||||
-- entity_type) after the rewrite. Two rows like ("Sarah", "person")
|
||||
-- + ("Sarah", "Person") would otherwise collide — the duplicate
|
||||
-- survives unchanged so the curator can merge it via the curation
|
||||
-- UI rather than have the migration silently delete data.
|
||||
|
||||
UPDATE OR IGNORE entities
|
||||
SET entity_type = 'person'
|
||||
WHERE LOWER(TRIM(entity_type)) IN ('person', 'people', 'human', 'individual', 'contact')
|
||||
AND entity_type != 'person';
|
||||
|
||||
UPDATE OR IGNORE entities
|
||||
SET entity_type = 'place'
|
||||
WHERE LOWER(TRIM(entity_type)) IN ('place', 'location', 'venue', 'site', 'area', 'landmark')
|
||||
AND entity_type != 'place';
|
||||
|
||||
UPDATE OR IGNORE entities
|
||||
SET entity_type = 'event'
|
||||
WHERE LOWER(TRIM(entity_type)) IN ('event', 'occasion', 'activity', 'celebration')
|
||||
AND entity_type != 'event';
|
||||
|
||||
UPDATE OR IGNORE entities
|
||||
SET entity_type = 'thing'
|
||||
WHERE LOWER(TRIM(entity_type)) IN ('thing', 'object', 'item', 'product')
|
||||
AND entity_type != 'thing';
|
||||
|
||||
-- Anything left ("Friend" vs "friend") gets a lowercase+trim sweep
|
||||
-- so at least case variants of the same custom type collapse.
|
||||
UPDATE OR IGNORE entities
|
||||
SET entity_type = LOWER(TRIM(entity_type))
|
||||
WHERE entity_type != LOWER(TRIM(entity_type));
|
||||
Reference in New Issue
Block a user