From 85f3716379992a7e0e70f0822674f8a25cf34ce9 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Sun, 10 May 2026 19:47:06 -0400 Subject: [PATCH] knowledge: fact supersession + photo-date valid_from MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two Phase-2 followups in one commit since they're coupled at the write path: * Agent populates valid_from from the source photo's date_taken when calling store_fact. Loose semantics — date_taken is *evidence at that date*, not strictly when the fact started being true — but gives the curator a calendar anchor and pairs with supersession to close intervals cleanly. valid_until stays NULL (a single photo can't tell us when something stopped). Honours the existing upsert_fact dedup (corroborated facts keep their first-recorded valid_from). * Supersession: new column entity_facts.superseded_by INTEGER (migration 2026-05-10-000200), new status value 'superseded', new DAO method supersede_fact, new HTTP endpoint POST /knowledge/facts/{id}/supersede. Marking an old fact as replaced by a new one atomically: flips status to 'superseded', sets superseded_by, and stamps valid_until from the new fact's valid_from (when not already set). delete_fact clears dangling supersession pointers in the same transaction so the column never points at a missing row — no FK because SQLite can't ALTER ADD with REFERENCES, but the DAO maintains the invariant. Pairs with conflict detection from the previous slice: once the old fact's valid_until is closed, its interval no longer overlaps the new fact's, so they stop flagging — the supersede action resolves the conflict. Two tests pin the contract: supersede stamps valid_until from new.valid_from while respecting an existing valid_until, and deleting the supersedeR clears the dangling pointer while leaving the old fact's 'superseded' status in place for history. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../down.sql | 2 + .../up.sql | 31 +++ src/ai/insight_generator.rs | 19 +- src/database/knowledge_dao.rs | 218 +++++++++++++++++- src/database/models.rs | 5 + src/database/schema.rs | 1 + src/knowledge.rs | 40 ++++ 7 files changed, 307 insertions(+), 9 deletions(-) create mode 100644 migrations/2026-05-10-000200_entity_facts_superseded_by/down.sql create mode 100644 migrations/2026-05-10-000200_entity_facts_superseded_by/up.sql diff --git a/migrations/2026-05-10-000200_entity_facts_superseded_by/down.sql b/migrations/2026-05-10-000200_entity_facts_superseded_by/down.sql new file mode 100644 index 0000000..1e09629 --- /dev/null +++ b/migrations/2026-05-10-000200_entity_facts_superseded_by/down.sql @@ -0,0 +1,2 @@ +DROP INDEX IF EXISTS idx_entity_facts_superseded_by; +ALTER TABLE entity_facts DROP COLUMN superseded_by; diff --git a/migrations/2026-05-10-000200_entity_facts_superseded_by/up.sql b/migrations/2026-05-10-000200_entity_facts_superseded_by/up.sql new file mode 100644 index 0000000..be04ae0 --- /dev/null +++ b/migrations/2026-05-10-000200_entity_facts_superseded_by/up.sql @@ -0,0 +1,31 @@ +-- Add a supersession pointer to entity_facts. +-- +-- Status alone is a one-way trapdoor: 'rejected' loses the link +-- between the rejected fact and the one that replaced it. For +-- evolving facts (Cameron's relationship, employer, address) the +-- curator wants to *replace* a stale fact with a new one and keep +-- the history readable: "from 2018 until 2022 this was true, then +-- it became this other thing". +-- +-- A nullable INTEGER column pointing at another entity_facts.id — +-- no FK constraint because SQLite can't ALTER ADD COLUMN with REFs; +-- the DAO's delete_fact clears dangling pointers in the same +-- transaction as the parent delete to keep the column honest. +-- +-- A status of 'superseded' on the old fact (alongside the existing +-- active / reviewed / rejected) signals "replaced by a newer +-- claim". Read paths already filter 'rejected' out of the active +-- view; the curation UI will treat 'superseded' the same way for +-- conflict detection so they don't keep flagging. +-- +-- Pairs with the valid-time columns from 2026-05-10-000100: the +-- supersede action auto-stamps the old fact's `valid_until` from +-- the new fact's `valid_from`, closing the interval cleanly. + +ALTER TABLE entity_facts ADD COLUMN superseded_by INTEGER; + +-- Helpful index for "show me what superseded this fact" walks +-- (rare today; cheap to add now while the table is small). +CREATE INDEX idx_entity_facts_superseded_by + ON entity_facts(superseded_by) + WHERE superseded_by IS NOT NULL; diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 1f9dfe7..60bc50c 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -2672,6 +2672,19 @@ Return ONLY the summary, nothing else."#, file_path ); + // Anchor the fact in valid-time using the source photo's + // `date_taken` (Apollo's naive-as-UTC convention is fine + // here — we only care about calendar ordering, not absolute + // UTC). The semantic stretch: a photo *evidences* the fact at + // that date — the fact may have started earlier — so this is + // best read as "no later than this it started being true", + // not a strict lower bound. Still useful: gives the curator a + // calendar anchor and lets supersession (next slice) close + // intervals cleanly when a newer fact arrives. valid_until + // stays NULL — a single photo can't tell us when something + // *stopped* being true. + let valid_from = self.fetch_exif(file_path).and_then(|e| e.date_taken); + let fact = InsertEntityFact { subject_entity_id, predicate, @@ -2684,11 +2697,9 @@ Return ONLY the summary, nothing else."#, created_at: chrono::Utc::now().timestamp(), persona_id: persona_id.to_string(), user_id, - // The agentic loop doesn't yet derive valid-time from the - // photo's date_taken. Left NULL for now; Phase 2's - // supersession + a future agent tool will populate these. - valid_from: None, + valid_from, valid_until: None, + superseded_by: None, }; let mut kdao = self diff --git a/src/database/knowledge_dao.rs b/src/database/knowledge_dao.rs index cd6a9bb..57518fe 100644 --- a/src/database/knowledge_dao.rs +++ b/src/database/knowledge_dao.rs @@ -226,6 +226,21 @@ pub trait KnowledgeDao: Sync + Send { fn delete_fact(&mut self, cx: &opentelemetry::Context, id: i32) -> Result<(), DbError>; + /// Mark an old fact as superseded by a new one. Atomically: + /// - reads the new fact's valid_from + /// - sets old.superseded_by = new_id + /// - sets old.status = 'superseded' + /// - stamps old.valid_until = new.valid_from (if not already + /// set; otherwise leaves it) + /// + /// Returns the updated old fact. Errors if either id is missing. + fn supersede_fact( + &mut self, + cx: &opentelemetry::Context, + old_id: i32, + new_id: i32, + ) -> Result, DbError>; + // --- Photo links --- fn upsert_photo_link( &mut self, @@ -1131,12 +1146,96 @@ impl KnowledgeDao for SqliteKnowledgeDao { trace_db_call(cx, "delete", "delete_fact", |_span| { use schema::entity_facts::dsl::*; let mut conn = self.connection.lock().expect("KnowledgeDao lock"); - diesel::delete(entity_facts.filter(id.eq(fact_id))) - .execute(conn.deref_mut()) - .map(|_| ()) - .map_err(|e| anyhow::anyhow!("Delete error: {}", e)) + // Clear dangling supersession pointers from any fact this + // one had retired — there's no FK on superseded_by (SQLite + // can't ALTER ADD with REFERENCES) so we do it manually. + // Sibling rows lose the pointer but stay 'superseded' — + // the user's historical correction survives the cleanup. + conn.transaction::<(), diesel::result::Error, _>(|conn| { + diesel::update(entity_facts.filter(superseded_by.eq(fact_id))) + .set(superseded_by.eq::>(None)) + .execute(conn)?; + diesel::delete(entity_facts.filter(id.eq(fact_id))).execute(conn)?; + Ok(()) + }) + .map_err(|e| anyhow::anyhow!("Delete error: {}", e)) + }) + .map_err(|e| { + log::warn!("delete_fact({}) failed: {}", fact_id, e); + DbError::new(DbErrorKind::QueryError) + }) + } + + fn supersede_fact( + &mut self, + cx: &opentelemetry::Context, + old_id: i32, + new_id: i32, + ) -> Result, DbError> { + trace_db_call(cx, "update", "supersede_fact", |_span| { + use schema::entity_facts::dsl::*; + let mut conn = self.connection.lock().expect("KnowledgeDao lock"); + + if old_id == new_id { + return Err(anyhow::anyhow!( + "supersede_fact: old_id and new_id must differ" + )); + } + + conn.transaction::, diesel::result::Error, _>( + |conn| { + // Pull the new fact's valid_from so we can close + // the old fact's interval at the same point. + let new_fact: Option = entity_facts + .filter(id.eq(new_id)) + .first::(conn) + .optional()?; + let Some(new_fact) = new_fact else { + return Ok(None); + }; + + // Verify the old fact exists before touching it — + // returning None lets the handler 404 cleanly. + let old_fact: Option = entity_facts + .filter(id.eq(old_id)) + .first::(conn) + .optional()?; + if old_fact.is_none() { + return Ok(None); + } + + // Only stamp valid_until if the user hasn't + // already set it — respecting hand-curated bounds. + let target_valid_until = old_fact + .as_ref() + .and_then(|f| f.valid_until) + .or(new_fact.valid_from); + + diesel::update(entity_facts.filter(id.eq(old_id))) + .set(( + status.eq("superseded"), + superseded_by.eq(Some(new_id)), + valid_until.eq(target_valid_until), + )) + .execute(conn)?; + + entity_facts + .filter(id.eq(old_id)) + .first::(conn) + .optional() + }, + ) + .map_err(|e| anyhow::anyhow!("Supersede error: {}", e)) + }) + .map_err(|e| { + log::warn!( + "supersede_fact(old={}, new={}) failed: {}", + old_id, + new_id, + e + ); + DbError::new(DbErrorKind::UpdateError) }) - .map_err(|_| DbError::new(DbErrorKind::QueryError)) } // ----------------------------------------------------------------------- @@ -1370,6 +1469,7 @@ mod tests { user_id, valid_from: None, valid_until: None, + superseded_by: None, }, ) .unwrap(); @@ -1590,6 +1690,7 @@ mod tests { user_id: alice, valid_from: None, valid_until: None, + superseded_by: None, }, ); assert!( @@ -1598,6 +1699,112 @@ mod tests { ); } + #[test] + fn supersede_fact_links_and_stamps_valid_until() { + // Supersession: marking an old fact as replaced by a new one + // flips its status to 'superseded', points superseded_by at + // the new fact, and stamps valid_until from the new fact's + // valid_from (when not already set). Pre-existing valid_until + // on the old fact is respected. + let cx = opentelemetry::Context::new(); + let conn = connection_with_fks_on(); + let alice = create_user(&conn, "alice"); + create_persona_row(&conn, alice, "default"); + + let mut dao = SqliteKnowledgeDao::from_connection(conn.clone()); + let cameron = make_entity(&mut dao, "Cameron"); + let old = add_fact( + &mut dao, + cameron.id, + "is_in_relationship_with", + "X", + alice, + "default", + ); + // The new fact carries a valid_from we expect to be stamped + // onto the old fact's valid_until. + let new = add_fact( + &mut dao, + cameron.id, + "is_in_relationship_with", + "Y", + alice, + "default", + ); + dao.update_fact( + &cx, + new.id, + FactPatch { + predicate: None, + object_value: None, + status: None, + confidence: None, + valid_from: Some(Some(1640995200)), // 2022-01-01 + valid_until: None, + }, + ) + .unwrap(); + + let updated = dao + .supersede_fact(&cx, old.id, new.id) + .unwrap() + .expect("supersede returned None"); + + assert_eq!(updated.status, "superseded"); + assert_eq!(updated.superseded_by, Some(new.id)); + assert_eq!(updated.valid_until, Some(1640995200)); + } + + #[test] + fn delete_fact_clears_dangling_supersession_pointers() { + // Deleting the newer fact (the supersedeR) leaves the older + // fact's superseded_by dangling — the DAO clears it back to + // NULL in the same transaction so the column never points at + // a missing row. The old fact's status stays 'superseded' + // because the historical correction is still meaningful. + let cx = opentelemetry::Context::new(); + let conn = connection_with_fks_on(); + let alice = create_user(&conn, "alice"); + create_persona_row(&conn, alice, "default"); + + let mut dao = SqliteKnowledgeDao::from_connection(conn.clone()); + let cameron = make_entity(&mut dao, "Cameron"); + let old = add_fact(&mut dao, cameron.id, "lives_in", "NYC", alice, "default"); + let new = add_fact(&mut dao, cameron.id, "lives_in", "SF", alice, "default"); + + dao.supersede_fact(&cx, old.id, new.id).unwrap().unwrap(); + dao.delete_fact(&cx, new.id).unwrap(); + + let rehydrated = dao + .list_facts( + &cx, + FactFilter { + entity_id: Some(cameron.id), + // "all" — the old fact is 'superseded' now, so the + // default 'active' scope would skip it. + status: Some("all".to_string()), + predicate: None, + persona: PersonaFilter::Single { + user_id: alice, + persona_id: "default".to_string(), + }, + limit: 10, + offset: 0, + }, + ) + .unwrap() + .0; + let old_row = rehydrated.iter().find(|f| f.id == old.id).unwrap(); + assert_eq!( + old_row.superseded_by, None, + "dangling supersession pointer should be cleared" + ); + assert_eq!( + old_row.status, "superseded", + "historical status should survive the supersederr delete" + ); + } + #[test] fn update_fact_can_set_and_clear_valid_time() { // FactPatch.valid_from / valid_until are Option> @@ -1715,6 +1922,7 @@ mod tests { user_id: alice, valid_from: None, valid_until: None, + superseded_by: None, }, ) .unwrap(); diff --git a/src/database/models.rs b/src/database/models.rs index 6ee5f02..4f21110 100644 --- a/src/database/models.rs +++ b/src/database/models.rs @@ -258,6 +258,10 @@ pub struct InsertEntityFact { /// 2026-05-10-000100. pub valid_from: Option, pub valid_until: Option, + /// Points at the entity_facts.id that replaced this one. Set by + /// the supersede endpoint; status flips to 'superseded' in the + /// same transaction. See migration 2026-05-10-000200. + pub superseded_by: Option, } #[derive(Serialize, Queryable, Clone, Debug)] @@ -276,6 +280,7 @@ pub struct EntityFact { pub user_id: i32, pub valid_from: Option, pub valid_until: Option, + pub superseded_by: Option, } #[derive(Insertable)] diff --git a/src/database/schema.rs b/src/database/schema.rs index 1260e5b..57326af 100644 --- a/src/database/schema.rs +++ b/src/database/schema.rs @@ -61,6 +61,7 @@ diesel::table! { user_id -> Integer, valid_from -> Nullable, valid_until -> Nullable, + superseded_by -> Nullable, } } diff --git a/src/knowledge.rs b/src/knowledge.rs index 9d4f349..8e8f601 100644 --- a/src/knowledge.rs +++ b/src/knowledge.rs @@ -115,6 +115,10 @@ pub struct FactDetail { /// recorded it). See migration 2026-05-10-000100. pub valid_from: Option, pub valid_until: Option, + /// Points at the entity_facts.id that replaced this one (Phase 2 + /// supersession, migration 2026-05-10-000200). Only set when + /// status == 'superseded'. + pub superseded_by: Option, /// Set when another active fact has the same subject+predicate, /// a different object, AND their valid-time intervals overlap. /// Detected at read time by the get_entity handler grouping @@ -228,6 +232,12 @@ pub struct FactPatchRequest { pub valid_until: Option>, } +#[derive(Deserialize)] +pub struct SupersedeRequest { + /// The id of the new fact that replaces the path-params one. + pub by_fact_id: i32, +} + #[derive(Deserialize)] pub struct FactCreateRequest { pub subject_entity_id: i32, @@ -296,6 +306,10 @@ where .route(web::patch().to(patch_fact::)) .route(web::delete().to(delete_fact::)), ) + .service( + web::resource("/facts/{id}/supersede") + .route(web::post().to(supersede_fact::)), + ) .service(web::resource("/recent").route(web::get().to(get_recent::))), ) } @@ -417,6 +431,7 @@ async fn get_entity( created_at: f.created_at, valid_from: f.valid_from, valid_until: f.valid_until, + superseded_by: f.superseded_by, in_conflict: false, }); } @@ -752,6 +767,7 @@ async fn create_fact( user_id, valid_from: body.valid_from, valid_until: body.valid_until, + superseded_by: None, }; match dao.upsert_fact(&cx, insert) { @@ -815,6 +831,30 @@ async fn delete_fact( } } +async fn supersede_fact( + _claims: Claims, + id: web::Path, + body: web::Json, + dao: web::Data>, +) -> impl Responder { + let cx = opentelemetry::Context::current(); + let old_id = id.into_inner(); + if old_id == body.by_fact_id { + return HttpResponse::BadRequest() + .json(serde_json::json!({"error": "old_id and by_fact_id must differ"})); + } + let mut dao = dao.lock().expect("Unable to lock KnowledgeDao"); + match dao.supersede_fact(&cx, old_id, body.by_fact_id) { + Ok(Some(fact)) => HttpResponse::Ok().json(fact), + Ok(None) => HttpResponse::NotFound() + .json(serde_json::json!({"error": "Old or new fact not found"})), + Err(e) => { + log::error!("supersede_fact error: {:?}", e); + HttpResponse::InternalServerError().json(serde_json::json!({"error": "Database error"})) + } + } +} + async fn get_recent( req: HttpRequest, claims: Claims,