diff --git a/migrations/2026-05-10-000100_entity_facts_valid_time/down.sql b/migrations/2026-05-10-000100_entity_facts_valid_time/down.sql new file mode 100644 index 0000000..63bc68d --- /dev/null +++ b/migrations/2026-05-10-000100_entity_facts_valid_time/down.sql @@ -0,0 +1,5 @@ +-- SQLite can drop columns since 3.35 (March 2021); embedded +-- libsqlite3-sys is well past that. Drop in reverse insert order so +-- a partial down still leaves the schema valid. +ALTER TABLE entity_facts DROP COLUMN valid_until; +ALTER TABLE entity_facts DROP COLUMN valid_from; diff --git a/migrations/2026-05-10-000100_entity_facts_valid_time/up.sql b/migrations/2026-05-10-000100_entity_facts_valid_time/up.sql new file mode 100644 index 0000000..4f0bb01 --- /dev/null +++ b/migrations/2026-05-10-000100_entity_facts_valid_time/up.sql @@ -0,0 +1,25 @@ +-- Add valid-time columns to entity_facts. +-- +-- entity_facts already has created_at — *transaction time*, the +-- moment WE recorded the fact. That's not the same as the real-world +-- period the fact was true. "Cameron is_in_relationship_with X" was +-- only true during a window; recording it in 2026 doesn't make it +-- true today. Without the distinction, every former relationship, +-- former job, former address reads as currently-true. +-- +-- Adding two BIGINT NULL columns: valid_from / valid_until (unix +-- seconds). NULL means "unbounded on that side" — `valid_from IS +-- NULL` reads as "always-true-back-to-the-beginning", +-- `valid_until IS NULL` as "still-true-now-or-unknown". Both NULL = +-- temporal validity unknown (current state of all legacy rows). +-- +-- Conflict detection refines accordingly: same-predicate facts with +-- different objects stop flagging when their intervals are disjoint +-- ("lives_in NYC 2018-2020" and "lives_in SF 2020-present" are both +-- valid, just at different times). + +ALTER TABLE entity_facts ADD COLUMN valid_from BIGINT; +ALTER TABLE entity_facts ADD COLUMN valid_until BIGINT; + +-- Optional partial index for time-bounded scans. Skipped for now — +-- conflict detection runs per-entity (small N) and doesn't need it. diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 49528f7..1f9dfe7 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -2684,6 +2684,11 @@ Return ONLY the summary, nothing else."#, created_at: chrono::Utc::now().timestamp(), persona_id: persona_id.to_string(), user_id, + // The agentic loop doesn't yet derive valid-time from the + // photo's date_taken. Left NULL for now; Phase 2's + // supersession + a future agent tool will populate these. + valid_from: None, + valid_until: None, }; let mut kdao = self diff --git a/src/database/knowledge_dao.rs b/src/database/knowledge_dao.rs index 70ff711..cd6a9bb 100644 --- a/src/database/knowledge_dao.rs +++ b/src/database/knowledge_dao.rs @@ -101,6 +101,15 @@ pub struct FactPatch { pub object_value: Option, pub status: Option, pub confidence: Option, + /// Real-world valid-time bounds. Outer Some = "patch this column"; + /// inner Some(val) = set to that unix-seconds value; inner None = + /// clear back to NULL ("unbounded"). The double-Option lets the + /// HTTP layer distinguish "field omitted" (leave alone) from + /// "field sent as null" (clear) — needed for these specifically + /// because there's no sentinel string-empty equivalent like the + /// other fields have. + pub valid_from: Option>, + pub valid_until: Option>, } pub struct RecentActivity { @@ -1074,6 +1083,18 @@ impl KnowledgeDao for SqliteKnowledgeDao { .execute(conn.deref_mut()) .map_err(|e| anyhow::anyhow!("Update error: {}", e))?; } + if let Some(new_from) = patch.valid_from { + diesel::update(entity_facts.filter(id.eq(fact_id))) + .set(valid_from.eq(new_from)) + .execute(conn.deref_mut()) + .map_err(|e| anyhow::anyhow!("Update error: {}", e))?; + } + if let Some(new_until) = patch.valid_until { + diesel::update(entity_facts.filter(id.eq(fact_id))) + .set(valid_until.eq(new_until)) + .execute(conn.deref_mut()) + .map_err(|e| anyhow::anyhow!("Update error: {}", e))?; + } entity_facts .filter(id.eq(fact_id)) @@ -1347,6 +1368,8 @@ mod tests { created_at: 0, persona_id: persona_id.to_string(), user_id, + valid_from: None, + valid_until: None, }, ) .unwrap(); @@ -1565,6 +1588,8 @@ mod tests { created_at: 0, persona_id: "ghost".to_string(), user_id: alice, + valid_from: None, + valid_until: None, }, ); assert!( @@ -1573,6 +1598,87 @@ mod tests { ); } + #[test] + fn update_fact_can_set_and_clear_valid_time() { + // FactPatch.valid_from / valid_until are Option> + // so PATCH can distinguish "leave alone" (None) from "set to + // value" (Some(Some(n))) and "clear back to NULL" (Some(None)). + let cx = opentelemetry::Context::new(); + let conn = connection_with_fks_on(); + let alice = create_user(&conn, "alice"); + create_persona_row(&conn, alice, "default"); + + let mut dao = SqliteKnowledgeDao::from_connection(conn.clone()); + let cameron = make_entity(&mut dao, "Cameron"); + let fact = add_fact( + &mut dao, + cameron.id, + "is_in_relationship_with", + "Alex", + alice, + "default", + ); + assert_eq!(fact.valid_from, None); + assert_eq!(fact.valid_until, None); + + // Set both bounds. + let updated = dao + .update_fact( + &cx, + fact.id, + FactPatch { + predicate: None, + object_value: None, + status: None, + confidence: None, + valid_from: Some(Some(1577836800)), // 2020-01-01 + valid_until: Some(Some(1640995200)), // 2022-01-01 + }, + ) + .unwrap() + .unwrap(); + assert_eq!(updated.valid_from, Some(1577836800)); + assert_eq!(updated.valid_until, Some(1640995200)); + + // Leave alone: omit both — values persist. + let still = dao + .update_fact( + &cx, + fact.id, + FactPatch { + predicate: None, + object_value: None, + status: None, + confidence: None, + valid_from: None, + valid_until: None, + }, + ) + .unwrap() + .unwrap(); + assert_eq!(still.valid_from, Some(1577836800)); + assert_eq!(still.valid_until, Some(1640995200)); + + // Clear valid_until back to NULL (relationship ongoing again). + let cleared = dao + .update_fact( + &cx, + fact.id, + FactPatch { + predicate: None, + object_value: None, + status: None, + confidence: None, + valid_from: None, + valid_until: Some(None), + }, + ) + .unwrap() + .unwrap(); + assert_eq!(cleared.valid_from, Some(1577836800)); + assert_eq!(cleared.valid_until, None); + } + #[test] fn delete_entity_clears_relational_facts_that_would_violate_check() { // entity_facts has a CHECK that at least one of object_entity_id / @@ -1607,6 +1713,8 @@ mod tests { created_at: 0, persona_id: "default".to_string(), user_id: alice, + valid_from: None, + valid_until: None, }, ) .unwrap(); diff --git a/src/database/models.rs b/src/database/models.rs index 479b631..6ee5f02 100644 --- a/src/database/models.rs +++ b/src/database/models.rs @@ -249,6 +249,15 @@ pub struct InsertEntityFact { /// persona must not see each other's facts. Always paired with /// `persona_id` — they're a unit. pub user_id: i32, + /// Real-world period the fact is/was true (unix seconds). NULL on + /// either side = unbounded — `valid_from IS NULL` reads as + /// "always-true-back-to-the-beginning", `valid_until IS NULL` as + /// "still-true-now-or-unknown". Distinguishes valid time from + /// transaction time (`created_at` is when we recorded the fact, + /// not when it was true in the world). See migration + /// 2026-05-10-000100. + pub valid_from: Option, + pub valid_until: Option, } #[derive(Serialize, Queryable, Clone, Debug)] @@ -265,6 +274,8 @@ pub struct EntityFact { pub created_at: i64, pub persona_id: String, pub user_id: i32, + pub valid_from: Option, + pub valid_until: Option, } #[derive(Insertable)] diff --git a/src/database/schema.rs b/src/database/schema.rs index d93d583..1260e5b 100644 --- a/src/database/schema.rs +++ b/src/database/schema.rs @@ -59,6 +59,8 @@ diesel::table! { created_at -> BigInt, persona_id -> Text, user_id -> Integer, + valid_from -> Nullable, + valid_until -> Nullable, } } diff --git a/src/knowledge.rs b/src/knowledge.rs index 256c55d..9d4f349 100644 --- a/src/knowledge.rs +++ b/src/knowledge.rs @@ -109,13 +109,20 @@ pub struct FactDetail { pub source_photo: Option, pub source_insight_id: Option, pub created_at: i64, - /// Set when another active fact has the same subject+predicate but - /// a different object. Detected at read time (no schema change) by - /// the get_entity handler grouping facts by predicate. Some - /// predicates are legitimately multi-valued ("tagged_in", - /// "friend_of") so this is a *signal* for the curator, not a hard - /// invariant. Stale-data correction is the common case (Alice - /// lives_in NYC AND SF — one of these is wrong). + /// Real-world valid-time interval. NULL on either side means + /// unbounded; both NULL = "always true" / validity unknown. + /// Distinct from `created_at` (transaction time — when we + /// recorded it). See migration 2026-05-10-000100. + pub valid_from: Option, + pub valid_until: Option, + /// Set when another active fact has the same subject+predicate, + /// a different object, AND their valid-time intervals overlap. + /// Detected at read time by the get_entity handler grouping + /// facts by predicate. Some predicates are legitimately + /// multi-valued ("tagged_in", "friend_of") so this is a *signal* + /// for the curator, not a hard invariant. The interval check + /// keeps "lives_in NYC 2018-2020" + "lives_in SF 2020-present" + /// from false-positive flagging. #[serde(skip_serializing_if = "std::ops::Not::not")] pub in_conflict: bool, } @@ -197,12 +204,28 @@ pub struct EntityPatchRequest { pub confidence: Option, } +/// Serde helper for the "tri-state" pattern: distinguish "field +/// omitted" from "field sent as null". Used for nullable columns +/// where we want PATCH to support both "leave alone" and "set NULL". +fn deserialize_optional_nullable_i64<'de, D>(d: D) -> Result>, D::Error> +where + D: serde::Deserializer<'de>, +{ + Ok(Some(Option::::deserialize(d)?)) +} + #[derive(Deserialize)] pub struct FactPatchRequest { pub predicate: Option, pub object_value: Option, pub status: Option, pub confidence: Option, + /// Tri-state: missing = leave alone, null = clear to NULL, number + /// = set. See `deserialize_optional_nullable_i64`. + #[serde(default, deserialize_with = "deserialize_optional_nullable_i64")] + pub valid_from: Option>, + #[serde(default, deserialize_with = "deserialize_optional_nullable_i64")] + pub valid_until: Option>, } #[derive(Deserialize)] @@ -213,6 +236,8 @@ pub struct FactCreateRequest { pub object_value: Option, pub source_photo: Option, pub confidence: Option, + pub valid_from: Option, + pub valid_until: Option, } #[derive(Deserialize)] @@ -390,17 +415,28 @@ async fn get_entity( source_photo: f.source_photo, source_insight_id: f.source_insight_id, created_at: f.created_at, + valid_from: f.valid_from, + valid_until: f.valid_until, in_conflict: false, }); } // Conflict detection: within the active set, group by predicate; - // any predicate group with more than one distinct object (entity - // id or value) flags all its members. Some predicates are - // legitimately multi-valued (e.g. "tagged_in", "friend_of") so - // this is a curator hint, not a hard rule — `in_conflict` exists - // to surface stale-data candidates ("lives_in NYC" and - // "lives_in SF" can't both be current). + // for each pair within a group that disagrees on the object, + // flag both only if their valid-time intervals overlap. NULL on + // either bound treats that side as unbounded — a fact with no + // valid-time data still flags against any time period (worst case + // for legacy data; user adds dates to suppress). + fn intervals_overlap( + a: (Option, Option), + b: (Option, Option), + ) -> bool { + let a_lo = a.0.unwrap_or(i64::MIN); + let a_hi = a.1.unwrap_or(i64::MAX); + let b_lo = b.0.unwrap_or(i64::MIN); + let b_hi = b.1.unwrap_or(i64::MAX); + a_lo < b_hi && b_lo < a_hi + } { use std::collections::{HashMap, HashSet}; let mut by_predicate: HashMap> = HashMap::new(); @@ -417,15 +453,21 @@ async fn get_entity( if indices.len() < 2 { continue; } - // Distinct (object_entity_id, object_value) tuples across - // these active facts. - let mut seen: HashSet<(Option, Option)> = HashSet::new(); - for &i in indices { - seen.insert((facts[i].object_entity_id, facts[i].object_value.clone())); - } - if seen.len() > 1 { - for &i in indices { - to_flag.insert(i); + for (a_pos, &i) in indices.iter().enumerate() { + for &j in &indices[a_pos + 1..] { + let same_object = facts[i].object_entity_id + == facts[j].object_entity_id + && facts[i].object_value == facts[j].object_value; + if same_object { + continue; + } + if intervals_overlap( + (facts[i].valid_from, facts[i].valid_until), + (facts[j].valid_from, facts[j].valid_until), + ) { + to_flag.insert(i); + to_flag.insert(j); + } } } } @@ -708,6 +750,8 @@ async fn create_fact( created_at: now, persona_id, user_id, + valid_from: body.valid_from, + valid_until: body.valid_until, }; match dao.upsert_fact(&cx, insert) { @@ -739,6 +783,8 @@ async fn patch_fact( object_value: body.object_value.clone(), status: body.status.clone(), confidence: body.confidence, + valid_from: body.valid_from, + valid_until: body.valid_until, }; let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");