knowledge: valid-time on facts + interval-aware conflict detection

Adds bitemporal support to entity_facts. Existing `created_at` is transaction time (when we recorded the fact); the new `valid_from` / `valid_until` BIGINT columns are valid time (when the fact is/was true in the real world). NULL on either side = unbounded on that side, both NULL = "always-true / unknown" — matches the default state of every legacy row, no backfill needed. The split matters for time-bounded predicates like is_in_relationship_with / lives_in / works_at: recording the fact once doesn't mean the relationship is still ongoing. Same predicate across different windows ("lives_in NYC 2018-2020", "lives_in SF 2020-present") is no longer a conflict — the interval-aware check in get_entity only flags pairs whose windows overlap. Facts with no valid-time data still flag against everything (worst case for legacy rows — user adds dates to suppress). API surface: - POST /knowledge/facts accepts optional valid_from / valid_until. - PATCH /knowledge/facts/{id} accepts both with tri-state semantics: field omitted = leave alone, JSON null = clear to NULL, number = set. Implemented via a small serde helper around Option<Option>. - GET /knowledge/entities/{id} surfaces both fields per fact and uses them in conflict detection. Agent path (insight_generator) writes NULL/NULL for now — deriving valid_from from the source photo's date_taken is slated for a follow-up agent tool alongside Phase 2's supersession. Test pins set + clear semantics via update_fact: setting both bounds, leaving them alone on a subsequent patch, then clearing valid_until back to NULL. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-10 19:25:55 -04:00
parent bcd5312953
commit 01f5ad7527
7 changed files with 224 additions and 22 deletions
--- a/src/knowledge.rs
+++ b/src/knowledge.rs
@@ -109,13 +109,20 @@ pub struct FactDetail {
    pub source_photo: Option<String>,
    pub source_insight_id: Option<i32>,
    pub created_at: i64,
-    /// Set when another active fact has the same subject+predicate but
-    /// a different object. Detected at read time (no schema change) by
-    /// the get_entity handler grouping facts by predicate. Some
-    /// predicates are legitimately multi-valued ("tagged_in",
-    /// "friend_of") so this is a *signal* for the curator, not a hard
-    /// invariant. Stale-data correction is the common case (Alice
-    /// lives_in NYC AND SF — one of these is wrong).
+    /// Real-world valid-time interval. NULL on either side means
+    /// unbounded; both NULL = "always true" / validity unknown.
+    /// Distinct from `created_at` (transaction time — when we
+    /// recorded it). See migration 2026-05-10-000100.
+    pub valid_from: Option<i64>,
+    pub valid_until: Option<i64>,
+    /// Set when another active fact has the same subject+predicate,
+    /// a different object, AND their valid-time intervals overlap.
+    /// Detected at read time by the get_entity handler grouping
+    /// facts by predicate. Some predicates are legitimately
+    /// multi-valued ("tagged_in", "friend_of") so this is a *signal*
+    /// for the curator, not a hard invariant. The interval check
+    /// keeps "lives_in NYC 2018-2020" + "lives_in SF 2020-present"
+    /// from false-positive flagging.
    #[serde(skip_serializing_if = "std::ops::Not::not")]
    pub in_conflict: bool,
 }
@@ -197,12 +204,28 @@ pub struct EntityPatchRequest {
    pub confidence: Option<f32>,
 }

+/// Serde helper for the "tri-state" pattern: distinguish "field
+/// omitted" from "field sent as null". Used for nullable columns
+/// where we want PATCH to support both "leave alone" and "set NULL".
+fn deserialize_optional_nullable_i64<'de, D>(d: D) -> Result<Option<Option<i64>>, D::Error>
+where
+    D: serde::Deserializer<'de>,
+{
+    Ok(Some(Option::<i64>::deserialize(d)?))
+}
+
 #[derive(Deserialize)]
 pub struct FactPatchRequest {
    pub predicate: Option<String>,
    pub object_value: Option<String>,
    pub status: Option<String>,
    pub confidence: Option<f32>,
+    /// Tri-state: missing = leave alone, null = clear to NULL, number
+    /// = set. See `deserialize_optional_nullable_i64`.
+    #[serde(default, deserialize_with = "deserialize_optional_nullable_i64")]
+    pub valid_from: Option<Option<i64>>,
+    #[serde(default, deserialize_with = "deserialize_optional_nullable_i64")]
+    pub valid_until: Option<Option<i64>>,
 }

 #[derive(Deserialize)]
@@ -213,6 +236,8 @@ pub struct FactCreateRequest {
    pub object_value: Option<String>,
    pub source_photo: Option<String>,
    pub confidence: Option<f32>,
+    pub valid_from: Option<i64>,
+    pub valid_until: Option<i64>,
 }

 #[derive(Deserialize)]
@@ -390,17 +415,28 @@ async fn get_entity<D: KnowledgeDao + 'static>(
            source_photo: f.source_photo,
            source_insight_id: f.source_insight_id,
            created_at: f.created_at,
+            valid_from: f.valid_from,
+            valid_until: f.valid_until,
            in_conflict: false,
        });
    }

    // Conflict detection: within the active set, group by predicate;
-    // any predicate group with more than one distinct object (entity
-    // id or value) flags all its members. Some predicates are
-    // legitimately multi-valued (e.g. "tagged_in", "friend_of") so
-    // this is a curator hint, not a hard rule — `in_conflict` exists
-    // to surface stale-data candidates ("lives_in NYC" and
-    // "lives_in SF" can't both be current).
+    // for each pair within a group that disagrees on the object,
+    // flag both only if their valid-time intervals overlap. NULL on
+    // either bound treats that side as unbounded — a fact with no
+    // valid-time data still flags against any time period (worst case
+    // for legacy data; user adds dates to suppress).
+    fn intervals_overlap(
+        a: (Option<i64>, Option<i64>),
+        b: (Option<i64>, Option<i64>),
+    ) -> bool {
+        let a_lo = a.0.unwrap_or(i64::MIN);
+        let a_hi = a.1.unwrap_or(i64::MAX);
+        let b_lo = b.0.unwrap_or(i64::MIN);
+        let b_hi = b.1.unwrap_or(i64::MAX);
+        a_lo < b_hi && b_lo < a_hi
+    }
    {
        use std::collections::{HashMap, HashSet};
        let mut by_predicate: HashMap<String, Vec<usize>> = HashMap::new();
@@ -417,15 +453,21 @@ async fn get_entity<D: KnowledgeDao + 'static>(
            if indices.len() < 2 {
                continue;
            }
-            // Distinct (object_entity_id, object_value) tuples across
-            // these active facts.
-            let mut seen: HashSet<(Option<i32>, Option<String>)> = HashSet::new();
-            for &i in indices {
-                seen.insert((facts[i].object_entity_id, facts[i].object_value.clone()));
-            }
-            if seen.len() > 1 {
-                for &i in indices {
-                    to_flag.insert(i);
+            for (a_pos, &i) in indices.iter().enumerate() {
+                for &j in &indices[a_pos + 1..] {
+                    let same_object = facts[i].object_entity_id
+                        == facts[j].object_entity_id
+                        && facts[i].object_value == facts[j].object_value;
+                    if same_object {
+                        continue;
+                    }
+                    if intervals_overlap(
+                        (facts[i].valid_from, facts[i].valid_until),
+                        (facts[j].valid_from, facts[j].valid_until),
+                    ) {
+                        to_flag.insert(i);
+                        to_flag.insert(j);
+                    }
                }
            }
        }
@@ -708,6 +750,8 @@ async fn create_fact<D: KnowledgeDao + 'static>(
        created_at: now,
        persona_id,
        user_id,
+        valid_from: body.valid_from,
+        valid_until: body.valid_until,
    };

    match dao.upsert_fact(&cx, insert) {
@@ -739,6 +783,8 @@ async fn patch_fact<D: KnowledgeDao + 'static>(
        object_value: body.object_value.clone(),
        status: body.status.clone(),
        confidence: body.confidence,
+        valid_from: body.valid_from,
+        valid_until: body.valid_until,
    };

    let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");