diff --git a/src/knowledge.rs b/src/knowledge.rs index b8fc4ba..256c55d 100644 --- a/src/knowledge.rs +++ b/src/knowledge.rs @@ -109,6 +109,15 @@ pub struct FactDetail { pub source_photo: Option, pub source_insight_id: Option, pub created_at: i64, + /// Set when another active fact has the same subject+predicate but + /// a different object. Detected at read time (no schema change) by + /// the get_entity handler grouping facts by predicate. Some + /// predicates are legitimately multi-valued ("tagged_in", + /// "friend_of") so this is a *signal* for the curator, not a hard + /// invariant. Stale-data correction is the common case (Alice + /// lives_in NYC AND SF — one of these is wrong). + #[serde(skip_serializing_if = "std::ops::Not::not")] + pub in_conflict: bool, } #[derive(Serialize)] @@ -381,9 +390,50 @@ async fn get_entity( source_photo: f.source_photo, source_insight_id: f.source_insight_id, created_at: f.created_at, + in_conflict: false, }); } + // Conflict detection: within the active set, group by predicate; + // any predicate group with more than one distinct object (entity + // id or value) flags all its members. Some predicates are + // legitimately multi-valued (e.g. "tagged_in", "friend_of") so + // this is a curator hint, not a hard rule — `in_conflict` exists + // to surface stale-data candidates ("lives_in NYC" and + // "lives_in SF" can't both be current). + { + use std::collections::{HashMap, HashSet}; + let mut by_predicate: HashMap> = HashMap::new(); + for (idx, f) in facts.iter().enumerate() { + if f.status == "active" { + by_predicate + .entry(f.predicate.clone()) + .or_default() + .push(idx); + } + } + let mut to_flag: HashSet = HashSet::new(); + for indices in by_predicate.values() { + if indices.len() < 2 { + continue; + } + // Distinct (object_entity_id, object_value) tuples across + // these active facts. + let mut seen: HashSet<(Option, Option)> = HashSet::new(); + for &i in indices { + seen.insert((facts[i].object_entity_id, facts[i].object_value.clone())); + } + if seen.len() > 1 { + for &i in indices { + to_flag.insert(i); + } + } + } + for i in to_flag { + facts[i].in_conflict = true; + } + } + // Fetch photo links let photo_links: Vec = match dao.get_links_for_entity(&cx, entity_id) { Ok(links) => links.into_iter().map(PhotoLinkDetail::from).collect(),