knowledge: detect same-predicate object conflicts at read time

GET /knowledge/entities/{id} now flags facts as `in_conflict` when
another active fact shares the same predicate but disagrees on the
object (entity id or text value). Pure read-time computation in the
handler — group facts by predicate, distinct-object count > 1 flags
all members. No schema change; same shape as `is_current` on photo
insights.

The flag is intentionally a *signal*, not a hard constraint. Some
predicates are legitimately multi-valued (friend_of, tagged_in,
appears_in) — the curator UI surfaces the amber accent and lets the
user reject the stale fact, accept both, or supersede one later
once the supersession column lands.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-05-10 19:14:58 -04:00
parent 0b8478a5e4
commit bcd5312953

View File

@@ -109,6 +109,15 @@ pub struct FactDetail {
pub source_photo: Option<String>, pub source_photo: Option<String>,
pub source_insight_id: Option<i32>, pub source_insight_id: Option<i32>,
pub created_at: i64, pub created_at: i64,
/// Set when another active fact has the same subject+predicate but
/// a different object. Detected at read time (no schema change) by
/// the get_entity handler grouping facts by predicate. Some
/// predicates are legitimately multi-valued ("tagged_in",
/// "friend_of") so this is a *signal* for the curator, not a hard
/// invariant. Stale-data correction is the common case (Alice
/// lives_in NYC AND SF — one of these is wrong).
#[serde(skip_serializing_if = "std::ops::Not::not")]
pub in_conflict: bool,
} }
#[derive(Serialize)] #[derive(Serialize)]
@@ -381,9 +390,50 @@ async fn get_entity<D: KnowledgeDao + 'static>(
source_photo: f.source_photo, source_photo: f.source_photo,
source_insight_id: f.source_insight_id, source_insight_id: f.source_insight_id,
created_at: f.created_at, created_at: f.created_at,
in_conflict: false,
}); });
} }
// Conflict detection: within the active set, group by predicate;
// any predicate group with more than one distinct object (entity
// id or value) flags all its members. Some predicates are
// legitimately multi-valued (e.g. "tagged_in", "friend_of") so
// this is a curator hint, not a hard rule — `in_conflict` exists
// to surface stale-data candidates ("lives_in NYC" and
// "lives_in SF" can't both be current).
{
use std::collections::{HashMap, HashSet};
let mut by_predicate: HashMap<String, Vec<usize>> = HashMap::new();
for (idx, f) in facts.iter().enumerate() {
if f.status == "active" {
by_predicate
.entry(f.predicate.clone())
.or_default()
.push(idx);
}
}
let mut to_flag: HashSet<usize> = HashSet::new();
for indices in by_predicate.values() {
if indices.len() < 2 {
continue;
}
// Distinct (object_entity_id, object_value) tuples across
// these active facts.
let mut seen: HashSet<(Option<i32>, Option<String>)> = HashSet::new();
for &i in indices {
seen.insert((facts[i].object_entity_id, facts[i].object_value.clone()));
}
if seen.len() > 1 {
for &i in indices {
to_flag.insert(i);
}
}
}
for i in to_flag {
facts[i].in_conflict = true;
}
}
// Fetch photo links // Fetch photo links
let photo_links: Vec<PhotoLinkDetail> = match dao.get_links_for_entity(&cx, entity_id) { let photo_links: Vec<PhotoLinkDetail> = match dao.get_links_for_entity(&cx, entity_id) {
Ok(links) => links.into_iter().map(PhotoLinkDetail::from).collect(), Ok(links) => links.into_iter().map(PhotoLinkDetail::from).collect(),