knowledge: detect same-predicate object conflicts at read time
GET /knowledge/entities/{id} now flags facts as `in_conflict` when
another active fact shares the same predicate but disagrees on the
object (entity id or text value). Pure read-time computation in the
handler — group facts by predicate, distinct-object count > 1 flags
all members. No schema change; same shape as `is_current` on photo
insights.
The flag is intentionally a *signal*, not a hard constraint. Some
predicates are legitimately multi-valued (friend_of, tagged_in,
appears_in) — the curator UI surfaces the amber accent and lets the
user reject the stale fact, accept both, or supersede one later
once the supersession column lands.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -109,6 +109,15 @@ pub struct FactDetail {
|
||||
pub source_photo: Option<String>,
|
||||
pub source_insight_id: Option<i32>,
|
||||
pub created_at: i64,
|
||||
/// Set when another active fact has the same subject+predicate but
|
||||
/// a different object. Detected at read time (no schema change) by
|
||||
/// the get_entity handler grouping facts by predicate. Some
|
||||
/// predicates are legitimately multi-valued ("tagged_in",
|
||||
/// "friend_of") so this is a *signal* for the curator, not a hard
|
||||
/// invariant. Stale-data correction is the common case (Alice
|
||||
/// lives_in NYC AND SF — one of these is wrong).
|
||||
#[serde(skip_serializing_if = "std::ops::Not::not")]
|
||||
pub in_conflict: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
@@ -381,9 +390,50 @@ async fn get_entity<D: KnowledgeDao + 'static>(
|
||||
source_photo: f.source_photo,
|
||||
source_insight_id: f.source_insight_id,
|
||||
created_at: f.created_at,
|
||||
in_conflict: false,
|
||||
});
|
||||
}
|
||||
|
||||
// Conflict detection: within the active set, group by predicate;
|
||||
// any predicate group with more than one distinct object (entity
|
||||
// id or value) flags all its members. Some predicates are
|
||||
// legitimately multi-valued (e.g. "tagged_in", "friend_of") so
|
||||
// this is a curator hint, not a hard rule — `in_conflict` exists
|
||||
// to surface stale-data candidates ("lives_in NYC" and
|
||||
// "lives_in SF" can't both be current).
|
||||
{
|
||||
use std::collections::{HashMap, HashSet};
|
||||
let mut by_predicate: HashMap<String, Vec<usize>> = HashMap::new();
|
||||
for (idx, f) in facts.iter().enumerate() {
|
||||
if f.status == "active" {
|
||||
by_predicate
|
||||
.entry(f.predicate.clone())
|
||||
.or_default()
|
||||
.push(idx);
|
||||
}
|
||||
}
|
||||
let mut to_flag: HashSet<usize> = HashSet::new();
|
||||
for indices in by_predicate.values() {
|
||||
if indices.len() < 2 {
|
||||
continue;
|
||||
}
|
||||
// Distinct (object_entity_id, object_value) tuples across
|
||||
// these active facts.
|
||||
let mut seen: HashSet<(Option<i32>, Option<String>)> = HashSet::new();
|
||||
for &i in indices {
|
||||
seen.insert((facts[i].object_entity_id, facts[i].object_value.clone()));
|
||||
}
|
||||
if seen.len() > 1 {
|
||||
for &i in indices {
|
||||
to_flag.insert(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
for i in to_flag {
|
||||
facts[i].in_conflict = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch photo links
|
||||
let photo_links: Vec<PhotoLinkDetail> = match dao.get_links_for_entity(&cx, entity_id) {
|
||||
Ok(links) => links.into_iter().map(PhotoLinkDetail::from).collect(),
|
||||
|
||||
Reference in New Issue
Block a user