knowledge: entity-graph endpoint for force-directed view
New GET /knowledge/graph?type=&limit= returns the data the
curation UI's graph tab needs:
- nodes = entities with at least one in-scope fact (rejected /
superseded excluded). Carries fact_count for visual sizing.
Top-N by count desc; default cap 200 (clamped 1..1000).
- edges = relational facts (object_entity_id set) grouped by
(subject, object, predicate) so 3 "is_friend_of" facts
between the same pair collapse into one edge with count=3.
Two raw SQL queries: an INNER JOIN onto a persona-scoped fact-
count subquery for nodes (skips 0-fact entities entirely so the
sim doesn't waste time on disconnected islands), then a follow-
up GROUP BY over the persona-scoped fact set restricted to the
node id set via IN clauses (ids are i32 so inlining is safe).
Pairs with the Apollo-side GraphPanel that runs d3-force over
the returned payload and renders SVG with click-to-open.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -128,6 +128,33 @@ pub struct ConsolidationGroup {
|
||||
pub max_cosine: f32,
|
||||
}
|
||||
|
||||
/// Graph view payload: every entity that has at least one fact
|
||||
/// becomes a node; every relational fact (object_entity_id set)
|
||||
/// becomes an edge between subject and object. Multiple facts with
|
||||
/// the same (subject, object, predicate) collapse into one edge
|
||||
/// with a count so the UI can fan them out as one weighted line.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct GraphNode {
|
||||
pub id: i32,
|
||||
pub name: String,
|
||||
pub entity_type: String,
|
||||
pub fact_count: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct GraphEdge {
|
||||
pub source: i32,
|
||||
pub target: i32,
|
||||
pub predicate: String,
|
||||
pub count: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct EntityGraph {
|
||||
pub nodes: Vec<GraphNode>,
|
||||
pub edges: Vec<GraphEdge>,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Trait
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -178,6 +205,23 @@ pub trait KnowledgeDao: Sync + Send {
|
||||
persona: &PersonaFilter,
|
||||
) -> Result<(Vec<(Entity, i64)>, i64), DbError>;
|
||||
|
||||
/// Build a graph snapshot — entities as nodes (fact count from
|
||||
/// the active persona scope), relational facts as edges. Used
|
||||
/// by the curation UI's graph view. Filters:
|
||||
/// - entity_type: optional, restricts nodes to one type
|
||||
/// - node_limit: caps the number of nodes; lower-fact-count
|
||||
/// entities drop first
|
||||
/// Edges between dropped entities are pruned. Persona scoping
|
||||
/// affects fact_count + edge inclusion (rejected / superseded
|
||||
/// excluded; All vs Single mirrors the existing pattern).
|
||||
fn build_entity_graph(
|
||||
&mut self,
|
||||
cx: &opentelemetry::Context,
|
||||
entity_type: Option<&str>,
|
||||
node_limit: usize,
|
||||
persona: &PersonaFilter,
|
||||
) -> Result<EntityGraph, DbError>;
|
||||
|
||||
/// Find groups of near-duplicate entities that the upsert-time
|
||||
/// cosine guard didn't catch (it runs at ~0.92; this scan runs
|
||||
/// at a lower threshold to surface the "probably same" tier that
|
||||
@@ -828,6 +872,184 @@ impl KnowledgeDao for SqliteKnowledgeDao {
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn build_entity_graph(
|
||||
&mut self,
|
||||
cx: &opentelemetry::Context,
|
||||
entity_type_filter: Option<&str>,
|
||||
node_limit: usize,
|
||||
persona: &PersonaFilter,
|
||||
) -> Result<EntityGraph, DbError> {
|
||||
trace_db_call(cx, "query", "build_entity_graph", |_span| {
|
||||
use diesel::sql_query;
|
||||
use diesel::sql_types::{BigInt, Integer, Text};
|
||||
|
||||
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
|
||||
|
||||
// ── Nodes: entities with non-rejected facts under the
|
||||
// active scope, plus their fact count. Cap to node_limit
|
||||
// by count desc so the graph stays drawable; lower-count
|
||||
// entities drop. Excludes 'rejected' entity rows too.
|
||||
let (persona_filter_sql, persona_binds_count) = match persona {
|
||||
PersonaFilter::Single { .. } => (
|
||||
"AND ef.user_id = ? AND ef.persona_id = ? AND ef.status NOT IN ('rejected','superseded')",
|
||||
2,
|
||||
),
|
||||
PersonaFilter::All { .. } => (
|
||||
"AND ef.user_id = ? AND ef.status NOT IN ('rejected','superseded')",
|
||||
1,
|
||||
),
|
||||
};
|
||||
|
||||
let mut where_parts: Vec<&str> = vec!["e.status != 'rejected'"];
|
||||
if entity_type_filter.is_some() {
|
||||
where_parts.push("e.entity_type = ?");
|
||||
}
|
||||
let where_clause = format!("WHERE {}", where_parts.join(" AND "));
|
||||
|
||||
// SQL: join entities to their (persona-scoped) fact count,
|
||||
// sort by count desc, limit. Including entities with 0
|
||||
// facts would clutter the view — skip them via INNER JOIN
|
||||
// (subquery on entity_facts) so only entities with at
|
||||
// least one in-scope fact show up.
|
||||
let node_sql = format!(
|
||||
"SELECT e.id, e.name, e.entity_type, fc.fact_count \
|
||||
FROM entities e \
|
||||
INNER JOIN ( \
|
||||
SELECT subject_entity_id AS sid, COUNT(*) AS fact_count \
|
||||
FROM entity_facts ef \
|
||||
WHERE 1=1 {persona_filter_sql} \
|
||||
GROUP BY subject_entity_id \
|
||||
) fc ON fc.sid = e.id \
|
||||
{where_clause} \
|
||||
ORDER BY fc.fact_count DESC, e.id ASC \
|
||||
LIMIT ?",
|
||||
);
|
||||
|
||||
#[derive(diesel::QueryableByName)]
|
||||
struct NodeRow {
|
||||
#[diesel(sql_type = Integer)]
|
||||
id: i32,
|
||||
#[diesel(sql_type = Text)]
|
||||
name: String,
|
||||
#[diesel(sql_type = Text)]
|
||||
entity_type: String,
|
||||
#[diesel(sql_type = BigInt)]
|
||||
fact_count: i64,
|
||||
}
|
||||
|
||||
let mut nq = sql_query(node_sql).into_boxed();
|
||||
// Persona binds (inside the subquery — earlier in the SQL).
|
||||
match persona {
|
||||
PersonaFilter::Single { user_id, persona_id } => {
|
||||
nq = nq
|
||||
.bind::<Integer, _>(*user_id)
|
||||
.bind::<Text, _>(persona_id.clone());
|
||||
}
|
||||
PersonaFilter::All { user_id } => {
|
||||
nq = nq.bind::<Integer, _>(*user_id);
|
||||
}
|
||||
}
|
||||
// Entity-type filter bind, if any.
|
||||
if let Some(t) = entity_type_filter {
|
||||
nq = nq.bind::<Text, _>(t.to_string());
|
||||
}
|
||||
// LIMIT.
|
||||
nq = nq.bind::<BigInt, _>(node_limit as i64);
|
||||
|
||||
let node_rows: Vec<NodeRow> = nq
|
||||
.load(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Node query error: {}", e))?;
|
||||
|
||||
let _ = persona_binds_count; // documentary
|
||||
|
||||
let node_ids: std::collections::HashSet<i32> =
|
||||
node_rows.iter().map(|r| r.id).collect();
|
||||
let nodes: Vec<GraphNode> = node_rows
|
||||
.into_iter()
|
||||
.map(|r| GraphNode {
|
||||
id: r.id,
|
||||
name: r.name,
|
||||
entity_type: r.entity_type,
|
||||
fact_count: r.fact_count,
|
||||
})
|
||||
.collect();
|
||||
|
||||
if nodes.is_empty() {
|
||||
return Ok(EntityGraph {
|
||||
nodes,
|
||||
edges: Vec::new(),
|
||||
});
|
||||
}
|
||||
|
||||
// ── Edges: relational facts where BOTH subject and
|
||||
// object survived the node cap. Grouped by (subject,
|
||||
// object, predicate) so 3 "is_friend_of Bob" facts
|
||||
// become one edge with count=3.
|
||||
let id_list: Vec<String> = node_ids.iter().map(|i| i.to_string()).collect();
|
||||
let in_clause = id_list.join(", ");
|
||||
// Note: ids are i32, inlined safely; predicates use binds.
|
||||
let (edge_persona_sql, _) = match persona {
|
||||
PersonaFilter::Single { .. } => (
|
||||
"user_id = ? AND persona_id = ? AND status NOT IN ('rejected','superseded')",
|
||||
2,
|
||||
),
|
||||
PersonaFilter::All { .. } => (
|
||||
"user_id = ? AND status NOT IN ('rejected','superseded')",
|
||||
1,
|
||||
),
|
||||
};
|
||||
let edge_sql = format!(
|
||||
"SELECT subject_entity_id, object_entity_id, predicate, COUNT(*) AS cnt \
|
||||
FROM entity_facts \
|
||||
WHERE {edge_persona_sql} \
|
||||
AND object_entity_id IS NOT NULL \
|
||||
AND subject_entity_id IN ({in_clause}) \
|
||||
AND object_entity_id IN ({in_clause}) \
|
||||
GROUP BY subject_entity_id, object_entity_id, predicate",
|
||||
);
|
||||
|
||||
#[derive(diesel::QueryableByName)]
|
||||
struct EdgeRow {
|
||||
#[diesel(sql_type = Integer)]
|
||||
subject_entity_id: i32,
|
||||
#[diesel(sql_type = Integer)]
|
||||
object_entity_id: i32,
|
||||
#[diesel(sql_type = Text)]
|
||||
predicate: String,
|
||||
#[diesel(sql_type = BigInt)]
|
||||
cnt: i64,
|
||||
}
|
||||
|
||||
let mut eq = sql_query(edge_sql).into_boxed();
|
||||
match persona {
|
||||
PersonaFilter::Single { user_id, persona_id } => {
|
||||
eq = eq
|
||||
.bind::<Integer, _>(*user_id)
|
||||
.bind::<Text, _>(persona_id.clone());
|
||||
}
|
||||
PersonaFilter::All { user_id } => {
|
||||
eq = eq.bind::<Integer, _>(*user_id);
|
||||
}
|
||||
}
|
||||
let edge_rows: Vec<EdgeRow> = eq
|
||||
.load(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Edge query error: {}", e))?;
|
||||
|
||||
let edges: Vec<GraphEdge> = edge_rows
|
||||
.into_iter()
|
||||
.map(|r| GraphEdge {
|
||||
source: r.subject_entity_id,
|
||||
target: r.object_entity_id,
|
||||
predicate: r.predicate,
|
||||
count: r.cnt,
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(EntityGraph { nodes, edges })
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn find_consolidation_proposals(
|
||||
&mut self,
|
||||
cx: &opentelemetry::Context,
|
||||
|
||||
Reference in New Issue
Block a user