knowledge: entity-graph endpoint for force-directed view

New GET /knowledge/graph?type=&limit= returns the data the
curation UI's graph tab needs:
  - nodes = entities with at least one in-scope fact (rejected /
    superseded excluded). Carries fact_count for visual sizing.
    Top-N by count desc; default cap 200 (clamped 1..1000).
  - edges = relational facts (object_entity_id set) grouped by
    (subject, object, predicate) so 3 "is_friend_of" facts
    between the same pair collapse into one edge with count=3.

Two raw SQL queries: an INNER JOIN onto a persona-scoped fact-
count subquery for nodes (skips 0-fact entities entirely so the
sim doesn't waste time on disconnected islands), then a follow-
up GROUP BY over the persona-scoped fact set restricted to the
node id set via IN clauses (ids are i32 so inlining is safe).

Pairs with the Apollo-side GraphPanel that runs d3-force over
the returned payload and renders SVG with click-to-open.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-05-11 21:26:02 -04:00
parent 6dca0c027d
commit d123cde333
3 changed files with 296 additions and 5 deletions

View File

@@ -128,6 +128,33 @@ pub struct ConsolidationGroup {
pub max_cosine: f32,
}
/// Graph view payload: every entity that has at least one fact
/// becomes a node; every relational fact (object_entity_id set)
/// becomes an edge between subject and object. Multiple facts with
/// the same (subject, object, predicate) collapse into one edge
/// with a count so the UI can fan them out as one weighted line.
#[derive(Debug, Clone)]
pub struct GraphNode {
pub id: i32,
pub name: String,
pub entity_type: String,
pub fact_count: i64,
}
#[derive(Debug, Clone)]
pub struct GraphEdge {
pub source: i32,
pub target: i32,
pub predicate: String,
pub count: i64,
}
#[derive(Debug, Clone)]
pub struct EntityGraph {
pub nodes: Vec<GraphNode>,
pub edges: Vec<GraphEdge>,
}
// ---------------------------------------------------------------------------
// Trait
// ---------------------------------------------------------------------------
@@ -178,6 +205,23 @@ pub trait KnowledgeDao: Sync + Send {
persona: &PersonaFilter,
) -> Result<(Vec<(Entity, i64)>, i64), DbError>;
/// Build a graph snapshot — entities as nodes (fact count from
/// the active persona scope), relational facts as edges. Used
/// by the curation UI's graph view. Filters:
/// - entity_type: optional, restricts nodes to one type
/// - node_limit: caps the number of nodes; lower-fact-count
/// entities drop first
/// Edges between dropped entities are pruned. Persona scoping
/// affects fact_count + edge inclusion (rejected / superseded
/// excluded; All vs Single mirrors the existing pattern).
fn build_entity_graph(
&mut self,
cx: &opentelemetry::Context,
entity_type: Option<&str>,
node_limit: usize,
persona: &PersonaFilter,
) -> Result<EntityGraph, DbError>;
/// Find groups of near-duplicate entities that the upsert-time
/// cosine guard didn't catch (it runs at ~0.92; this scan runs
/// at a lower threshold to surface the "probably same" tier that
@@ -828,6 +872,184 @@ impl KnowledgeDao for SqliteKnowledgeDao {
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn build_entity_graph(
&mut self,
cx: &opentelemetry::Context,
entity_type_filter: Option<&str>,
node_limit: usize,
persona: &PersonaFilter,
) -> Result<EntityGraph, DbError> {
trace_db_call(cx, "query", "build_entity_graph", |_span| {
use diesel::sql_query;
use diesel::sql_types::{BigInt, Integer, Text};
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
// ── Nodes: entities with non-rejected facts under the
// active scope, plus their fact count. Cap to node_limit
// by count desc so the graph stays drawable; lower-count
// entities drop. Excludes 'rejected' entity rows too.
let (persona_filter_sql, persona_binds_count) = match persona {
PersonaFilter::Single { .. } => (
"AND ef.user_id = ? AND ef.persona_id = ? AND ef.status NOT IN ('rejected','superseded')",
2,
),
PersonaFilter::All { .. } => (
"AND ef.user_id = ? AND ef.status NOT IN ('rejected','superseded')",
1,
),
};
let mut where_parts: Vec<&str> = vec!["e.status != 'rejected'"];
if entity_type_filter.is_some() {
where_parts.push("e.entity_type = ?");
}
let where_clause = format!("WHERE {}", where_parts.join(" AND "));
// SQL: join entities to their (persona-scoped) fact count,
// sort by count desc, limit. Including entities with 0
// facts would clutter the view — skip them via INNER JOIN
// (subquery on entity_facts) so only entities with at
// least one in-scope fact show up.
let node_sql = format!(
"SELECT e.id, e.name, e.entity_type, fc.fact_count \
FROM entities e \
INNER JOIN ( \
SELECT subject_entity_id AS sid, COUNT(*) AS fact_count \
FROM entity_facts ef \
WHERE 1=1 {persona_filter_sql} \
GROUP BY subject_entity_id \
) fc ON fc.sid = e.id \
{where_clause} \
ORDER BY fc.fact_count DESC, e.id ASC \
LIMIT ?",
);
#[derive(diesel::QueryableByName)]
struct NodeRow {
#[diesel(sql_type = Integer)]
id: i32,
#[diesel(sql_type = Text)]
name: String,
#[diesel(sql_type = Text)]
entity_type: String,
#[diesel(sql_type = BigInt)]
fact_count: i64,
}
let mut nq = sql_query(node_sql).into_boxed();
// Persona binds (inside the subquery — earlier in the SQL).
match persona {
PersonaFilter::Single { user_id, persona_id } => {
nq = nq
.bind::<Integer, _>(*user_id)
.bind::<Text, _>(persona_id.clone());
}
PersonaFilter::All { user_id } => {
nq = nq.bind::<Integer, _>(*user_id);
}
}
// Entity-type filter bind, if any.
if let Some(t) = entity_type_filter {
nq = nq.bind::<Text, _>(t.to_string());
}
// LIMIT.
nq = nq.bind::<BigInt, _>(node_limit as i64);
let node_rows: Vec<NodeRow> = nq
.load(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Node query error: {}", e))?;
let _ = persona_binds_count; // documentary
let node_ids: std::collections::HashSet<i32> =
node_rows.iter().map(|r| r.id).collect();
let nodes: Vec<GraphNode> = node_rows
.into_iter()
.map(|r| GraphNode {
id: r.id,
name: r.name,
entity_type: r.entity_type,
fact_count: r.fact_count,
})
.collect();
if nodes.is_empty() {
return Ok(EntityGraph {
nodes,
edges: Vec::new(),
});
}
// ── Edges: relational facts where BOTH subject and
// object survived the node cap. Grouped by (subject,
// object, predicate) so 3 "is_friend_of Bob" facts
// become one edge with count=3.
let id_list: Vec<String> = node_ids.iter().map(|i| i.to_string()).collect();
let in_clause = id_list.join(", ");
// Note: ids are i32, inlined safely; predicates use binds.
let (edge_persona_sql, _) = match persona {
PersonaFilter::Single { .. } => (
"user_id = ? AND persona_id = ? AND status NOT IN ('rejected','superseded')",
2,
),
PersonaFilter::All { .. } => (
"user_id = ? AND status NOT IN ('rejected','superseded')",
1,
),
};
let edge_sql = format!(
"SELECT subject_entity_id, object_entity_id, predicate, COUNT(*) AS cnt \
FROM entity_facts \
WHERE {edge_persona_sql} \
AND object_entity_id IS NOT NULL \
AND subject_entity_id IN ({in_clause}) \
AND object_entity_id IN ({in_clause}) \
GROUP BY subject_entity_id, object_entity_id, predicate",
);
#[derive(diesel::QueryableByName)]
struct EdgeRow {
#[diesel(sql_type = Integer)]
subject_entity_id: i32,
#[diesel(sql_type = Integer)]
object_entity_id: i32,
#[diesel(sql_type = Text)]
predicate: String,
#[diesel(sql_type = BigInt)]
cnt: i64,
}
let mut eq = sql_query(edge_sql).into_boxed();
match persona {
PersonaFilter::Single { user_id, persona_id } => {
eq = eq
.bind::<Integer, _>(*user_id)
.bind::<Text, _>(persona_id.clone());
}
PersonaFilter::All { user_id } => {
eq = eq.bind::<Integer, _>(*user_id);
}
}
let edge_rows: Vec<EdgeRow> = eq
.load(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Edge query error: {}", e))?;
let edges: Vec<GraphEdge> = edge_rows
.into_iter()
.map(|r| GraphEdge {
source: r.subject_entity_id,
target: r.object_entity_id,
predicate: r.predicate,
count: r.cnt,
})
.collect();
Ok(EntityGraph { nodes, edges })
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn find_consolidation_proposals(
&mut self,
cx: &opentelemetry::Context,

View File

@@ -59,8 +59,8 @@ pub use calendar_dao::{CalendarEventDao, SqliteCalendarEventDao};
pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
pub use insights_dao::{InsightDao, SqliteInsightDao};
pub use knowledge_dao::{
ConsolidationGroup, EntityFilter, EntityPatch, EntitySort, FactFilter, FactPatch, KnowledgeDao,
PersonaFilter, RecentActivity, SqliteKnowledgeDao,
ConsolidationGroup, EntityFilter, EntityGraph, EntityPatch, EntitySort, FactFilter, FactPatch,
KnowledgeDao, PersonaFilter, RecentActivity, SqliteKnowledgeDao,
};
pub use location_dao::{LocationHistoryDao, SqliteLocationHistoryDao};
pub use persona_dao::{ImportPersona, PersonaDao, PersonaPatch, SqlitePersonaDao};

View File

@@ -7,8 +7,8 @@ use std::sync::Mutex;
use crate::data::Claims;
use crate::database::models::{Entity, EntityFact, EntityPhotoLink, InsertEntityFact};
use crate::database::{
ConsolidationGroup, EntityFilter, EntityPatch, EntitySort, FactFilter, FactPatch, KnowledgeDao,
PersonaFilter, RecentActivity,
ConsolidationGroup, EntityFilter, EntityGraph, EntityPatch, EntitySort, FactFilter, FactPatch,
KnowledgeDao, PersonaFilter, RecentActivity,
};
use crate::personas::PersonaDaoData;
use crate::state::AppState;
@@ -330,6 +330,35 @@ pub struct RecentQuery {
pub limit: Option<i64>,
}
#[derive(Deserialize)]
pub struct GraphQuery {
#[serde(rename = "type")]
pub entity_type: Option<String>,
pub limit: Option<i64>,
}
#[derive(Serialize)]
pub struct GraphNodeView {
pub id: i32,
pub name: String,
pub entity_type: String,
pub fact_count: i64,
}
#[derive(Serialize)]
pub struct GraphEdgeView {
pub source: i32,
pub target: i32,
pub predicate: String,
pub count: i64,
}
#[derive(Serialize)]
pub struct GraphResponse {
pub nodes: Vec<GraphNodeView>,
pub edges: Vec<GraphEdgeView>,
}
#[derive(Deserialize)]
pub struct ConsolidationQuery {
/// Cosine threshold for clustering. Default 0.85 — looser than
@@ -391,7 +420,8 @@ where
.service(
web::resource("/consolidation-proposals")
.route(web::get().to(get_consolidation_proposals::<D>)),
),
)
.service(web::resource("/graph").route(web::get().to(get_graph::<D>))),
)
}
@@ -1162,6 +1192,45 @@ async fn get_recent<D: KnowledgeDao + 'static>(
}
}
async fn get_graph<D: KnowledgeDao + 'static>(
req: HttpRequest,
claims: Claims,
query: web::Query<GraphQuery>,
dao: web::Data<Mutex<D>>,
persona_dao: PersonaDaoData,
) -> impl Responder {
let limit = query.limit.unwrap_or(200).clamp(1, 1000) as usize;
let persona = resolve_persona_filter(&req, &claims, &persona_dao);
let cx = opentelemetry::Context::current();
let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");
match dao.build_entity_graph(&cx, query.entity_type.as_deref(), limit, &persona) {
Ok(EntityGraph { nodes, edges }) => HttpResponse::Ok().json(GraphResponse {
nodes: nodes
.into_iter()
.map(|n| GraphNodeView {
id: n.id,
name: n.name,
entity_type: n.entity_type,
fact_count: n.fact_count,
})
.collect(),
edges: edges
.into_iter()
.map(|e| GraphEdgeView {
source: e.source,
target: e.target,
predicate: e.predicate,
count: e.count,
})
.collect(),
}),
Err(e) => {
log::error!("build_entity_graph error: {:?}", e);
HttpResponse::InternalServerError().json(serde_json::json!({"error": "Database error"}))
}
}
}
async fn get_consolidation_proposals<D: KnowledgeDao + 'static>(
req: HttpRequest,
claims: Claims,