knowledge: entity-graph endpoint for force-directed view
New GET /knowledge/graph?type=&limit= returns the data the
curation UI's graph tab needs:
- nodes = entities with at least one in-scope fact (rejected /
superseded excluded). Carries fact_count for visual sizing.
Top-N by count desc; default cap 200 (clamped 1..1000).
- edges = relational facts (object_entity_id set) grouped by
(subject, object, predicate) so 3 "is_friend_of" facts
between the same pair collapse into one edge with count=3.
Two raw SQL queries: an INNER JOIN onto a persona-scoped fact-
count subquery for nodes (skips 0-fact entities entirely so the
sim doesn't waste time on disconnected islands), then a follow-
up GROUP BY over the persona-scoped fact set restricted to the
node id set via IN clauses (ids are i32 so inlining is safe).
Pairs with the Apollo-side GraphPanel that runs d3-force over
the returned payload and renders SVG with click-to-open.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -128,6 +128,33 @@ pub struct ConsolidationGroup {
|
||||
pub max_cosine: f32,
|
||||
}
|
||||
|
||||
/// Graph view payload: every entity that has at least one fact
|
||||
/// becomes a node; every relational fact (object_entity_id set)
|
||||
/// becomes an edge between subject and object. Multiple facts with
|
||||
/// the same (subject, object, predicate) collapse into one edge
|
||||
/// with a count so the UI can fan them out as one weighted line.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct GraphNode {
|
||||
pub id: i32,
|
||||
pub name: String,
|
||||
pub entity_type: String,
|
||||
pub fact_count: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct GraphEdge {
|
||||
pub source: i32,
|
||||
pub target: i32,
|
||||
pub predicate: String,
|
||||
pub count: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct EntityGraph {
|
||||
pub nodes: Vec<GraphNode>,
|
||||
pub edges: Vec<GraphEdge>,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Trait
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -178,6 +205,23 @@ pub trait KnowledgeDao: Sync + Send {
|
||||
persona: &PersonaFilter,
|
||||
) -> Result<(Vec<(Entity, i64)>, i64), DbError>;
|
||||
|
||||
/// Build a graph snapshot — entities as nodes (fact count from
|
||||
/// the active persona scope), relational facts as edges. Used
|
||||
/// by the curation UI's graph view. Filters:
|
||||
/// - entity_type: optional, restricts nodes to one type
|
||||
/// - node_limit: caps the number of nodes; lower-fact-count
|
||||
/// entities drop first
|
||||
/// Edges between dropped entities are pruned. Persona scoping
|
||||
/// affects fact_count + edge inclusion (rejected / superseded
|
||||
/// excluded; All vs Single mirrors the existing pattern).
|
||||
fn build_entity_graph(
|
||||
&mut self,
|
||||
cx: &opentelemetry::Context,
|
||||
entity_type: Option<&str>,
|
||||
node_limit: usize,
|
||||
persona: &PersonaFilter,
|
||||
) -> Result<EntityGraph, DbError>;
|
||||
|
||||
/// Find groups of near-duplicate entities that the upsert-time
|
||||
/// cosine guard didn't catch (it runs at ~0.92; this scan runs
|
||||
/// at a lower threshold to surface the "probably same" tier that
|
||||
@@ -828,6 +872,184 @@ impl KnowledgeDao for SqliteKnowledgeDao {
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn build_entity_graph(
|
||||
&mut self,
|
||||
cx: &opentelemetry::Context,
|
||||
entity_type_filter: Option<&str>,
|
||||
node_limit: usize,
|
||||
persona: &PersonaFilter,
|
||||
) -> Result<EntityGraph, DbError> {
|
||||
trace_db_call(cx, "query", "build_entity_graph", |_span| {
|
||||
use diesel::sql_query;
|
||||
use diesel::sql_types::{BigInt, Integer, Text};
|
||||
|
||||
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
|
||||
|
||||
// ── Nodes: entities with non-rejected facts under the
|
||||
// active scope, plus their fact count. Cap to node_limit
|
||||
// by count desc so the graph stays drawable; lower-count
|
||||
// entities drop. Excludes 'rejected' entity rows too.
|
||||
let (persona_filter_sql, persona_binds_count) = match persona {
|
||||
PersonaFilter::Single { .. } => (
|
||||
"AND ef.user_id = ? AND ef.persona_id = ? AND ef.status NOT IN ('rejected','superseded')",
|
||||
2,
|
||||
),
|
||||
PersonaFilter::All { .. } => (
|
||||
"AND ef.user_id = ? AND ef.status NOT IN ('rejected','superseded')",
|
||||
1,
|
||||
),
|
||||
};
|
||||
|
||||
let mut where_parts: Vec<&str> = vec!["e.status != 'rejected'"];
|
||||
if entity_type_filter.is_some() {
|
||||
where_parts.push("e.entity_type = ?");
|
||||
}
|
||||
let where_clause = format!("WHERE {}", where_parts.join(" AND "));
|
||||
|
||||
// SQL: join entities to their (persona-scoped) fact count,
|
||||
// sort by count desc, limit. Including entities with 0
|
||||
// facts would clutter the view — skip them via INNER JOIN
|
||||
// (subquery on entity_facts) so only entities with at
|
||||
// least one in-scope fact show up.
|
||||
let node_sql = format!(
|
||||
"SELECT e.id, e.name, e.entity_type, fc.fact_count \
|
||||
FROM entities e \
|
||||
INNER JOIN ( \
|
||||
SELECT subject_entity_id AS sid, COUNT(*) AS fact_count \
|
||||
FROM entity_facts ef \
|
||||
WHERE 1=1 {persona_filter_sql} \
|
||||
GROUP BY subject_entity_id \
|
||||
) fc ON fc.sid = e.id \
|
||||
{where_clause} \
|
||||
ORDER BY fc.fact_count DESC, e.id ASC \
|
||||
LIMIT ?",
|
||||
);
|
||||
|
||||
#[derive(diesel::QueryableByName)]
|
||||
struct NodeRow {
|
||||
#[diesel(sql_type = Integer)]
|
||||
id: i32,
|
||||
#[diesel(sql_type = Text)]
|
||||
name: String,
|
||||
#[diesel(sql_type = Text)]
|
||||
entity_type: String,
|
||||
#[diesel(sql_type = BigInt)]
|
||||
fact_count: i64,
|
||||
}
|
||||
|
||||
let mut nq = sql_query(node_sql).into_boxed();
|
||||
// Persona binds (inside the subquery — earlier in the SQL).
|
||||
match persona {
|
||||
PersonaFilter::Single { user_id, persona_id } => {
|
||||
nq = nq
|
||||
.bind::<Integer, _>(*user_id)
|
||||
.bind::<Text, _>(persona_id.clone());
|
||||
}
|
||||
PersonaFilter::All { user_id } => {
|
||||
nq = nq.bind::<Integer, _>(*user_id);
|
||||
}
|
||||
}
|
||||
// Entity-type filter bind, if any.
|
||||
if let Some(t) = entity_type_filter {
|
||||
nq = nq.bind::<Text, _>(t.to_string());
|
||||
}
|
||||
// LIMIT.
|
||||
nq = nq.bind::<BigInt, _>(node_limit as i64);
|
||||
|
||||
let node_rows: Vec<NodeRow> = nq
|
||||
.load(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Node query error: {}", e))?;
|
||||
|
||||
let _ = persona_binds_count; // documentary
|
||||
|
||||
let node_ids: std::collections::HashSet<i32> =
|
||||
node_rows.iter().map(|r| r.id).collect();
|
||||
let nodes: Vec<GraphNode> = node_rows
|
||||
.into_iter()
|
||||
.map(|r| GraphNode {
|
||||
id: r.id,
|
||||
name: r.name,
|
||||
entity_type: r.entity_type,
|
||||
fact_count: r.fact_count,
|
||||
})
|
||||
.collect();
|
||||
|
||||
if nodes.is_empty() {
|
||||
return Ok(EntityGraph {
|
||||
nodes,
|
||||
edges: Vec::new(),
|
||||
});
|
||||
}
|
||||
|
||||
// ── Edges: relational facts where BOTH subject and
|
||||
// object survived the node cap. Grouped by (subject,
|
||||
// object, predicate) so 3 "is_friend_of Bob" facts
|
||||
// become one edge with count=3.
|
||||
let id_list: Vec<String> = node_ids.iter().map(|i| i.to_string()).collect();
|
||||
let in_clause = id_list.join(", ");
|
||||
// Note: ids are i32, inlined safely; predicates use binds.
|
||||
let (edge_persona_sql, _) = match persona {
|
||||
PersonaFilter::Single { .. } => (
|
||||
"user_id = ? AND persona_id = ? AND status NOT IN ('rejected','superseded')",
|
||||
2,
|
||||
),
|
||||
PersonaFilter::All { .. } => (
|
||||
"user_id = ? AND status NOT IN ('rejected','superseded')",
|
||||
1,
|
||||
),
|
||||
};
|
||||
let edge_sql = format!(
|
||||
"SELECT subject_entity_id, object_entity_id, predicate, COUNT(*) AS cnt \
|
||||
FROM entity_facts \
|
||||
WHERE {edge_persona_sql} \
|
||||
AND object_entity_id IS NOT NULL \
|
||||
AND subject_entity_id IN ({in_clause}) \
|
||||
AND object_entity_id IN ({in_clause}) \
|
||||
GROUP BY subject_entity_id, object_entity_id, predicate",
|
||||
);
|
||||
|
||||
#[derive(diesel::QueryableByName)]
|
||||
struct EdgeRow {
|
||||
#[diesel(sql_type = Integer)]
|
||||
subject_entity_id: i32,
|
||||
#[diesel(sql_type = Integer)]
|
||||
object_entity_id: i32,
|
||||
#[diesel(sql_type = Text)]
|
||||
predicate: String,
|
||||
#[diesel(sql_type = BigInt)]
|
||||
cnt: i64,
|
||||
}
|
||||
|
||||
let mut eq = sql_query(edge_sql).into_boxed();
|
||||
match persona {
|
||||
PersonaFilter::Single { user_id, persona_id } => {
|
||||
eq = eq
|
||||
.bind::<Integer, _>(*user_id)
|
||||
.bind::<Text, _>(persona_id.clone());
|
||||
}
|
||||
PersonaFilter::All { user_id } => {
|
||||
eq = eq.bind::<Integer, _>(*user_id);
|
||||
}
|
||||
}
|
||||
let edge_rows: Vec<EdgeRow> = eq
|
||||
.load(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Edge query error: {}", e))?;
|
||||
|
||||
let edges: Vec<GraphEdge> = edge_rows
|
||||
.into_iter()
|
||||
.map(|r| GraphEdge {
|
||||
source: r.subject_entity_id,
|
||||
target: r.object_entity_id,
|
||||
predicate: r.predicate,
|
||||
count: r.cnt,
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(EntityGraph { nodes, edges })
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn find_consolidation_proposals(
|
||||
&mut self,
|
||||
cx: &opentelemetry::Context,
|
||||
|
||||
@@ -59,8 +59,8 @@ pub use calendar_dao::{CalendarEventDao, SqliteCalendarEventDao};
|
||||
pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
|
||||
pub use insights_dao::{InsightDao, SqliteInsightDao};
|
||||
pub use knowledge_dao::{
|
||||
ConsolidationGroup, EntityFilter, EntityPatch, EntitySort, FactFilter, FactPatch, KnowledgeDao,
|
||||
PersonaFilter, RecentActivity, SqliteKnowledgeDao,
|
||||
ConsolidationGroup, EntityFilter, EntityGraph, EntityPatch, EntitySort, FactFilter, FactPatch,
|
||||
KnowledgeDao, PersonaFilter, RecentActivity, SqliteKnowledgeDao,
|
||||
};
|
||||
pub use location_dao::{LocationHistoryDao, SqliteLocationHistoryDao};
|
||||
pub use persona_dao::{ImportPersona, PersonaDao, PersonaPatch, SqlitePersonaDao};
|
||||
|
||||
@@ -7,8 +7,8 @@ use std::sync::Mutex;
|
||||
use crate::data::Claims;
|
||||
use crate::database::models::{Entity, EntityFact, EntityPhotoLink, InsertEntityFact};
|
||||
use crate::database::{
|
||||
ConsolidationGroup, EntityFilter, EntityPatch, EntitySort, FactFilter, FactPatch, KnowledgeDao,
|
||||
PersonaFilter, RecentActivity,
|
||||
ConsolidationGroup, EntityFilter, EntityGraph, EntityPatch, EntitySort, FactFilter, FactPatch,
|
||||
KnowledgeDao, PersonaFilter, RecentActivity,
|
||||
};
|
||||
use crate::personas::PersonaDaoData;
|
||||
use crate::state::AppState;
|
||||
@@ -330,6 +330,35 @@ pub struct RecentQuery {
|
||||
pub limit: Option<i64>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct GraphQuery {
|
||||
#[serde(rename = "type")]
|
||||
pub entity_type: Option<String>,
|
||||
pub limit: Option<i64>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct GraphNodeView {
|
||||
pub id: i32,
|
||||
pub name: String,
|
||||
pub entity_type: String,
|
||||
pub fact_count: i64,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct GraphEdgeView {
|
||||
pub source: i32,
|
||||
pub target: i32,
|
||||
pub predicate: String,
|
||||
pub count: i64,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct GraphResponse {
|
||||
pub nodes: Vec<GraphNodeView>,
|
||||
pub edges: Vec<GraphEdgeView>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct ConsolidationQuery {
|
||||
/// Cosine threshold for clustering. Default 0.85 — looser than
|
||||
@@ -391,7 +420,8 @@ where
|
||||
.service(
|
||||
web::resource("/consolidation-proposals")
|
||||
.route(web::get().to(get_consolidation_proposals::<D>)),
|
||||
),
|
||||
)
|
||||
.service(web::resource("/graph").route(web::get().to(get_graph::<D>))),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1162,6 +1192,45 @@ async fn get_recent<D: KnowledgeDao + 'static>(
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_graph<D: KnowledgeDao + 'static>(
|
||||
req: HttpRequest,
|
||||
claims: Claims,
|
||||
query: web::Query<GraphQuery>,
|
||||
dao: web::Data<Mutex<D>>,
|
||||
persona_dao: PersonaDaoData,
|
||||
) -> impl Responder {
|
||||
let limit = query.limit.unwrap_or(200).clamp(1, 1000) as usize;
|
||||
let persona = resolve_persona_filter(&req, &claims, &persona_dao);
|
||||
let cx = opentelemetry::Context::current();
|
||||
let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");
|
||||
match dao.build_entity_graph(&cx, query.entity_type.as_deref(), limit, &persona) {
|
||||
Ok(EntityGraph { nodes, edges }) => HttpResponse::Ok().json(GraphResponse {
|
||||
nodes: nodes
|
||||
.into_iter()
|
||||
.map(|n| GraphNodeView {
|
||||
id: n.id,
|
||||
name: n.name,
|
||||
entity_type: n.entity_type,
|
||||
fact_count: n.fact_count,
|
||||
})
|
||||
.collect(),
|
||||
edges: edges
|
||||
.into_iter()
|
||||
.map(|e| GraphEdgeView {
|
||||
source: e.source,
|
||||
target: e.target,
|
||||
predicate: e.predicate,
|
||||
count: e.count,
|
||||
})
|
||||
.collect(),
|
||||
}),
|
||||
Err(e) => {
|
||||
log::error!("build_entity_graph error: {:?}", e);
|
||||
HttpResponse::InternalServerError().json(serde_json::json!({"error": "Database error"}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_consolidation_proposals<D: KnowledgeDao + 'static>(
|
||||
req: HttpRequest,
|
||||
claims: Claims,
|
||||
|
||||
Reference in New Issue
Block a user