knowledge: entity-graph endpoint for force-directed view
New GET /knowledge/graph?type=&limit= returns the data the
curation UI's graph tab needs:
- nodes = entities with at least one in-scope fact (rejected /
superseded excluded). Carries fact_count for visual sizing.
Top-N by count desc; default cap 200 (clamped 1..1000).
- edges = relational facts (object_entity_id set) grouped by
(subject, object, predicate) so 3 "is_friend_of" facts
between the same pair collapse into one edge with count=3.
Two raw SQL queries: an INNER JOIN onto a persona-scoped fact-
count subquery for nodes (skips 0-fact entities entirely so the
sim doesn't waste time on disconnected islands), then a follow-
up GROUP BY over the persona-scoped fact set restricted to the
node id set via IN clauses (ids are i32 so inlining is safe).
Pairs with the Apollo-side GraphPanel that runs d3-force over
the returned payload and renders SVG with click-to-open.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -128,6 +128,33 @@ pub struct ConsolidationGroup {
|
|||||||
pub max_cosine: f32,
|
pub max_cosine: f32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Graph view payload: every entity that has at least one fact
|
||||||
|
/// becomes a node; every relational fact (object_entity_id set)
|
||||||
|
/// becomes an edge between subject and object. Multiple facts with
|
||||||
|
/// the same (subject, object, predicate) collapse into one edge
|
||||||
|
/// with a count so the UI can fan them out as one weighted line.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct GraphNode {
|
||||||
|
pub id: i32,
|
||||||
|
pub name: String,
|
||||||
|
pub entity_type: String,
|
||||||
|
pub fact_count: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct GraphEdge {
|
||||||
|
pub source: i32,
|
||||||
|
pub target: i32,
|
||||||
|
pub predicate: String,
|
||||||
|
pub count: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct EntityGraph {
|
||||||
|
pub nodes: Vec<GraphNode>,
|
||||||
|
pub edges: Vec<GraphEdge>,
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Trait
|
// Trait
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@@ -178,6 +205,23 @@ pub trait KnowledgeDao: Sync + Send {
|
|||||||
persona: &PersonaFilter,
|
persona: &PersonaFilter,
|
||||||
) -> Result<(Vec<(Entity, i64)>, i64), DbError>;
|
) -> Result<(Vec<(Entity, i64)>, i64), DbError>;
|
||||||
|
|
||||||
|
/// Build a graph snapshot — entities as nodes (fact count from
|
||||||
|
/// the active persona scope), relational facts as edges. Used
|
||||||
|
/// by the curation UI's graph view. Filters:
|
||||||
|
/// - entity_type: optional, restricts nodes to one type
|
||||||
|
/// - node_limit: caps the number of nodes; lower-fact-count
|
||||||
|
/// entities drop first
|
||||||
|
/// Edges between dropped entities are pruned. Persona scoping
|
||||||
|
/// affects fact_count + edge inclusion (rejected / superseded
|
||||||
|
/// excluded; All vs Single mirrors the existing pattern).
|
||||||
|
fn build_entity_graph(
|
||||||
|
&mut self,
|
||||||
|
cx: &opentelemetry::Context,
|
||||||
|
entity_type: Option<&str>,
|
||||||
|
node_limit: usize,
|
||||||
|
persona: &PersonaFilter,
|
||||||
|
) -> Result<EntityGraph, DbError>;
|
||||||
|
|
||||||
/// Find groups of near-duplicate entities that the upsert-time
|
/// Find groups of near-duplicate entities that the upsert-time
|
||||||
/// cosine guard didn't catch (it runs at ~0.92; this scan runs
|
/// cosine guard didn't catch (it runs at ~0.92; this scan runs
|
||||||
/// at a lower threshold to surface the "probably same" tier that
|
/// at a lower threshold to surface the "probably same" tier that
|
||||||
@@ -828,6 +872,184 @@ impl KnowledgeDao for SqliteKnowledgeDao {
|
|||||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn build_entity_graph(
|
||||||
|
&mut self,
|
||||||
|
cx: &opentelemetry::Context,
|
||||||
|
entity_type_filter: Option<&str>,
|
||||||
|
node_limit: usize,
|
||||||
|
persona: &PersonaFilter,
|
||||||
|
) -> Result<EntityGraph, DbError> {
|
||||||
|
trace_db_call(cx, "query", "build_entity_graph", |_span| {
|
||||||
|
use diesel::sql_query;
|
||||||
|
use diesel::sql_types::{BigInt, Integer, Text};
|
||||||
|
|
||||||
|
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
|
||||||
|
|
||||||
|
// ── Nodes: entities with non-rejected facts under the
|
||||||
|
// active scope, plus their fact count. Cap to node_limit
|
||||||
|
// by count desc so the graph stays drawable; lower-count
|
||||||
|
// entities drop. Excludes 'rejected' entity rows too.
|
||||||
|
let (persona_filter_sql, persona_binds_count) = match persona {
|
||||||
|
PersonaFilter::Single { .. } => (
|
||||||
|
"AND ef.user_id = ? AND ef.persona_id = ? AND ef.status NOT IN ('rejected','superseded')",
|
||||||
|
2,
|
||||||
|
),
|
||||||
|
PersonaFilter::All { .. } => (
|
||||||
|
"AND ef.user_id = ? AND ef.status NOT IN ('rejected','superseded')",
|
||||||
|
1,
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut where_parts: Vec<&str> = vec!["e.status != 'rejected'"];
|
||||||
|
if entity_type_filter.is_some() {
|
||||||
|
where_parts.push("e.entity_type = ?");
|
||||||
|
}
|
||||||
|
let where_clause = format!("WHERE {}", where_parts.join(" AND "));
|
||||||
|
|
||||||
|
// SQL: join entities to their (persona-scoped) fact count,
|
||||||
|
// sort by count desc, limit. Including entities with 0
|
||||||
|
// facts would clutter the view — skip them via INNER JOIN
|
||||||
|
// (subquery on entity_facts) so only entities with at
|
||||||
|
// least one in-scope fact show up.
|
||||||
|
let node_sql = format!(
|
||||||
|
"SELECT e.id, e.name, e.entity_type, fc.fact_count \
|
||||||
|
FROM entities e \
|
||||||
|
INNER JOIN ( \
|
||||||
|
SELECT subject_entity_id AS sid, COUNT(*) AS fact_count \
|
||||||
|
FROM entity_facts ef \
|
||||||
|
WHERE 1=1 {persona_filter_sql} \
|
||||||
|
GROUP BY subject_entity_id \
|
||||||
|
) fc ON fc.sid = e.id \
|
||||||
|
{where_clause} \
|
||||||
|
ORDER BY fc.fact_count DESC, e.id ASC \
|
||||||
|
LIMIT ?",
|
||||||
|
);
|
||||||
|
|
||||||
|
#[derive(diesel::QueryableByName)]
|
||||||
|
struct NodeRow {
|
||||||
|
#[diesel(sql_type = Integer)]
|
||||||
|
id: i32,
|
||||||
|
#[diesel(sql_type = Text)]
|
||||||
|
name: String,
|
||||||
|
#[diesel(sql_type = Text)]
|
||||||
|
entity_type: String,
|
||||||
|
#[diesel(sql_type = BigInt)]
|
||||||
|
fact_count: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut nq = sql_query(node_sql).into_boxed();
|
||||||
|
// Persona binds (inside the subquery — earlier in the SQL).
|
||||||
|
match persona {
|
||||||
|
PersonaFilter::Single { user_id, persona_id } => {
|
||||||
|
nq = nq
|
||||||
|
.bind::<Integer, _>(*user_id)
|
||||||
|
.bind::<Text, _>(persona_id.clone());
|
||||||
|
}
|
||||||
|
PersonaFilter::All { user_id } => {
|
||||||
|
nq = nq.bind::<Integer, _>(*user_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Entity-type filter bind, if any.
|
||||||
|
if let Some(t) = entity_type_filter {
|
||||||
|
nq = nq.bind::<Text, _>(t.to_string());
|
||||||
|
}
|
||||||
|
// LIMIT.
|
||||||
|
nq = nq.bind::<BigInt, _>(node_limit as i64);
|
||||||
|
|
||||||
|
let node_rows: Vec<NodeRow> = nq
|
||||||
|
.load(conn.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Node query error: {}", e))?;
|
||||||
|
|
||||||
|
let _ = persona_binds_count; // documentary
|
||||||
|
|
||||||
|
let node_ids: std::collections::HashSet<i32> =
|
||||||
|
node_rows.iter().map(|r| r.id).collect();
|
||||||
|
let nodes: Vec<GraphNode> = node_rows
|
||||||
|
.into_iter()
|
||||||
|
.map(|r| GraphNode {
|
||||||
|
id: r.id,
|
||||||
|
name: r.name,
|
||||||
|
entity_type: r.entity_type,
|
||||||
|
fact_count: r.fact_count,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if nodes.is_empty() {
|
||||||
|
return Ok(EntityGraph {
|
||||||
|
nodes,
|
||||||
|
edges: Vec::new(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Edges: relational facts where BOTH subject and
|
||||||
|
// object survived the node cap. Grouped by (subject,
|
||||||
|
// object, predicate) so 3 "is_friend_of Bob" facts
|
||||||
|
// become one edge with count=3.
|
||||||
|
let id_list: Vec<String> = node_ids.iter().map(|i| i.to_string()).collect();
|
||||||
|
let in_clause = id_list.join(", ");
|
||||||
|
// Note: ids are i32, inlined safely; predicates use binds.
|
||||||
|
let (edge_persona_sql, _) = match persona {
|
||||||
|
PersonaFilter::Single { .. } => (
|
||||||
|
"user_id = ? AND persona_id = ? AND status NOT IN ('rejected','superseded')",
|
||||||
|
2,
|
||||||
|
),
|
||||||
|
PersonaFilter::All { .. } => (
|
||||||
|
"user_id = ? AND status NOT IN ('rejected','superseded')",
|
||||||
|
1,
|
||||||
|
),
|
||||||
|
};
|
||||||
|
let edge_sql = format!(
|
||||||
|
"SELECT subject_entity_id, object_entity_id, predicate, COUNT(*) AS cnt \
|
||||||
|
FROM entity_facts \
|
||||||
|
WHERE {edge_persona_sql} \
|
||||||
|
AND object_entity_id IS NOT NULL \
|
||||||
|
AND subject_entity_id IN ({in_clause}) \
|
||||||
|
AND object_entity_id IN ({in_clause}) \
|
||||||
|
GROUP BY subject_entity_id, object_entity_id, predicate",
|
||||||
|
);
|
||||||
|
|
||||||
|
#[derive(diesel::QueryableByName)]
|
||||||
|
struct EdgeRow {
|
||||||
|
#[diesel(sql_type = Integer)]
|
||||||
|
subject_entity_id: i32,
|
||||||
|
#[diesel(sql_type = Integer)]
|
||||||
|
object_entity_id: i32,
|
||||||
|
#[diesel(sql_type = Text)]
|
||||||
|
predicate: String,
|
||||||
|
#[diesel(sql_type = BigInt)]
|
||||||
|
cnt: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut eq = sql_query(edge_sql).into_boxed();
|
||||||
|
match persona {
|
||||||
|
PersonaFilter::Single { user_id, persona_id } => {
|
||||||
|
eq = eq
|
||||||
|
.bind::<Integer, _>(*user_id)
|
||||||
|
.bind::<Text, _>(persona_id.clone());
|
||||||
|
}
|
||||||
|
PersonaFilter::All { user_id } => {
|
||||||
|
eq = eq.bind::<Integer, _>(*user_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let edge_rows: Vec<EdgeRow> = eq
|
||||||
|
.load(conn.deref_mut())
|
||||||
|
.map_err(|e| anyhow::anyhow!("Edge query error: {}", e))?;
|
||||||
|
|
||||||
|
let edges: Vec<GraphEdge> = edge_rows
|
||||||
|
.into_iter()
|
||||||
|
.map(|r| GraphEdge {
|
||||||
|
source: r.subject_entity_id,
|
||||||
|
target: r.object_entity_id,
|
||||||
|
predicate: r.predicate,
|
||||||
|
count: r.cnt,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
Ok(EntityGraph { nodes, edges })
|
||||||
|
})
|
||||||
|
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||||
|
}
|
||||||
|
|
||||||
fn find_consolidation_proposals(
|
fn find_consolidation_proposals(
|
||||||
&mut self,
|
&mut self,
|
||||||
cx: &opentelemetry::Context,
|
cx: &opentelemetry::Context,
|
||||||
|
|||||||
@@ -59,8 +59,8 @@ pub use calendar_dao::{CalendarEventDao, SqliteCalendarEventDao};
|
|||||||
pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
|
pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
|
||||||
pub use insights_dao::{InsightDao, SqliteInsightDao};
|
pub use insights_dao::{InsightDao, SqliteInsightDao};
|
||||||
pub use knowledge_dao::{
|
pub use knowledge_dao::{
|
||||||
ConsolidationGroup, EntityFilter, EntityPatch, EntitySort, FactFilter, FactPatch, KnowledgeDao,
|
ConsolidationGroup, EntityFilter, EntityGraph, EntityPatch, EntitySort, FactFilter, FactPatch,
|
||||||
PersonaFilter, RecentActivity, SqliteKnowledgeDao,
|
KnowledgeDao, PersonaFilter, RecentActivity, SqliteKnowledgeDao,
|
||||||
};
|
};
|
||||||
pub use location_dao::{LocationHistoryDao, SqliteLocationHistoryDao};
|
pub use location_dao::{LocationHistoryDao, SqliteLocationHistoryDao};
|
||||||
pub use persona_dao::{ImportPersona, PersonaDao, PersonaPatch, SqlitePersonaDao};
|
pub use persona_dao::{ImportPersona, PersonaDao, PersonaPatch, SqlitePersonaDao};
|
||||||
|
|||||||
@@ -7,8 +7,8 @@ use std::sync::Mutex;
|
|||||||
use crate::data::Claims;
|
use crate::data::Claims;
|
||||||
use crate::database::models::{Entity, EntityFact, EntityPhotoLink, InsertEntityFact};
|
use crate::database::models::{Entity, EntityFact, EntityPhotoLink, InsertEntityFact};
|
||||||
use crate::database::{
|
use crate::database::{
|
||||||
ConsolidationGroup, EntityFilter, EntityPatch, EntitySort, FactFilter, FactPatch, KnowledgeDao,
|
ConsolidationGroup, EntityFilter, EntityGraph, EntityPatch, EntitySort, FactFilter, FactPatch,
|
||||||
PersonaFilter, RecentActivity,
|
KnowledgeDao, PersonaFilter, RecentActivity,
|
||||||
};
|
};
|
||||||
use crate::personas::PersonaDaoData;
|
use crate::personas::PersonaDaoData;
|
||||||
use crate::state::AppState;
|
use crate::state::AppState;
|
||||||
@@ -330,6 +330,35 @@ pub struct RecentQuery {
|
|||||||
pub limit: Option<i64>,
|
pub limit: Option<i64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct GraphQuery {
|
||||||
|
#[serde(rename = "type")]
|
||||||
|
pub entity_type: Option<String>,
|
||||||
|
pub limit: Option<i64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub struct GraphNodeView {
|
||||||
|
pub id: i32,
|
||||||
|
pub name: String,
|
||||||
|
pub entity_type: String,
|
||||||
|
pub fact_count: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub struct GraphEdgeView {
|
||||||
|
pub source: i32,
|
||||||
|
pub target: i32,
|
||||||
|
pub predicate: String,
|
||||||
|
pub count: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub struct GraphResponse {
|
||||||
|
pub nodes: Vec<GraphNodeView>,
|
||||||
|
pub edges: Vec<GraphEdgeView>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
pub struct ConsolidationQuery {
|
pub struct ConsolidationQuery {
|
||||||
/// Cosine threshold for clustering. Default 0.85 — looser than
|
/// Cosine threshold for clustering. Default 0.85 — looser than
|
||||||
@@ -391,7 +420,8 @@ where
|
|||||||
.service(
|
.service(
|
||||||
web::resource("/consolidation-proposals")
|
web::resource("/consolidation-proposals")
|
||||||
.route(web::get().to(get_consolidation_proposals::<D>)),
|
.route(web::get().to(get_consolidation_proposals::<D>)),
|
||||||
),
|
)
|
||||||
|
.service(web::resource("/graph").route(web::get().to(get_graph::<D>))),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1162,6 +1192,45 @@ async fn get_recent<D: KnowledgeDao + 'static>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn get_graph<D: KnowledgeDao + 'static>(
|
||||||
|
req: HttpRequest,
|
||||||
|
claims: Claims,
|
||||||
|
query: web::Query<GraphQuery>,
|
||||||
|
dao: web::Data<Mutex<D>>,
|
||||||
|
persona_dao: PersonaDaoData,
|
||||||
|
) -> impl Responder {
|
||||||
|
let limit = query.limit.unwrap_or(200).clamp(1, 1000) as usize;
|
||||||
|
let persona = resolve_persona_filter(&req, &claims, &persona_dao);
|
||||||
|
let cx = opentelemetry::Context::current();
|
||||||
|
let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");
|
||||||
|
match dao.build_entity_graph(&cx, query.entity_type.as_deref(), limit, &persona) {
|
||||||
|
Ok(EntityGraph { nodes, edges }) => HttpResponse::Ok().json(GraphResponse {
|
||||||
|
nodes: nodes
|
||||||
|
.into_iter()
|
||||||
|
.map(|n| GraphNodeView {
|
||||||
|
id: n.id,
|
||||||
|
name: n.name,
|
||||||
|
entity_type: n.entity_type,
|
||||||
|
fact_count: n.fact_count,
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
edges: edges
|
||||||
|
.into_iter()
|
||||||
|
.map(|e| GraphEdgeView {
|
||||||
|
source: e.source,
|
||||||
|
target: e.target,
|
||||||
|
predicate: e.predicate,
|
||||||
|
count: e.count,
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
}),
|
||||||
|
Err(e) => {
|
||||||
|
log::error!("build_entity_graph error: {:?}", e);
|
||||||
|
HttpResponse::InternalServerError().json(serde_json::json!({"error": "Database error"}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async fn get_consolidation_proposals<D: KnowledgeDao + 'static>(
|
async fn get_consolidation_proposals<D: KnowledgeDao + 'static>(
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
claims: Claims,
|
claims: Claims,
|
||||||
|
|||||||
Reference in New Issue
Block a user