From d123cde3330fb2090b87051b4ce9ecce32522337 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Mon, 11 May 2026 21:26:02 -0400 Subject: [PATCH] knowledge: entity-graph endpoint for force-directed view New GET /knowledge/graph?type=&limit= returns the data the curation UI's graph tab needs: - nodes = entities with at least one in-scope fact (rejected / superseded excluded). Carries fact_count for visual sizing. Top-N by count desc; default cap 200 (clamped 1..1000). - edges = relational facts (object_entity_id set) grouped by (subject, object, predicate) so 3 "is_friend_of" facts between the same pair collapse into one edge with count=3. Two raw SQL queries: an INNER JOIN onto a persona-scoped fact- count subquery for nodes (skips 0-fact entities entirely so the sim doesn't waste time on disconnected islands), then a follow- up GROUP BY over the persona-scoped fact set restricted to the node id set via IN clauses (ids are i32 so inlining is safe). Pairs with the Apollo-side GraphPanel that runs d3-force over the returned payload and renders SVG with click-to-open. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/database/knowledge_dao.rs | 222 ++++++++++++++++++++++++++++++++++ src/database/mod.rs | 4 +- src/knowledge.rs | 75 +++++++++++- 3 files changed, 296 insertions(+), 5 deletions(-) diff --git a/src/database/knowledge_dao.rs b/src/database/knowledge_dao.rs index cc64b62..028454b 100644 --- a/src/database/knowledge_dao.rs +++ b/src/database/knowledge_dao.rs @@ -128,6 +128,33 @@ pub struct ConsolidationGroup { pub max_cosine: f32, } +/// Graph view payload: every entity that has at least one fact +/// becomes a node; every relational fact (object_entity_id set) +/// becomes an edge between subject and object. Multiple facts with +/// the same (subject, object, predicate) collapse into one edge +/// with a count so the UI can fan them out as one weighted line. +#[derive(Debug, Clone)] +pub struct GraphNode { + pub id: i32, + pub name: String, + pub entity_type: String, + pub fact_count: i64, +} + +#[derive(Debug, Clone)] +pub struct GraphEdge { + pub source: i32, + pub target: i32, + pub predicate: String, + pub count: i64, +} + +#[derive(Debug, Clone)] +pub struct EntityGraph { + pub nodes: Vec, + pub edges: Vec, +} + // --------------------------------------------------------------------------- // Trait // --------------------------------------------------------------------------- @@ -178,6 +205,23 @@ pub trait KnowledgeDao: Sync + Send { persona: &PersonaFilter, ) -> Result<(Vec<(Entity, i64)>, i64), DbError>; + /// Build a graph snapshot — entities as nodes (fact count from + /// the active persona scope), relational facts as edges. Used + /// by the curation UI's graph view. Filters: + /// - entity_type: optional, restricts nodes to one type + /// - node_limit: caps the number of nodes; lower-fact-count + /// entities drop first + /// Edges between dropped entities are pruned. Persona scoping + /// affects fact_count + edge inclusion (rejected / superseded + /// excluded; All vs Single mirrors the existing pattern). + fn build_entity_graph( + &mut self, + cx: &opentelemetry::Context, + entity_type: Option<&str>, + node_limit: usize, + persona: &PersonaFilter, + ) -> Result; + /// Find groups of near-duplicate entities that the upsert-time /// cosine guard didn't catch (it runs at ~0.92; this scan runs /// at a lower threshold to surface the "probably same" tier that @@ -828,6 +872,184 @@ impl KnowledgeDao for SqliteKnowledgeDao { .map_err(|_| DbError::new(DbErrorKind::QueryError)) } + fn build_entity_graph( + &mut self, + cx: &opentelemetry::Context, + entity_type_filter: Option<&str>, + node_limit: usize, + persona: &PersonaFilter, + ) -> Result { + trace_db_call(cx, "query", "build_entity_graph", |_span| { + use diesel::sql_query; + use diesel::sql_types::{BigInt, Integer, Text}; + + let mut conn = self.connection.lock().expect("KnowledgeDao lock"); + + // ── Nodes: entities with non-rejected facts under the + // active scope, plus their fact count. Cap to node_limit + // by count desc so the graph stays drawable; lower-count + // entities drop. Excludes 'rejected' entity rows too. + let (persona_filter_sql, persona_binds_count) = match persona { + PersonaFilter::Single { .. } => ( + "AND ef.user_id = ? AND ef.persona_id = ? AND ef.status NOT IN ('rejected','superseded')", + 2, + ), + PersonaFilter::All { .. } => ( + "AND ef.user_id = ? AND ef.status NOT IN ('rejected','superseded')", + 1, + ), + }; + + let mut where_parts: Vec<&str> = vec!["e.status != 'rejected'"]; + if entity_type_filter.is_some() { + where_parts.push("e.entity_type = ?"); + } + let where_clause = format!("WHERE {}", where_parts.join(" AND ")); + + // SQL: join entities to their (persona-scoped) fact count, + // sort by count desc, limit. Including entities with 0 + // facts would clutter the view — skip them via INNER JOIN + // (subquery on entity_facts) so only entities with at + // least one in-scope fact show up. + let node_sql = format!( + "SELECT e.id, e.name, e.entity_type, fc.fact_count \ + FROM entities e \ + INNER JOIN ( \ + SELECT subject_entity_id AS sid, COUNT(*) AS fact_count \ + FROM entity_facts ef \ + WHERE 1=1 {persona_filter_sql} \ + GROUP BY subject_entity_id \ + ) fc ON fc.sid = e.id \ + {where_clause} \ + ORDER BY fc.fact_count DESC, e.id ASC \ + LIMIT ?", + ); + + #[derive(diesel::QueryableByName)] + struct NodeRow { + #[diesel(sql_type = Integer)] + id: i32, + #[diesel(sql_type = Text)] + name: String, + #[diesel(sql_type = Text)] + entity_type: String, + #[diesel(sql_type = BigInt)] + fact_count: i64, + } + + let mut nq = sql_query(node_sql).into_boxed(); + // Persona binds (inside the subquery — earlier in the SQL). + match persona { + PersonaFilter::Single { user_id, persona_id } => { + nq = nq + .bind::(*user_id) + .bind::(persona_id.clone()); + } + PersonaFilter::All { user_id } => { + nq = nq.bind::(*user_id); + } + } + // Entity-type filter bind, if any. + if let Some(t) = entity_type_filter { + nq = nq.bind::(t.to_string()); + } + // LIMIT. + nq = nq.bind::(node_limit as i64); + + let node_rows: Vec = nq + .load(conn.deref_mut()) + .map_err(|e| anyhow::anyhow!("Node query error: {}", e))?; + + let _ = persona_binds_count; // documentary + + let node_ids: std::collections::HashSet = + node_rows.iter().map(|r| r.id).collect(); + let nodes: Vec = node_rows + .into_iter() + .map(|r| GraphNode { + id: r.id, + name: r.name, + entity_type: r.entity_type, + fact_count: r.fact_count, + }) + .collect(); + + if nodes.is_empty() { + return Ok(EntityGraph { + nodes, + edges: Vec::new(), + }); + } + + // ── Edges: relational facts where BOTH subject and + // object survived the node cap. Grouped by (subject, + // object, predicate) so 3 "is_friend_of Bob" facts + // become one edge with count=3. + let id_list: Vec = node_ids.iter().map(|i| i.to_string()).collect(); + let in_clause = id_list.join(", "); + // Note: ids are i32, inlined safely; predicates use binds. + let (edge_persona_sql, _) = match persona { + PersonaFilter::Single { .. } => ( + "user_id = ? AND persona_id = ? AND status NOT IN ('rejected','superseded')", + 2, + ), + PersonaFilter::All { .. } => ( + "user_id = ? AND status NOT IN ('rejected','superseded')", + 1, + ), + }; + let edge_sql = format!( + "SELECT subject_entity_id, object_entity_id, predicate, COUNT(*) AS cnt \ + FROM entity_facts \ + WHERE {edge_persona_sql} \ + AND object_entity_id IS NOT NULL \ + AND subject_entity_id IN ({in_clause}) \ + AND object_entity_id IN ({in_clause}) \ + GROUP BY subject_entity_id, object_entity_id, predicate", + ); + + #[derive(diesel::QueryableByName)] + struct EdgeRow { + #[diesel(sql_type = Integer)] + subject_entity_id: i32, + #[diesel(sql_type = Integer)] + object_entity_id: i32, + #[diesel(sql_type = Text)] + predicate: String, + #[diesel(sql_type = BigInt)] + cnt: i64, + } + + let mut eq = sql_query(edge_sql).into_boxed(); + match persona { + PersonaFilter::Single { user_id, persona_id } => { + eq = eq + .bind::(*user_id) + .bind::(persona_id.clone()); + } + PersonaFilter::All { user_id } => { + eq = eq.bind::(*user_id); + } + } + let edge_rows: Vec = eq + .load(conn.deref_mut()) + .map_err(|e| anyhow::anyhow!("Edge query error: {}", e))?; + + let edges: Vec = edge_rows + .into_iter() + .map(|r| GraphEdge { + source: r.subject_entity_id, + target: r.object_entity_id, + predicate: r.predicate, + count: r.cnt, + }) + .collect(); + + Ok(EntityGraph { nodes, edges }) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + fn find_consolidation_proposals( &mut self, cx: &opentelemetry::Context, diff --git a/src/database/mod.rs b/src/database/mod.rs index 5a26048..2e919f6 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -59,8 +59,8 @@ pub use calendar_dao::{CalendarEventDao, SqliteCalendarEventDao}; pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao}; pub use insights_dao::{InsightDao, SqliteInsightDao}; pub use knowledge_dao::{ - ConsolidationGroup, EntityFilter, EntityPatch, EntitySort, FactFilter, FactPatch, KnowledgeDao, - PersonaFilter, RecentActivity, SqliteKnowledgeDao, + ConsolidationGroup, EntityFilter, EntityGraph, EntityPatch, EntitySort, FactFilter, FactPatch, + KnowledgeDao, PersonaFilter, RecentActivity, SqliteKnowledgeDao, }; pub use location_dao::{LocationHistoryDao, SqliteLocationHistoryDao}; pub use persona_dao::{ImportPersona, PersonaDao, PersonaPatch, SqlitePersonaDao}; diff --git a/src/knowledge.rs b/src/knowledge.rs index e64e4af..fa1142b 100644 --- a/src/knowledge.rs +++ b/src/knowledge.rs @@ -7,8 +7,8 @@ use std::sync::Mutex; use crate::data::Claims; use crate::database::models::{Entity, EntityFact, EntityPhotoLink, InsertEntityFact}; use crate::database::{ - ConsolidationGroup, EntityFilter, EntityPatch, EntitySort, FactFilter, FactPatch, KnowledgeDao, - PersonaFilter, RecentActivity, + ConsolidationGroup, EntityFilter, EntityGraph, EntityPatch, EntitySort, FactFilter, FactPatch, + KnowledgeDao, PersonaFilter, RecentActivity, }; use crate::personas::PersonaDaoData; use crate::state::AppState; @@ -330,6 +330,35 @@ pub struct RecentQuery { pub limit: Option, } +#[derive(Deserialize)] +pub struct GraphQuery { + #[serde(rename = "type")] + pub entity_type: Option, + pub limit: Option, +} + +#[derive(Serialize)] +pub struct GraphNodeView { + pub id: i32, + pub name: String, + pub entity_type: String, + pub fact_count: i64, +} + +#[derive(Serialize)] +pub struct GraphEdgeView { + pub source: i32, + pub target: i32, + pub predicate: String, + pub count: i64, +} + +#[derive(Serialize)] +pub struct GraphResponse { + pub nodes: Vec, + pub edges: Vec, +} + #[derive(Deserialize)] pub struct ConsolidationQuery { /// Cosine threshold for clustering. Default 0.85 — looser than @@ -391,7 +420,8 @@ where .service( web::resource("/consolidation-proposals") .route(web::get().to(get_consolidation_proposals::)), - ), + ) + .service(web::resource("/graph").route(web::get().to(get_graph::))), ) } @@ -1162,6 +1192,45 @@ async fn get_recent( } } +async fn get_graph( + req: HttpRequest, + claims: Claims, + query: web::Query, + dao: web::Data>, + persona_dao: PersonaDaoData, +) -> impl Responder { + let limit = query.limit.unwrap_or(200).clamp(1, 1000) as usize; + let persona = resolve_persona_filter(&req, &claims, &persona_dao); + let cx = opentelemetry::Context::current(); + let mut dao = dao.lock().expect("Unable to lock KnowledgeDao"); + match dao.build_entity_graph(&cx, query.entity_type.as_deref(), limit, &persona) { + Ok(EntityGraph { nodes, edges }) => HttpResponse::Ok().json(GraphResponse { + nodes: nodes + .into_iter() + .map(|n| GraphNodeView { + id: n.id, + name: n.name, + entity_type: n.entity_type, + fact_count: n.fact_count, + }) + .collect(), + edges: edges + .into_iter() + .map(|e| GraphEdgeView { + source: e.source, + target: e.target, + predicate: e.predicate, + count: e.count, + }) + .collect(), + }), + Err(e) => { + log::error!("build_entity_graph error: {:?}", e); + HttpResponse::InternalServerError().json(serde_json::json!({"error": "Database error"})) + } + } +} + async fn get_consolidation_proposals( req: HttpRequest, claims: Claims,