knowledge: per-entity persona breakdown for list + detail

Entities are global; facts are persona-scoped. Under the active persona an entity can read as "0 facts" while having plenty under other personas the user owns — the curation UI had no way to surface that gap. Adds a batched DAO method `get_persona_breakdowns_for_entities` that returns {entity_id → [(persona_id, count)]} in one query (group by subject + persona, user-scoped, status != rejected), and wires it into both /knowledge/entities list rows and GET /knowledge/entities/{id}. EntitySummary grows an optional `persona_breakdown` field (skipped on serialization when None — keeps PATCH responses unchanged). EntityDetailResponse carries the breakdown as a non-optional Vec since the detail endpoint always populates it. One extra query per list page (50 entities → 50 subject ids batched in one IN clause); single-entity GET adds one round trip. Indexed by (subject_entity_id, persona_id) implicitly via the existing user-persona indexes on entity_facts. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-11 18:29:20 -04:00
parent f200466508
commit 89d0a6527c
2 changed files with 133 additions and 1 deletions
--- a/src/database/knowledge_dao.rs
+++ b/src/database/knowledge_dao.rs
@@ -167,6 +167,20 @@ pub trait KnowledgeDao: Sync + Send {
        persona: &PersonaFilter,
    ) -> Result<(Vec<(Entity, i64)>, i64), DbError>;

+    /// Batch fetch per-persona fact counts for a set of entities,
+    /// scoped to one user. Returns map of entity_id → list of
+    /// (persona_id, count). Used by the curation UI to show "this
+    /// entity has 0 facts in your active persona but 12 in journal"
+    /// so the curator knows where to find the existing knowledge.
+    /// Rejected facts excluded; superseded included (they're history,
+    /// not noise).
+    fn get_persona_breakdowns_for_entities(
+        &mut self,
+        cx: &opentelemetry::Context,
+        entity_ids: &[i32],
+        user_id: i32,
+    ) -> Result<std::collections::HashMap<i32, Vec<(String, i64)>>, DbError>;
+
    fn update_entity_status(
        &mut self,
        cx: &opentelemetry::Context,
@@ -786,6 +800,68 @@ impl KnowledgeDao for SqliteKnowledgeDao {
        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

+    fn get_persona_breakdowns_for_entities(
+        &mut self,
+        cx: &opentelemetry::Context,
+        entity_ids: &[i32],
+        user_id: i32,
+    ) -> Result<std::collections::HashMap<i32, Vec<(String, i64)>>, DbError> {
+        trace_db_call(cx, "query", "get_persona_breakdowns", |_span| {
+            use diesel::sql_query;
+            use diesel::sql_types::{BigInt, Integer, Text};
+
+            if entity_ids.is_empty() {
+                return Ok(std::collections::HashMap::new());
+            }
+
+            // Build the `IN (?, ?, ?…)` placeholder list. We bind
+            // user_id first, then the entity ids. No real escape risk
+            // since the values are typed ints, but bound parameters
+            // are cleaner than format!() either way.
+            let placeholders = vec!["?"; entity_ids.len()].join(", ");
+            let sql = format!(
+                "SELECT subject_entity_id, persona_id, COUNT(*) AS cnt \
+                 FROM entity_facts \
+                 WHERE user_id = ? \
+                   AND status != 'rejected' \
+                   AND subject_entity_id IN ({}) \
+                 GROUP BY subject_entity_id, persona_id \
+                 ORDER BY subject_entity_id, persona_id",
+                placeholders
+            );
+
+            #[derive(diesel::QueryableByName)]
+            struct Row {
+                #[diesel(sql_type = Integer)]
+                subject_entity_id: i32,
+                #[diesel(sql_type = Text)]
+                persona_id: String,
+                #[diesel(sql_type = BigInt)]
+                cnt: i64,
+            }
+
+            let mut conn = self.connection.lock().expect("KnowledgeDao lock");
+            let mut q = sql_query(sql).into_boxed();
+            q = q.bind::<Integer, _>(user_id);
+            for id in entity_ids {
+                q = q.bind::<Integer, _>(*id);
+            }
+            let rows: Vec<Row> = q
+                .load(conn.deref_mut())
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
+
+            let mut out: std::collections::HashMap<i32, Vec<(String, i64)>> =
+                std::collections::HashMap::with_capacity(entity_ids.len());
+            for r in rows {
+                out.entry(r.subject_entity_id)
+                    .or_default()
+                    .push((r.persona_id, r.cnt));
+            }
+            Ok(out)
+        })
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+    }
+
    fn update_entity_status(
        &mut self,
        cx: &opentelemetry::Context,