knowledge: per-entity persona breakdown for list + detail

Entities are global; facts are persona-scoped. Under the active
persona an entity can read as "0 facts" while having plenty under
other personas the user owns — the curation UI had no way to
surface that gap. Adds a batched DAO method
`get_persona_breakdowns_for_entities` that returns
{entity_id → [(persona_id, count)]} in one query (group by
subject + persona, user-scoped, status != rejected), and wires it
into both /knowledge/entities list rows and
GET /knowledge/entities/{id}.

EntitySummary grows an optional `persona_breakdown` field
(skipped on serialization when None — keeps PATCH responses
unchanged). EntityDetailResponse carries the breakdown as a
non-optional Vec since the detail endpoint always populates it.

One extra query per list page (50 entities → 50 subject ids
batched in one IN clause); single-entity GET adds one round trip.
Indexed by (subject_entity_id, persona_id) implicitly via the
existing user-persona indexes on entity_facts.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-05-11 18:29:20 -04:00
parent f200466508
commit 89d0a6527c
2 changed files with 133 additions and 1 deletions

View File

@@ -167,6 +167,20 @@ pub trait KnowledgeDao: Sync + Send {
persona: &PersonaFilter,
) -> Result<(Vec<(Entity, i64)>, i64), DbError>;
/// Batch fetch per-persona fact counts for a set of entities,
/// scoped to one user. Returns map of entity_id → list of
/// (persona_id, count). Used by the curation UI to show "this
/// entity has 0 facts in your active persona but 12 in journal"
/// so the curator knows where to find the existing knowledge.
/// Rejected facts excluded; superseded included (they're history,
/// not noise).
fn get_persona_breakdowns_for_entities(
&mut self,
cx: &opentelemetry::Context,
entity_ids: &[i32],
user_id: i32,
) -> Result<std::collections::HashMap<i32, Vec<(String, i64)>>, DbError>;
fn update_entity_status(
&mut self,
cx: &opentelemetry::Context,
@@ -786,6 +800,68 @@ impl KnowledgeDao for SqliteKnowledgeDao {
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_persona_breakdowns_for_entities(
&mut self,
cx: &opentelemetry::Context,
entity_ids: &[i32],
user_id: i32,
) -> Result<std::collections::HashMap<i32, Vec<(String, i64)>>, DbError> {
trace_db_call(cx, "query", "get_persona_breakdowns", |_span| {
use diesel::sql_query;
use diesel::sql_types::{BigInt, Integer, Text};
if entity_ids.is_empty() {
return Ok(std::collections::HashMap::new());
}
// Build the `IN (?, ?, ?…)` placeholder list. We bind
// user_id first, then the entity ids. No real escape risk
// since the values are typed ints, but bound parameters
// are cleaner than format!() either way.
let placeholders = vec!["?"; entity_ids.len()].join(", ");
let sql = format!(
"SELECT subject_entity_id, persona_id, COUNT(*) AS cnt \
FROM entity_facts \
WHERE user_id = ? \
AND status != 'rejected' \
AND subject_entity_id IN ({}) \
GROUP BY subject_entity_id, persona_id \
ORDER BY subject_entity_id, persona_id",
placeholders
);
#[derive(diesel::QueryableByName)]
struct Row {
#[diesel(sql_type = Integer)]
subject_entity_id: i32,
#[diesel(sql_type = Text)]
persona_id: String,
#[diesel(sql_type = BigInt)]
cnt: i64,
}
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
let mut q = sql_query(sql).into_boxed();
q = q.bind::<Integer, _>(user_id);
for id in entity_ids {
q = q.bind::<Integer, _>(*id);
}
let rows: Vec<Row> = q
.load(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
let mut out: std::collections::HashMap<i32, Vec<(String, i64)>> =
std::collections::HashMap::with_capacity(entity_ids.len());
for r in rows {
out.entry(r.subject_entity_id)
.or_default()
.push((r.persona_id, r.cnt));
}
Ok(out)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn update_entity_status(
&mut self,
cx: &opentelemetry::Context,

View File

@@ -64,6 +64,19 @@ pub struct EntitySummary {
/// PATCH responses return the bare entity without scoping context.
#[serde(skip_serializing_if = "Option::is_none")]
pub fact_count: Option<i64>,
/// Per-persona breakdown of fact counts for this entity, scoped
/// to the active user. Lets the curation UI surface "this entity
/// is empty under your active persona but has 12 facts in
/// journal" so you know which persona owns the existing
/// knowledge. Skipped on serialization when None.
#[serde(skip_serializing_if = "Option::is_none")]
pub persona_breakdown: Option<Vec<PersonaCount>>,
}
#[derive(Serialize)]
pub struct PersonaCount {
pub persona_id: String,
pub count: i64,
}
impl From<Entity> for EntitySummary {
@@ -78,6 +91,7 @@ impl From<Entity> for EntitySummary {
created_at: e.created_at,
updated_at: e.updated_at,
fact_count: None,
persona_breakdown: None,
}
}
}
@@ -88,6 +102,16 @@ impl EntitySummary {
s.fact_count = Some(fact_count);
s
}
fn with_persona_breakdown(mut self, breakdown: Vec<(String, i64)>) -> Self {
self.persona_breakdown = Some(
breakdown
.into_iter()
.map(|(persona_id, count)| PersonaCount { persona_id, count })
.collect(),
);
self
}
}
#[derive(Serialize)]
@@ -171,6 +195,11 @@ pub struct EntityDetailResponse {
pub updated_at: i64,
pub facts: Vec<FactDetail>,
pub photo_links: Vec<PhotoLinkDetail>,
/// Per-persona fact counts for the active user. Mirrors the
/// same field on EntitySummary; the detail panel surfaces a
/// clickable list so the curator can switch to the persona
/// that owns existing facts about this entity.
pub persona_breakdown: Vec<PersonaCount>,
}
#[derive(Serialize)]
@@ -386,9 +415,23 @@ async fn list_entities<D: KnowledgeDao + 'static>(
let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");
match dao.list_entities_with_fact_counts(&cx, filter, sort, &persona) {
Ok((pairs, total)) => {
// Batch fetch persona breakdowns so the list-row tooltip
// and detail panel can show "0 here · 12 in journal".
// One extra query for the visible page.
let entity_ids: Vec<i32> = pairs.iter().map(|(e, _)| e.id).collect();
let breakdowns = dao
.get_persona_breakdowns_for_entities(&cx, &entity_ids, persona.user_id())
.unwrap_or_default();
let summaries: Vec<EntitySummary> = pairs
.into_iter()
.map(|(e, c)| EntitySummary::from_entity_with_count(e, c))
.map(|(e, c)| {
let entity_id = e.id;
let summary = EntitySummary::from_entity_with_count(e, c);
match breakdowns.get(&entity_id) {
Some(bd) => summary.with_persona_breakdown(bd.clone()),
None => summary,
}
})
.collect();
HttpResponse::Ok().json(EntityListResponse {
entities: summaries,
@@ -537,6 +580,18 @@ async fn get_entity<D: KnowledgeDao + 'static>(
}
};
// Per-persona breakdown for the detail panel's "facts live in
// {persona}" block — same data the list-row tooltip reads. One
// query, single entity in scope.
let persona_breakdown: Vec<PersonaCount> = dao
.get_persona_breakdowns_for_entities(&cx, &[entity_id], persona.user_id())
.ok()
.and_then(|mut map| map.remove(&entity_id))
.unwrap_or_default()
.into_iter()
.map(|(persona_id, count)| PersonaCount { persona_id, count })
.collect();
HttpResponse::Ok().json(EntityDetailResponse {
id: entity.id,
name: entity.name,
@@ -548,6 +603,7 @@ async fn get_entity<D: KnowledgeDao + 'static>(
updated_at: entity.updated_at,
facts,
photo_links,
persona_breakdown,
})
}