knowledge: per-entity persona breakdown for list + detail

Entities are global; facts are persona-scoped. Under the active
persona an entity can read as "0 facts" while having plenty under
other personas the user owns — the curation UI had no way to
surface that gap. Adds a batched DAO method
`get_persona_breakdowns_for_entities` that returns
{entity_id → [(persona_id, count)]} in one query (group by
subject + persona, user-scoped, status != rejected), and wires it
into both /knowledge/entities list rows and
GET /knowledge/entities/{id}.

EntitySummary grows an optional `persona_breakdown` field
(skipped on serialization when None — keeps PATCH responses
unchanged). EntityDetailResponse carries the breakdown as a
non-optional Vec since the detail endpoint always populates it.

One extra query per list page (50 entities → 50 subject ids
batched in one IN clause); single-entity GET adds one round trip.
Indexed by (subject_entity_id, persona_id) implicitly via the
existing user-persona indexes on entity_facts.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-05-11 18:29:20 -04:00
parent f200466508
commit 89d0a6527c
2 changed files with 133 additions and 1 deletions

View File

@@ -167,6 +167,20 @@ pub trait KnowledgeDao: Sync + Send {
persona: &PersonaFilter,
) -> Result<(Vec<(Entity, i64)>, i64), DbError>;
/// Batch fetch per-persona fact counts for a set of entities,
/// scoped to one user. Returns map of entity_id → list of
/// (persona_id, count). Used by the curation UI to show "this
/// entity has 0 facts in your active persona but 12 in journal"
/// so the curator knows where to find the existing knowledge.
/// Rejected facts excluded; superseded included (they're history,
/// not noise).
fn get_persona_breakdowns_for_entities(
&mut self,
cx: &opentelemetry::Context,
entity_ids: &[i32],
user_id: i32,
) -> Result<std::collections::HashMap<i32, Vec<(String, i64)>>, DbError>;
fn update_entity_status(
&mut self,
cx: &opentelemetry::Context,
@@ -786,6 +800,68 @@ impl KnowledgeDao for SqliteKnowledgeDao {
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_persona_breakdowns_for_entities(
&mut self,
cx: &opentelemetry::Context,
entity_ids: &[i32],
user_id: i32,
) -> Result<std::collections::HashMap<i32, Vec<(String, i64)>>, DbError> {
trace_db_call(cx, "query", "get_persona_breakdowns", |_span| {
use diesel::sql_query;
use diesel::sql_types::{BigInt, Integer, Text};
if entity_ids.is_empty() {
return Ok(std::collections::HashMap::new());
}
// Build the `IN (?, ?, ?…)` placeholder list. We bind
// user_id first, then the entity ids. No real escape risk
// since the values are typed ints, but bound parameters
// are cleaner than format!() either way.
let placeholders = vec!["?"; entity_ids.len()].join(", ");
let sql = format!(
"SELECT subject_entity_id, persona_id, COUNT(*) AS cnt \
FROM entity_facts \
WHERE user_id = ? \
AND status != 'rejected' \
AND subject_entity_id IN ({}) \
GROUP BY subject_entity_id, persona_id \
ORDER BY subject_entity_id, persona_id",
placeholders
);
#[derive(diesel::QueryableByName)]
struct Row {
#[diesel(sql_type = Integer)]
subject_entity_id: i32,
#[diesel(sql_type = Text)]
persona_id: String,
#[diesel(sql_type = BigInt)]
cnt: i64,
}
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
let mut q = sql_query(sql).into_boxed();
q = q.bind::<Integer, _>(user_id);
for id in entity_ids {
q = q.bind::<Integer, _>(*id);
}
let rows: Vec<Row> = q
.load(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
let mut out: std::collections::HashMap<i32, Vec<(String, i64)>> =
std::collections::HashMap::with_capacity(entity_ids.len());
for r in rows {
out.entry(r.subject_entity_id)
.or_default()
.push((r.persona_id, r.cnt));
}
Ok(out)
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn update_entity_status(
&mut self,
cx: &opentelemetry::Context,