knowledge: list sort + persona-scoped fact_count per entity

Two related additions to /knowledge/entities:

- New EntitySort enum (UpdatedDesc default, NameAsc, FactCountDesc)
  surfaced via `?sort=updated|name|count`. NameAsc clusters near-
  duplicate names so dupes stand out at a glance; FactCountDesc
  surfaces heavily-used entities and demotes 0-fact noise to the
  bottom.

- New `list_entities_with_fact_counts` DAO method that returns each
  entity alongside a persona-scoped count of its non-rejected facts
  (subject side). Persona scope follows X-Persona-Id via the
  existing resolve_persona_filter chain — Single filters on
  (user_id, persona_id), All unions across the user's personas.
  Implemented as one raw SQL query with a LEFT JOIN to a fact-count
  subquery and ORDER BY tied to the chosen sort, so count-sort needs
  no second round trip.

The agent's existing list_entities call site is unchanged — it
doesn't need persona-scoped counts and the trait method stays cheap.
EntitySummary grows an Option<i64> fact_count (skip_serializing_if
none) so PATCH responses stay shaped as before.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-05-10 16:04:13 -04:00
parent 0e2b18224f
commit 0b8478a5e4
3 changed files with 249 additions and 8 deletions

View File

@@ -45,6 +45,17 @@ pub struct EntityFilter {
pub offset: i64,
}
/// Sort key for the curation list. Name = alphabetical clustering
/// (good for spotting near-duplicates like Sara / Sarah / Sarah J.).
/// FactCount = surface heavily-used entities first, demote 0-fact
/// noise to the bottom. UpdatedDesc = legacy "newest activity first".
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EntitySort {
UpdatedDesc,
NameAsc,
FactCountDesc,
}
pub struct FactFilter {
pub entity_id: Option<i32>,
/// "active" | "reviewed" | "rejected" | "all"
@@ -134,6 +145,19 @@ pub trait KnowledgeDao: Sync + Send {
filter: EntityFilter,
) -> Result<(Vec<Entity>, i64), DbError>;
/// List entities alongside a persona-scoped fact count for each.
/// Powers the curation surface — sorting by fact count surfaces
/// the heavily-used entities and demotes 0-fact noise. Counting
/// is restricted to non-rejected facts under the active persona
/// scope so a switch in the persona picker re-orders the list.
fn list_entities_with_fact_counts(
&mut self,
cx: &opentelemetry::Context,
filter: EntityFilter,
sort: EntitySort,
persona: &PersonaFilter,
) -> Result<(Vec<(Entity, i64)>, i64), DbError>;
fn update_entity_status(
&mut self,
cx: &opentelemetry::Context,
@@ -529,6 +553,192 @@ impl KnowledgeDao for SqliteKnowledgeDao {
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn list_entities_with_fact_counts(
&mut self,
cx: &opentelemetry::Context,
filter: EntityFilter,
sort: EntitySort,
persona: &PersonaFilter,
) -> Result<(Vec<(Entity, i64)>, i64), DbError> {
trace_db_call(cx, "query", "list_entities_with_fact_counts", |_span| {
use diesel::sql_query;
use diesel::sql_types::{BigInt, Integer, Text};
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
// Build WHERE fragments. Inline-safe values are bound; status
// / sort keywords are validated against fixed sets.
let mut where_parts: Vec<String> = Vec::new();
let mut bind_types: Vec<&'static str> = Vec::new();
let mut bind_strs: Vec<String> = Vec::new();
if filter.entity_type.is_some() {
where_parts.push("e.entity_type = ?".to_string());
bind_types.push("text");
bind_strs.push(filter.entity_type.clone().unwrap());
}
let status_val = filter.status.as_deref().unwrap_or("active");
if status_val != "all" {
where_parts.push("e.status = ?".to_string());
bind_types.push("text");
bind_strs.push(status_val.to_string());
}
if let Some(ref s) = filter.search {
where_parts.push("(e.name LIKE ? OR e.description LIKE ?)".to_string());
bind_types.push("text");
bind_types.push("text");
let pat = format!("%{}%", s);
bind_strs.push(pat.clone());
bind_strs.push(pat);
}
let where_clause = if where_parts.is_empty() {
String::new()
} else {
format!("WHERE {}", where_parts.join(" AND "))
};
// Persona-scoped fact-count subquery. Single = filter on
// (user_id, persona_id); All = union across the user's
// personas (mirror PersonaFilter::All read semantics).
let fact_count_join = match persona {
PersonaFilter::Single { user_id: _, persona_id: _ } => {
"LEFT JOIN (\
SELECT subject_entity_id, COUNT(*) AS fact_count \
FROM entity_facts \
WHERE user_id = ? AND persona_id = ? AND status != 'rejected' \
GROUP BY subject_entity_id\
) fc ON fc.subject_entity_id = e.id"
}
PersonaFilter::All { user_id: _ } => {
"LEFT JOIN (\
SELECT subject_entity_id, COUNT(*) AS fact_count \
FROM entity_facts \
WHERE user_id = ? AND status != 'rejected' \
GROUP BY subject_entity_id\
) fc ON fc.subject_entity_id = e.id"
}
};
let order_by = match sort {
EntitySort::UpdatedDesc => "e.updated_at DESC",
EntitySort::NameAsc => "lower(e.name) ASC",
EntitySort::FactCountDesc => {
"COALESCE(fc.fact_count, 0) DESC, lower(e.name) ASC"
}
};
let select_sql = format!(
"SELECT e.id, e.name, e.entity_type, e.description, e.embedding, \
e.confidence, e.status, e.created_at, e.updated_at, \
COALESCE(fc.fact_count, 0) AS fact_count \
FROM entities e \
{fact_count_join} \
{where_clause} \
ORDER BY {order_by} \
LIMIT ? OFFSET ?"
);
let count_sql = format!(
"SELECT COUNT(*) AS total FROM entities e {where_clause}"
);
// ── Total count ─────────────────────────────────────────
#[derive(diesel::QueryableByName)]
struct TotalRow {
#[diesel(sql_type = BigInt)]
total: i64,
}
let mut count_q = sql_query(count_sql).into_boxed();
for s in &bind_strs {
count_q = count_q.bind::<Text, _>(s.clone());
}
let total: i64 = count_q
.get_result::<TotalRow>(conn.deref_mut())
.map(|r| r.total)
.unwrap_or(0);
// ── Page query ──────────────────────────────────────────
#[derive(diesel::QueryableByName)]
struct EntityWithCountRow {
#[diesel(sql_type = Integer)]
id: i32,
#[diesel(sql_type = Text)]
name: String,
#[diesel(sql_type = Text)]
entity_type: String,
#[diesel(sql_type = Text)]
description: String,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Binary>)]
embedding: Option<Vec<u8>>,
#[diesel(sql_type = diesel::sql_types::Float)]
confidence: f32,
#[diesel(sql_type = Text)]
status: String,
#[diesel(sql_type = BigInt)]
created_at: i64,
#[diesel(sql_type = BigInt)]
updated_at: i64,
#[diesel(sql_type = BigInt)]
fact_count: i64,
}
let mut q = sql_query(select_sql).into_boxed();
// Persona binds first (they're earlier in the SQL — inside
// the subquery LEFT JOIN).
match persona {
PersonaFilter::Single { user_id, persona_id } => {
q = q
.bind::<Integer, _>(*user_id)
.bind::<Text, _>(persona_id.clone());
}
PersonaFilter::All { user_id } => {
q = q.bind::<Integer, _>(*user_id);
}
}
// Then WHERE binds in order.
for s in &bind_strs {
q = q.bind::<Text, _>(s.clone());
}
// Then LIMIT / OFFSET.
q = q
.bind::<BigInt, _>(filter.limit)
.bind::<BigInt, _>(filter.offset);
let rows: Vec<EntityWithCountRow> = q
.load(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
let pairs: Vec<(Entity, i64)> = rows
.into_iter()
.map(|r| {
(
Entity {
id: r.id,
name: r.name,
entity_type: r.entity_type,
description: r.description,
embedding: r.embedding,
confidence: r.confidence,
status: r.status,
created_at: r.created_at,
updated_at: r.updated_at,
},
r.fact_count,
)
})
.collect();
// Sink unused `_bind_types`; keeping it as documentation.
let _ = bind_types;
Ok((pairs, total))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn update_entity_status(
&mut self,
cx: &opentelemetry::Context,