knowledge: list sort + persona-scoped fact_count per entity

Two related additions to /knowledge/entities:

- New EntitySort enum (UpdatedDesc default, NameAsc, FactCountDesc)
  surfaced via `?sort=updated|name|count`. NameAsc clusters near-
  duplicate names so dupes stand out at a glance; FactCountDesc
  surfaces heavily-used entities and demotes 0-fact noise to the
  bottom.

- New `list_entities_with_fact_counts` DAO method that returns each
  entity alongside a persona-scoped count of its non-rejected facts
  (subject side). Persona scope follows X-Persona-Id via the
  existing resolve_persona_filter chain — Single filters on
  (user_id, persona_id), All unions across the user's personas.
  Implemented as one raw SQL query with a LEFT JOIN to a fact-count
  subquery and ORDER BY tied to the chosen sort, so count-sort needs
  no second round trip.

The agent's existing list_entities call site is unchanged — it
doesn't need persona-scoped counts and the trait method stays cheap.
EntitySummary grows an Option<i64> fact_count (skip_serializing_if
none) so PATCH responses stay shaped as before.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-05-10 16:04:13 -04:00
parent 0e2b18224f
commit 0b8478a5e4
3 changed files with 249 additions and 8 deletions

View File

@@ -45,6 +45,17 @@ pub struct EntityFilter {
pub offset: i64,
}
/// Sort key for the curation list. Name = alphabetical clustering
/// (good for spotting near-duplicates like Sara / Sarah / Sarah J.).
/// FactCount = surface heavily-used entities first, demote 0-fact
/// noise to the bottom. UpdatedDesc = legacy "newest activity first".
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EntitySort {
UpdatedDesc,
NameAsc,
FactCountDesc,
}
pub struct FactFilter {
pub entity_id: Option<i32>,
/// "active" | "reviewed" | "rejected" | "all"
@@ -134,6 +145,19 @@ pub trait KnowledgeDao: Sync + Send {
filter: EntityFilter,
) -> Result<(Vec<Entity>, i64), DbError>;
/// List entities alongside a persona-scoped fact count for each.
/// Powers the curation surface — sorting by fact count surfaces
/// the heavily-used entities and demotes 0-fact noise. Counting
/// is restricted to non-rejected facts under the active persona
/// scope so a switch in the persona picker re-orders the list.
fn list_entities_with_fact_counts(
&mut self,
cx: &opentelemetry::Context,
filter: EntityFilter,
sort: EntitySort,
persona: &PersonaFilter,
) -> Result<(Vec<(Entity, i64)>, i64), DbError>;
fn update_entity_status(
&mut self,
cx: &opentelemetry::Context,
@@ -529,6 +553,192 @@ impl KnowledgeDao for SqliteKnowledgeDao {
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn list_entities_with_fact_counts(
&mut self,
cx: &opentelemetry::Context,
filter: EntityFilter,
sort: EntitySort,
persona: &PersonaFilter,
) -> Result<(Vec<(Entity, i64)>, i64), DbError> {
trace_db_call(cx, "query", "list_entities_with_fact_counts", |_span| {
use diesel::sql_query;
use diesel::sql_types::{BigInt, Integer, Text};
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
// Build WHERE fragments. Inline-safe values are bound; status
// / sort keywords are validated against fixed sets.
let mut where_parts: Vec<String> = Vec::new();
let mut bind_types: Vec<&'static str> = Vec::new();
let mut bind_strs: Vec<String> = Vec::new();
if filter.entity_type.is_some() {
where_parts.push("e.entity_type = ?".to_string());
bind_types.push("text");
bind_strs.push(filter.entity_type.clone().unwrap());
}
let status_val = filter.status.as_deref().unwrap_or("active");
if status_val != "all" {
where_parts.push("e.status = ?".to_string());
bind_types.push("text");
bind_strs.push(status_val.to_string());
}
if let Some(ref s) = filter.search {
where_parts.push("(e.name LIKE ? OR e.description LIKE ?)".to_string());
bind_types.push("text");
bind_types.push("text");
let pat = format!("%{}%", s);
bind_strs.push(pat.clone());
bind_strs.push(pat);
}
let where_clause = if where_parts.is_empty() {
String::new()
} else {
format!("WHERE {}", where_parts.join(" AND "))
};
// Persona-scoped fact-count subquery. Single = filter on
// (user_id, persona_id); All = union across the user's
// personas (mirror PersonaFilter::All read semantics).
let fact_count_join = match persona {
PersonaFilter::Single { user_id: _, persona_id: _ } => {
"LEFT JOIN (\
SELECT subject_entity_id, COUNT(*) AS fact_count \
FROM entity_facts \
WHERE user_id = ? AND persona_id = ? AND status != 'rejected' \
GROUP BY subject_entity_id\
) fc ON fc.subject_entity_id = e.id"
}
PersonaFilter::All { user_id: _ } => {
"LEFT JOIN (\
SELECT subject_entity_id, COUNT(*) AS fact_count \
FROM entity_facts \
WHERE user_id = ? AND status != 'rejected' \
GROUP BY subject_entity_id\
) fc ON fc.subject_entity_id = e.id"
}
};
let order_by = match sort {
EntitySort::UpdatedDesc => "e.updated_at DESC",
EntitySort::NameAsc => "lower(e.name) ASC",
EntitySort::FactCountDesc => {
"COALESCE(fc.fact_count, 0) DESC, lower(e.name) ASC"
}
};
let select_sql = format!(
"SELECT e.id, e.name, e.entity_type, e.description, e.embedding, \
e.confidence, e.status, e.created_at, e.updated_at, \
COALESCE(fc.fact_count, 0) AS fact_count \
FROM entities e \
{fact_count_join} \
{where_clause} \
ORDER BY {order_by} \
LIMIT ? OFFSET ?"
);
let count_sql = format!(
"SELECT COUNT(*) AS total FROM entities e {where_clause}"
);
// ── Total count ─────────────────────────────────────────
#[derive(diesel::QueryableByName)]
struct TotalRow {
#[diesel(sql_type = BigInt)]
total: i64,
}
let mut count_q = sql_query(count_sql).into_boxed();
for s in &bind_strs {
count_q = count_q.bind::<Text, _>(s.clone());
}
let total: i64 = count_q
.get_result::<TotalRow>(conn.deref_mut())
.map(|r| r.total)
.unwrap_or(0);
// ── Page query ──────────────────────────────────────────
#[derive(diesel::QueryableByName)]
struct EntityWithCountRow {
#[diesel(sql_type = Integer)]
id: i32,
#[diesel(sql_type = Text)]
name: String,
#[diesel(sql_type = Text)]
entity_type: String,
#[diesel(sql_type = Text)]
description: String,
#[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Binary>)]
embedding: Option<Vec<u8>>,
#[diesel(sql_type = diesel::sql_types::Float)]
confidence: f32,
#[diesel(sql_type = Text)]
status: String,
#[diesel(sql_type = BigInt)]
created_at: i64,
#[diesel(sql_type = BigInt)]
updated_at: i64,
#[diesel(sql_type = BigInt)]
fact_count: i64,
}
let mut q = sql_query(select_sql).into_boxed();
// Persona binds first (they're earlier in the SQL — inside
// the subquery LEFT JOIN).
match persona {
PersonaFilter::Single { user_id, persona_id } => {
q = q
.bind::<Integer, _>(*user_id)
.bind::<Text, _>(persona_id.clone());
}
PersonaFilter::All { user_id } => {
q = q.bind::<Integer, _>(*user_id);
}
}
// Then WHERE binds in order.
for s in &bind_strs {
q = q.bind::<Text, _>(s.clone());
}
// Then LIMIT / OFFSET.
q = q
.bind::<BigInt, _>(filter.limit)
.bind::<BigInt, _>(filter.offset);
let rows: Vec<EntityWithCountRow> = q
.load(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
let pairs: Vec<(Entity, i64)> = rows
.into_iter()
.map(|r| {
(
Entity {
id: r.id,
name: r.name,
entity_type: r.entity_type,
description: r.description,
embedding: r.embedding,
confidence: r.confidence,
status: r.status,
created_at: r.created_at,
updated_at: r.updated_at,
},
r.fact_count,
)
})
.collect();
// Sink unused `_bind_types`; keeping it as documentation.
let _ = bind_types;
Ok((pairs, total))
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn update_entity_status(
&mut self,
cx: &opentelemetry::Context,

View File

@@ -59,8 +59,8 @@ pub use calendar_dao::{CalendarEventDao, SqliteCalendarEventDao};
pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
pub use insights_dao::{InsightDao, SqliteInsightDao};
pub use knowledge_dao::{
EntityFilter, EntityPatch, FactFilter, FactPatch, KnowledgeDao, PersonaFilter, RecentActivity,
SqliteKnowledgeDao,
EntityFilter, EntityPatch, EntitySort, FactFilter, FactPatch, KnowledgeDao, PersonaFilter,
RecentActivity, SqliteKnowledgeDao,
};
pub use location_dao::{LocationHistoryDao, SqliteLocationHistoryDao};
pub use persona_dao::{ImportPersona, PersonaDao, PersonaPatch, SqlitePersonaDao};

View File

@@ -7,7 +7,8 @@ use std::sync::Mutex;
use crate::data::Claims;
use crate::database::models::{Entity, EntityFact, EntityPhotoLink, InsertEntityFact};
use crate::database::{
EntityFilter, EntityPatch, FactFilter, FactPatch, KnowledgeDao, PersonaFilter, RecentActivity,
EntityFilter, EntityPatch, EntitySort, FactFilter, FactPatch, KnowledgeDao, PersonaFilter,
RecentActivity,
};
use crate::personas::PersonaDaoData;
@@ -57,6 +58,11 @@ pub struct EntitySummary {
pub status: String,
pub created_at: i64,
pub updated_at: i64,
/// Persona-scoped count of non-rejected facts about this entity
/// (subject side). 0 when not provided by the call site, e.g.
/// PATCH responses return the bare entity without scoping context.
#[serde(skip_serializing_if = "Option::is_none")]
pub fact_count: Option<i64>,
}
impl From<Entity> for EntitySummary {
@@ -70,10 +76,19 @@ impl From<Entity> for EntitySummary {
status: e.status,
created_at: e.created_at,
updated_at: e.updated_at,
fact_count: None,
}
}
}
impl EntitySummary {
fn from_entity_with_count(e: Entity, fact_count: i64) -> Self {
let mut s = EntitySummary::from(e);
s.fact_count = Some(fact_count);
s
}
}
#[derive(Serialize)]
pub struct EntityListResponse {
pub entities: Vec<EntitySummary>,
@@ -197,6 +212,9 @@ pub struct EntityListQuery {
pub entity_type: Option<String>,
pub status: Option<String>,
pub search: Option<String>,
/// "updated" (default) | "name" | "count". `count` is persona-scoped
/// via the X-Persona-Id header.
pub sort: Option<String>,
pub limit: Option<i64>,
pub offset: Option<i64>,
}
@@ -253,9 +271,11 @@ where
// ---------------------------------------------------------------------------
async fn list_entities<D: KnowledgeDao + 'static>(
_claims: Claims,
req: HttpRequest,
claims: Claims,
query: web::Query<EntityListQuery>,
dao: web::Data<Mutex<D>>,
persona_dao: PersonaDaoData,
) -> impl Responder {
let limit = query.limit.unwrap_or(50).min(200);
let offset = query.offset.unwrap_or(0);
@@ -266,6 +286,15 @@ async fn list_entities<D: KnowledgeDao + 'static>(
Some(s) => Some(s.to_string()),
};
let sort = match query.sort.as_deref() {
Some("name") => EntitySort::NameAsc,
Some("count") => EntitySort::FactCountDesc,
// "updated" or anything else falls through to the default.
_ => EntitySort::UpdatedDesc,
};
let persona = resolve_persona_filter(&req, &claims, &persona_dao);
let filter = EntityFilter {
entity_type: query.entity_type.clone(),
status: status_filter,
@@ -276,10 +305,12 @@ async fn list_entities<D: KnowledgeDao + 'static>(
let cx = opentelemetry::Context::current();
let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");
match dao.list_entities(&cx, filter) {
Ok((entities, total)) => {
let summaries: Vec<EntitySummary> =
entities.into_iter().map(EntitySummary::from).collect();
match dao.list_entities_with_fact_counts(&cx, filter, sort, &persona) {
Ok((pairs, total)) => {
let summaries: Vec<EntitySummary> = pairs
.into_iter()
.map(|(e, c)| EntitySummary::from_entity_with_count(e, c))
.collect();
HttpResponse::Ok().json(EntityListResponse {
entities: summaries,
total,