diff --git a/src/database/knowledge_dao.rs b/src/database/knowledge_dao.rs index 091cd9a..70ff711 100644 --- a/src/database/knowledge_dao.rs +++ b/src/database/knowledge_dao.rs @@ -45,6 +45,17 @@ pub struct EntityFilter { pub offset: i64, } +/// Sort key for the curation list. Name = alphabetical clustering +/// (good for spotting near-duplicates like Sara / Sarah / Sarah J.). +/// FactCount = surface heavily-used entities first, demote 0-fact +/// noise to the bottom. UpdatedDesc = legacy "newest activity first". +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum EntitySort { + UpdatedDesc, + NameAsc, + FactCountDesc, +} + pub struct FactFilter { pub entity_id: Option, /// "active" | "reviewed" | "rejected" | "all" @@ -134,6 +145,19 @@ pub trait KnowledgeDao: Sync + Send { filter: EntityFilter, ) -> Result<(Vec, i64), DbError>; + /// List entities alongside a persona-scoped fact count for each. + /// Powers the curation surface — sorting by fact count surfaces + /// the heavily-used entities and demotes 0-fact noise. Counting + /// is restricted to non-rejected facts under the active persona + /// scope so a switch in the persona picker re-orders the list. + fn list_entities_with_fact_counts( + &mut self, + cx: &opentelemetry::Context, + filter: EntityFilter, + sort: EntitySort, + persona: &PersonaFilter, + ) -> Result<(Vec<(Entity, i64)>, i64), DbError>; + fn update_entity_status( &mut self, cx: &opentelemetry::Context, @@ -529,6 +553,192 @@ impl KnowledgeDao for SqliteKnowledgeDao { .map_err(|_| DbError::new(DbErrorKind::QueryError)) } + fn list_entities_with_fact_counts( + &mut self, + cx: &opentelemetry::Context, + filter: EntityFilter, + sort: EntitySort, + persona: &PersonaFilter, + ) -> Result<(Vec<(Entity, i64)>, i64), DbError> { + trace_db_call(cx, "query", "list_entities_with_fact_counts", |_span| { + use diesel::sql_query; + use diesel::sql_types::{BigInt, Integer, Text}; + + let mut conn = self.connection.lock().expect("KnowledgeDao lock"); + + // Build WHERE fragments. Inline-safe values are bound; status + // / sort keywords are validated against fixed sets. + let mut where_parts: Vec = Vec::new(); + let mut bind_types: Vec<&'static str> = Vec::new(); + let mut bind_strs: Vec = Vec::new(); + + if filter.entity_type.is_some() { + where_parts.push("e.entity_type = ?".to_string()); + bind_types.push("text"); + bind_strs.push(filter.entity_type.clone().unwrap()); + } + + let status_val = filter.status.as_deref().unwrap_or("active"); + if status_val != "all" { + where_parts.push("e.status = ?".to_string()); + bind_types.push("text"); + bind_strs.push(status_val.to_string()); + } + + if let Some(ref s) = filter.search { + where_parts.push("(e.name LIKE ? OR e.description LIKE ?)".to_string()); + bind_types.push("text"); + bind_types.push("text"); + let pat = format!("%{}%", s); + bind_strs.push(pat.clone()); + bind_strs.push(pat); + } + + let where_clause = if where_parts.is_empty() { + String::new() + } else { + format!("WHERE {}", where_parts.join(" AND ")) + }; + + // Persona-scoped fact-count subquery. Single = filter on + // (user_id, persona_id); All = union across the user's + // personas (mirror PersonaFilter::All read semantics). + let fact_count_join = match persona { + PersonaFilter::Single { user_id: _, persona_id: _ } => { + "LEFT JOIN (\ + SELECT subject_entity_id, COUNT(*) AS fact_count \ + FROM entity_facts \ + WHERE user_id = ? AND persona_id = ? AND status != 'rejected' \ + GROUP BY subject_entity_id\ + ) fc ON fc.subject_entity_id = e.id" + } + PersonaFilter::All { user_id: _ } => { + "LEFT JOIN (\ + SELECT subject_entity_id, COUNT(*) AS fact_count \ + FROM entity_facts \ + WHERE user_id = ? AND status != 'rejected' \ + GROUP BY subject_entity_id\ + ) fc ON fc.subject_entity_id = e.id" + } + }; + + let order_by = match sort { + EntitySort::UpdatedDesc => "e.updated_at DESC", + EntitySort::NameAsc => "lower(e.name) ASC", + EntitySort::FactCountDesc => { + "COALESCE(fc.fact_count, 0) DESC, lower(e.name) ASC" + } + }; + + let select_sql = format!( + "SELECT e.id, e.name, e.entity_type, e.description, e.embedding, \ + e.confidence, e.status, e.created_at, e.updated_at, \ + COALESCE(fc.fact_count, 0) AS fact_count \ + FROM entities e \ + {fact_count_join} \ + {where_clause} \ + ORDER BY {order_by} \ + LIMIT ? OFFSET ?" + ); + + let count_sql = format!( + "SELECT COUNT(*) AS total FROM entities e {where_clause}" + ); + + // ── Total count ───────────────────────────────────────── + #[derive(diesel::QueryableByName)] + struct TotalRow { + #[diesel(sql_type = BigInt)] + total: i64, + } + let mut count_q = sql_query(count_sql).into_boxed(); + for s in &bind_strs { + count_q = count_q.bind::(s.clone()); + } + let total: i64 = count_q + .get_result::(conn.deref_mut()) + .map(|r| r.total) + .unwrap_or(0); + + // ── Page query ────────────────────────────────────────── + #[derive(diesel::QueryableByName)] + struct EntityWithCountRow { + #[diesel(sql_type = Integer)] + id: i32, + #[diesel(sql_type = Text)] + name: String, + #[diesel(sql_type = Text)] + entity_type: String, + #[diesel(sql_type = Text)] + description: String, + #[diesel(sql_type = diesel::sql_types::Nullable)] + embedding: Option>, + #[diesel(sql_type = diesel::sql_types::Float)] + confidence: f32, + #[diesel(sql_type = Text)] + status: String, + #[diesel(sql_type = BigInt)] + created_at: i64, + #[diesel(sql_type = BigInt)] + updated_at: i64, + #[diesel(sql_type = BigInt)] + fact_count: i64, + } + + let mut q = sql_query(select_sql).into_boxed(); + // Persona binds first (they're earlier in the SQL — inside + // the subquery LEFT JOIN). + match persona { + PersonaFilter::Single { user_id, persona_id } => { + q = q + .bind::(*user_id) + .bind::(persona_id.clone()); + } + PersonaFilter::All { user_id } => { + q = q.bind::(*user_id); + } + } + // Then WHERE binds in order. + for s in &bind_strs { + q = q.bind::(s.clone()); + } + // Then LIMIT / OFFSET. + q = q + .bind::(filter.limit) + .bind::(filter.offset); + + let rows: Vec = q + .load(conn.deref_mut()) + .map_err(|e| anyhow::anyhow!("Query error: {}", e))?; + + let pairs: Vec<(Entity, i64)> = rows + .into_iter() + .map(|r| { + ( + Entity { + id: r.id, + name: r.name, + entity_type: r.entity_type, + description: r.description, + embedding: r.embedding, + confidence: r.confidence, + status: r.status, + created_at: r.created_at, + updated_at: r.updated_at, + }, + r.fact_count, + ) + }) + .collect(); + + // Sink unused `_bind_types`; keeping it as documentation. + let _ = bind_types; + + Ok((pairs, total)) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + fn update_entity_status( &mut self, cx: &opentelemetry::Context, diff --git a/src/database/mod.rs b/src/database/mod.rs index d5dd9cb..3c2fb5d 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -59,8 +59,8 @@ pub use calendar_dao::{CalendarEventDao, SqliteCalendarEventDao}; pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao}; pub use insights_dao::{InsightDao, SqliteInsightDao}; pub use knowledge_dao::{ - EntityFilter, EntityPatch, FactFilter, FactPatch, KnowledgeDao, PersonaFilter, RecentActivity, - SqliteKnowledgeDao, + EntityFilter, EntityPatch, EntitySort, FactFilter, FactPatch, KnowledgeDao, PersonaFilter, + RecentActivity, SqliteKnowledgeDao, }; pub use location_dao::{LocationHistoryDao, SqliteLocationHistoryDao}; pub use persona_dao::{ImportPersona, PersonaDao, PersonaPatch, SqlitePersonaDao}; diff --git a/src/knowledge.rs b/src/knowledge.rs index 49ff307..b8fc4ba 100644 --- a/src/knowledge.rs +++ b/src/knowledge.rs @@ -7,7 +7,8 @@ use std::sync::Mutex; use crate::data::Claims; use crate::database::models::{Entity, EntityFact, EntityPhotoLink, InsertEntityFact}; use crate::database::{ - EntityFilter, EntityPatch, FactFilter, FactPatch, KnowledgeDao, PersonaFilter, RecentActivity, + EntityFilter, EntityPatch, EntitySort, FactFilter, FactPatch, KnowledgeDao, PersonaFilter, + RecentActivity, }; use crate::personas::PersonaDaoData; @@ -57,6 +58,11 @@ pub struct EntitySummary { pub status: String, pub created_at: i64, pub updated_at: i64, + /// Persona-scoped count of non-rejected facts about this entity + /// (subject side). 0 when not provided by the call site, e.g. + /// PATCH responses return the bare entity without scoping context. + #[serde(skip_serializing_if = "Option::is_none")] + pub fact_count: Option, } impl From for EntitySummary { @@ -70,10 +76,19 @@ impl From for EntitySummary { status: e.status, created_at: e.created_at, updated_at: e.updated_at, + fact_count: None, } } } +impl EntitySummary { + fn from_entity_with_count(e: Entity, fact_count: i64) -> Self { + let mut s = EntitySummary::from(e); + s.fact_count = Some(fact_count); + s + } +} + #[derive(Serialize)] pub struct EntityListResponse { pub entities: Vec, @@ -197,6 +212,9 @@ pub struct EntityListQuery { pub entity_type: Option, pub status: Option, pub search: Option, + /// "updated" (default) | "name" | "count". `count` is persona-scoped + /// via the X-Persona-Id header. + pub sort: Option, pub limit: Option, pub offset: Option, } @@ -253,9 +271,11 @@ where // --------------------------------------------------------------------------- async fn list_entities( - _claims: Claims, + req: HttpRequest, + claims: Claims, query: web::Query, dao: web::Data>, + persona_dao: PersonaDaoData, ) -> impl Responder { let limit = query.limit.unwrap_or(50).min(200); let offset = query.offset.unwrap_or(0); @@ -266,6 +286,15 @@ async fn list_entities( Some(s) => Some(s.to_string()), }; + let sort = match query.sort.as_deref() { + Some("name") => EntitySort::NameAsc, + Some("count") => EntitySort::FactCountDesc, + // "updated" or anything else falls through to the default. + _ => EntitySort::UpdatedDesc, + }; + + let persona = resolve_persona_filter(&req, &claims, &persona_dao); + let filter = EntityFilter { entity_type: query.entity_type.clone(), status: status_filter, @@ -276,10 +305,12 @@ async fn list_entities( let cx = opentelemetry::Context::current(); let mut dao = dao.lock().expect("Unable to lock KnowledgeDao"); - match dao.list_entities(&cx, filter) { - Ok((entities, total)) => { - let summaries: Vec = - entities.into_iter().map(EntitySummary::from).collect(); + match dao.list_entities_with_fact_counts(&cx, filter, sort, &persona) { + Ok((pairs, total)) => { + let summaries: Vec = pairs + .into_iter() + .map(|(e, c)| EntitySummary::from_entity_with_count(e, c)) + .collect(); HttpResponse::Ok().json(EntityListResponse { entities: summaries, total,