knowledge: predicate-quality nudge + bulk-reject endpoint

Two coupled changes to fight the speech-act-predicate problem
(facts like (Cameron, expressed, "I'm tempted to...")):

1. System prompt grows an explicit predicate-quality rule. The
   agent is told to use relationship-shaped verbs (lives_in,
   works_at, attended, is_friend_of, interested_in), and is
   given an explicit DON'T list (expressed, said, mentioned,
   stated, quoted, noted, discussed, thought, wondered). Plus a
   concrete Bad / Good example contrasting the noise pattern
   with the structured paraphrase the agent should be writing.
   Stops the bleed for new insights.

2. Cleanup tools for the legacy noise that's already in the
   table:
   - get_predicate_stats(persona, limit) returns
     [(predicate, count)] sorted desc — feeds the curation UI's
     PREDICATES tab.
   - bulk_reject_facts_by_predicate(persona, predicate, audit)
     flips every ACTIVE fact under that predicate to 'rejected'
     in one transaction, stamping last_modified_* so the action
     is attributable + reversible per-fact through the entity
     detail panel. REVIEWED facts under the same predicate are
     left alone — the curator may have hand-approved an
     exception ("interested_in" might be largely noise but a
     reviewed entry is intentional).

New HTTP endpoints:
   GET  /knowledge/predicate-stats?limit=
   POST /knowledge/predicates/{predicate}/bulk-reject

Persona-scoped via the existing X-Persona-Id header.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-05-11 21:50:26 -04:00
parent fb078b4906
commit e67e00ef8a
3 changed files with 225 additions and 1 deletions

View File

@@ -205,6 +205,30 @@ pub trait KnowledgeDao: Sync + Send {
persona: &PersonaFilter,
) -> Result<(Vec<(Entity, i64)>, i64), DbError>;
/// Aggregate the user's active+reviewed facts by predicate so
/// the curation UI can flag noisy verbs ("expressed", "said") and
/// bulk-reject. Persona-scoped via the existing PersonaFilter
/// pattern. Sorted by count desc.
fn get_predicate_stats(
&mut self,
cx: &opentelemetry::Context,
persona: &PersonaFilter,
limit: usize,
) -> Result<Vec<(String, i64)>, DbError>;
/// Bulk reject every active fact under a given predicate
/// (persona-scoped). Returns the number of rows touched. Used by
/// the predicate-cleanup UI to nuke noise verbs in one click.
/// Stamps last_modified_* with the caller-supplied audit so the
/// action shows up in the recent-edits feed.
fn bulk_reject_facts_by_predicate(
&mut self,
cx: &opentelemetry::Context,
persona: &PersonaFilter,
predicate: &str,
audit: Option<(&str, &str)>,
) -> Result<usize, DbError>;
/// Build a graph snapshot — entities as nodes (fact count from
/// the active persona scope), relational facts as edges. Used
/// by the curation UI's graph view. Filters:
@@ -872,6 +896,122 @@ impl KnowledgeDao for SqliteKnowledgeDao {
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn get_predicate_stats(
&mut self,
cx: &opentelemetry::Context,
persona: &PersonaFilter,
limit: usize,
) -> Result<Vec<(String, i64)>, DbError> {
trace_db_call(cx, "query", "get_predicate_stats", |_span| {
use diesel::sql_query;
use diesel::sql_types::{BigInt, Integer, Text};
// Active + reviewed only — rejected / superseded are
// already off the agent's read path so they shouldn't
// count toward "what predicates are noisy in production".
let where_sql = match persona {
PersonaFilter::Single { .. } => {
"WHERE user_id = ? AND persona_id = ? \
AND status IN ('active','reviewed')"
}
PersonaFilter::All { .. } => {
"WHERE user_id = ? AND status IN ('active','reviewed')"
}
};
let sql = format!(
"SELECT predicate, COUNT(*) AS cnt FROM entity_facts \
{where_sql} \
GROUP BY predicate \
ORDER BY cnt DESC \
LIMIT ?",
);
#[derive(diesel::QueryableByName)]
struct Row {
#[diesel(sql_type = Text)]
predicate: String,
#[diesel(sql_type = BigInt)]
cnt: i64,
}
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
let mut q = sql_query(sql).into_boxed();
match persona {
PersonaFilter::Single { user_id, persona_id } => {
q = q
.bind::<Integer, _>(*user_id)
.bind::<Text, _>(persona_id.clone());
}
PersonaFilter::All { user_id } => {
q = q.bind::<Integer, _>(*user_id);
}
}
q = q.bind::<BigInt, _>(limit as i64);
let rows: Vec<Row> = q
.load(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
Ok(rows.into_iter().map(|r| (r.predicate, r.cnt)).collect())
})
.map_err(|_| DbError::new(DbErrorKind::QueryError))
}
fn bulk_reject_facts_by_predicate(
&mut self,
cx: &opentelemetry::Context,
persona: &PersonaFilter,
target_predicate: &str,
audit: Option<(&str, &str)>,
) -> Result<usize, DbError> {
trace_db_call(cx, "update", "bulk_reject_facts_by_predicate", |_span| {
use schema::entity_facts::dsl::*;
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
let now = chrono::Utc::now().timestamp();
let (audit_model, audit_backend) = match audit {
Some((m, b)) => (Some(m.to_string()), Some(b.to_string())),
None => (None, None),
};
// Persona scoping mirrors get_predicate_stats. Only ACTIVE
// rows flip — REVIEWED survives so the curator can preserve
// a hand-approved exception under the same predicate.
let touched = match persona {
PersonaFilter::Single { user_id: uid, persona_id: pid } => diesel::update(
entity_facts
.filter(predicate.eq(target_predicate))
.filter(user_id.eq(*uid))
.filter(persona_id.eq(pid))
.filter(status.eq("active")),
)
.set((
status.eq("rejected"),
last_modified_by_model.eq(audit_model.clone()),
last_modified_by_backend.eq(audit_backend.clone()),
last_modified_at.eq(Some(now)),
))
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Bulk reject error: {}", e))?,
PersonaFilter::All { user_id: uid } => diesel::update(
entity_facts
.filter(predicate.eq(target_predicate))
.filter(user_id.eq(*uid))
.filter(status.eq("active")),
)
.set((
status.eq("rejected"),
last_modified_by_model.eq(audit_model.clone()),
last_modified_by_backend.eq(audit_backend.clone()),
last_modified_at.eq(Some(now)),
))
.execute(conn.deref_mut())
.map_err(|e| anyhow::anyhow!("Bulk reject error: {}", e))?,
};
Ok(touched)
})
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
}
fn build_entity_graph(
&mut self,
cx: &opentelemetry::Context,