knowledge: predicate-quality nudge + bulk-reject endpoint
Two coupled changes to fight the speech-act-predicate problem
(facts like (Cameron, expressed, "I'm tempted to...")):
1. System prompt grows an explicit predicate-quality rule. The
agent is told to use relationship-shaped verbs (lives_in,
works_at, attended, is_friend_of, interested_in), and is
given an explicit DON'T list (expressed, said, mentioned,
stated, quoted, noted, discussed, thought, wondered). Plus a
concrete Bad / Good example contrasting the noise pattern
with the structured paraphrase the agent should be writing.
Stops the bleed for new insights.
2. Cleanup tools for the legacy noise that's already in the
table:
- get_predicate_stats(persona, limit) returns
[(predicate, count)] sorted desc — feeds the curation UI's
PREDICATES tab.
- bulk_reject_facts_by_predicate(persona, predicate, audit)
flips every ACTIVE fact under that predicate to 'rejected'
in one transaction, stamping last_modified_* so the action
is attributable + reversible per-fact through the entity
detail panel. REVIEWED facts under the same predicate are
left alone — the curator may have hand-approved an
exception ("interested_in" might be largely noise but a
reviewed entry is intentional).
New HTTP endpoints:
GET /knowledge/predicate-stats?limit=
POST /knowledge/predicates/{predicate}/bulk-reject
Persona-scoped via the existing X-Persona-Id header.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -205,6 +205,30 @@ pub trait KnowledgeDao: Sync + Send {
|
||||
persona: &PersonaFilter,
|
||||
) -> Result<(Vec<(Entity, i64)>, i64), DbError>;
|
||||
|
||||
/// Aggregate the user's active+reviewed facts by predicate so
|
||||
/// the curation UI can flag noisy verbs ("expressed", "said") and
|
||||
/// bulk-reject. Persona-scoped via the existing PersonaFilter
|
||||
/// pattern. Sorted by count desc.
|
||||
fn get_predicate_stats(
|
||||
&mut self,
|
||||
cx: &opentelemetry::Context,
|
||||
persona: &PersonaFilter,
|
||||
limit: usize,
|
||||
) -> Result<Vec<(String, i64)>, DbError>;
|
||||
|
||||
/// Bulk reject every active fact under a given predicate
|
||||
/// (persona-scoped). Returns the number of rows touched. Used by
|
||||
/// the predicate-cleanup UI to nuke noise verbs in one click.
|
||||
/// Stamps last_modified_* with the caller-supplied audit so the
|
||||
/// action shows up in the recent-edits feed.
|
||||
fn bulk_reject_facts_by_predicate(
|
||||
&mut self,
|
||||
cx: &opentelemetry::Context,
|
||||
persona: &PersonaFilter,
|
||||
predicate: &str,
|
||||
audit: Option<(&str, &str)>,
|
||||
) -> Result<usize, DbError>;
|
||||
|
||||
/// Build a graph snapshot — entities as nodes (fact count from
|
||||
/// the active persona scope), relational facts as edges. Used
|
||||
/// by the curation UI's graph view. Filters:
|
||||
@@ -872,6 +896,122 @@ impl KnowledgeDao for SqliteKnowledgeDao {
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn get_predicate_stats(
|
||||
&mut self,
|
||||
cx: &opentelemetry::Context,
|
||||
persona: &PersonaFilter,
|
||||
limit: usize,
|
||||
) -> Result<Vec<(String, i64)>, DbError> {
|
||||
trace_db_call(cx, "query", "get_predicate_stats", |_span| {
|
||||
use diesel::sql_query;
|
||||
use diesel::sql_types::{BigInt, Integer, Text};
|
||||
|
||||
// Active + reviewed only — rejected / superseded are
|
||||
// already off the agent's read path so they shouldn't
|
||||
// count toward "what predicates are noisy in production".
|
||||
let where_sql = match persona {
|
||||
PersonaFilter::Single { .. } => {
|
||||
"WHERE user_id = ? AND persona_id = ? \
|
||||
AND status IN ('active','reviewed')"
|
||||
}
|
||||
PersonaFilter::All { .. } => {
|
||||
"WHERE user_id = ? AND status IN ('active','reviewed')"
|
||||
}
|
||||
};
|
||||
let sql = format!(
|
||||
"SELECT predicate, COUNT(*) AS cnt FROM entity_facts \
|
||||
{where_sql} \
|
||||
GROUP BY predicate \
|
||||
ORDER BY cnt DESC \
|
||||
LIMIT ?",
|
||||
);
|
||||
|
||||
#[derive(diesel::QueryableByName)]
|
||||
struct Row {
|
||||
#[diesel(sql_type = Text)]
|
||||
predicate: String,
|
||||
#[diesel(sql_type = BigInt)]
|
||||
cnt: i64,
|
||||
}
|
||||
|
||||
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
|
||||
let mut q = sql_query(sql).into_boxed();
|
||||
match persona {
|
||||
PersonaFilter::Single { user_id, persona_id } => {
|
||||
q = q
|
||||
.bind::<Integer, _>(*user_id)
|
||||
.bind::<Text, _>(persona_id.clone());
|
||||
}
|
||||
PersonaFilter::All { user_id } => {
|
||||
q = q.bind::<Integer, _>(*user_id);
|
||||
}
|
||||
}
|
||||
q = q.bind::<BigInt, _>(limit as i64);
|
||||
|
||||
let rows: Vec<Row> = q
|
||||
.load(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
|
||||
Ok(rows.into_iter().map(|r| (r.predicate, r.cnt)).collect())
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::QueryError))
|
||||
}
|
||||
|
||||
fn bulk_reject_facts_by_predicate(
|
||||
&mut self,
|
||||
cx: &opentelemetry::Context,
|
||||
persona: &PersonaFilter,
|
||||
target_predicate: &str,
|
||||
audit: Option<(&str, &str)>,
|
||||
) -> Result<usize, DbError> {
|
||||
trace_db_call(cx, "update", "bulk_reject_facts_by_predicate", |_span| {
|
||||
use schema::entity_facts::dsl::*;
|
||||
let mut conn = self.connection.lock().expect("KnowledgeDao lock");
|
||||
|
||||
let now = chrono::Utc::now().timestamp();
|
||||
let (audit_model, audit_backend) = match audit {
|
||||
Some((m, b)) => (Some(m.to_string()), Some(b.to_string())),
|
||||
None => (None, None),
|
||||
};
|
||||
|
||||
// Persona scoping mirrors get_predicate_stats. Only ACTIVE
|
||||
// rows flip — REVIEWED survives so the curator can preserve
|
||||
// a hand-approved exception under the same predicate.
|
||||
let touched = match persona {
|
||||
PersonaFilter::Single { user_id: uid, persona_id: pid } => diesel::update(
|
||||
entity_facts
|
||||
.filter(predicate.eq(target_predicate))
|
||||
.filter(user_id.eq(*uid))
|
||||
.filter(persona_id.eq(pid))
|
||||
.filter(status.eq("active")),
|
||||
)
|
||||
.set((
|
||||
status.eq("rejected"),
|
||||
last_modified_by_model.eq(audit_model.clone()),
|
||||
last_modified_by_backend.eq(audit_backend.clone()),
|
||||
last_modified_at.eq(Some(now)),
|
||||
))
|
||||
.execute(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Bulk reject error: {}", e))?,
|
||||
PersonaFilter::All { user_id: uid } => diesel::update(
|
||||
entity_facts
|
||||
.filter(predicate.eq(target_predicate))
|
||||
.filter(user_id.eq(*uid))
|
||||
.filter(status.eq("active")),
|
||||
)
|
||||
.set((
|
||||
status.eq("rejected"),
|
||||
last_modified_by_model.eq(audit_model.clone()),
|
||||
last_modified_by_backend.eq(audit_backend.clone()),
|
||||
last_modified_at.eq(Some(now)),
|
||||
))
|
||||
.execute(conn.deref_mut())
|
||||
.map_err(|e| anyhow::anyhow!("Bulk reject error: {}", e))?,
|
||||
};
|
||||
Ok(touched)
|
||||
})
|
||||
.map_err(|_| DbError::new(DbErrorKind::UpdateError))
|
||||
}
|
||||
|
||||
fn build_entity_graph(
|
||||
&mut self,
|
||||
cx: &opentelemetry::Context,
|
||||
|
||||
Reference in New Issue
Block a user