knowledge: predicate-quality nudge + bulk-reject endpoint
Two coupled changes to fight the speech-act-predicate problem
(facts like (Cameron, expressed, "I'm tempted to...")):
1. System prompt grows an explicit predicate-quality rule. The
agent is told to use relationship-shaped verbs (lives_in,
works_at, attended, is_friend_of, interested_in), and is
given an explicit DON'T list (expressed, said, mentioned,
stated, quoted, noted, discussed, thought, wondered). Plus a
concrete Bad / Good example contrasting the noise pattern
with the structured paraphrase the agent should be writing.
Stops the bleed for new insights.
2. Cleanup tools for the legacy noise that's already in the
table:
- get_predicate_stats(persona, limit) returns
[(predicate, count)] sorted desc — feeds the curation UI's
PREDICATES tab.
- bulk_reject_facts_by_predicate(persona, predicate, audit)
flips every ACTIVE fact under that predicate to 'rejected'
in one transaction, stamping last_modified_* so the action
is attributable + reversible per-fact through the entity
detail panel. REVIEWED facts under the same predicate are
left alone — the curator may have hand-approved an
exception ("interested_in" might be largely noise but a
reviewed entry is intentional).
New HTTP endpoints:
GET /knowledge/predicate-stats?limit=
POST /knowledge/predicates/{predicate}/bulk-reject
Persona-scoped via the existing X-Persona-Id header.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -359,6 +359,27 @@ pub struct GraphResponse {
|
||||
pub edges: Vec<GraphEdgeView>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct PredicateStatsQuery {
|
||||
pub limit: Option<i64>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct PredicateStat {
|
||||
pub predicate: String,
|
||||
pub count: i64,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct PredicateStatsResponse {
|
||||
pub predicates: Vec<PredicateStat>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct BulkRejectResponse {
|
||||
pub rejected: usize,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct ConsolidationQuery {
|
||||
/// Cosine threshold for clustering. Default 0.85 — looser than
|
||||
@@ -421,7 +442,15 @@ where
|
||||
web::resource("/consolidation-proposals")
|
||||
.route(web::get().to(get_consolidation_proposals::<D>)),
|
||||
)
|
||||
.service(web::resource("/graph").route(web::get().to(get_graph::<D>))),
|
||||
.service(web::resource("/graph").route(web::get().to(get_graph::<D>)))
|
||||
.service(
|
||||
web::resource("/predicate-stats")
|
||||
.route(web::get().to(get_predicate_stats::<D>)),
|
||||
)
|
||||
.service(
|
||||
web::resource("/predicates/{predicate}/bulk-reject")
|
||||
.route(web::post().to(bulk_reject_predicate::<D>)),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1192,6 +1221,60 @@ async fn get_recent<D: KnowledgeDao + 'static>(
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_predicate_stats<D: KnowledgeDao + 'static>(
|
||||
req: HttpRequest,
|
||||
claims: Claims,
|
||||
query: web::Query<PredicateStatsQuery>,
|
||||
dao: web::Data<Mutex<D>>,
|
||||
persona_dao: PersonaDaoData,
|
||||
) -> impl Responder {
|
||||
let limit = query.limit.unwrap_or(100).clamp(1, 500) as usize;
|
||||
let persona = resolve_persona_filter(&req, &claims, &persona_dao);
|
||||
let cx = opentelemetry::Context::current();
|
||||
let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");
|
||||
match dao.get_predicate_stats(&cx, &persona, limit) {
|
||||
Ok(rows) => HttpResponse::Ok().json(PredicateStatsResponse {
|
||||
predicates: rows
|
||||
.into_iter()
|
||||
.map(|(predicate, count)| PredicateStat { predicate, count })
|
||||
.collect(),
|
||||
}),
|
||||
Err(e) => {
|
||||
log::error!("get_predicate_stats error: {:?}", e);
|
||||
HttpResponse::InternalServerError().json(serde_json::json!({"error": "Database error"}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn bulk_reject_predicate<D: KnowledgeDao + 'static>(
|
||||
req: HttpRequest,
|
||||
claims: Claims,
|
||||
predicate: web::Path<String>,
|
||||
dao: web::Data<Mutex<D>>,
|
||||
persona_dao: PersonaDaoData,
|
||||
) -> impl Responder {
|
||||
let predicate = predicate.into_inner();
|
||||
if predicate.trim().is_empty() {
|
||||
return HttpResponse::BadRequest()
|
||||
.json(serde_json::json!({"error": "predicate must not be empty"}));
|
||||
}
|
||||
let persona = resolve_persona_filter(&req, &claims, &persona_dao);
|
||||
let cx = opentelemetry::Context::current();
|
||||
let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");
|
||||
match dao.bulk_reject_facts_by_predicate(
|
||||
&cx,
|
||||
&persona,
|
||||
&predicate,
|
||||
Some(("manual", "manual")),
|
||||
) {
|
||||
Ok(rejected) => HttpResponse::Ok().json(BulkRejectResponse { rejected }),
|
||||
Err(e) => {
|
||||
log::error!("bulk_reject_predicate error: {:?}", e);
|
||||
HttpResponse::InternalServerError().json(serde_json::json!({"error": "Database error"}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_graph<D: KnowledgeDao + 'static>(
|
||||
req: HttpRequest,
|
||||
claims: Claims,
|
||||
|
||||
Reference in New Issue
Block a user