feature/knowledge-curation #91

Merged
cameron merged 19 commits from feature/knowledge-curation into master 2026-05-12 15:40:57 +00:00
Showing only changes of commit afac02cade - Show all commits

View File

@@ -11,6 +11,7 @@ use crate::database::{
RecentActivity,
};
use crate::personas::PersonaDaoData;
use crate::state::AppState;
/// Resolve the `X-Persona-Id` header into a `PersonaFilter`. Missing
/// header → `'default'`. If the persona has `include_all_memories=true`,
@@ -248,6 +249,18 @@ pub struct SupersedeRequest {
pub by_fact_id: i32,
}
#[derive(Deserialize)]
pub struct SynthesizeMergeRequest {
pub source_id: i32,
pub target_id: i32,
}
#[derive(serde::Serialize)]
pub struct SynthesizeMergeResponse {
pub proposed_description: String,
pub model_used: String,
}
#[derive(Deserialize)]
pub struct FactCreateRequest {
pub subject_entity_id: i32,
@@ -300,6 +313,10 @@ where
web::scope("/knowledge")
.service(web::resource("/entities").route(web::get().to(list_entities::<D>)))
.service(web::resource("/entities/merge").route(web::post().to(merge_entities::<D>)))
.service(
web::resource("/entities/synthesize-merge")
.route(web::post().to(synthesize_merge::<D>)),
)
.service(
web::resource("/entities/{id}")
.route(web::get().to(get_entity::<D>))
@@ -634,6 +651,110 @@ async fn merge_entities<D: KnowledgeDao + 'static>(
}
}
/// Preview a merged-description before the actual merge fires. Calls
/// the local Ollama with both entities' names + descriptions and
/// returns a synthesized rewrite that combines them. The curator
/// previews, edits, and either accepts (PATCH target's description
/// then POST /merge) or skips (just /merge as-is).
///
/// Deliberately doesn't touch the database — read-only on entities,
/// no LLM call gets to write anything. If the model is unavailable
/// the handler returns 503 so the UI can degrade gracefully (skip
/// the preview, fall back to the existing merge action).
async fn synthesize_merge<D: KnowledgeDao + 'static>(
_claims: Claims,
body: web::Json<SynthesizeMergeRequest>,
dao: web::Data<Mutex<D>>,
app_state: web::Data<AppState>,
) -> impl Responder {
if body.source_id == body.target_id {
return HttpResponse::BadRequest()
.json(serde_json::json!({"error": "source_id and target_id must differ"}));
}
let cx = opentelemetry::Context::current();
let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");
let source = match dao.get_entity_by_id(&cx, body.source_id) {
Ok(Some(e)) => e,
Ok(None) => {
return HttpResponse::BadRequest()
.json(serde_json::json!({"error": "source entity not found"}));
}
Err(e) => {
log::error!("synthesize_merge source lookup: {:?}", e);
return HttpResponse::InternalServerError()
.json(serde_json::json!({"error": "Database error"}));
}
};
let target = match dao.get_entity_by_id(&cx, body.target_id) {
Ok(Some(e)) => e,
Ok(None) => {
return HttpResponse::BadRequest()
.json(serde_json::json!({"error": "target entity not found"}));
}
Err(e) => {
log::error!("synthesize_merge target lookup: {:?}", e);
return HttpResponse::InternalServerError()
.json(serde_json::json!({"error": "Database error"}));
}
};
// Drop the DAO lock before the LLM call — the generate request
// is the slow part (seconds) and we don't want to block other
// knowledge reads while it runs.
drop(dao);
let source_desc = if source.description.trim().is_empty() {
"(none)".to_string()
} else {
source.description.clone()
};
let target_desc = if target.description.trim().is_empty() {
"(none)".to_string()
} else {
target.description.clone()
};
let system = "You are condensing two stored entity descriptions into one. The two \
entities refer to the same real-world thing and are about to be merged. Write a \
single neutral third-person description (1-2 sentences, max 300 chars) that \
preserves any concrete facts in either source. Do not invent details. Do not \
editorialize. Return ONLY the merged description — no preamble, no quotes.";
let prompt = format!(
"Entity A: {} [{}]\nDescription: {}\n\nEntity B: {} [{}]\nDescription: {}\n\nMerged description:",
source.name, source.entity_type, source_desc,
target.name, target.entity_type, target_desc,
);
let ollama = app_state.ollama.clone();
let model_used = ollama.primary_model.clone();
let proposed = match ollama.generate(&prompt, Some(system)).await {
Ok(out) => {
// Some models open with their own framing — strip a
// leading quote or "Merged:" lead-in defensively.
let cleaned = out
.trim()
.trim_start_matches("Merged description:")
.trim()
.trim_matches(|c| c == '"' || c == '\'')
.to_string();
cleaned
}
Err(e) => {
log::warn!("synthesize_merge generate failed: {:?}", e);
return HttpResponse::ServiceUnavailable().json(serde_json::json!({
"error": "LLM unavailable; the merge picker should fall back to skip-synthesis."
}));
}
};
HttpResponse::Ok().json(SynthesizeMergeResponse {
proposed_description: proposed,
model_used,
})
}
async fn list_facts<D: KnowledgeDao + 'static>(
req: HttpRequest,
claims: Claims,