feature/knowledge-curation #91
121
src/knowledge.rs
121
src/knowledge.rs
@@ -11,6 +11,7 @@ use crate::database::{
|
||||
RecentActivity,
|
||||
};
|
||||
use crate::personas::PersonaDaoData;
|
||||
use crate::state::AppState;
|
||||
|
||||
/// Resolve the `X-Persona-Id` header into a `PersonaFilter`. Missing
|
||||
/// header → `'default'`. If the persona has `include_all_memories=true`,
|
||||
@@ -248,6 +249,18 @@ pub struct SupersedeRequest {
|
||||
pub by_fact_id: i32,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct SynthesizeMergeRequest {
|
||||
pub source_id: i32,
|
||||
pub target_id: i32,
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize)]
|
||||
pub struct SynthesizeMergeResponse {
|
||||
pub proposed_description: String,
|
||||
pub model_used: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct FactCreateRequest {
|
||||
pub subject_entity_id: i32,
|
||||
@@ -300,6 +313,10 @@ where
|
||||
web::scope("/knowledge")
|
||||
.service(web::resource("/entities").route(web::get().to(list_entities::<D>)))
|
||||
.service(web::resource("/entities/merge").route(web::post().to(merge_entities::<D>)))
|
||||
.service(
|
||||
web::resource("/entities/synthesize-merge")
|
||||
.route(web::post().to(synthesize_merge::<D>)),
|
||||
)
|
||||
.service(
|
||||
web::resource("/entities/{id}")
|
||||
.route(web::get().to(get_entity::<D>))
|
||||
@@ -634,6 +651,110 @@ async fn merge_entities<D: KnowledgeDao + 'static>(
|
||||
}
|
||||
}
|
||||
|
||||
/// Preview a merged-description before the actual merge fires. Calls
|
||||
/// the local Ollama with both entities' names + descriptions and
|
||||
/// returns a synthesized rewrite that combines them. The curator
|
||||
/// previews, edits, and either accepts (PATCH target's description
|
||||
/// then POST /merge) or skips (just /merge as-is).
|
||||
///
|
||||
/// Deliberately doesn't touch the database — read-only on entities,
|
||||
/// no LLM call gets to write anything. If the model is unavailable
|
||||
/// the handler returns 503 so the UI can degrade gracefully (skip
|
||||
/// the preview, fall back to the existing merge action).
|
||||
async fn synthesize_merge<D: KnowledgeDao + 'static>(
|
||||
_claims: Claims,
|
||||
body: web::Json<SynthesizeMergeRequest>,
|
||||
dao: web::Data<Mutex<D>>,
|
||||
app_state: web::Data<AppState>,
|
||||
) -> impl Responder {
|
||||
if body.source_id == body.target_id {
|
||||
return HttpResponse::BadRequest()
|
||||
.json(serde_json::json!({"error": "source_id and target_id must differ"}));
|
||||
}
|
||||
|
||||
let cx = opentelemetry::Context::current();
|
||||
let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");
|
||||
|
||||
let source = match dao.get_entity_by_id(&cx, body.source_id) {
|
||||
Ok(Some(e)) => e,
|
||||
Ok(None) => {
|
||||
return HttpResponse::BadRequest()
|
||||
.json(serde_json::json!({"error": "source entity not found"}));
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("synthesize_merge source lookup: {:?}", e);
|
||||
return HttpResponse::InternalServerError()
|
||||
.json(serde_json::json!({"error": "Database error"}));
|
||||
}
|
||||
};
|
||||
let target = match dao.get_entity_by_id(&cx, body.target_id) {
|
||||
Ok(Some(e)) => e,
|
||||
Ok(None) => {
|
||||
return HttpResponse::BadRequest()
|
||||
.json(serde_json::json!({"error": "target entity not found"}));
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("synthesize_merge target lookup: {:?}", e);
|
||||
return HttpResponse::InternalServerError()
|
||||
.json(serde_json::json!({"error": "Database error"}));
|
||||
}
|
||||
};
|
||||
|
||||
// Drop the DAO lock before the LLM call — the generate request
|
||||
// is the slow part (seconds) and we don't want to block other
|
||||
// knowledge reads while it runs.
|
||||
drop(dao);
|
||||
|
||||
let source_desc = if source.description.trim().is_empty() {
|
||||
"(none)".to_string()
|
||||
} else {
|
||||
source.description.clone()
|
||||
};
|
||||
let target_desc = if target.description.trim().is_empty() {
|
||||
"(none)".to_string()
|
||||
} else {
|
||||
target.description.clone()
|
||||
};
|
||||
|
||||
let system = "You are condensing two stored entity descriptions into one. The two \
|
||||
entities refer to the same real-world thing and are about to be merged. Write a \
|
||||
single neutral third-person description (1-2 sentences, max 300 chars) that \
|
||||
preserves any concrete facts in either source. Do not invent details. Do not \
|
||||
editorialize. Return ONLY the merged description — no preamble, no quotes.";
|
||||
let prompt = format!(
|
||||
"Entity A: {} [{}]\nDescription: {}\n\nEntity B: {} [{}]\nDescription: {}\n\nMerged description:",
|
||||
source.name, source.entity_type, source_desc,
|
||||
target.name, target.entity_type, target_desc,
|
||||
);
|
||||
|
||||
let ollama = app_state.ollama.clone();
|
||||
let model_used = ollama.primary_model.clone();
|
||||
let proposed = match ollama.generate(&prompt, Some(system)).await {
|
||||
Ok(out) => {
|
||||
// Some models open with their own framing — strip a
|
||||
// leading quote or "Merged:" lead-in defensively.
|
||||
let cleaned = out
|
||||
.trim()
|
||||
.trim_start_matches("Merged description:")
|
||||
.trim()
|
||||
.trim_matches(|c| c == '"' || c == '\'')
|
||||
.to_string();
|
||||
cleaned
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("synthesize_merge generate failed: {:?}", e);
|
||||
return HttpResponse::ServiceUnavailable().json(serde_json::json!({
|
||||
"error": "LLM unavailable; the merge picker should fall back to skip-synthesis."
|
||||
}));
|
||||
}
|
||||
};
|
||||
|
||||
HttpResponse::Ok().json(SynthesizeMergeResponse {
|
||||
proposed_description: proposed,
|
||||
model_used,
|
||||
})
|
||||
}
|
||||
|
||||
async fn list_facts<D: KnowledgeDao + 'static>(
|
||||
req: HttpRequest,
|
||||
claims: Claims,
|
||||
|
||||
Reference in New Issue
Block a user