feature/knowledge-curation #91
121
src/knowledge.rs
121
src/knowledge.rs
@@ -11,6 +11,7 @@ use crate::database::{
|
|||||||
RecentActivity,
|
RecentActivity,
|
||||||
};
|
};
|
||||||
use crate::personas::PersonaDaoData;
|
use crate::personas::PersonaDaoData;
|
||||||
|
use crate::state::AppState;
|
||||||
|
|
||||||
/// Resolve the `X-Persona-Id` header into a `PersonaFilter`. Missing
|
/// Resolve the `X-Persona-Id` header into a `PersonaFilter`. Missing
|
||||||
/// header → `'default'`. If the persona has `include_all_memories=true`,
|
/// header → `'default'`. If the persona has `include_all_memories=true`,
|
||||||
@@ -248,6 +249,18 @@ pub struct SupersedeRequest {
|
|||||||
pub by_fact_id: i32,
|
pub by_fact_id: i32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct SynthesizeMergeRequest {
|
||||||
|
pub source_id: i32,
|
||||||
|
pub target_id: i32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(serde::Serialize)]
|
||||||
|
pub struct SynthesizeMergeResponse {
|
||||||
|
pub proposed_description: String,
|
||||||
|
pub model_used: String,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
pub struct FactCreateRequest {
|
pub struct FactCreateRequest {
|
||||||
pub subject_entity_id: i32,
|
pub subject_entity_id: i32,
|
||||||
@@ -300,6 +313,10 @@ where
|
|||||||
web::scope("/knowledge")
|
web::scope("/knowledge")
|
||||||
.service(web::resource("/entities").route(web::get().to(list_entities::<D>)))
|
.service(web::resource("/entities").route(web::get().to(list_entities::<D>)))
|
||||||
.service(web::resource("/entities/merge").route(web::post().to(merge_entities::<D>)))
|
.service(web::resource("/entities/merge").route(web::post().to(merge_entities::<D>)))
|
||||||
|
.service(
|
||||||
|
web::resource("/entities/synthesize-merge")
|
||||||
|
.route(web::post().to(synthesize_merge::<D>)),
|
||||||
|
)
|
||||||
.service(
|
.service(
|
||||||
web::resource("/entities/{id}")
|
web::resource("/entities/{id}")
|
||||||
.route(web::get().to(get_entity::<D>))
|
.route(web::get().to(get_entity::<D>))
|
||||||
@@ -634,6 +651,110 @@ async fn merge_entities<D: KnowledgeDao + 'static>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Preview a merged-description before the actual merge fires. Calls
|
||||||
|
/// the local Ollama with both entities' names + descriptions and
|
||||||
|
/// returns a synthesized rewrite that combines them. The curator
|
||||||
|
/// previews, edits, and either accepts (PATCH target's description
|
||||||
|
/// then POST /merge) or skips (just /merge as-is).
|
||||||
|
///
|
||||||
|
/// Deliberately doesn't touch the database — read-only on entities,
|
||||||
|
/// no LLM call gets to write anything. If the model is unavailable
|
||||||
|
/// the handler returns 503 so the UI can degrade gracefully (skip
|
||||||
|
/// the preview, fall back to the existing merge action).
|
||||||
|
async fn synthesize_merge<D: KnowledgeDao + 'static>(
|
||||||
|
_claims: Claims,
|
||||||
|
body: web::Json<SynthesizeMergeRequest>,
|
||||||
|
dao: web::Data<Mutex<D>>,
|
||||||
|
app_state: web::Data<AppState>,
|
||||||
|
) -> impl Responder {
|
||||||
|
if body.source_id == body.target_id {
|
||||||
|
return HttpResponse::BadRequest()
|
||||||
|
.json(serde_json::json!({"error": "source_id and target_id must differ"}));
|
||||||
|
}
|
||||||
|
|
||||||
|
let cx = opentelemetry::Context::current();
|
||||||
|
let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");
|
||||||
|
|
||||||
|
let source = match dao.get_entity_by_id(&cx, body.source_id) {
|
||||||
|
Ok(Some(e)) => e,
|
||||||
|
Ok(None) => {
|
||||||
|
return HttpResponse::BadRequest()
|
||||||
|
.json(serde_json::json!({"error": "source entity not found"}));
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
log::error!("synthesize_merge source lookup: {:?}", e);
|
||||||
|
return HttpResponse::InternalServerError()
|
||||||
|
.json(serde_json::json!({"error": "Database error"}));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let target = match dao.get_entity_by_id(&cx, body.target_id) {
|
||||||
|
Ok(Some(e)) => e,
|
||||||
|
Ok(None) => {
|
||||||
|
return HttpResponse::BadRequest()
|
||||||
|
.json(serde_json::json!({"error": "target entity not found"}));
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
log::error!("synthesize_merge target lookup: {:?}", e);
|
||||||
|
return HttpResponse::InternalServerError()
|
||||||
|
.json(serde_json::json!({"error": "Database error"}));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Drop the DAO lock before the LLM call — the generate request
|
||||||
|
// is the slow part (seconds) and we don't want to block other
|
||||||
|
// knowledge reads while it runs.
|
||||||
|
drop(dao);
|
||||||
|
|
||||||
|
let source_desc = if source.description.trim().is_empty() {
|
||||||
|
"(none)".to_string()
|
||||||
|
} else {
|
||||||
|
source.description.clone()
|
||||||
|
};
|
||||||
|
let target_desc = if target.description.trim().is_empty() {
|
||||||
|
"(none)".to_string()
|
||||||
|
} else {
|
||||||
|
target.description.clone()
|
||||||
|
};
|
||||||
|
|
||||||
|
let system = "You are condensing two stored entity descriptions into one. The two \
|
||||||
|
entities refer to the same real-world thing and are about to be merged. Write a \
|
||||||
|
single neutral third-person description (1-2 sentences, max 300 chars) that \
|
||||||
|
preserves any concrete facts in either source. Do not invent details. Do not \
|
||||||
|
editorialize. Return ONLY the merged description — no preamble, no quotes.";
|
||||||
|
let prompt = format!(
|
||||||
|
"Entity A: {} [{}]\nDescription: {}\n\nEntity B: {} [{}]\nDescription: {}\n\nMerged description:",
|
||||||
|
source.name, source.entity_type, source_desc,
|
||||||
|
target.name, target.entity_type, target_desc,
|
||||||
|
);
|
||||||
|
|
||||||
|
let ollama = app_state.ollama.clone();
|
||||||
|
let model_used = ollama.primary_model.clone();
|
||||||
|
let proposed = match ollama.generate(&prompt, Some(system)).await {
|
||||||
|
Ok(out) => {
|
||||||
|
// Some models open with their own framing — strip a
|
||||||
|
// leading quote or "Merged:" lead-in defensively.
|
||||||
|
let cleaned = out
|
||||||
|
.trim()
|
||||||
|
.trim_start_matches("Merged description:")
|
||||||
|
.trim()
|
||||||
|
.trim_matches(|c| c == '"' || c == '\'')
|
||||||
|
.to_string();
|
||||||
|
cleaned
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
log::warn!("synthesize_merge generate failed: {:?}", e);
|
||||||
|
return HttpResponse::ServiceUnavailable().json(serde_json::json!({
|
||||||
|
"error": "LLM unavailable; the merge picker should fall back to skip-synthesis."
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
HttpResponse::Ok().json(SynthesizeMergeResponse {
|
||||||
|
proposed_description: proposed,
|
||||||
|
model_used,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
async fn list_facts<D: KnowledgeDao + 'static>(
|
async fn list_facts<D: KnowledgeDao + 'static>(
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
claims: Claims,
|
claims: Claims,
|
||||||
|
|||||||
Reference in New Issue
Block a user