knowledge: synthesize-merge endpoint for LLM-curated descriptions

New POST /knowledge/entities/synthesize-merge { source_id,
target_id } that calls the local Ollama with both entities' names
+ descriptions and returns a synthesized merged-description draft.
Read-only on the database — the curation UI uses the response as
the editable seed in the merge picker; the actual merge still
requires a follow-up PATCH-target-description + POST /merge.

The handler drops the KnowledgeDao lock before the LLM call so
other knowledge reads aren't blocked while generation runs
(typically seconds). Failure mode is 503 with an explicit hint
that the UI should fall back to skip-synthesis — keeps the merge
action working when the model is offline.

Output is lightly cleaned (leading "Merged description:" /
surrounding quotes stripped) since small models reach for those
patterns even with explicit "no preamble" guidance. Heavier
parsing isn't worth it — the curator edits anyway.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-05-11 16:37:26 -04:00
parent fd4dd89bbb
commit afac02cade

View File

@@ -11,6 +11,7 @@ use crate::database::{
RecentActivity,
};
use crate::personas::PersonaDaoData;
use crate::state::AppState;
/// Resolve the `X-Persona-Id` header into a `PersonaFilter`. Missing
/// header → `'default'`. If the persona has `include_all_memories=true`,
@@ -248,6 +249,18 @@ pub struct SupersedeRequest {
pub by_fact_id: i32,
}
#[derive(Deserialize)]
pub struct SynthesizeMergeRequest {
pub source_id: i32,
pub target_id: i32,
}
#[derive(serde::Serialize)]
pub struct SynthesizeMergeResponse {
pub proposed_description: String,
pub model_used: String,
}
#[derive(Deserialize)]
pub struct FactCreateRequest {
pub subject_entity_id: i32,
@@ -300,6 +313,10 @@ where
web::scope("/knowledge")
.service(web::resource("/entities").route(web::get().to(list_entities::<D>)))
.service(web::resource("/entities/merge").route(web::post().to(merge_entities::<D>)))
.service(
web::resource("/entities/synthesize-merge")
.route(web::post().to(synthesize_merge::<D>)),
)
.service(
web::resource("/entities/{id}")
.route(web::get().to(get_entity::<D>))
@@ -634,6 +651,110 @@ async fn merge_entities<D: KnowledgeDao + 'static>(
}
}
/// Preview a merged-description before the actual merge fires. Calls
/// the local Ollama with both entities' names + descriptions and
/// returns a synthesized rewrite that combines them. The curator
/// previews, edits, and either accepts (PATCH target's description
/// then POST /merge) or skips (just /merge as-is).
///
/// Deliberately doesn't touch the database — read-only on entities,
/// no LLM call gets to write anything. If the model is unavailable
/// the handler returns 503 so the UI can degrade gracefully (skip
/// the preview, fall back to the existing merge action).
async fn synthesize_merge<D: KnowledgeDao + 'static>(
_claims: Claims,
body: web::Json<SynthesizeMergeRequest>,
dao: web::Data<Mutex<D>>,
app_state: web::Data<AppState>,
) -> impl Responder {
if body.source_id == body.target_id {
return HttpResponse::BadRequest()
.json(serde_json::json!({"error": "source_id and target_id must differ"}));
}
let cx = opentelemetry::Context::current();
let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");
let source = match dao.get_entity_by_id(&cx, body.source_id) {
Ok(Some(e)) => e,
Ok(None) => {
return HttpResponse::BadRequest()
.json(serde_json::json!({"error": "source entity not found"}));
}
Err(e) => {
log::error!("synthesize_merge source lookup: {:?}", e);
return HttpResponse::InternalServerError()
.json(serde_json::json!({"error": "Database error"}));
}
};
let target = match dao.get_entity_by_id(&cx, body.target_id) {
Ok(Some(e)) => e,
Ok(None) => {
return HttpResponse::BadRequest()
.json(serde_json::json!({"error": "target entity not found"}));
}
Err(e) => {
log::error!("synthesize_merge target lookup: {:?}", e);
return HttpResponse::InternalServerError()
.json(serde_json::json!({"error": "Database error"}));
}
};
// Drop the DAO lock before the LLM call — the generate request
// is the slow part (seconds) and we don't want to block other
// knowledge reads while it runs.
drop(dao);
let source_desc = if source.description.trim().is_empty() {
"(none)".to_string()
} else {
source.description.clone()
};
let target_desc = if target.description.trim().is_empty() {
"(none)".to_string()
} else {
target.description.clone()
};
let system = "You are condensing two stored entity descriptions into one. The two \
entities refer to the same real-world thing and are about to be merged. Write a \
single neutral third-person description (1-2 sentences, max 300 chars) that \
preserves any concrete facts in either source. Do not invent details. Do not \
editorialize. Return ONLY the merged description — no preamble, no quotes.";
let prompt = format!(
"Entity A: {} [{}]\nDescription: {}\n\nEntity B: {} [{}]\nDescription: {}\n\nMerged description:",
source.name, source.entity_type, source_desc,
target.name, target.entity_type, target_desc,
);
let ollama = app_state.ollama.clone();
let model_used = ollama.primary_model.clone();
let proposed = match ollama.generate(&prompt, Some(system)).await {
Ok(out) => {
// Some models open with their own framing — strip a
// leading quote or "Merged:" lead-in defensively.
let cleaned = out
.trim()
.trim_start_matches("Merged description:")
.trim()
.trim_matches(|c| c == '"' || c == '\'')
.to_string();
cleaned
}
Err(e) => {
log::warn!("synthesize_merge generate failed: {:?}", e);
return HttpResponse::ServiceUnavailable().json(serde_json::json!({
"error": "LLM unavailable; the merge picker should fall back to skip-synthesis."
}));
}
};
HttpResponse::Ok().json(SynthesizeMergeResponse {
proposed_description: proposed,
model_used,
})
}
async fn list_facts<D: KnowledgeDao + 'static>(
req: HttpRequest,
claims: Claims,