knowledge: forbid markdown in synthesized merge descriptions
System prompt now explicitly enumerates the markdown forms the model shouldn't emit (bold, italics, headings, bullets, lists, code fences) on top of the existing "no preamble, no quotes" constraints. Some local models default to markdown-shaped output for descriptions and the curation UI is plain-text, which would render the asterisks and hashes literally. The output cleaning step picks up a parallel sweep: strip code fences, leading bullets / headings, wrapping quotes, and naive inline emphasis markers (** and __). Rare enough that the plain-replace is fine; not trying to parse markdown. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -720,7 +720,9 @@ async fn synthesize_merge<D: KnowledgeDao + 'static>(
|
||||
entities refer to the same real-world thing and are about to be merged. Write a \
|
||||
single neutral third-person description (1-2 sentences, max 300 chars) that \
|
||||
preserves any concrete facts in either source. Do not invent details. Do not \
|
||||
editorialize. Return ONLY the merged description — no preamble, no quotes.";
|
||||
editorialize. Plain prose only — no markdown, no bold, no italics, no headings, \
|
||||
no bullets, no lists, no code fences. Return ONLY the merged description — no \
|
||||
preamble, no labels, no quotes.";
|
||||
let prompt = format!(
|
||||
"Entity A: {} [{}]\nDescription: {}\n\nEntity B: {} [{}]\nDescription: {}\n\nMerged description:",
|
||||
source.name, source.entity_type, source_desc,
|
||||
@@ -731,15 +733,46 @@ async fn synthesize_merge<D: KnowledgeDao + 'static>(
|
||||
let model_used = ollama.primary_model.clone();
|
||||
let proposed = match ollama.generate(&prompt, Some(system)).await {
|
||||
Ok(out) => {
|
||||
// Some models open with their own framing — strip a
|
||||
// leading quote or "Merged:" lead-in defensively.
|
||||
let cleaned = out
|
||||
.trim()
|
||||
// Strip the framing models reach for even with explicit
|
||||
// "no preamble" guidance: leading "Merged description:"
|
||||
// labels, wrapping quotes, ``` code fences, leading
|
||||
// bullets / hash headings. Belt-and-braces against the
|
||||
// system prompt's plain-text directive.
|
||||
let mut s = out.trim().to_string();
|
||||
s = s
|
||||
.trim_start_matches("Merged description:")
|
||||
.trim_start_matches("Merged Description:")
|
||||
.trim()
|
||||
.to_string();
|
||||
// Code fences (``` or ```text)
|
||||
s = s
|
||||
.trim_start_matches("```text")
|
||||
.trim_start_matches("```markdown")
|
||||
.trim_start_matches("```")
|
||||
.trim_end_matches("```")
|
||||
.trim()
|
||||
.to_string();
|
||||
// Markdown headings / bullets at the very start
|
||||
while let Some(stripped) = s
|
||||
.strip_prefix('#')
|
||||
.or_else(|| s.strip_prefix('*'))
|
||||
.or_else(|| s.strip_prefix('-'))
|
||||
.or_else(|| s.strip_prefix('>'))
|
||||
{
|
||||
s = stripped.trim_start().to_string();
|
||||
}
|
||||
// Wrapping quotes
|
||||
s = s
|
||||
.trim_matches(|c| c == '"' || c == '\'')
|
||||
.to_string();
|
||||
cleaned
|
||||
// Inline emphasis: drop standalone `**` / `*` / `__` /
|
||||
// `_` markers without trying to parse markdown — just
|
||||
// remove the punctuation. Rare enough that this naive
|
||||
// replace is fine.
|
||||
s = s
|
||||
.replace("**", "")
|
||||
.replace("__", "");
|
||||
s
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("synthesize_merge generate failed: {:?}", e);
|
||||
|
||||
Reference in New Issue
Block a user