From f20046650884333d9b8a04ecc54bcec6211b199c Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Mon, 11 May 2026 16:49:02 -0400 Subject: [PATCH] knowledge: forbid markdown in synthesized merge descriptions System prompt now explicitly enumerates the markdown forms the model shouldn't emit (bold, italics, headings, bullets, lists, code fences) on top of the existing "no preamble, no quotes" constraints. Some local models default to markdown-shaped output for descriptions and the curation UI is plain-text, which would render the asterisks and hashes literally. The output cleaning step picks up a parallel sweep: strip code fences, leading bullets / headings, wrapping quotes, and naive inline emphasis markers (** and __). Rare enough that the plain-replace is fine; not trying to parse markdown. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/knowledge.rs | 45 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/src/knowledge.rs b/src/knowledge.rs index 3baa900..05c50a0 100644 --- a/src/knowledge.rs +++ b/src/knowledge.rs @@ -720,7 +720,9 @@ async fn synthesize_merge( entities refer to the same real-world thing and are about to be merged. Write a \ single neutral third-person description (1-2 sentences, max 300 chars) that \ preserves any concrete facts in either source. Do not invent details. Do not \ - editorialize. Return ONLY the merged description — no preamble, no quotes."; + editorialize. Plain prose only — no markdown, no bold, no italics, no headings, \ + no bullets, no lists, no code fences. Return ONLY the merged description — no \ + preamble, no labels, no quotes."; let prompt = format!( "Entity A: {} [{}]\nDescription: {}\n\nEntity B: {} [{}]\nDescription: {}\n\nMerged description:", source.name, source.entity_type, source_desc, @@ -731,15 +733,46 @@ async fn synthesize_merge( let model_used = ollama.primary_model.clone(); let proposed = match ollama.generate(&prompt, Some(system)).await { Ok(out) => { - // Some models open with their own framing — strip a - // leading quote or "Merged:" lead-in defensively. - let cleaned = out - .trim() + // Strip the framing models reach for even with explicit + // "no preamble" guidance: leading "Merged description:" + // labels, wrapping quotes, ``` code fences, leading + // bullets / hash headings. Belt-and-braces against the + // system prompt's plain-text directive. + let mut s = out.trim().to_string(); + s = s .trim_start_matches("Merged description:") + .trim_start_matches("Merged Description:") .trim() + .to_string(); + // Code fences (``` or ```text) + s = s + .trim_start_matches("```text") + .trim_start_matches("```markdown") + .trim_start_matches("```") + .trim_end_matches("```") + .trim() + .to_string(); + // Markdown headings / bullets at the very start + while let Some(stripped) = s + .strip_prefix('#') + .or_else(|| s.strip_prefix('*')) + .or_else(|| s.strip_prefix('-')) + .or_else(|| s.strip_prefix('>')) + { + s = stripped.trim_start().to_string(); + } + // Wrapping quotes + s = s .trim_matches(|c| c == '"' || c == '\'') .to_string(); - cleaned + // Inline emphasis: drop standalone `**` / `*` / `__` / + // `_` markers without trying to parse markdown — just + // remove the punctuation. Rare enough that this naive + // replace is fine. + s = s + .replace("**", "") + .replace("__", ""); + s } Err(e) => { log::warn!("synthesize_merge generate failed: {:?}", e);