Collapse blank lines to a single break in TTS text cleaning

Chatterbox inserts a long pause — sometimes ~20s of silence — for each
blank line it sees, and insight text is markdown full of paragraph
breaks. clean_for_tts previously preserved paragraph structure
(\n{3,} -> \n\n), so every paragraph boundary still reached the model
as a double newline. Now any run of 2+ newlines, including
whitespace-only blank lines, collapses to a single newline so the
worst pause a break can cause is a normal line-break pause.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-06-04 09:12:43 -04:00
parent dec6f21af9
commit 412da2ce8e
+18 -3
View File
@@ -85,7 +85,10 @@ static MD_LIST: LazyLock<Regex> =
static MD_EMPHASIS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[*_`~]+").unwrap()); static MD_EMPHASIS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[*_`~]+").unwrap());
static URL_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"https?://\S+").unwrap()); static URL_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"https?://\S+").unwrap());
static MULTISPACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[ \t]{2,}").unwrap()); static MULTISPACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[ \t]{2,}").unwrap());
static MULTINEWLINE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\n{3,}").unwrap()); // Any run of 2+ newlines (incl. whitespace-only blank lines) collapses to ONE
// newline: Chatterbox inserts a long pause (sometimes ~20s of silence) per
// blank line, so paragraph breaks must reach it as a single line break at most.
static MULTINEWLINE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\n(?:[ \t]*\n)+").unwrap());
/// True for emoji / pictographic symbols, which most TTS models either skip or /// True for emoji / pictographic symbols, which most TTS models either skip or
/// mispronounce. Covers the main emoji blocks plus dingbats, misc-technical, /// mispronounce. Covers the main emoji blocks plus dingbats, misc-technical,
@@ -118,7 +121,7 @@ fn clean_for_tts(input: &str) -> String {
let s = URL_RE.replace_all(&s, " "); let s = URL_RE.replace_all(&s, " ");
let s: String = s.chars().filter(|c| !is_emoji_like(*c)).collect(); let s: String = s.chars().filter(|c| !is_emoji_like(*c)).collect();
let s = MULTISPACE.replace_all(&s, " "); let s = MULTISPACE.replace_all(&s, " ");
let s = MULTINEWLINE.replace_all(&s, "\n\n"); let s = MULTINEWLINE.replace_all(&s, "\n");
s.trim().to_string() s.trim().to_string()
} }
@@ -537,7 +540,7 @@ mod tests {
clean_for_tts("**Bold** and _italic_ and `code`"), clean_for_tts("**Bold** and _italic_ and `code`"),
"Bold and italic and code" "Bold and italic and code"
); );
assert_eq!(clean_for_tts("# Title\n\nbody"), "Title\n\nbody"); assert_eq!(clean_for_tts("# Title\n\nbody"), "Title\nbody");
assert_eq!( assert_eq!(
clean_for_tts("See [docs](http://x.com) now"), clean_for_tts("See [docs](http://x.com) now"),
"See docs now" "See docs now"
@@ -556,6 +559,18 @@ mod tests {
assert_eq!(clean_for_tts("family 👨‍👩‍👧 photo"), "family photo"); assert_eq!(clean_for_tts("family 👨‍👩‍👧 photo"), "family photo");
} }
#[test]
fn clean_for_tts_collapses_blank_lines_to_single_break() {
// Chatterbox pauses (sometimes ~20s) per blank line, so paragraph
// breaks must collapse to a single newline.
assert_eq!(clean_for_tts("para one\n\npara two"), "para one\npara two");
assert_eq!(clean_for_tts("a\n\n\n\nb"), "a\nb");
// Whitespace-only "blank" lines collapse too.
assert_eq!(clean_for_tts("a\n \t \nb"), "a\nb");
// A single newline is left alone.
assert_eq!(clean_for_tts("a\nb"), "a\nb");
}
#[test] #[test]
fn clean_for_tts_preserves_bracket_tags() { fn clean_for_tts_preserves_bracket_tags() {
// Non-turbo Chatterbox ignores these; a future Turbo uses them as // Non-turbo Chatterbox ignores these; a future Turbo uses them as