Collapse blank lines to a single break in TTS text cleaning
Chatterbox inserts a long pause — sometimes ~20s of silence — for each
blank line it sees, and insight text is markdown full of paragraph
breaks. clean_for_tts previously preserved paragraph structure
(\n{3,} -> \n\n), so every paragraph boundary still reached the model
as a double newline. Now any run of 2+ newlines, including
whitespace-only blank lines, collapses to a single newline so the
worst pause a break can cause is a normal line-break pause.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+18
-3
@@ -85,7 +85,10 @@ static MD_LIST: LazyLock<Regex> =
|
||||
static MD_EMPHASIS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[*_`~]+").unwrap());
|
||||
static URL_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"https?://\S+").unwrap());
|
||||
static MULTISPACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[ \t]{2,}").unwrap());
|
||||
static MULTINEWLINE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\n{3,}").unwrap());
|
||||
// Any run of 2+ newlines (incl. whitespace-only blank lines) collapses to ONE
|
||||
// newline: Chatterbox inserts a long pause (sometimes ~20s of silence) per
|
||||
// blank line, so paragraph breaks must reach it as a single line break at most.
|
||||
static MULTINEWLINE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\n(?:[ \t]*\n)+").unwrap());
|
||||
|
||||
/// True for emoji / pictographic symbols, which most TTS models either skip or
|
||||
/// mispronounce. Covers the main emoji blocks plus dingbats, misc-technical,
|
||||
@@ -118,7 +121,7 @@ fn clean_for_tts(input: &str) -> String {
|
||||
let s = URL_RE.replace_all(&s, " ");
|
||||
let s: String = s.chars().filter(|c| !is_emoji_like(*c)).collect();
|
||||
let s = MULTISPACE.replace_all(&s, " ");
|
||||
let s = MULTINEWLINE.replace_all(&s, "\n\n");
|
||||
let s = MULTINEWLINE.replace_all(&s, "\n");
|
||||
s.trim().to_string()
|
||||
}
|
||||
|
||||
@@ -537,7 +540,7 @@ mod tests {
|
||||
clean_for_tts("**Bold** and _italic_ and `code`"),
|
||||
"Bold and italic and code"
|
||||
);
|
||||
assert_eq!(clean_for_tts("# Title\n\nbody"), "Title\n\nbody");
|
||||
assert_eq!(clean_for_tts("# Title\n\nbody"), "Title\nbody");
|
||||
assert_eq!(
|
||||
clean_for_tts("See [docs](http://x.com) now"),
|
||||
"See docs now"
|
||||
@@ -556,6 +559,18 @@ mod tests {
|
||||
assert_eq!(clean_for_tts("family 👨👩👧 photo"), "family photo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clean_for_tts_collapses_blank_lines_to_single_break() {
|
||||
// Chatterbox pauses (sometimes ~20s) per blank line, so paragraph
|
||||
// breaks must collapse to a single newline.
|
||||
assert_eq!(clean_for_tts("para one\n\npara two"), "para one\npara two");
|
||||
assert_eq!(clean_for_tts("a\n\n\n\nb"), "a\nb");
|
||||
// Whitespace-only "blank" lines collapse too.
|
||||
assert_eq!(clean_for_tts("a\n \t \nb"), "a\nb");
|
||||
// A single newline is left alone.
|
||||
assert_eq!(clean_for_tts("a\nb"), "a\nb");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clean_for_tts_preserves_bracket_tags() {
|
||||
// Non-turbo Chatterbox ignores these; a future Turbo uses them as
|
||||
|
||||
Reference in New Issue
Block a user