Feature/tts integration #103
@@ -88,6 +88,7 @@ AGENTIC_CHAT_MAX_ITERATIONS=6
|
|||||||
# LLAMA_SWAP_TTS_MODEL=chatterbox # TTS model id in config.yaml
|
# LLAMA_SWAP_TTS_MODEL=chatterbox # TTS model id in config.yaml
|
||||||
# LLAMA_SWAP_TTS_VOICE=m # default voice when a request omits one
|
# LLAMA_SWAP_TTS_VOICE=m # default voice when a request omits one
|
||||||
# LLAMA_SWAP_TTS_REF_SECONDS=30 # max voice-clone reference clip length (s)
|
# LLAMA_SWAP_TTS_REF_SECONDS=30 # max voice-clone reference clip length (s)
|
||||||
|
# LLAMA_SWAP_TTS_REQUEST_TIMEOUT_SECONDS=600 # synth timeout (long chunked text)
|
||||||
|
|
||||||
# ── AI Insights — sibling services (optional) ───────────────────────────
|
# ── AI Insights — sibling services (optional) ───────────────────────────
|
||||||
# Apollo (places, face inference, CLIP encoders). Single-Apollo deploys
|
# Apollo (places, face inference, CLIP encoders). Single-Apollo deploys
|
||||||
|
|||||||
@@ -666,6 +666,8 @@ LLAMA_SWAP_TTS_MODEL=chatterbox # TTS model id in config.yaml (de
|
|||||||
LLAMA_SWAP_TTS_VOICE=m # Default voice when /tts/speech omits one (optional)
|
LLAMA_SWAP_TTS_VOICE=m # Default voice when /tts/speech omits one (optional)
|
||||||
LLAMA_SWAP_TTS_REF_SECONDS=30 # Max voice-clone reference clip length, seconds
|
LLAMA_SWAP_TTS_REF_SECONDS=30 # Max voice-clone reference clip length, seconds
|
||||||
# (Chatterbox is zero-shot; ~10-20s clean ref is ideal)
|
# (Chatterbox is zero-shot; ~10-20s clean ref is ideal)
|
||||||
|
LLAMA_SWAP_TTS_REQUEST_TIMEOUT_SECONDS=600 # Per-request synth timeout (long chunked insights take
|
||||||
|
# minutes); overrides the shared client timeout for /tts/speech
|
||||||
|
|
||||||
# Insight Chat Continuation
|
# Insight Chat Continuation
|
||||||
AGENTIC_CHAT_MAX_ITERATIONS=6 # Cap on tool-calling iterations per chat turn (default 6)
|
AGENTIC_CHAT_MAX_ITERATIONS=6 # Cap on tool-calling iterations per chat turn (default 6)
|
||||||
|
|||||||
@@ -169,6 +169,10 @@ Env:
|
|||||||
[default: `30`]. Reference audio is ffmpeg-normalized to mono 24 kHz WAV (so any
|
[default: `30`]. Reference audio is ffmpeg-normalized to mono 24 kHz WAV (so any
|
||||||
source format works); Chatterbox is zero-shot, so a clean ~10–20s sample is the
|
source format works); Chatterbox is zero-shot, so a clean ~10–20s sample is the
|
||||||
sweet spot — more rarely helps.
|
sweet spot — more rarely helps.
|
||||||
|
- `LLAMA_SWAP_TTS_REQUEST_TIMEOUT_SECONDS` - per-request synthesis timeout in
|
||||||
|
seconds [default: `600`]. Long insights are chunked + synthesized server-side
|
||||||
|
and can take minutes; this is separate from (and overrides, for `/tts/speech`)
|
||||||
|
the shared `LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS`.
|
||||||
|
|
||||||
#### Fallback Behavior
|
#### Fallback Behavior
|
||||||
- Primary server is tried first with 5-second connection timeout
|
- Primary server is tried first with 5-second connection timeout
|
||||||
|
|||||||
@@ -170,9 +170,19 @@ impl LlamaCppClient {
|
|||||||
body["temperature"] = json!(x);
|
body["temperature"] = json!(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TTS gets its own (longer) timeout: synthesizing a long, internally
|
||||||
|
// chunked insight can take minutes, well past the shared chat/embedding
|
||||||
|
// client timeout. Per-request `.timeout()` overrides the client default.
|
||||||
|
let tts_timeout = std::env::var("LLAMA_SWAP_TTS_REQUEST_TIMEOUT_SECONDS")
|
||||||
|
.ok()
|
||||||
|
.and_then(|v| v.parse::<u64>().ok())
|
||||||
|
.filter(|n| *n > 0)
|
||||||
|
.unwrap_or(600);
|
||||||
|
|
||||||
let resp = self
|
let resp = self
|
||||||
.client
|
.client
|
||||||
.post(&url)
|
.post(&url)
|
||||||
|
.timeout(Duration::from_secs(tts_timeout))
|
||||||
.json(&body)
|
.json(&body)
|
||||||
.send()
|
.send()
|
||||||
.await
|
.await
|
||||||
|
|||||||
Reference in New Issue
Block a user