diff --git a/.env.example b/.env.example
index 2b6cff0..a45fdd5 100644
--- a/.env.example
+++ b/.env.example
@@ -88,6 +88,7 @@ AGENTIC_CHAT_MAX_ITERATIONS=6
 # LLAMA_SWAP_TTS_MODEL=chatterbox        # TTS model id in config.yaml
 # LLAMA_SWAP_TTS_VOICE=m                 # default voice when a request omits one
 # LLAMA_SWAP_TTS_REF_SECONDS=30          # max voice-clone reference clip length (s)
+# LLAMA_SWAP_TTS_REQUEST_TIMEOUT_SECONDS=600   # synth timeout (long chunked text)
 
 # ── AI Insights — sibling services (optional) ───────────────────────────
 # Apollo (places, face inference, CLIP encoders). Single-Apollo deploys
diff --git a/CLAUDE.md b/CLAUDE.md
index b5e1ee2..fba33e0 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -666,6 +666,8 @@ LLAMA_SWAP_TTS_MODEL=chatterbox                # TTS model id in config.yaml (de
 LLAMA_SWAP_TTS_VOICE=m                         # Default voice when /tts/speech omits one (optional)
 LLAMA_SWAP_TTS_REF_SECONDS=30                  # Max voice-clone reference clip length, seconds
                                                # (Chatterbox is zero-shot; ~10-20s clean ref is ideal)
+LLAMA_SWAP_TTS_REQUEST_TIMEOUT_SECONDS=600     # Per-request synth timeout (long chunked insights take
+                                               # minutes); overrides the shared client timeout for /tts/speech
 
 # Insight Chat Continuation
 AGENTIC_CHAT_MAX_ITERATIONS=6                  # Cap on tool-calling iterations per chat turn (default 6)
diff --git a/README.md b/README.md
index 0b678df..58ddc81 100644
--- a/README.md
+++ b/README.md
@@ -169,6 +169,10 @@ Env:
   [default: `30`]. Reference audio is ffmpeg-normalized to mono 24 kHz WAV (so any
   source format works); Chatterbox is zero-shot, so a clean ~10–20s sample is the
   sweet spot — more rarely helps.
+- `LLAMA_SWAP_TTS_REQUEST_TIMEOUT_SECONDS` - per-request synthesis timeout in
+  seconds [default: `600`]. Long insights are chunked + synthesized server-side
+  and can take minutes; this is separate from (and overrides, for `/tts/speech`)
+  the shared `LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS`.
 
 #### Fallback Behavior
 - Primary server is tried first with 5-second connection timeout
diff --git a/src/ai/llamacpp.rs b/src/ai/llamacpp.rs
index 2946688..d56b645 100644
--- a/src/ai/llamacpp.rs
+++ b/src/ai/llamacpp.rs
@@ -170,9 +170,19 @@ impl LlamaCppClient {
             body["temperature"] = json!(x);
         }
 
+        // TTS gets its own (longer) timeout: synthesizing a long, internally
+        // chunked insight can take minutes, well past the shared chat/embedding
+        // client timeout. Per-request `.timeout()` overrides the client default.
+        let tts_timeout = std::env::var("LLAMA_SWAP_TTS_REQUEST_TIMEOUT_SECONDS")
+            .ok()
+            .and_then(|v| v.parse::<u64>().ok())
+            .filter(|n| *n > 0)
+            .unwrap_or(600);
+
         let resp = self
             .client
             .post(&url)
+            .timeout(Duration::from_secs(tts_timeout))
             .json(&body)
             .send()
             .await