From b1493f5aca475f1ab47b67ca474d30c061596478 Mon Sep 17 00:00:00 2001
From: Cameron Cordes <cameronc.dev@gmail.com>
Date: Thu, 11 Jun 2026 19:15:38 -0400
Subject: [PATCH] Wait out TTS GPU hold before the insight job timeout starts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The GPU lease keeps per-request reqwest budgets from burning behind a
cross-model swap, but the job-level INSIGHT_GENERATION_TIMEOUT_SECS
wall-clock started at spawn — an insight queued behind a running TTS
synthesis parked its first chat call on the lease and timed out
("timeout after 180s") before chatterbox even finished loading.

Acquire-and-drop an LLM read lease before starting the job clock in
both insight handlers: the wait for the GPU happens before the
timeout begins, mirroring the per-request lease semantics. Dropped
immediately — holding it across the generation would deadlock the
chat calls' own lease acquisitions.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 src/ai/handlers.rs | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs
index b3beeda..cb21b14 100644
--- a/src/ai/handlers.rs
+++ b/src/ai/handlers.rs
@@ -468,6 +468,13 @@ pub async fn generate_insight_handler(
         let path_for_task = path.clone();
         let generator_for_task = generator.clone();
         let result = tokio::task::spawn(async move {
+            // Cross-model barrier: if a TTS synthesis holds the GPU, wait it
+            // out BEFORE the generation wall-clock starts. The per-request
+            // lease keeps reqwest budgets honest, but this job-level timeout
+            // would otherwise burn while the first chat call queues behind a
+            // multi-minute synthesis. Dropped immediately — holding it across
+            // the generation would deadlock the chat calls' own leases.
+            drop(crate::ai::gpu::llm_lease().await);
             tokio::time::timeout(
                 std::time::Duration::from_secs(timeout_secs),
                 generator_for_task.generate_insight_for_photo_with_config(
@@ -846,6 +853,9 @@ pub async fn generate_agentic_insight_handler(
         let path_for_task = path.clone();
         let generator_for_task = generator.clone();
         let result = tokio::task::spawn(async move {
+            // Cross-model barrier — see generate_insight_handler: wait out any
+            // running TTS synthesis before the generation wall-clock starts.
+            drop(crate::ai::gpu::llm_lease().await);
             tokio::time::timeout(
                 std::time::Duration::from_secs(timeout_secs),
                 generator_for_task.generate_agentic_insight_for_photo(