From b1493f5aca475f1ab47b67ca474d30c061596478 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Thu, 11 Jun 2026 19:15:38 -0400 Subject: [PATCH] Wait out TTS GPU hold before the insight job timeout starts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GPU lease keeps per-request reqwest budgets from burning behind a cross-model swap, but the job-level INSIGHT_GENERATION_TIMEOUT_SECS wall-clock started at spawn — an insight queued behind a running TTS synthesis parked its first chat call on the lease and timed out ("timeout after 180s") before chatterbox even finished loading. Acquire-and-drop an LLM read lease before starting the job clock in both insight handlers: the wait for the GPU happens before the timeout begins, mirroring the per-request lease semantics. Dropped immediately — holding it across the generation would deadlock the chat calls' own lease acquisitions. Co-Authored-By: Claude Fable 5 --- src/ai/handlers.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs index b3beeda..cb21b14 100644 --- a/src/ai/handlers.rs +++ b/src/ai/handlers.rs @@ -468,6 +468,13 @@ pub async fn generate_insight_handler( let path_for_task = path.clone(); let generator_for_task = generator.clone(); let result = tokio::task::spawn(async move { + // Cross-model barrier: if a TTS synthesis holds the GPU, wait it + // out BEFORE the generation wall-clock starts. The per-request + // lease keeps reqwest budgets honest, but this job-level timeout + // would otherwise burn while the first chat call queues behind a + // multi-minute synthesis. Dropped immediately — holding it across + // the generation would deadlock the chat calls' own leases. + drop(crate::ai::gpu::llm_lease().await); tokio::time::timeout( std::time::Duration::from_secs(timeout_secs), generator_for_task.generate_insight_for_photo_with_config( @@ -846,6 +853,9 @@ pub async fn generate_agentic_insight_handler( let path_for_task = path.clone(); let generator_for_task = generator.clone(); let result = tokio::task::spawn(async move { + // Cross-model barrier — see generate_insight_handler: wait out any + // running TTS synthesis before the generation wall-clock starts. + drop(crate::ai::gpu::llm_lease().await); tokio::time::timeout( std::time::Duration::from_secs(timeout_secs), generator_for_task.generate_agentic_insight_for_photo(