diff --git a/src/ai/handlers.rs b/src/ai/handlers.rs index b3beeda..cb21b14 100644 --- a/src/ai/handlers.rs +++ b/src/ai/handlers.rs @@ -468,6 +468,13 @@ pub async fn generate_insight_handler( let path_for_task = path.clone(); let generator_for_task = generator.clone(); let result = tokio::task::spawn(async move { + // Cross-model barrier: if a TTS synthesis holds the GPU, wait it + // out BEFORE the generation wall-clock starts. The per-request + // lease keeps reqwest budgets honest, but this job-level timeout + // would otherwise burn while the first chat call queues behind a + // multi-minute synthesis. Dropped immediately — holding it across + // the generation would deadlock the chat calls' own leases. + drop(crate::ai::gpu::llm_lease().await); tokio::time::timeout( std::time::Duration::from_secs(timeout_secs), generator_for_task.generate_insight_for_photo_with_config( @@ -846,6 +853,9 @@ pub async fn generate_agentic_insight_handler( let path_for_task = path.clone(); let generator_for_task = generator.clone(); let result = tokio::task::spawn(async move { + // Cross-model barrier — see generate_insight_handler: wait out any + // running TTS synthesis before the generation wall-clock starts. + drop(crate::ai::gpu::llm_lease().await); tokio::time::timeout( std::time::Duration::from_secs(timeout_secs), generator_for_task.generate_agentic_insight_for_photo(