2026-06-13 02:01:37 +00:00
1 changed files with 10 additions and 0 deletions
@@ -468,6 +468,13 @@ pub async fn generate_insight_handler(
        let path_for_task = path.clone();
        let generator_for_task = generator.clone();
        let result = tokio::task::spawn(async move {
+            // Cross-model barrier: if a TTS synthesis holds the GPU, wait it
+            // out BEFORE the generation wall-clock starts. The per-request
+            // lease keeps reqwest budgets honest, but this job-level timeout
+            // would otherwise burn while the first chat call queues behind a
+            // multi-minute synthesis. Dropped immediately — holding it across
+            // the generation would deadlock the chat calls' own leases.
+            drop(crate::ai::gpu::llm_lease().await);
            tokio::time::timeout(
                std::time::Duration::from_secs(timeout_secs),
                generator_for_task.generate_insight_for_photo_with_config(
@@ -846,6 +853,9 @@ pub async fn generate_agentic_insight_handler(
        let path_for_task = path.clone();
        let generator_for_task = generator.clone();
        let result = tokio::task::spawn(async move {
+            // Cross-model barrier — see generate_insight_handler: wait out any
+            // running TTS synthesis before the generation wall-clock starts.
+            drop(crate::ai::gpu::llm_lease().await);
            tokio::time::timeout(
                std::time::Duration::from_secs(timeout_secs),
                generator_for_task.generate_agentic_insight_for_photo(