feat(ai): rerank timing + think:false + OpenRouter error detail

- search_rag reranker now logs wall-clock time around the ollama.generate call, the candidate count / top-N going in, and the final reordering. The "final indices" + swap-count line is info level so it's always visible; detailed before/after previews stay at debug for when you want to inspect reranker quality. - New OllamaClient::generate_no_think convenience that sets Ollama's top-level think:false on the request, plumbed through try_generate via a new internal generate_with_options. Used only by the reranker today; avoids the chain-of-thought tax on reasoning models (Qwen3/VL, DeepSeek-R1 distills, GPT-OSS) when the task has nothing to reason about. Server-side no-op on non-reasoning models. - OpenRouter chat_with_tools "missing choices[0]" error now includes the actual response body — extracts structured {error: {code, message}} when OpenRouter surfaces it (common for upstream-provider issues like rate limits and content moderation), otherwise falls back to a truncated raw-JSON view. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 16:19:45 -04:00
parent e5781325c6
commit 0ebc2e9003
3 changed files with 121 additions and 6 deletions
--- a/src/ai/insight_generator.rs
+++ b/src/ai/insight_generator.rs
@@ -1547,6 +1547,14 @@ Return ONLY the summary, nothing else."#,
        limit: usize,
        ollama: &OllamaClient,
    ) -> Result<Vec<String>> {
+        let query_preview: String = query.chars().take(60).collect();
+        log::info!(
+            "rerank: {} candidates -> top {} (query=\"{}\")",
+            candidates.len(),
+            limit,
+            query_preview
+        );
+
        // Build numbered list (1-based for readability). Cap each passage
        // at ~1000 chars so very long summaries don't eat the prompt.
        let numbered: String = candidates
@@ -1574,14 +1582,20 @@ Return ONLY the summary, nothing else."#,
            limit, query, numbered, limit
        );

+        let started = std::time::Instant::now();
        let response = ollama
-            .generate(
+            .generate_no_think(
                &prompt,
                Some(
                    "You are a terse relevance ranker. You output only numbers separated by commas.",
                ),
            )
            .await?;
+        log::info!(
+            "rerank: finished in {} ms (prompt={} chars)",
+            started.elapsed().as_millis(),
+            prompt.len()
+        );

        // Extract indices from the response. Accept "3, 1, 7" and also
        // tolerate "[3, 1, 7]" or "3,1,7,..." with trailing junk.
@@ -1600,9 +1614,11 @@ Return ONLY the summary, nothing else."#,

        let mut seen = std::collections::HashSet::new();
        let mut reordered: Vec<String> = Vec::with_capacity(limit);
+        let mut final_indices: Vec<usize> = Vec::with_capacity(limit);
        for n in picks {
            if seen.insert(n) {
                reordered.push(candidates[n - 1].clone());
+                final_indices.push(n);
                if reordered.len() >= limit {
                    break;
                }
@@ -1614,12 +1630,40 @@ Return ONLY the summary, nothing else."#,
            for (i, c) in candidates.iter().enumerate() {
                if !seen.contains(&(i + 1)) {
                    reordered.push(c.clone());
+                    final_indices.push(i + 1);
                    if reordered.len() >= limit {
                        break;
                    }
                }
            }
        }
+
+        // Debug snapshot: show what the reranker changed. Position p holds
+        // the 1-based index of the candidate that now sits at position p.
+        // A value that equals its position means "no change at that slot".
+        let swapped = final_indices
+            .iter()
+            .enumerate()
+            .filter(|(pos, idx)| **idx != pos + 1)
+            .count();
+        log::info!(
+            "rerank: final indices (1-based): {:?} — {} of top {} swapped from vector order",
+            final_indices,
+            swapped,
+            final_indices.len()
+        );
+        let show = final_indices.len().min(5);
+        log::debug!("rerank: vector-order top {}:", show);
+        for (i, c) in candidates.iter().enumerate().take(show) {
+            let preview: String = c.chars().take(100).collect();
+            log::debug!("rerank:   [{}] {}", i + 1, preview);
+        }
+        log::debug!("rerank: reranked top {}:", show);
+        for (pos, idx) in final_indices.iter().enumerate().take(show) {
+            let preview: String = candidates[*idx - 1].chars().take(100).collect();
+            log::debug!("rerank:   [{}] (orig #{}) {}", pos + 1, idx, preview);
+        }
+
        Ok(reordered)
    }