feat(ai): rerank timing + think:false + OpenRouter error detail
- search_rag reranker now logs wall-clock time around the ollama.generate
call, the candidate count / top-N going in, and the final reordering.
The "final indices" + swap-count line is info level so it's always
visible; detailed before/after previews stay at debug for when you want
to inspect reranker quality.
- New OllamaClient::generate_no_think convenience that sets Ollama's
top-level think:false on the request, plumbed through try_generate via
a new internal generate_with_options. Used only by the reranker today;
avoids the chain-of-thought tax on reasoning models (Qwen3/VL,
DeepSeek-R1 distills, GPT-OSS) when the task has nothing to reason
about. Server-side no-op on non-reasoning models.
- OpenRouter chat_with_tools "missing choices[0]" error now includes the
actual response body — extracts structured {error: {code, message}}
when OpenRouter surfaces it (common for upstream-provider issues like
rate limits and content moderation), otherwise falls back to a
truncated raw-JSON view.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1547,6 +1547,14 @@ Return ONLY the summary, nothing else."#,
|
||||
limit: usize,
|
||||
ollama: &OllamaClient,
|
||||
) -> Result<Vec<String>> {
|
||||
let query_preview: String = query.chars().take(60).collect();
|
||||
log::info!(
|
||||
"rerank: {} candidates -> top {} (query=\"{}\")",
|
||||
candidates.len(),
|
||||
limit,
|
||||
query_preview
|
||||
);
|
||||
|
||||
// Build numbered list (1-based for readability). Cap each passage
|
||||
// at ~1000 chars so very long summaries don't eat the prompt.
|
||||
let numbered: String = candidates
|
||||
@@ -1574,14 +1582,20 @@ Return ONLY the summary, nothing else."#,
|
||||
limit, query, numbered, limit
|
||||
);
|
||||
|
||||
let started = std::time::Instant::now();
|
||||
let response = ollama
|
||||
.generate(
|
||||
.generate_no_think(
|
||||
&prompt,
|
||||
Some(
|
||||
"You are a terse relevance ranker. You output only numbers separated by commas.",
|
||||
),
|
||||
)
|
||||
.await?;
|
||||
log::info!(
|
||||
"rerank: finished in {} ms (prompt={} chars)",
|
||||
started.elapsed().as_millis(),
|
||||
prompt.len()
|
||||
);
|
||||
|
||||
// Extract indices from the response. Accept "3, 1, 7" and also
|
||||
// tolerate "[3, 1, 7]" or "3,1,7,..." with trailing junk.
|
||||
@@ -1600,9 +1614,11 @@ Return ONLY the summary, nothing else."#,
|
||||
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
let mut reordered: Vec<String> = Vec::with_capacity(limit);
|
||||
let mut final_indices: Vec<usize> = Vec::with_capacity(limit);
|
||||
for n in picks {
|
||||
if seen.insert(n) {
|
||||
reordered.push(candidates[n - 1].clone());
|
||||
final_indices.push(n);
|
||||
if reordered.len() >= limit {
|
||||
break;
|
||||
}
|
||||
@@ -1614,12 +1630,40 @@ Return ONLY the summary, nothing else."#,
|
||||
for (i, c) in candidates.iter().enumerate() {
|
||||
if !seen.contains(&(i + 1)) {
|
||||
reordered.push(c.clone());
|
||||
final_indices.push(i + 1);
|
||||
if reordered.len() >= limit {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Debug snapshot: show what the reranker changed. Position p holds
|
||||
// the 1-based index of the candidate that now sits at position p.
|
||||
// A value that equals its position means "no change at that slot".
|
||||
let swapped = final_indices
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(pos, idx)| **idx != pos + 1)
|
||||
.count();
|
||||
log::info!(
|
||||
"rerank: final indices (1-based): {:?} — {} of top {} swapped from vector order",
|
||||
final_indices,
|
||||
swapped,
|
||||
final_indices.len()
|
||||
);
|
||||
let show = final_indices.len().min(5);
|
||||
log::debug!("rerank: vector-order top {}:", show);
|
||||
for (i, c) in candidates.iter().enumerate().take(show) {
|
||||
let preview: String = c.chars().take(100).collect();
|
||||
log::debug!("rerank: [{}] {}", i + 1, preview);
|
||||
}
|
||||
log::debug!("rerank: reranked top {}:", show);
|
||||
for (pos, idx) in final_indices.iter().enumerate().take(show) {
|
||||
let preview: String = candidates[*idx - 1].chars().take(100).collect();
|
||||
log::debug!("rerank: [{}] (orig #{}) {}", pos + 1, idx, preview);
|
||||
}
|
||||
|
||||
Ok(reordered)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user