feat(ai): rerank timing + think:false + OpenRouter error detail
- search_rag reranker now logs wall-clock time around the ollama.generate
call, the candidate count / top-N going in, and the final reordering.
The "final indices" + swap-count line is info level so it's always
visible; detailed before/after previews stay at debug for when you want
to inspect reranker quality.
- New OllamaClient::generate_no_think convenience that sets Ollama's
top-level think:false on the request, plumbed through try_generate via
a new internal generate_with_options. Used only by the reranker today;
avoids the chain-of-thought tax on reasoning models (Qwen3/VL,
DeepSeek-R1 distills, GPT-OSS) when the task has nothing to reason
about. Server-side no-op on non-reasoning models.
- OpenRouter chat_with_tools "missing choices[0]" error now includes the
actual response body — extracts structured {error: {code, message}}
when OpenRouter surfaces it (common for upstream-provider issues like
rate limits and content moderation), otherwise falls back to a
truncated raw-JSON view.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -360,10 +360,18 @@ impl LlmClient for OpenRouterClient {
|
||||
.get("choices")
|
||||
.and_then(|v| v.as_array())
|
||||
.and_then(|a| a.first())
|
||||
.ok_or_else(|| anyhow!("response missing choices[0]"))?;
|
||||
let msg = choice
|
||||
.get("message")
|
||||
.ok_or_else(|| anyhow!("choices[0] missing message"))?;
|
||||
.ok_or_else(|| {
|
||||
anyhow!(
|
||||
"response missing choices[0]: {}",
|
||||
extract_openrouter_error_detail(&parsed)
|
||||
)
|
||||
})?;
|
||||
let msg = choice.get("message").ok_or_else(|| {
|
||||
anyhow!(
|
||||
"choices[0] missing message: {}",
|
||||
extract_openrouter_error_detail(&parsed)
|
||||
)
|
||||
})?;
|
||||
let chat_msg = Self::openai_message_to_chat(msg)?;
|
||||
|
||||
let usage = parsed.get("usage");
|
||||
@@ -687,6 +695,33 @@ impl LlmClient for OpenRouterClient {
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract a diagnostic fragment from an OpenRouter response body that
|
||||
/// doesn't match the expected `{choices: [...]}` shape. OpenRouter will
|
||||
/// sometimes return 200 OK with `{"error": {"message": "...", "code": ...}}`
|
||||
/// when the upstream provider (Anthropic/OpenAI/Google/etc) errored out
|
||||
/// — rate limits, content moderation, model overload, provider timeout.
|
||||
/// Surface the structured error if present; otherwise fall back to a
|
||||
/// truncated raw-JSON view so the log line is actionable.
|
||||
fn extract_openrouter_error_detail(parsed: &Value) -> String {
|
||||
if let Some(err) = parsed.get("error") {
|
||||
let message = err
|
||||
.get("message")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("(no message)");
|
||||
let code = err
|
||||
.get("code")
|
||||
.map(|v| match v {
|
||||
Value::String(s) => s.clone(),
|
||||
other => other.to_string(),
|
||||
})
|
||||
.unwrap_or_else(|| "?".to_string());
|
||||
let short_message: String = message.chars().take(240).collect();
|
||||
return format!("error code={} message=\"{}\"", code, short_message);
|
||||
}
|
||||
let raw = parsed.to_string();
|
||||
raw.chars().take(300).collect()
|
||||
}
|
||||
|
||||
/// Find the byte offset of the first `\n\n` (end of an SSE frame) in `buf`.
|
||||
/// Returns the index of the first `\n` of the pair, so the full separator is
|
||||
/// `buf[idx..=idx+1]`. Also handles `\r\n\r\n` since some servers emit it.
|
||||
|
||||
Reference in New Issue
Block a user