AI: add enable_thinking reasoning toggle plumbed to llama.cpp
New optional SamplingOverride forwarded to llama-server as chat_template_kwargs.enable_thinking (gates Qwen3-style reasoning blocks). None leaves the template default; other backends ignore it. Wired through the agentic-insight and chat-turn request bodies/handlers. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -41,6 +41,10 @@ pub struct SamplingOverrides {
|
||||
pub top_p: Option<f32>,
|
||||
pub top_k: Option<i32>,
|
||||
pub min_p: Option<f32>,
|
||||
/// Reasoning toggle. Only the llama.cpp backend honors it (forwarded as
|
||||
/// `chat_template_kwargs.enable_thinking`); other backends ignore it.
|
||||
/// `None` leaves the model/template default in place.
|
||||
pub enable_thinking: Option<bool>,
|
||||
}
|
||||
|
||||
impl SamplingOverrides {
|
||||
@@ -124,6 +128,7 @@ mod tests {
|
||||
top_p: None,
|
||||
top_k: None,
|
||||
min_p: None,
|
||||
enable_thinking: None,
|
||||
};
|
||||
assert!(!empty.has_sampling());
|
||||
|
||||
@@ -134,6 +139,7 @@ mod tests {
|
||||
top_p: None,
|
||||
top_k: None,
|
||||
min_p: None,
|
||||
enable_thinking: None,
|
||||
};
|
||||
assert!(with_temp.has_sampling());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user