-
Notifications
You must be signed in to change notification settings - Fork 69
Open
Description
I'm using vllm with a qwen3 model with reasoning. When using this with echokit client it also puts out the thinking block. To disable this behavior I changed the mod.rs file:
#[derive(Debug, Clone, serde::Serialize)]
pub struct StableLlmRequest {
stream: bool,
#[serde(rename = "chatId")]
#[serde(skip_serializing_if = "String::is_empty")]
chat_id: String,
messages: Vec<llm::Content>,
#[serde(skip_serializing_if = "String::is_empty")]
model: String,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
tools: Vec<llm::Tool>,
#[serde(skip_serializing_if = "str::is_empty")]
tool_choice: &'static str,
// --- ADD THIS ---
#[serde(skip_serializing_if = "Option::is_none")]
pub chat_template_kwargs: Option<serde_json::Value>,
// ----------------
}
pub async fn llm_stable<'p, I: IntoIterator<Item = C>, C: AsRef<llm::Content>>(
llm_url: &str,
token: &str,
model: &str,
chat_id: Option<String>,
prompts: I,
tools: Vec<llm::Tool>,
) -> anyhow::Result<StableLlmResponse> {
let messages = prompts
.into_iter()
.map(|c| c.as_ref().clone())
.collect::<Vec<_>>();
let mut response_builder = reqwest::Client::new().post(llm_url);
if !token.is_empty() {
response_builder = response_builder.bearer_auth(token);
};
let tool_choice = if tools.is_empty() { "" } else { "auto" };
let request = StableLlmRequest {
stream: true,
chat_id: chat_id.unwrap_or_default(),
messages,
model: model.to_string(),
tools,
tool_choice,
// --- ADD THIS ---
chat_template_kwargs: Some(serde_json::json!({
"enable_thinking": false
})),
// ----------------
};
log::debug!(
"#### send to llm:\n{}\n#####",
serde_json::to_string_pretty(&request)?
);
But it would be cool to have this as a flag in the config.toml
Metadata
Metadata
Assignees
Labels
No labels