Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2949,11 +2949,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
"- deepseek: puts thoughts in `message.reasoning_content` (except in streaming mode, which behaves as `none`)\n"
"(default: auto)",
[](common_params & params, const std::string & value) {
/**/ if (value == "deepseek") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; }
else if (value == "deepseek-legacy") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY; }
else if (value == "none") { params.reasoning_format = COMMON_REASONING_FORMAT_NONE; }
else if (value == "auto") { params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; }
else { throw std::invalid_argument("invalid value"); }
params.reasoning_format = common_reasoning_format_from_name(value);
}
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_THINK"));
add_opt(common_arg(
Expand Down
13 changes: 13 additions & 0 deletions common/chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,19 @@ const char * common_reasoning_format_name(common_reasoning_format format) {
}
}

common_reasoning_format common_reasoning_format_from_name(const std::string & format) {
if (format == "none") {
return COMMON_REASONING_FORMAT_NONE;
} else if (format == "auto") {
return COMMON_REASONING_FORMAT_AUTO;
} else if (format == "deepseek") {
return COMMON_REASONING_FORMAT_DEEPSEEK;
} else if (format == "deepseek-legacy") {
return COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY;
}
throw std::runtime_error("Unknown reasoning format: " + format);
}

static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
std::string arguments;
if (builder.is_partial()) {
Expand Down
1 change: 1 addition & 0 deletions common/chat.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ std::string common_chat_format_example(

const char* common_chat_format_name(common_chat_format format);
const char* common_reasoning_format_name(common_reasoning_format format);
common_reasoning_format common_reasoning_format_from_name(const std::string & format);
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);

common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
Expand Down
6 changes: 6 additions & 0 deletions tools/server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1132,6 +1132,12 @@ The `response_format` parameter supports both plain JSON output (e.g. `{"type":

`chat_template_kwargs`: Allows sending additional parameters to the json templating system. For example: `{"enable_thinking": false}`

`reasoning_format`: The reasoning format to be parsed. If set to `none`, it will output the raw generated text.

`thinking_forced_open`: Force a reasoning model to always output the reasoning. Only works on certain models.

`parse_tool_calls`: Whether to parse the generated tool call.

*Examples:*

You can use either Python `openai` library with appropriate checkpoints:
Expand Down
Binary file modified tools/server/public/index.html.gz
Binary file not shown.
8 changes: 6 additions & 2 deletions tools/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,8 +383,12 @@ struct server_task {
} else {
params.oaicompat_chat_syntax.format = defaults.oaicompat_chat_syntax.format;
}
params.oaicompat_chat_syntax.reasoning_format = params_base.reasoning_format;
params.oaicompat_chat_syntax.reasoning_in_content = params.stream && (params_base.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY);
common_reasoning_format reasoning_format = params_base.reasoning_format;
if (data.contains("reasoning_format")) {
reasoning_format = common_reasoning_format_from_name(data.at("reasoning_format").get<std::string>());
}
params.oaicompat_chat_syntax.reasoning_format = reasoning_format;
params.oaicompat_chat_syntax.reasoning_in_content = params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY);
params.oaicompat_chat_syntax.thinking_forced_open = json_value(data, "thinking_forced_open", false);
params.oaicompat_chat_syntax.parse_tool_calls = json_value(data, "parse_tool_calls", false);
}
Expand Down
1 change: 1 addition & 0 deletions tools/server/webui/src/utils/app.context.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ export const AppContextProvider = ({
messages,
stream: true,
cache_prompt: true,
reasoning_format: 'none',
samplers: config.samplers,
temperature: config.temperature,
dynatemp_range: config.dynatemp_range,
Expand Down
Loading