Skip to content

Commit 53d0a12

Browse files
authored
server : allow specifying reasoning_format in HTTP request (#15238)
1 parent 27093af commit 53d0a12

File tree

7 files changed

+28
-7
lines changed

7 files changed

+28
-7
lines changed

common/arg.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2949,11 +2949,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
29492949
"- deepseek: puts thoughts in `message.reasoning_content` (except in streaming mode, which behaves as `none`)\n"
29502950
"(default: auto)",
29512951
[](common_params & params, const std::string & value) {
2952-
/**/ if (value == "deepseek") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; }
2953-
else if (value == "deepseek-legacy") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY; }
2954-
else if (value == "none") { params.reasoning_format = COMMON_REASONING_FORMAT_NONE; }
2955-
else if (value == "auto") { params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; }
2956-
else { throw std::invalid_argument("invalid value"); }
2952+
params.reasoning_format = common_reasoning_format_from_name(value);
29572953
}
29582954
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_THINK"));
29592955
add_opt(common_arg(

common/chat.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,19 @@ const char * common_reasoning_format_name(common_reasoning_format format) {
625625
}
626626
}
627627

628+
common_reasoning_format common_reasoning_format_from_name(const std::string & format) {
629+
if (format == "none") {
630+
return COMMON_REASONING_FORMAT_NONE;
631+
} else if (format == "auto") {
632+
return COMMON_REASONING_FORMAT_AUTO;
633+
} else if (format == "deepseek") {
634+
return COMMON_REASONING_FORMAT_DEEPSEEK;
635+
} else if (format == "deepseek-legacy") {
636+
return COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY;
637+
}
638+
throw std::runtime_error("Unknown reasoning format: " + format);
639+
}
640+
628641
static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
629642
std::string arguments;
630643
if (builder.is_partial()) {

common/chat.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ std::string common_chat_format_example(
191191

192192
const char* common_chat_format_name(common_chat_format format);
193193
const char* common_reasoning_format_name(common_reasoning_format format);
194+
common_reasoning_format common_reasoning_format_from_name(const std::string & format);
194195
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
195196

196197
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);

tools/server/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1132,6 +1132,12 @@ The `response_format` parameter supports both plain JSON output (e.g. `{"type":
11321132

11331133
`chat_template_kwargs`: Allows sending additional parameters to the json templating system. For example: `{"enable_thinking": false}`
11341134

1135+
`reasoning_format`: The reasoning format to be parsed. If set to `none`, it will output the raw generated text.
1136+
1137+
`thinking_forced_open`: Force a reasoning model to always output the reasoning. Only works on certain models.
1138+
1139+
`parse_tool_calls`: Whether to parse the generated tool call.
1140+
11351141
*Examples:*
11361142

11371143
You can use either Python `openai` library with appropriate checkpoints:

tools/server/public/index.html.gz

19 Bytes
Binary file not shown.

tools/server/server.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -383,8 +383,12 @@ struct server_task {
383383
} else {
384384
params.oaicompat_chat_syntax.format = defaults.oaicompat_chat_syntax.format;
385385
}
386-
params.oaicompat_chat_syntax.reasoning_format = params_base.reasoning_format;
387-
params.oaicompat_chat_syntax.reasoning_in_content = params.stream && (params_base.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY);
386+
common_reasoning_format reasoning_format = params_base.reasoning_format;
387+
if (data.contains("reasoning_format")) {
388+
reasoning_format = common_reasoning_format_from_name(data.at("reasoning_format").get<std::string>());
389+
}
390+
params.oaicompat_chat_syntax.reasoning_format = reasoning_format;
391+
params.oaicompat_chat_syntax.reasoning_in_content = params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY);
388392
params.oaicompat_chat_syntax.thinking_forced_open = json_value(data, "thinking_forced_open", false);
389393
params.oaicompat_chat_syntax.parse_tool_calls = json_value(data, "parse_tool_calls", false);
390394
}

tools/server/webui/src/utils/app.context.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ export const AppContextProvider = ({
209209
messages,
210210
stream: true,
211211
cache_prompt: true,
212+
reasoning_format: 'none',
212213
samplers: config.samplers,
213214
temperature: config.temperature,
214215
dynatemp_range: config.dynatemp_range,

0 commit comments

Comments
 (0)