Skip to content

Commit 4cb47f3

Browse files
authored
Merge pull request #7 from createthis/deepseek_3_1_thinking_mode_danger
Tool calls work in thinking and non-thinking modes. However, I've introduced a regression in streaming mode where reasoning content initially comes through as regular content. I need to think about how to deal with this long term.
2 parents a839be7 + 0e36761 commit 4cb47f3

File tree

3 files changed

+168
-22
lines changed

3 files changed

+168
-22
lines changed

common/chat.cpp

Lines changed: 130 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,93 @@ static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, cons
666666
return arguments;
667667
}
668668

669+
/**
670+
* Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
671+
* Aggregates the prefix, suffix and in-between text into the content.
672+
*/
673+
static void parse_json_tool_calls_deepseek_v3_1(
674+
common_chat_msg_parser & builder,
675+
const std::optional<common_regex> & block_open,
676+
const std::optional<common_regex> & function_regex_start_only,
677+
const std::optional<common_regex> & function_regex,
678+
const common_regex & close_regex,
679+
const std::optional<common_regex> & block_close,
680+
bool allow_raw_python = false,
681+
const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name = nullptr) {
682+
683+
auto parse_tool_calls = [&]() {
684+
size_t from = std::string::npos;
685+
auto first = true;
686+
while (true) {
687+
auto res = function_regex_start_only && first
688+
? builder.try_consume_regex(*function_regex_start_only)
689+
: function_regex
690+
? builder.try_find_regex(*function_regex, from)
691+
: std::nullopt;
692+
693+
if (res) {
694+
std::string name;
695+
if (get_function_name) {
696+
name = get_function_name(*res);
697+
} else {
698+
GGML_ASSERT(res->groups.size() == 2);
699+
name = builder.str(res->groups[1]);
700+
}
701+
first = false;
702+
if (name.empty()) {
703+
// get_function_name signalled us that we should skip this match and treat it as content.
704+
from = res->groups[0].begin + 1;
705+
continue;
706+
}
707+
builder.move_to(res->groups[0].end);
708+
from = builder.pos();
709+
710+
auto maybe_raw_python = name == "python" && allow_raw_python;
711+
if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
712+
if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
713+
if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) {
714+
throw common_chat_msg_partial_exception("incomplete tool call");
715+
}
716+
builder.consume_regex(close_regex);
717+
from = builder.pos(); // continue after this call
718+
continue;
719+
}
720+
throw common_chat_msg_partial_exception("incomplete tool call");
721+
}
722+
if (maybe_raw_python) {
723+
auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
724+
if (!builder.add_tool_call(name, "", arguments)) {
725+
throw common_chat_msg_partial_exception("incomplete tool call");
726+
}
727+
return;
728+
}
729+
throw common_chat_msg_partial_exception("incomplete tool call");
730+
}
731+
break;
732+
}
733+
if (block_close) {
734+
// ensure we’re right after the last call header/close
735+
if (from != std::string::npos) builder.move_to(from);
736+
builder.consume_regex(*block_close);
737+
}
738+
builder.consume_spaces();
739+
builder.add_content(builder.consume_rest());
740+
};
741+
if (block_open) {
742+
if (auto res = builder.try_find_regex(*block_open)) {
743+
builder.move_to(res->groups[0].end); // consume opener
744+
parse_tool_calls();
745+
return;
746+
} else {
747+
builder.add_content(builder.consume_rest());
748+
return;
749+
}
750+
} else {
751+
parse_tool_calls();
752+
return;
753+
}
754+
}
755+
669756
/**
670757
* Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
671758
* Aggregates the prefix, suffix and in-between text into the content.
@@ -1395,41 +1482,63 @@ static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
13951482
tool_calls_end);
13961483
}
13971484

1485+
static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) {
1486+
static const common_regex function_regex("(?:<|tool▁call▁begin|>)?(?:function<|tool▁sep|>)?([^\\n<]+)(?:\\n```json\\n|<|tool▁sep|>)");
1487+
1488+
static const common_regex close_regex("(?:[\\n]*```[\\s\\r\\n]*)?<|tool▁call▁end|>");
1489+
static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
1490+
static const common_regex tool_calls_end("<|tool▁calls▁end|>");
1491+
1492+
if (!builder.syntax().parse_tool_calls) {
1493+
LOG_DBG("%s: not parse_tool_calls\n", __func__);
1494+
builder.add_content(builder.consume_rest());
1495+
return;
1496+
}
1497+
1498+
LOG_DBG("%s: parse_tool_calls\n", __func__);
1499+
1500+
parse_json_tool_calls_deepseek_v3_1(
1501+
builder,
1502+
/* block_open= */ tool_calls_begin,
1503+
/* function_regex_start_only= */ std::nullopt,
1504+
function_regex,
1505+
close_regex,
1506+
tool_calls_end);
1507+
}
1508+
13981509
static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
13991510
// DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
14001511
// First try to parse using the standard reasoning parsing method
1512+
LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
1513+
1514+
bool has_reasoning = false;
1515+
auto header_start_pos = builder.pos();
1516+
if (auto res = builder.try_find_literal("<think>")) {
1517+
has_reasoning = true;
1518+
}
1519+
if (auto res = builder.try_find_literal("</think>")) {
1520+
has_reasoning = true;
1521+
}
1522+
builder.move_to(header_start_pos);
1523+
if (!has_reasoning && builder.syntax().thinking_forced_open) {
1524+
LOG_DBG("%s: edge case no reasoning, adding content\n", __func__);
1525+
common_chat_parse_deepseek_v3_1_content(builder);
1526+
return;
1527+
}
14011528
if (builder.try_parse_reasoning("<think>", "</think>")) {
14021529
// If reasoning was parsed successfully, the remaining content is regular content
14031530
LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
1404-
builder.add_content(builder.consume_rest());
1531+
// </think><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>NAME\n```json\nJSON\n```<|tool▁call▁end|><|tool▁calls▁end|>
1532+
common_chat_parse_deepseek_v3_1_content(builder);
14051533
} else {
14061534
// If no reasoning tags found, check if we should treat everything as reasoning
14071535
if (builder.syntax().thinking_forced_open) {
14081536
// If thinking is forced open but no tags found, treat everything as reasoning
14091537
LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
14101538
builder.add_reasoning_content(builder.consume_rest());
14111539
} else {
1412-
// Tool calls are support in non-thinking mode
1413-
if (!builder.syntax().parse_tool_calls) {
1414-
LOG_DBG("%s: not parse_tool_calls\n", __func__);
1415-
builder.add_content(builder.consume_rest());
1416-
return;
1417-
}
1418-
14191540
// <|tool▁call▁begin|>NAME<|tool▁sep|>JSON<|tool▁call▁end|>
1420-
static const common_regex function_regex("<|tool▁call▁begin|>([^\\n<]+)<|tool▁sep|>");
1421-
static const common_regex close_regex("<|tool▁call▁end|>");
1422-
static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
1423-
static const common_regex tool_calls_end("<|tool▁calls▁end|>");
1424-
LOG_DBG("%s: parse_tool_calls\n", __func__);
1425-
1426-
parse_json_tool_calls(
1427-
builder,
1428-
/* block_open= */ tool_calls_begin,
1429-
/* function_regex_start_only= */ std::nullopt,
1430-
function_regex,
1431-
close_regex,
1432-
tool_calls_end);
1541+
common_chat_parse_deepseek_v3_1_content(builder);
14331542
}
14341543
}
14351544
}

tests/test-chat-parser.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ static void assert_equals(const char* label, const T& expected, const T& actual)
213213
}
214214

215215
static void test_deepseek_v3_1_tool_calls() {
216+
//common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
216217
// variant: happy path for when it works as the model card says it should
217218
const char* variant = "simple";
218219
common_chat_syntax syntax = {
@@ -242,6 +243,42 @@ static void test_deepseek_v3_1_tool_calls() {
242243
assert_equals(variant, std::string(""), m.content);
243244
assert_equals(variant, std::string(""), m.reasoning_content);
244245
}
246+
247+
// variant: function + fenced JSON + thinking open
248+
{
249+
common_chat_syntax syntax = {
250+
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
251+
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
252+
/* .reasoning_in_content = */ false,
253+
/* .thinking_forced_open = */ true,
254+
/* .parse_tool_calls = */ true,
255+
};
256+
const char* variant = "fenced_thinking";
257+
const std::string in = "REASONING</think><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_time\n```json\n{\"city\": \"Tokyo\"}\n```<|tool▁call▁end|><|tool▁calls▁end|>";
258+
auto m = common_chat_parse(in, false, syntax);
259+
assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
260+
assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
261+
assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
262+
assert_equals(variant, std::string(""), m.content);
263+
assert_equals(variant, std::string("REASONING"), m.reasoning_content);
264+
}
265+
266+
// variant: thinking forced open + missing reasoning + no tool calls
267+
{
268+
common_chat_syntax syntax = {
269+
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
270+
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
271+
/* .reasoning_in_content = */ false,
272+
/* .thinking_forced_open = */ true,
273+
/* .parse_tool_calls = */ true,
274+
};
275+
const char* variant = "thinking_forced_open_missing_reasoning_no_tool_calls";
276+
const std::string in = "CONTENT";
277+
auto m = common_chat_parse(in, false, syntax);
278+
assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
279+
assert_equals(variant, std::string("CONTENT"), m.content);
280+
assert_equals(variant, std::string(""), m.reasoning_content);
281+
}
245282
}
246283

247284
static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) {

tools/server/utils.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -782,7 +782,7 @@ static json oaicompat_chat_params_parse(
782782
/* TODO: test this properly */
783783
inputs.reasoning_format = COMMON_REASONING_FORMAT_NONE;
784784

785-
if (inputs.enable_thinking) {
785+
if ( (!inputs.enable_thinking) || inputs.chat_template_kwargs.find("enable_thinking") != inputs.chat_template_kwargs.end()) {
786786
throw std::runtime_error("Assistant response prefill is incompatible with enable_thinking.");
787787
}
788788

0 commit comments

Comments
 (0)