Skip to content

Commit c46d4da

Browse files
author
ochafik
committed
rename: common_chat_syntax (now contains format)
1 parent 64ea080 commit c46d4da

File tree

6 files changed

+298
-199
lines changed

6 files changed

+298
-199
lines changed

common/chat-parser.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010

1111
using json = nlohmann::ordered_json;
1212

13-
common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_reasoning_syntax & reasoning_syntax)
14-
: input_(input), is_partial_(is_partial), reasoning_syntax_(reasoning_syntax)
13+
common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_syntax & syntax)
14+
: input_(input), is_partial_(is_partial), syntax_(syntax)
1515
{
1616
result_.role = "assistant";
1717

@@ -127,14 +127,14 @@ void common_chat_msg_parser::consume_literal(const std::string & literal) {
127127
}
128128

129129
void common_chat_msg_parser::try_consume_think_tags(const common_regex & start_think_regex, const common_regex & end_think_regex) {
130-
if (reasoning_syntax_.format != COMMON_REASONING_FORMAT_NONE) {
131-
if (reasoning_syntax_.thinking_forced_open || try_consume_regex(start_think_regex)) {
130+
if (syntax_.reasoning_format != COMMON_REASONING_FORMAT_NONE) {
131+
if (syntax_.thinking_forced_open || try_consume_regex(start_think_regex)) {
132132
if (auto res = try_find_regex(end_think_regex)) {
133133
result_.reasoning_content = res->prelude;
134134
consume_spaces();
135135
} else {
136136
result_.reasoning_content = consume_rest();
137-
if (!reasoning_syntax_.thinking_forced_open) {
137+
if (!syntax_.thinking_forced_open) {
138138
incomplete("Failed to find end of reasoning tag " + end_think_regex.str());
139139
}
140140
return;
@@ -218,7 +218,7 @@ std::optional<common_json> common_chat_msg_parser::try_consume_json(
218218
// No healing marker, just return the parsed json
219219
return result;
220220
}
221-
if (!is_partial_) {
221+
if (!is_partial()) {
222222
incomplete("JSON is incomplete");
223223
return std::nullopt; // Actually unreachable
224224
}

common/chat-parser.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,14 @@ class common_chat_msg_partial_exception : public std::runtime_error {
1616
class common_chat_msg_parser {
1717
std::string input_;
1818
bool is_partial_;
19-
common_chat_reasoning_syntax reasoning_syntax_;
19+
common_chat_syntax syntax_;
2020

2121
size_t pos_ = 0;
2222
common_chat_msg result_;
2323
std::string healing_marker_;
2424

2525
public:
26-
common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_reasoning_syntax & reasoning_syntax);
26+
common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
2727
const std::string & input() const { return input_; }
2828
size_t pos() const { return pos_; }
2929
const std::string & healing_marker() const { return healing_marker_; }

common/chat.cpp

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -578,17 +578,22 @@ static void parse_json_tool_calls(
578578
// get_function_name signalled us that we should skip this match and treat it as content.
579579
from = res->groups[0].begin + 1;
580580
continue;
581-
} else {
582-
from = std::string::npos;
583581
}
582+
from = std::string::npos;
583+
584584
builder.add_content(res->prelude);
585-
if (auto partial = builder.try_consume_json({{}})) {
586-
std::string arguments = partial->json.dump();
587-
if (!builder.add_tool_call(name, "", arguments, partial->healing_marker)) {
588-
builder.incomplete("incomplete tool call");
585+
auto maybe_raw_python = name == "python" && allow_raw_python;
586+
if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
587+
if (auto partial = builder.try_consume_json({{}})) {
588+
std::string arguments = partial->json.dump();
589+
if (!builder.add_tool_call(name, "", arguments, partial->healing_marker)) {
590+
builder.incomplete("incomplete tool call");
591+
}
592+
builder.consume_regex(close_regex);
589593
}
590-
builder.consume_regex(close_regex);
591-
} else if (name == "python" && allow_raw_python) {
594+
continue;
595+
}
596+
if (maybe_raw_python) {
592597
auto code = builder.consume_rest();
593598
std::string arguments;
594599
common_healing_marker healing_marker;
@@ -602,13 +607,11 @@ static void parse_json_tool_calls(
602607
builder.incomplete("incomplete tool call");
603608
}
604609
return;
605-
} else {
606-
builder.incomplete("incomplete tool call");
607-
return;
608610
}
609-
} else {
610-
break;
611+
builder.incomplete("incomplete tool call");
612+
return;
611613
}
614+
break;
612615
}
613616
if (block_close) {
614617
builder.consume_regex(*block_close);
@@ -1238,14 +1241,18 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
12381241
std::string args_pattern = "[\\s\\S]*";
12391242
auto args_rule = builder.add_schema(name + "-args", parameters);
12401243
if (name == "python") {
1241-
args_pattern = "\\{" + args_pattern;
12421244
args_rule = builder.add_rule(name + "-maybe-raw-args", args_rule + " | [^{] .*");
1245+
} else {
1246+
args_pattern = "\\{" + args_pattern;
1247+
}
1248+
auto call_rule = builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule);
1249+
first_tool_rules.push_back(call_rule);
1250+
if (inputs.parallel_tool_calls) {
1251+
subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>\" " + call_rule));
12431252
}
1244-
first_tool_rules.push_back(builder.add_rule(name + "-call", "( \"assistant<|end_header_id|>\\n\" )? \"" + name + "\\n\" " + args_rule));
1245-
subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>" + name + "\\n\" " + args_rule));
12461253
data.grammar_triggers.push_back({
12471254
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1248-
"((?:[\\s\\S]*?>>>)?" + regex_escape(name) + "\n)" + args_pattern,
1255+
"((?:[\\s\\S]+?>>>)?" + regex_escape(name) + "\n)" + args_pattern,
12491256
});
12501257
});
12511258
data.preserved_tokens = {
@@ -1771,20 +1778,20 @@ static void common_chat_parse(common_chat_msg_parser & builder, common_chat_form
17711778
builder.finish();
17721779
}
17731780

1774-
common_chat_msg common_chat_parse(const std::string & input, common_chat_format format, bool is_partial, const common_chat_reasoning_syntax & reasoning_syntax) {
1775-
common_chat_msg_parser builder(input, is_partial, reasoning_syntax);
1781+
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
1782+
common_chat_msg_parser builder(input, is_partial, syntax);
17761783
try {
1777-
common_chat_parse(builder, format);
1784+
common_chat_parse(builder, syntax.format);
17781785
} catch (const common_chat_msg_partial_exception & ex) {
17791786
LOG_DBG("Partial parse: %s\n", ex.what());
17801787
if (!is_partial) {
17811788
throw std::runtime_error(ex.what());
17821789
}
17831790
}
17841791
auto msg = builder.result();
1785-
switch (reasoning_syntax.format) {
1792+
switch (syntax.reasoning_format) {
17861793
case COMMON_REASONING_FORMAT_DEEPSEEK:
1787-
if (!msg.reasoning_content.empty() && reasoning_syntax.inlined_in_content) {
1794+
if (!msg.reasoning_content.empty() && syntax.reasoning_in_content) {
17881795
std::string content = "<think>" + msg.reasoning_content;
17891796
if (!is_partial || !msg.content.empty()) {
17901797
content += "</think>";

common/chat.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,12 @@ struct common_chat_params {
123123
std::vector<std::string> additional_stops;
124124
};
125125

126-
struct common_chat_reasoning_syntax {
127-
common_reasoning_format format = COMMON_REASONING_FORMAT_NONE;
128-
bool inlined_in_content = false;
129-
bool thinking_forced_open = false;
126+
struct common_chat_syntax {
127+
common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
128+
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
129+
// Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
130+
bool reasoning_in_content = false;
131+
bool thinking_forced_open = false;
130132
};
131133

132134
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
@@ -166,7 +168,7 @@ std::string common_chat_format_example(
166168
bool use_jinja);
167169

168170
std::string common_chat_format_name(common_chat_format format);
169-
common_chat_msg common_chat_parse(const std::string & input, common_chat_format format, bool is_partial = false, const common_chat_reasoning_syntax & reasoning_syntax = {});
171+
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
170172

171173
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
172174

examples/server/server.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include "chat.h"
12
#include "utils.hpp"
23

34
#include "arg.h"
@@ -117,8 +118,7 @@ struct slot_params {
117118
oaicompat_type oaicompat = OAICOMPAT_TYPE_NONE;
118119
std::string oaicompat_model;
119120
std::string oaicompat_cmpl_id;
120-
common_chat_format oaicompat_chat_format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
121-
common_chat_reasoning_syntax oaicompat_reasoning_syntax;
121+
common_chat_syntax oaicompat_chat_syntax;
122122

123123
json to_json() const {
124124
std::vector<std::string> samplers;
@@ -174,7 +174,10 @@ struct slot_params {
174174
{"grammar_lazy", sampling.grammar_lazy},
175175
{"grammar_triggers", grammar_triggers},
176176
{"preserved_tokens", sampling.preserved_tokens},
177-
{"chat_format", common_chat_format_name(oaicompat_chat_format)},
177+
{"chat_format", common_chat_format_name(oaicompat_chat_syntax.format)},
178+
{"reasoning_format", (oaicompat_chat_syntax.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "deepseek" : "none")},
179+
{"reasoning_in_content", oaicompat_chat_syntax.reasoning_in_content},
180+
{"thinking_forced_open", oaicompat_chat_syntax.thinking_forced_open},
178181
{"samplers", samplers},
179182
{"speculative.n_max", speculative.n_max},
180183
{"speculative.n_min", speculative.n_min},
@@ -349,14 +352,14 @@ struct server_task {
349352
{
350353
auto it = data.find("chat_format");
351354
if (it != data.end()) {
352-
params.oaicompat_chat_format = static_cast<common_chat_format>(it->get<int>());
353-
SRV_INF("Chat format: %s\n", common_chat_format_name(params.oaicompat_chat_format).c_str());
355+
params.oaicompat_chat_syntax.format = static_cast<common_chat_format>(it->get<int>());
356+
SRV_INF("Chat format: %s\n", common_chat_format_name(params.oaicompat_chat_syntax.format).c_str());
354357
} else {
355-
params.oaicompat_chat_format = defaults.oaicompat_chat_format;
358+
params.oaicompat_chat_syntax.format = defaults.oaicompat_chat_syntax.format;
356359
}
357-
params.oaicompat_reasoning_syntax.format = params_base.reasoning_format;
358-
params.oaicompat_reasoning_syntax.inlined_in_content = params.stream;
359-
params.oaicompat_reasoning_syntax.thinking_forced_open = json_value(data, "thinking_forced_open", false);
360+
params.oaicompat_chat_syntax.reasoning_format = params_base.reasoning_format;
361+
params.oaicompat_chat_syntax.reasoning_in_content = params.stream;
362+
params.oaicompat_chat_syntax.thinking_forced_open = json_value(data, "thinking_forced_open", false);
360363
}
361364

362365
{
@@ -632,7 +635,7 @@ struct server_task_result_cmpl_final : server_task_result {
632635
oaicompat_type oaicompat = OAICOMPAT_TYPE_NONE;
633636
std::string oaicompat_model;
634637
std::string oaicompat_cmpl_id;
635-
common_chat_format oaicompat_chat_format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
638+
common_chat_syntax oaicompat_chat_syntax;
636639
common_chat_msg oaicompat_msg;
637640

638641
virtual int get_index() override {
@@ -2335,9 +2338,8 @@ struct server_context {
23352338
SRV_DBG("Parsing chat message: %s\n", slot.generated_text.c_str());
23362339
auto new_msg = common_chat_parse(
23372340
slot.generated_text,
2338-
slot.params.oaicompat_chat_format,
23392341
/* is_partial= */ true,
2340-
slot.params.oaicompat_reasoning_syntax);
2342+
slot.params.oaicompat_chat_syntax);
23412343
if (!new_msg.empty()) {
23422344
slot.generated_msg = new_msg;
23432345
}
@@ -2347,7 +2349,6 @@ struct server_context {
23472349
// res->previous_content = slot.generated_text.substr(0, slot.generated_text.size() - tkn.text_to_send.size());
23482350
// res->oaicompat_chat_format = slot.params.oaicompat_chat_format;
23492351

2350-
23512352
// populate res.probs_output
23522353
if (slot.params.sampling.n_probs > 0) {
23532354
res->prob_output = tkn; // copy the token probs
@@ -2391,10 +2392,9 @@ struct server_context {
23912392
SRV_DBG("Parsing chat message: %s\n", res->content.c_str());
23922393
res->oaicompat_msg = slot.generated_msg = common_chat_parse(
23932394
res->content,
2394-
slot.params.oaicompat_chat_format,
23952395
/* is_partial= */ slot.stop == STOP_TYPE_LIMIT,
2396-
slot.params.oaicompat_reasoning_syntax);
2397-
res->oaicompat_chat_format = slot.params.oaicompat_chat_format;
2396+
slot.params.oaicompat_chat_syntax);
2397+
res->oaicompat_chat_syntax = slot.params.oaicompat_chat_syntax;
23982398

23992399
// populate res.probs_output
24002400
if (slot.params.sampling.n_probs > 0) {

0 commit comments

Comments
 (0)