Skip to content

Commit 1caacd5

Browse files
author
ochafik
committed
remove any_spaces grammar option, allow extra line for airy llama json outputs
1 parent fe6968f commit 1caacd5

File tree

3 files changed

+13
-20
lines changed

3 files changed

+13
-20
lines changed

common/chat.cpp

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -449,11 +449,6 @@ std::string common_chat_format_name(common_chat_format format) {
449449
}
450450
}
451451

452-
const common_grammar_options grammar_options {
453-
/* .dotall = */ false,
454-
/* .any_spaces = */ true,
455-
};
456-
457452
static std::optional<json> parse_json(std::string::const_iterator & it, const std::string::const_iterator & end) {
458453
// // https://json.nlohmann.me/features/parsing/sax_interface/
459454
struct json_error_locator : public nlohmann::json_sax<json> {
@@ -732,7 +727,7 @@ static common_chat_params common_chat_params_init_generic(const common_chat_temp
732727
data.grammar_lazy = false;
733728
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
734729
builder.add_schema("root", schema);
735-
}, grammar_options);
730+
});
736731

737732
auto tweaked_messages = common_chat_template::add_system(
738733
inputs.messages,
@@ -802,7 +797,7 @@ static common_chat_params common_chat_params_init_mistral_nemo(const common_chat
802797
schema["maxItems"] = 1;
803798
}
804799
builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
805-
}, grammar_options);
800+
});
806801
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"});
807802
data.preserved_tokens = {
808803
"[TOOL_CALLS]",
@@ -848,7 +843,7 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
848843
schema["maxItems"] = 1;
849844
}
850845
builder.add_rule("root", "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\"");
851-
}, grammar_options);
846+
});
852847
data.grammar_triggers.push_back({
853848
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
854849
"<|START_ACTION|>",
@@ -1000,7 +995,7 @@ static common_chat_params common_chat_params_init_llama_3_1_tool_calls(const com
1000995
}
1001996
// Allow a few empty lines on top of the usual constrained json schema space rule.
1002997
builder.add_rule("root", string_join(tool_rules, " | "));
1003-
}, grammar_options);
998+
});
1004999
data.additional_stops.push_back("<|eom_id|>");
10051000
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {
10061001
{"tools_in_user_message", false},
@@ -1081,7 +1076,7 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
10811076
"<|tool▁call▁end|>",
10821077
"<|tool▁calls▁end|",
10831078
};
1084-
}, grammar_options);
1079+
});
10851080
}
10861081
auto prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
10871082

@@ -1170,7 +1165,7 @@ static common_chat_params common_chat_params_init_firefunction_v2(const common_c
11701165
schema["maxItems"] = 1;
11711166
}
11721167
builder.add_rule("root", "\" functools\"? " + builder.add_schema("tool_calls", schema));
1173-
}, grammar_options);
1168+
});
11741169
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, " functools["});
11751170
data.preserved_tokens = {
11761171
" functools[",
@@ -1232,7 +1227,7 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
12321227
builder.add_rule("root", first_rule);
12331228
}
12341229

1235-
}, grammar_options);
1230+
});
12361231
}
12371232
return data;
12381233
}
@@ -1319,7 +1314,7 @@ static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(con
13191314
auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
13201315
builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
13211316
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function="});
1322-
}, grammar_options);
1317+
});
13231318

13241319
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
13251320
// TODO: if (has_raw_python)
@@ -1427,7 +1422,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
14271422
"```json",
14281423
"```xml",
14291424
};
1430-
}, grammar_options);
1425+
});
14311426

14321427
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
14331428
data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;

common/json-schema-to-grammar.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream &
264264
throw std::runtime_error("At least one of min_value or max_value must be set");
265265
}
266266

267-
const std::string SPACE_RULE = "| \" \" | \"\\n\" [ \\t]{0,20}";
267+
const std::string SPACE_RULE = "| \" \" | \"\\n\"{1,2} [ \\t]{0,20}";
268268

269269
struct BuiltinRule {
270270
std::string content;
@@ -764,11 +764,10 @@ class SchemaConverter {
764764
public:
765765
SchemaConverter(
766766
const std::function<json(const std::string &)> & fetch_json,
767-
bool dotall,
768-
bool any_spaces)
767+
bool dotall)
769768
: _fetch_json(fetch_json), _dotall(dotall)
770769
{
771-
_rules["space"] = any_spaces ? "[ \\t\\n]*" : SPACE_RULE;
770+
_rules["space"] = SPACE_RULE;
772771
}
773772

774773
void resolve_refs(json & schema, const std::string & url) {
@@ -1007,7 +1006,7 @@ std::string json_schema_to_grammar(const json & schema, bool force_gbnf) {
10071006
}
10081007

10091008
std::string build_grammar(const std::function<void(const common_grammar_builder &)> & cb, const common_grammar_options & options) {
1010-
SchemaConverter converter([&](const std::string &) { return json(); }, options.dotall, options.any_spaces);
1009+
SchemaConverter converter([&](const std::string &) { return json(); }, options.dotall);
10111010
common_grammar_builder builder {
10121011
/* .add_rule = */ [&](const std::string & name, const std::string & rule) {
10131012
return converter._add_rule(name, rule);

common/json-schema-to-grammar.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ struct common_grammar_builder {
1616

1717
struct common_grammar_options {
1818
bool dotall = false;
19-
bool any_spaces = false;
2019
};
2120

2221
std::string build_grammar(const std::function<void(const common_grammar_builder &)> & cb, const common_grammar_options & options = {});

0 commit comments

Comments
 (0)