77#include " log.h"
88#include " nlohmann/json.hpp"
99
10+ #include < algorithm>
1011#include < stdexcept>
1112#include < string>
1213
1314using json = nlohmann::ordered_json;
1415
16+ namespace {
17+
18+ // Gemma4-specific PEG builder extending the standard chat builder.
19+ // Adds value type parsers that use <|\"|> as string delimiters
20+ // instead of JSON's double quotes, and disables json-to-schema
21+ // conversion for these types.
22+ class common_peg_gemma4_builder {
23+ common_chat_peg_builder & p_;
24+ static constexpr const char * QUOTE = " <|\" |>" ;
25+
26+ public:
27+ explicit common_peg_gemma4_builder (common_chat_peg_builder & p) : p_(p) {}
28+
29+ common_peg_parser gemma4_string () {
30+ return p_.rule (" gemma4-string" , [&]() {
31+ return p_.literal (QUOTE) + p_.until (QUOTE) + p_.literal (QUOTE);
32+ });
33+ }
34+
35+ common_peg_parser gemma4_number () {
36+ return p_.rule (" gemma4-number" , [&]() {
37+ auto digit1_9 = p_.chars (" [1-9]" , 1 , 1 );
38+ auto digits = p_.chars (" [0-9]" );
39+ auto int_part = p_.choice ({p_.literal (" 0" ), p_.sequence ({digit1_9, p_.chars (" [0-9]" , 0 , -1 )})});
40+ auto frac = p_.sequence ({p_.literal (" ." ), digits});
41+ auto exp = p_.sequence ({p_.choice ({p_.literal (" e" ), p_.literal (" E" )}),
42+ p_.optional (p_.chars (" [+-]" , 1 , 1 )), digits});
43+ auto not_number_continuation = p_.negate (p_.chars (" [0-9.eE+-]" , 1 , 1 ));
44+ return p_.sequence ({p_.optional (p_.literal (" -" )), int_part, p_.optional (frac),
45+ p_.optional (exp), not_number_continuation});
46+ });
47+ }
48+
49+ common_peg_parser gemma4_bool () {
50+ return p_.rule (" gemma4-bool" , [&]() {
51+ return p_.choice ({p_.literal (" true" ), p_.literal (" false" )});
52+ });
53+ }
54+
55+ common_peg_parser gemma4_null () {
56+ return p_.rule (" gemma4-null" , [&]() {
57+ return p_.literal (" null" );
58+ });
59+ }
60+
61+ common_peg_parser gemma4_dict () {
62+ return p_.rule (" gemma4-dict" , [&]() {
63+ auto ws = p_.space ();
64+ auto key = p_.until (" :" );
65+ auto member = p_.sequence ({key, p_.literal (" :" ), ws, gemma4_value ()});
66+ auto members = p_.sequence ({member, p_.zero_or_more (p_.sequence ({p_.literal (" ," ), ws, member}))});
67+ return p_.sequence ({
68+ p_.literal (" {" ), ws,
69+ p_.choice ({p_.literal (" }" ), p_.sequence ({members, ws, p_.literal (" }" )})})
70+ });
71+ });
72+ }
73+
74+ common_peg_parser gemma4_array () {
75+ return p_.rule (" gemma4-array" , [&]() {
76+ auto ws = p_.space ();
77+ auto elements = p_.sequence ({gemma4_value (), p_.zero_or_more (p_.sequence ({p_.literal (" ," ), ws, gemma4_value ()}))});
78+ return p_.sequence ({
79+ p_.literal (" [" ), ws,
80+ p_.choice ({p_.literal (" ]" ), p_.sequence ({elements, ws, p_.literal (" ]" )})})
81+ });
82+ });
83+ }
84+
85+ common_peg_parser gemma4_value () {
86+ return p_.rule (" gemma4-value" , [&]() {
87+ return p_.choice ({gemma4_string (), gemma4_dict (), gemma4_array (),
88+ gemma4_number (), gemma4_bool (), gemma4_null ()});
89+ });
90+ }
91+
92+ // Select the appropriate value parser based on JSON schema type.
93+ // Does NOT use schema() - the gemma4 types are pure PEG without
94+ // JSON schema metadata, so GBNF is generated directly from the
95+ // PEG structure.
96+ common_peg_parser gemma4_value_for_type (const json & schema) {
97+ if (!schema.contains (" type" ) || !schema.at (" type" ).is_string ()) {
98+ return gemma4_value ();
99+ }
100+ std::string type = schema.at (" type" ).get <std::string>();
101+ if (type == " string" ) { return gemma4_string (); }
102+ if (type == " number" ) { return gemma4_number (); }
103+ if (type == " integer" ) { return gemma4_number (); }
104+ if (type == " boolean" ) { return gemma4_bool (); }
105+ if (type == " object" ) { return gemma4_dict (); }
106+ if (type == " array" ) { return gemma4_array (); }
107+ return gemma4_value ();
108+ }
109+ };
110+
111+ } // anonymous namespace
112+
15113// Helper to iterate over tools/functions
16114static void foreach_function (const json & tools, const std::function<void (const json &)> & fn) {
17115 for (const auto & tool : tools) {
@@ -43,7 +141,9 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
43141 // Create the result structure
44142 common_chat_params data;
45143 data.prompt = common_chat_template_direct_apply (tmpl, inputs);
46- data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
144+ data.format = (autoparser.tools .format .mode == tool_format::TAG_WITH_GEMMA4_DICT)
145+ ? COMMON_CHAT_FORMAT_PEG_GEMMA4
146+ : COMMON_CHAT_FORMAT_PEG_NATIVE;
47147 data.preserved_tokens = autoparser.preserved_tokens ;
48148
49149 auto parser = autoparser.build_parser (inputs);
@@ -92,6 +192,7 @@ common_peg_arena autoparser::build_parser(const generation_params & inputs) cons
92192
93193 ctx.extracting_reasoning = extract_reasoning && reasoning.mode != reasoning_mode::NONE;
94194 ctx.content = &content;
195+ ctx.reasoning = &reasoning;
95196
96197 // Build reasoning parser
97198 ctx.reasoning_parser = reasoning.build_parser (ctx);
@@ -440,7 +541,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_gemma4_dict(parser_build_
440541 const auto & inputs = ctx.inputs ;
441542 bool force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
442543
443- // The Gemma4 string quote token used in place of JSON "
544+ common_peg_gemma4_builder g4 (p);
444545 static const std::string QUOTE = " <|\" |>" ;
445546
446547 common_peg_parser tool_choice = p.choice ();
@@ -451,7 +552,6 @@ common_peg_parser analyze_tools::build_tool_parser_tag_gemma4_dict(parser_build_
451552 const auto & params = func.at (" parameters" );
452553
453554 if (!params.contains (" properties" ) || !params.at (" properties" ).is_object ()) {
454- // No arguments - just match the function name with empty braces
455555 auto func_parser = p.atomic (
456556 p.tool_open (p.literal (function.name_prefix ) + p.tool_name (p.literal (name)) + p.literal (" {" )) +
457557 p.tool_args (p.eps ()) +
@@ -486,9 +586,18 @@ common_peg_parser analyze_tools::build_tool_parser_tag_gemma4_dict(parser_build_
486586 p.tool_arg_string_value (p.schema (p.until (QUOTE),
487587 " tool-" + name + " -arg-" + param_name + " -schema" , param_schema, true )) +
488588 p.literal (QUOTE);
589+ } else if (type == " number" || type == " integer" ) {
590+ value_parser = p.tool_arg_value (g4.gemma4_number ());
591+ } else if (type == " boolean" ) {
592+ value_parser = p.tool_arg_value (g4.gemma4_bool ());
593+ } else if (type == " null" ) {
594+ value_parser = p.tool_arg_value (g4.gemma4_null ());
595+ } else if (type == " object" ) {
596+ value_parser = p.tool_arg_value (g4.gemma4_dict ());
597+ } else if (type == " array" ) {
598+ value_parser = p.tool_arg_value (g4.gemma4_array ());
489599 } else {
490- // Numbers, booleans: raw text up to the next comma or closing brace
491- value_parser = p.tool_arg_value (p.until_one_of ({" ," , " }" }));
600+ value_parser = p.tool_arg_value (g4.gemma4_value ());
492601 }
493602
494603 auto arg = p.tool_arg (
@@ -538,9 +647,9 @@ common_peg_parser analyze_tools::build_tool_parser_tag_gemma4_dict(parser_build_
538647 tool_calls = p.optional (tool_calls);
539648 }
540649
541- auto content_before_tools = p.until ( format.per_call_start );
650+ auto content_before_tools = p.until_one_of ({ format.per_call_start , ctx. reasoning -> start } );
542651 return ctx.reasoning_parser +
543- (force_tools ? p.eps () : p.optional (p.content (content_before_tools))) +
652+ (force_tools ? p.eps () : p.optional (p.content (content_before_tools) + p. optional (ctx. reasoning_parser ) )) +
544653 tool_calls + p.end ();
545654}
546655
0 commit comments