11// Tests common_regex (esp. its partial final matches support).
22
3+ #include " common.h"
34#include " regex-partial.h"
45
56#include < sstream>
@@ -24,6 +25,18 @@ struct test_case {
2425 std::vector<input_output> inputs_outputs;
2526};
2627
28+ static std::string common_regex_match_type_name (common_regex_match_type type) {
29+ switch (type) {
30+ case COMMON_REGEX_MATCH_TYPE_NONE:
31+ return " COMMON_REGEX_MATCH_TYPE_NONE" ;
32+ case COMMON_REGEX_MATCH_TYPE_PARTIAL:
33+ return " COMMON_REGEX_MATCH_TYPE_PARTIAL" ;
34+ case COMMON_REGEX_MATCH_TYPE_FULL:
35+ return " COMMON_REGEX_MATCH_TYPE_FULL" ;
36+ }
37+ return " ?" ;
38+ }
39+
2740static void test_regex () {
2841 printf (" [%s]\n " , __func__);
2942 auto test = [](const test_case & test_case) {
@@ -40,7 +53,11 @@ static void test_regex() {
4053 ss << " <no match>" ;
4154 } else {
4255 GGML_ASSERT (!input_output.output .groups .empty ());
43- ss << " begin = " << input_output.output .groups [0 ].begin << " , end =" << input_output.output .groups [0 ].end << " , type = " << (m->type == COMMON_REGEX_MATCH_TYPE_PARTIAL ? " partial" : m->type == COMMON_REGEX_MATCH_TYPE_FULL ? " full" : " none" ) << " , groups.length = " << m->groups .size ();
56+ std::vector<std::string> parts;
57+ for (const auto & g : m->groups ) {
58+ parts.push_back (" {" + std::to_string (g.begin ) + " , " + std::to_string (g.end ) + " }" );
59+ }
60+ ss << " {" << common_regex_match_type_name (m->type ) << " , {" << string_join (parts, " , " ) << " }}" ;
4461 }
4562 return ss.str ();
4663 };
@@ -149,6 +166,65 @@ static void test_regex() {
149166 {" " , {}},
150167 }
151168 });
169+
170+ test ({
171+ " (?:abc)?\\ s*def" ,
172+ {
173+ {" ab" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 2 }}}},
174+ {" abc" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 3 }}}},
175+ {" abc " , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 4 }}}},
176+ {" abc d" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 5 }}}},
177+ {" abc de" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 6 }}}},
178+ {" abc def" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 7 }}}},
179+ {" abc defg" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 7 }}}},
180+ {" abc defgh" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 7 }}}},
181+ {" abcde" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 5 }}}},
182+ {" abcdefgh" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 6 }}}},
183+ {" d" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 2 }}}},
184+ {" def" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 3 }}}},
185+ }
186+ });
187+
188+ test ({
189+ " a+b" ,
190+ {
191+ {" aaab" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 4 }}}},
192+ {" aaa" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 3 }}}},
193+ {" ab" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 2 }}}},
194+ }
195+ });
196+
197+ test ({
198+ " (?:"
199+ " (```(?:xml|json)?\\ n\\ s*)?" // match 1 (block_start)
200+ " (" // match 2 (open_tag)
201+ " <tool_call>"
202+ " |<function_call>"
203+ " |<tool>"
204+ " |<tools>"
205+ " |<response>"
206+ " |<json>"
207+ " |<xml>"
208+ " |<JSON>"
209+ " )?"
210+ " (\\ s*\\ {\\ s*\" name\"\\ s*:)" // match 3 (named tool call)
211+ " )"
212+ " |<function=([^>]+)>" // match 4 (function name)
213+ " |<function name=\" ([^\" ]+)\" >" , // match 5 (function name again)
214+ {
215+ {" {\" name\" : \" special_function\" , \" arguments\" : {\" arg1\" : 1}}" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 8 }, {54 , 54 }, {54 , 54 }, {0 , 8 }, {54 , 54 }, {54 , 54 }}}},
216+ {" <tool_call> {\" name" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 18 }}}},
217+ {" <tool_call>{\" name" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 17 }}}},
218+ {" Ok then<tool_call>{\" name" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{7 , 24 }}}},
219+ {" {\" name" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 6 }}}},
220+ {" Ok then{\" name" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{7 , 13 }}}},
221+ {" <tool_call> {\" name\" : \" special_function\" , \" arguments\" : {\" arg1\" : 1}}" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 20 }, {66 , 66 }, {0 , 11 }, {11 , 20 }, {66 , 66 }, {66 , 66 }}}},
222+ {" <function_call> {\" name\" : \" special_function\" , \" arguments\" : {\" arg1\" : 1}}" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 24 }, {70 , 70 }, {0 , 15 }, {15 , 24 }, {70 , 70 }, {70 , 70 }}}},
223+ {" <function name=\" special_function\" > {\" name\" : \" special_function\" , \" arguments\" : {\" arg1\" : 1}}" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 34 }, {89 , 89 }, {89 , 89 }, {89 , 89 }, {89 , 89 }, {16 , 32 }}}},
224+ {" <function=all>" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 14 }, {14 , 14 }, {14 , 14 }, {14 , 14 }, {10 , 13 }, {14 , 14 }}}},
225+
226+ }
227+ });
152228}
153229
154230static void test_regex_to_reversed_partial_regex () {
@@ -158,7 +234,7 @@ static void test_regex_to_reversed_partial_regex() {
158234 regex_to_reversed_partial_regex (" a+" ));
159235
160236 assert_equals<std::string>(
161- " (a*? ).*" ,
237+ " (a*).*" ,
162238 regex_to_reversed_partial_regex (" a*" ));
163239
164240 assert_equals<std::string>(
@@ -180,13 +256,13 @@ static void test_regex_to_reversed_partial_regex() {
180256 " ((?:(?:(?:d)?c)?b)?a).*" ,
181257 regex_to_reversed_partial_regex (" abcd" ));
182258 assert_equals<std::string>(
183- " ((?:b)?a*? ).*" , // TODO: ((?:b)?a*+).* ??
259+ " ((?:b)?a*).*" , // TODO: ((?:b)?a*+).* ??
184260 regex_to_reversed_partial_regex (" a*b" ));
185261 assert_equals<std::string>(
186262 " ((?:(?:b)?a)?.*).*" ,
187263 regex_to_reversed_partial_regex (" .*?ab" ));
188264 assert_equals<std::string>(
189- " ((?:(?:b)?.*? )?a).*" ,
265+ " ((?:(?:b)?.*)?a).*" ,
190266 regex_to_reversed_partial_regex (" a.*?b" ));
191267 assert_equals<std::string>(
192268 " ((?:(?:d)?(?:(?:c)?b))?a).*" ,
0 commit comments