Skip to content

Commit 21cd34c

Browse files
author
ochafik
committed
fix regex-partial (drop reluctant repetitions conversions)
1 parent e63e542 commit 21cd34c

File tree

2 files changed

+80
-11
lines changed

2 files changed

+80
-11
lines changed

common/regex-partial.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -104,13 +104,6 @@ std::string regex_to_reversed_partial_regex(const std::string &pattern) {
104104
if (is_star) {
105105
if (*it == '?') {
106106
++it;
107-
// Convert initial reluctant quantifier to greedy to match as early as possible
108-
if (sequence->size() > 1) {
109-
sequence->back() += '?';
110-
}
111-
} else {
112-
// Convert greedy quantifiers to reluctant to not miss any matches
113-
sequence->back() += '?';
114107
}
115108
}
116109
} else if (*it == '{') {

tests/test-regex-partial.cpp

Lines changed: 80 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// Tests common_regex (esp. its partial final matches support).
22

3+
#include "common.h"
34
#include "regex-partial.h"
45

56
#include <sstream>
@@ -24,6 +25,18 @@ struct test_case {
2425
std::vector<input_output> inputs_outputs;
2526
};
2627

28+
static std::string common_regex_match_type_name(common_regex_match_type type) {
29+
switch (type) {
30+
case COMMON_REGEX_MATCH_TYPE_NONE:
31+
return "COMMON_REGEX_MATCH_TYPE_NONE";
32+
case COMMON_REGEX_MATCH_TYPE_PARTIAL:
33+
return "COMMON_REGEX_MATCH_TYPE_PARTIAL";
34+
case COMMON_REGEX_MATCH_TYPE_FULL:
35+
return "COMMON_REGEX_MATCH_TYPE_FULL";
36+
}
37+
return "?";
38+
}
39+
2740
static void test_regex() {
2841
printf("[%s]\n", __func__);
2942
auto test = [](const test_case & test_case) {
@@ -40,7 +53,11 @@ static void test_regex() {
4053
ss << "<no match>";
4154
} else {
4255
GGML_ASSERT(!input_output.output.groups.empty());
43-
ss << "begin = " << input_output.output.groups[0].begin << ", end =" << input_output.output.groups[0].end << ", type = " << (m->type == COMMON_REGEX_MATCH_TYPE_PARTIAL ? "partial" : m->type == COMMON_REGEX_MATCH_TYPE_FULL ? "full" : "none") << ", groups.length = " << m->groups.size();
56+
std::vector<std::string> parts;
57+
for (const auto & g : m->groups) {
58+
parts.push_back("{" + std::to_string(g.begin) + ", " + std::to_string(g.end) + "}");
59+
}
60+
ss << "{" << common_regex_match_type_name(m->type) << ", {" << string_join(parts, ", ") << "}}";
4461
}
4562
return ss.str();
4663
};
@@ -149,6 +166,65 @@ static void test_regex() {
149166
{"", {}},
150167
}
151168
});
169+
170+
test({
171+
"(?:abc)?\\s*def",
172+
{
173+
{"ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 2}}}},
174+
{"abc", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 3}}}},
175+
{"abc ", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 4}}}},
176+
{"abc d", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 5}}}},
177+
{"abc de", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 6}}}},
178+
{"abc def", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 7}}}},
179+
{"abc defg", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 7}}}},
180+
{"abc defgh", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 7}}}},
181+
{"abcde", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 5}}}},
182+
{"abcdefgh", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 6}}}},
183+
{" d", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 2}}}},
184+
{"def", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 3}}}},
185+
}
186+
});
187+
188+
test({
189+
"a+b",
190+
{
191+
{"aaab", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 4}}}},
192+
{"aaa", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 3}}}},
193+
{"ab", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 2}}}},
194+
}
195+
});
196+
197+
test({
198+
"(?:"
199+
"(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
200+
"(" // match 2 (open_tag)
201+
"<tool_call>"
202+
"|<function_call>"
203+
"|<tool>"
204+
"|<tools>"
205+
"|<response>"
206+
"|<json>"
207+
"|<xml>"
208+
"|<JSON>"
209+
")?"
210+
"(\\s*\\{\\s*\"name\"\\s*:)" // match 3 (named tool call)
211+
")"
212+
"|<function=([^>]+)>" // match 4 (function name)
213+
"|<function name=\"([^\"]+)\">", // match 5 (function name again)
214+
{
215+
{"{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 8}, {54, 54}, {54, 54}, {0, 8}, {54, 54}, {54, 54}}}},
216+
{"<tool_call> {\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 18}}}},
217+
{"<tool_call>{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 17}}}},
218+
{"Ok then<tool_call>{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{7, 24}}}},
219+
{"{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 6}}}},
220+
{"Ok then{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{7, 13}}}},
221+
{"<tool_call> {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 20}, {66, 66}, {0, 11}, {11, 20}, {66, 66}, {66, 66}}}},
222+
{"<function_call> {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 24}, {70, 70}, {0, 15}, {15, 24}, {70, 70}, {70, 70}}}},
223+
{"<function name=\"special_function\"> {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 34}, {89, 89}, {89, 89}, {89, 89}, {89, 89}, {16, 32}}}},
224+
{"<function=all>", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 14}, {14, 14}, {14, 14}, {14, 14}, {10, 13}, {14, 14}}}},
225+
226+
}
227+
});
152228
}
153229

154230
static void test_regex_to_reversed_partial_regex() {
@@ -158,7 +234,7 @@ static void test_regex_to_reversed_partial_regex() {
158234
regex_to_reversed_partial_regex("a+"));
159235

160236
assert_equals<std::string>(
161-
"(a*?).*",
237+
"(a*).*",
162238
regex_to_reversed_partial_regex("a*"));
163239

164240
assert_equals<std::string>(
@@ -180,13 +256,13 @@ static void test_regex_to_reversed_partial_regex() {
180256
"((?:(?:(?:d)?c)?b)?a).*",
181257
regex_to_reversed_partial_regex("abcd"));
182258
assert_equals<std::string>(
183-
"((?:b)?a*?).*", // TODO: ((?:b)?a*+).* ??
259+
"((?:b)?a*).*", // TODO: ((?:b)?a*+).* ??
184260
regex_to_reversed_partial_regex("a*b"));
185261
assert_equals<std::string>(
186262
"((?:(?:b)?a)?.*).*",
187263
regex_to_reversed_partial_regex(".*?ab"));
188264
assert_equals<std::string>(
189-
"((?:(?:b)?.*?)?a).*",
265+
"((?:(?:b)?.*)?a).*",
190266
regex_to_reversed_partial_regex("a.*?b"));
191267
assert_equals<std::string>(
192268
"((?:(?:d)?(?:(?:c)?b))?a).*",

0 commit comments

Comments
 (0)