Skip to content

Commit 5f0450d

Browse files
author
ochafik
committed
partial regex: allow newlines in prefixes
1 parent 21cd34c commit 5f0450d

File tree

2 files changed

+15
-15
lines changed

2 files changed

+15
-15
lines changed

common/regex-partial.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ common_regex_match common_regex::search(const std::string & input, size_t pos, b
6060
- /a*b/ -> ((?:b)?a*+).* (final repetitions become eager)
6161
- /.*?ab/ -> ((?:b)?a).* (merge .*)
6262
- /a.*?b/ -> ((?:b)?.*?a).* (keep reluctant matches)
63-
- /a.*b/ -> ((?:b)?.*?a).* (in fact any repetition becomes a reluctant match!)
6463
- /a(bc)d/ -> ((?:(?:d)?(?:(?:c)?b))?a).*
6564
- /a(bc|de)/ -> ((?:(?:(?:e)?d)?|(?:(?:c)?b)?)?a).*
6665
- /ab{2,4}c/ -> abbb?b?c -> ((?:(?:(?:(?:(?:c)?b)?b)?b?)?b?)?a).*
@@ -200,5 +199,5 @@ std::string regex_to_reversed_partial_regex(const std::string &pattern) {
200199
throw std::runtime_error("Unmatched '(' in pattern");
201200
}
202201

203-
return "(" + res + ").*";
202+
return "(" + res + ")[\\s\\S]*";
204203
}

tests/test-regex-partial.cpp

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ static void test_regex() {
215215
{"{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 8}, {54, 54}, {54, 54}, {0, 8}, {54, 54}, {54, 54}}}},
216216
{"<tool_call> {\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 18}}}},
217217
{"<tool_call>{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 17}}}},
218+
{"Let's call something\n<tool_call>{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{21, 38}}}},
218219
{"Ok then<tool_call>{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{7, 24}}}},
219220
{"{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 6}}}},
220221
{"Ok then{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{7, 13}}}},
@@ -230,48 +231,48 @@ static void test_regex() {
230231
static void test_regex_to_reversed_partial_regex() {
231232
printf("[%s]\n", __func__);
232233
assert_equals<std::string>(
233-
"(a+).*",
234+
"(a+)[\\s\\S]*",
234235
regex_to_reversed_partial_regex("a+"));
235236

236237
assert_equals<std::string>(
237-
"(a*).*",
238+
"(a*)[\\s\\S]*",
238239
regex_to_reversed_partial_regex("a*"));
239240

240241
assert_equals<std::string>(
241-
"(a?).*",
242+
"(a?)[\\s\\S]*",
242243
regex_to_reversed_partial_regex("a?"));
243244

244245
assert_equals<std::string>(
245-
"([a-z]).*",
246+
"([a-z])[\\s\\S]*",
246247
regex_to_reversed_partial_regex("[a-z]"));
247248

248249
assert_equals<std::string>(
249-
"((?:\\w+)?[a-z]).*",
250+
"((?:\\w+)?[a-z])[\\s\\S]*",
250251
regex_to_reversed_partial_regex("[a-z]\\w+"));
251252

252253
assert_equals<std::string>(
253-
"((?:a|b)).*",
254+
"((?:a|b))[\\s\\S]*",
254255
regex_to_reversed_partial_regex("(?:a|b)"));
255256
assert_equals<std::string>(
256-
"((?:(?:(?:d)?c)?b)?a).*",
257+
"((?:(?:(?:d)?c)?b)?a)[\\s\\S]*",
257258
regex_to_reversed_partial_regex("abcd"));
258259
assert_equals<std::string>(
259-
"((?:b)?a*).*", // TODO: ((?:b)?a*+).* ??
260+
"((?:b)?a*)[\\s\\S]*", // TODO: ((?:b)?a*+).* ??
260261
regex_to_reversed_partial_regex("a*b"));
261262
assert_equals<std::string>(
262-
"((?:(?:b)?a)?.*).*",
263+
"((?:(?:b)?a)?.*)[\\s\\S]*",
263264
regex_to_reversed_partial_regex(".*?ab"));
264265
assert_equals<std::string>(
265-
"((?:(?:b)?.*)?a).*",
266+
"((?:(?:b)?.*)?a)[\\s\\S]*",
266267
regex_to_reversed_partial_regex("a.*?b"));
267268
assert_equals<std::string>(
268-
"((?:(?:d)?(?:(?:c)?b))?a).*",
269+
"((?:(?:d)?(?:(?:c)?b))?a)[\\s\\S]*",
269270
regex_to_reversed_partial_regex("a(bc)d"));
270271
assert_equals<std::string>(
271-
"((?:(?:(?:c)?b|(?:e)?d))?a).*",
272+
"((?:(?:(?:c)?b|(?:e)?d))?a)[\\s\\S]*",
272273
regex_to_reversed_partial_regex("a(bc|de)"));
273274
assert_equals<std::string>(
274-
"((?:(?:(?:(?:(?:c)?b?)?b?)?b)?b)?a).*",
275+
"((?:(?:(?:(?:(?:c)?b?)?b?)?b)?b)?a)[\\s\\S]*",
275276
regex_to_reversed_partial_regex("ab{2,4}c"));
276277
}
277278

0 commit comments

Comments
 (0)