@@ -25,103 +25,8 @@ struct test_case {
2525};
2626
2727static void test_regex () {
28- std::vector<test_case> test_cases {
29- test_case {
30- " a" ,
31- {
32- {" a" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 1 }}}},
33- {" b" , {COMMON_REGEX_MATCH_TYPE_NONE, {}}},
34- {" ab" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 1 }}}},
35- {" ba" , {COMMON_REGEX_MATCH_TYPE_FULL, {{1 , 2 }}}},
36- }
37- },
38- test_case {
39- " abcd" ,
40- {
41- {" abcd" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 4 }}}},
42- {" abcde" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 4 }}}},
43- {" abc" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 3 }}}},
44- {" ab" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 2 }}}},
45- {" a" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 1 }}}},
46- {" d" , {}},
47- {" bcd" , {}},
48- {" cde" , {}},
49- {" cd" , {}},
50- {" yeah ab" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{5 , 7 }}}},
51- {" abbie" , {}},
52- {" " , {}},
53- }
54- },
55- test_case {
56- " .*?ab" ,
57- {
58- {" ab" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 2 }}}},
59- {" abc" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 2 }}}},
60- {" dab" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 3 }}}},
61- {" dabc" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 3 }}}},
62- {" da" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 2 }}}},
63- {" d" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 1 }}}},
64- }
65- },
66- test_case {
67- " a.*?b" ,
68- {
69- {" ab" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 2 }}}},
70- {" abc" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 2 }}}},
71- {" a b" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 3 }}}},
72- {" a" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 1 }}}},
73- {" argh" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 4 }}}},
74- {" d" , {}},
75- {" b" , {}},
76- }
77- },
78- test_case {
79- " ab(?:cd){2,4}ef" ,
80- {
81- // {"ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, 0, {}}},
82- {" ab" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 2 }}}},
83- {" abcd" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 4 }}}},
84- {" abcde" , {}},
85- {" abcdef" , {}},
86- {" abcdcd" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 6 }}}},
87- {" abcdcde" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 7 }}}},
88- {" abcdcdef" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 8 }}}},
89- {" abcdcdcdcdef" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 12 }}}},
90- {" abcdcdcdcdcdef" , {}},
91- {" abcde" , {}},
92- {" yea" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{2 , 3 }}}},
93- }
94- },
95- test_case {
96- " a(?:rte| pure )fact" ,
97- {
98- {" a" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 1 }}}},
99- {" art" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 3 }}}},
100- {" artefa" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 6 }}}},
101- {" fact" , {}},
102- {" an arte" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{3 , 7 }}}},
103- {" artefact" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 8 }}}},
104- {" an artefact" , {COMMON_REGEX_MATCH_TYPE_FULL, {{3 , 11 }}}},
105- {" a pure" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 6 }}}},
106- {" a pure fact" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 11 }}}},
107- {" it's a pure fact" , {COMMON_REGEX_MATCH_TYPE_FULL, {{5 , 16 }}}},
108- {" " , {}},
109- {" pure" , {}},
110- {" pure fact" , {}},
111- }
112- },
113- test_case {
114- " abc" ,
115- {
116- {" abcc" , {}},
117- {" ab" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 2 }}}},
118- {" abc" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 3 }}}},
119- {" ab" , {}},
120- }
121- },
122- };
12328
124- for (const auto & test_case : test_cases ) {
29+ auto test = [] (const test_case & test_case) {
12530 common_regex cr (test_case.pattern );
12631 std::cout << " Testing pattern: /" << test_case.pattern << " /\n " ;
12732 // std::cout << " partial rev: " << cr.reversed_partial_pattern.str() << '\n';
@@ -134,6 +39,7 @@ static void test_regex() {
13439 if (m->type == COMMON_REGEX_MATCH_TYPE_NONE) {
13540 ss << " <no match>" ;
13641 } else {
42+ GGML_ASSERT (!input_output.output .groups .empty ());
13743 ss << " begin = " << input_output.output .groups [0 ].begin << " , end =" << input_output.output .groups [0 ].end << " , type = " << (m->type == COMMON_REGEX_MATCH_TYPE_PARTIAL ? " partial" : m->type == COMMON_REGEX_MATCH_TYPE_FULL ? " full" : " none" ) << " , groups.length = " << m->groups .size ();
13844 }
13945 return ss.str ();
@@ -145,7 +51,104 @@ static void test_regex() {
14551 throw std::runtime_error (" Test failed" );
14652 }
14753 }
148- }
54+ };
55+ test ({
56+ " a" ,
57+ {
58+ {" a" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 1 }}}},
59+ {" b" , {COMMON_REGEX_MATCH_TYPE_NONE, {}}},
60+ {" ab" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 1 }}}},
61+ {" ba" , {COMMON_REGEX_MATCH_TYPE_FULL, {{1 , 2 }}}},
62+ }
63+ });
64+ test ({
65+ " abcd" ,
66+ {
67+ {" abcd" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 4 }}}},
68+ {" abcde" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 4 }}}},
69+ {" abc" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 3 }}}},
70+ {" ab" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 2 }}}},
71+ {" a" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 1 }}}},
72+ {" d" , {}},
73+ {" bcd" , {}},
74+ {" cde" , {}},
75+ {" cd" , {}},
76+ {" yeah ab" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{5 , 7 }}}},
77+ {" abbie" , {}},
78+ {" " , {}},
79+ }
80+ });
81+ test ({
82+ " .*?ab" ,
83+ {
84+ {" ab" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 2 }}}},
85+ {" abc" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 2 }}}},
86+ {" dab" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 3 }}}},
87+ {" dabc" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 3 }}}},
88+ {" da" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 2 }}}},
89+ {" d" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 1 }}}},
90+ }
91+ });
92+ test ({
93+ " a.*?b" ,
94+ {
95+ {" ab" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 2 }}}},
96+ {" abc" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 2 }}}},
97+ {" a b" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 3 }}}},
98+ {" a" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 1 }}}},
99+ {" argh" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 4 }}}},
100+ {" d" , {}},
101+ {" b" , {}},
102+ }
103+ });
104+ test ({
105+ " ab(?:cd){2,4}ef" ,
106+ {
107+ // {"ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, 0, {}}},
108+ {" ab" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 2 }}}},
109+ {" abcd" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 4 }}}},
110+ {" abcde" , {}},
111+ {" abcdef" , {}},
112+ {" abcdcd" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 6 }}}},
113+ {" abcdcde" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 7 }}}},
114+ {" abcdcdef" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 8 }}}},
115+ {" abcdcdcdcdef" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 12 }}}},
116+ {" abcdcdcdcdcdef" , {}},
117+ {" abcde" , {}},
118+ {" yea" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{2 , 3 }}}},
119+ }
120+ });
121+ test ({
122+ " a(?:rte| pure )fact" ,
123+ {
124+ {" a" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 1 }}}},
125+ {" art" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 3 }}}},
126+ {" artefa" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 6 }}}},
127+ {" fact" , {}},
128+ {" an arte" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{3 , 7 }}}},
129+ {" artefact" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 8 }}}},
130+ {" an artefact" , {COMMON_REGEX_MATCH_TYPE_FULL, {{3 , 11 }}}},
131+ {" a pure" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 6 }}}},
132+ {" a pure fact" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 11 }}}},
133+ {" it's a pure fact" , {COMMON_REGEX_MATCH_TYPE_FULL, {{5 , 16 }}}},
134+ {" " , {}},
135+ {" pure" , {}},
136+ {" pure fact" , {}},
137+ }
138+ });
139+ test ({
140+ " abc" ,
141+ {
142+ {" abcc" , {COMMON_REGEX_MATCH_TYPE_FULL, {{1 , 4 }}}},
143+ {" ab" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 2 }}}},
144+ {" abc" , {COMMON_REGEX_MATCH_TYPE_FULL, {{0 , 3 }}}},
145+ {" ab" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{1 , 3 }}}},
146+ {" a" , {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0 , 1 }}}},
147+ {" b" , {}},
148+ {" c" , {}},
149+ {" " , {}},
150+ }
151+ });
149152}
150153
151154static void test_regex_to_reversed_partial_regex () {
0 commit comments