@@ -73,14 +73,88 @@ public function matchType(Type $patternType, ?Type $flagsType, TrinaryLogic $was
73
73
*/
74
74
private function matchRegex (string $ regex , ?int $ flags , TrinaryLogic $ wasMatched ): ?Type
75
75
{
76
- $ captureGroups = $ this ->parseGroups ($ regex );
77
- if ($ captureGroups === null ) {
76
+ $ groupList = $ this ->parseGroups ($ regex );
77
+ if ($ groupList === null ) {
78
78
// regex could not be parsed by Hoa/Regex
79
79
return null ;
80
80
}
81
81
82
- $ builder = ConstantArrayTypeBuilder::createEmpty ();
82
+ $ trailingOptionals = 0 ;
83
+ foreach (array_reverse ($ groupList ) as $ captureGroup ) {
84
+ if (!$ captureGroup ->isOptional ()) {
85
+ break ;
86
+ }
87
+ $ trailingOptionals ++;
88
+ }
89
+
83
90
$ valueType = $ this ->getValueType ($ flags ?? 0 );
91
+ $ onlyOptionalTopLevelGroup = $ this ->getOnlyOptionalTopLevelGroup ($ groupList );
92
+ if (
93
+ $ wasMatched ->yes ()
94
+ && $ onlyOptionalTopLevelGroup !== null
95
+ ) {
96
+ // if only one top level capturing optional group exists
97
+ // we build a more precise constant union of a empty-match and a match with the group
98
+
99
+ $ onlyOptionalTopLevelGroup ->removeOptionalQualification ();
100
+
101
+ $ combiType = $ this ->buildArrayType (
102
+ $ groupList ,
103
+ $ valueType ,
104
+ $ wasMatched ,
105
+ $ trailingOptionals ,
106
+ );
107
+
108
+ return TypeCombinator::union (
109
+ new ConstantArrayType ([new ConstantIntegerType (0 )], [new StringType ()]),
110
+ $ combiType ,
111
+ );
112
+ }
113
+
114
+ return $ this ->buildArrayType (
115
+ $ groupList ,
116
+ $ valueType ,
117
+ $ wasMatched ,
118
+ $ trailingOptionals ,
119
+ );
120
+ }
121
+
122
+ /**
123
+ * @param list<RegexCapturingGroup> $captureGroups
124
+ */
125
+ private function getOnlyOptionalTopLevelGroup (array $ captureGroups ): ?RegexCapturingGroup
126
+ {
127
+ $ group = null ;
128
+ foreach ($ captureGroups as $ captureGroup ) {
129
+ if (!$ captureGroup ->isTopLevel ()) {
130
+ continue ;
131
+ }
132
+
133
+ if (!$ captureGroup ->isOptional ()) {
134
+ return null ;
135
+ }
136
+
137
+ if ($ group !== null ) {
138
+ return null ;
139
+ }
140
+
141
+ $ group = $ captureGroup ;
142
+ }
143
+
144
+ return $ group ;
145
+ }
146
+
147
+ /**
148
+ * @param list<RegexCapturingGroup> $captureGroups
149
+ */
150
+ private function buildArrayType (
151
+ array $ captureGroups ,
152
+ Type $ valueType ,
153
+ TrinaryLogic $ wasMatched ,
154
+ int $ trailingOptionals ,
155
+ ): Type
156
+ {
157
+ $ builder = ConstantArrayTypeBuilder::createEmpty ();
84
158
85
159
// first item in matches contains the overall match.
86
160
$ builder ->setOffsetValueType (
@@ -89,21 +163,14 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
89
163
!$ wasMatched ->yes (),
90
164
);
91
165
92
- $ trailingOptionals = 0 ;
93
- foreach (array_reverse ($ captureGroups ) as $ captureGroup ) {
94
- if (!$ captureGroup ->isOptional ()) {
95
- break ;
96
- }
97
- $ trailingOptionals ++;
98
- }
99
-
100
- for ($ i = 0 ; $ i < count ($ captureGroups ); $ i ++) {
166
+ $ countGroups = count ($ captureGroups );
167
+ for ($ i = 0 ; $ i < $ countGroups ; $ i ++) {
101
168
$ captureGroup = $ captureGroups [$ i ];
102
169
103
170
if (!$ wasMatched ->yes ()) {
104
171
$ optional = true ;
105
172
} else {
106
- if ($ i < count ( $ captureGroups ) - $ trailingOptionals ) {
173
+ if ($ i < $ countGroups - $ trailingOptionals ) {
107
174
$ optional = false ;
108
175
} else {
109
176
$ optional = $ captureGroup ->isOptional ();
@@ -181,46 +248,84 @@ private function parseGroups(string $regex): ?array
181
248
return null ;
182
249
}
183
250
184
- $ capturings = [];
185
- $ this ->walkRegexAst ($ ast , 0 , 0 , $ capturings );
251
+ $ capturingGroups = [];
252
+ $ this ->walkRegexAst (
253
+ $ ast ,
254
+ false ,
255
+ false ,
256
+ null ,
257
+ $ capturingGroups ,
258
+ );
186
259
187
- return $ capturings ;
260
+ return $ capturingGroups ;
188
261
}
189
262
190
263
/**
191
- * @param list<RegexCapturingGroup> $capturings
264
+ * @param list<RegexCapturingGroup> $capturingGroups
192
265
*/
193
- private function walkRegexAst (TreeNode $ ast , int $ inAlternation , int $ inOptionalQuantification , array &$ capturings ): void
266
+ private function walkRegexAst (
267
+ TreeNode $ ast ,
268
+ bool $ inAlternation ,
269
+ bool $ inOptionalQuantification ,
270
+ RegexCapturingGroup |RegexNonCapturingGroup |null $ parentGroup ,
271
+ array &$ capturingGroups ,
272
+ ): void
194
273
{
274
+ $ group = null ;
195
275
if ($ ast ->getId () === '#capturing ' ) {
196
- $ capturings [] = RegexCapturingGroup::unnamed ($ inAlternation > 0 || $ inOptionalQuantification > 0 );
276
+ $ group = RegexCapturingGroup::unnamed (
277
+ $ inAlternation ,
278
+ $ inOptionalQuantification ,
279
+ $ parentGroup ,
280
+ );
281
+ $ parentGroup = $ group ;
197
282
} elseif ($ ast ->getId () === '#namedcapturing ' ) {
198
283
$ name = $ ast ->getChild (0 )->getValue ()['value ' ];
199
- $ capturings [] = RegexCapturingGroup::named (
284
+ $ group = RegexCapturingGroup::named (
200
285
$ name ,
201
- $ inAlternation > 0 || $ inOptionalQuantification > 0 ,
286
+ $ inAlternation ,
287
+ $ inOptionalQuantification ,
288
+ $ parentGroup ,
202
289
);
290
+ $ parentGroup = $ group ;
291
+ } elseif ($ ast ->getId () === '#noncapturing ' ) {
292
+ $ group = RegexNonCapturingGroup::create (
293
+ $ inOptionalQuantification ,
294
+ $ parentGroup ,
295
+ );
296
+ $ parentGroup = $ group ;
203
297
}
204
298
205
- if ($ ast ->getId () === '#alternation ' ) {
206
- $ inAlternation ++;
207
- }
208
-
299
+ $ inOptionalQuantification = false ;
209
300
if ($ ast ->getId () === '#quantification ' ) {
210
301
$ lastChild = $ ast ->getChild ($ ast ->getChildrenNumber () - 1 );
211
302
$ value = $ lastChild ->getValue ();
212
303
213
304
if ($ value ['token ' ] === 'n_to_m ' && str_contains ($ value ['value ' ], '{0, ' )) {
214
- $ inOptionalQuantification++ ;
305
+ $ inOptionalQuantification = true ;
215
306
} elseif ($ value ['token ' ] === 'zero_or_one ' ) {
216
- $ inOptionalQuantification++ ;
307
+ $ inOptionalQuantification = true ;
217
308
} elseif ($ value ['token ' ] === 'zero_or_more ' ) {
218
- $ inOptionalQuantification++ ;
309
+ $ inOptionalQuantification = true ;
219
310
}
220
311
}
221
312
313
+ if ($ ast ->getId () === '#alternation ' ) {
314
+ $ inAlternation = true ;
315
+ }
316
+
317
+ if ($ group instanceof RegexCapturingGroup) {
318
+ $ capturingGroups [] = $ group ;
319
+ }
320
+
222
321
foreach ($ ast ->getChildren () as $ child ) {
223
- $ this ->walkRegexAst ($ child , $ inAlternation , $ inOptionalQuantification , $ capturings );
322
+ $ this ->walkRegexAst (
323
+ $ child ,
324
+ $ inAlternation ,
325
+ $ inOptionalQuantification ,
326
+ $ parentGroup ,
327
+ $ capturingGroups ,
328
+ );
224
329
}
225
330
}
226
331
0 commit comments