16
16
use PHPStan \Type \StringType ;
17
17
use PHPStan \Type \Type ;
18
18
use PHPStan \Type \TypeCombinator ;
19
+ use function array_key_exists ;
19
20
use function array_reverse ;
20
21
use function count ;
21
22
use function in_array ;
@@ -65,6 +66,10 @@ public function matchType(Type $patternType, ?Type $flagsType, TrinaryLogic $was
65
66
$ matchedTypes [] = $ matched ;
66
67
}
67
68
69
+ if (count ($ matchedTypes ) === 1 ) {
70
+ return $ matchedTypes [0 ];
71
+ }
72
+
68
73
return TypeCombinator::union (...$ matchedTypes );
69
74
}
70
75
@@ -73,11 +78,12 @@ public function matchType(Type $patternType, ?Type $flagsType, TrinaryLogic $was
73
78
*/
74
79
private function matchRegex (string $ regex , ?int $ flags , TrinaryLogic $ wasMatched ): ?Type
75
80
{
76
- $ groupList = $ this ->parseGroups ($ regex );
77
- if ($ groupList === null ) {
81
+ $ parseResult = $ this ->parseGroups ($ regex );
82
+ if ($ parseResult === null ) {
78
83
// regex could not be parsed by Hoa/Regex
79
84
return null ;
80
85
}
86
+ [$ groupList , $ groupCombinations ] = $ parseResult ;
81
87
82
88
$ trailingOptionals = 0 ;
83
89
foreach (array_reverse ($ groupList ) as $ captureGroup ) {
@@ -89,14 +95,16 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
89
95
90
96
$ valueType = $ this ->getValueType ($ flags ?? 0 );
91
97
$ onlyOptionalTopLevelGroup = $ this ->getOnlyOptionalTopLevelGroup ($ groupList );
98
+ $ onlyTopLevelAlternationId = $ this ->getOnlyTopLevelAlternationId ($ groupList );
99
+
92
100
if (
93
101
$ wasMatched ->yes ()
94
102
&& $ onlyOptionalTopLevelGroup !== null
95
103
) {
96
104
// if only one top level capturing optional group exists
97
105
// we build a more precise constant union of a empty-match and a match with the group
98
106
99
- $ onlyOptionalTopLevelGroup ->removeOptionalQualification ();
107
+ $ onlyOptionalTopLevelGroup ->forceNonOptional ();
100
108
101
109
$ combiType = $ this ->buildArrayType (
102
110
$ groupList ,
@@ -109,6 +117,49 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
109
117
new ConstantArrayType ([new ConstantIntegerType (0 )], [new StringType ()]),
110
118
$ combiType ,
111
119
);
120
+ } elseif (
121
+ $ wasMatched ->yes ()
122
+ && $ onlyTopLevelAlternationId !== null
123
+ && array_key_exists ($ onlyTopLevelAlternationId , $ groupCombinations )
124
+ ) {
125
+ $ combiTypes = [];
126
+ $ isOptionalAlternation = false ;
127
+ foreach ($ groupCombinations [$ onlyTopLevelAlternationId ] as $ groupCombo ) {
128
+ $ comboList = $ groupList ;
129
+
130
+ $ beforeCurrentCombo = true ;
131
+ foreach ($ comboList as $ groupId => $ group ) {
132
+ if (in_array ($ groupId , $ groupCombo , true )) {
133
+ $ isOptionalAlternation = $ group ->inOptionalAlternation ();
134
+ $ group ->forceNonOptional ();
135
+ $ beforeCurrentCombo = false ;
136
+ } elseif ($ beforeCurrentCombo ) {
137
+ $ group ->forceNonOptional ();
138
+ } elseif ($ group ->getAlternationId () === $ onlyTopLevelAlternationId ) {
139
+ unset($ comboList [$ groupId ]);
140
+ }
141
+ }
142
+
143
+ $ combiType = $ this ->buildArrayType (
144
+ $ comboList ,
145
+ $ valueType ,
146
+ $ wasMatched ,
147
+ $ trailingOptionals ,
148
+ );
149
+
150
+ $ combiTypes [] = $ combiType ;
151
+
152
+ foreach ($ groupCombo as $ groupId ) {
153
+ $ group = $ comboList [$ groupId ];
154
+ $ group ->restoreNonOptional ();
155
+ }
156
+ }
157
+
158
+ if ($ isOptionalAlternation ) {
159
+ $ combiTypes [] = new ConstantArrayType ([new ConstantIntegerType (0 )], [new StringType ()]);
160
+ }
161
+
162
+ return TypeCombinator::union (...$ combiTypes );
112
163
}
113
164
114
165
return $ this ->buildArrayType (
@@ -120,7 +171,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
120
171
}
121
172
122
173
/**
123
- * @param list< RegexCapturingGroup> $captureGroups
174
+ * @param array<int, RegexCapturingGroup> $captureGroups
124
175
*/
125
176
private function getOnlyOptionalTopLevelGroup (array $ captureGroups ): ?RegexCapturingGroup
126
177
{
@@ -145,7 +196,32 @@ private function getOnlyOptionalTopLevelGroup(array $captureGroups): ?RegexCaptu
145
196
}
146
197
147
198
/**
148
- * @param list<RegexCapturingGroup> $captureGroups
199
+ * @param array<int, RegexCapturingGroup> $captureGroups
200
+ */
201
+ private function getOnlyTopLevelAlternationId (array $ captureGroups ): ?int
202
+ {
203
+ $ alternationId = null ;
204
+ foreach ($ captureGroups as $ captureGroup ) {
205
+ if (!$ captureGroup ->isTopLevel ()) {
206
+ continue ;
207
+ }
208
+
209
+ if (!$ captureGroup ->inAlternation ()) {
210
+ return null ;
211
+ }
212
+
213
+ if ($ alternationId === null ) {
214
+ $ alternationId = $ captureGroup ->getAlternationId ();
215
+ } elseif ($ alternationId !== $ captureGroup ->getAlternationId ()) {
216
+ return null ;
217
+ }
218
+ }
219
+
220
+ return $ alternationId ;
221
+ }
222
+
223
+ /**
224
+ * @param array<RegexCapturingGroup> $captureGroups
149
225
*/
150
226
private function buildArrayType (
151
227
array $ captureGroups ,
@@ -164,9 +240,8 @@ private function buildArrayType(
164
240
);
165
241
166
242
$ countGroups = count ($ captureGroups );
167
- for ($ i = 0 ; $ i < $ countGroups ; $ i ++) {
168
- $ captureGroup = $ captureGroups [$ i ];
169
-
243
+ $ i = 0 ;
244
+ foreach ($ captureGroups as $ captureGroup ) {
170
245
if (!$ wasMatched ->yes ()) {
171
246
$ optional = true ;
172
247
} else {
@@ -190,6 +265,8 @@ private function buildArrayType(
190
265
$ valueType ,
191
266
$ optional ,
192
267
);
268
+
269
+ $ i ++;
193
270
}
194
271
195
272
return $ builder ->getArray ();
@@ -233,7 +310,7 @@ private function getValueType(int $flags): Type
233
310
}
234
311
235
312
/**
236
- * @return list< RegexCapturingGroup>|null
313
+ * @return array{array<int, RegexCapturingGroup>, array<int, array<int, int[]>>} |null
237
314
*/
238
315
private function parseGroups (string $ regex ): ?array
239
316
{
@@ -249,47 +326,63 @@ private function parseGroups(string $regex): ?array
249
326
}
250
327
251
328
$ capturingGroups = [];
329
+ $ groupCombinations = [];
330
+ $ alternationId = -1 ;
331
+ $ captureGroupId = 100 ;
252
332
$ this ->walkRegexAst (
253
333
$ ast ,
254
334
false ,
335
+ $ alternationId ,
336
+ 0 ,
255
337
false ,
256
338
null ,
339
+ $ captureGroupId ,
257
340
$ capturingGroups ,
341
+ $ groupCombinations ,
258
342
);
259
343
260
- return $ capturingGroups ;
344
+ return [ $ capturingGroups, $ groupCombinations ] ;
261
345
}
262
346
263
347
/**
264
- * @param list<RegexCapturingGroup> $capturingGroups
348
+ * @param array<int, RegexCapturingGroup> $capturingGroups
349
+ * @param array<int, array<int, int[]>> $groupCombinations
265
350
*/
266
351
private function walkRegexAst (
267
352
TreeNode $ ast ,
268
353
bool $ inAlternation ,
354
+ int &$ alternationId ,
355
+ int $ combinationIndex ,
269
356
bool $ inOptionalQuantification ,
270
357
RegexCapturingGroup |RegexNonCapturingGroup |null $ parentGroup ,
358
+ int &$ captureGroupId ,
271
359
array &$ capturingGroups ,
360
+ array &$ groupCombinations ,
272
361
): void
273
362
{
274
363
$ group = null ;
275
364
if ($ ast ->getId () === '#capturing ' ) {
276
- $ group = RegexCapturingGroup::unnamed (
277
- $ inAlternation ,
365
+ $ group = new RegexCapturingGroup (
366
+ $ captureGroupId ++,
367
+ null ,
368
+ $ inAlternation ? $ alternationId : null ,
278
369
$ inOptionalQuantification ,
279
370
$ parentGroup ,
280
371
);
281
372
$ parentGroup = $ group ;
282
373
} elseif ($ ast ->getId () === '#namedcapturing ' ) {
283
374
$ name = $ ast ->getChild (0 )->getValue ()['value ' ];
284
- $ group = RegexCapturingGroup::named (
375
+ $ group = new RegexCapturingGroup (
376
+ $ captureGroupId ++,
285
377
$ name ,
286
- $ inAlternation ,
378
+ $ inAlternation ? $ alternationId : null ,
287
379
$ inOptionalQuantification ,
288
380
$ parentGroup ,
289
381
);
290
382
$ parentGroup = $ group ;
291
383
} elseif ($ ast ->getId () === '#noncapturing ' ) {
292
- $ group = RegexNonCapturingGroup::create (
384
+ $ group = new RegexNonCapturingGroup (
385
+ $ inAlternation ? $ alternationId : null ,
293
386
$ inOptionalQuantification ,
294
387
$ parentGroup ,
295
388
);
@@ -311,21 +404,40 @@ private function walkRegexAst(
311
404
}
312
405
313
406
if ($ ast ->getId () === '#alternation ' ) {
407
+ $ alternationId ++;
314
408
$ inAlternation = true ;
315
409
}
316
410
317
411
if ($ group instanceof RegexCapturingGroup) {
318
- $ capturingGroups [] = $ group ;
412
+ $ capturingGroups [$ group ->getId ()] = $ group ;
413
+
414
+ if (!array_key_exists ($ alternationId , $ groupCombinations )) {
415
+ $ groupCombinations [$ alternationId ] = [];
416
+ }
417
+ if (!array_key_exists ($ combinationIndex , $ groupCombinations [$ alternationId ])) {
418
+ $ groupCombinations [$ alternationId ][$ combinationIndex ] = [];
419
+ }
420
+ $ groupCombinations [$ alternationId ][$ combinationIndex ][] = $ group ->getId ();
319
421
}
320
422
321
423
foreach ($ ast ->getChildren () as $ child ) {
322
424
$ this ->walkRegexAst (
323
425
$ child ,
324
426
$ inAlternation ,
427
+ $ alternationId ,
428
+ $ combinationIndex ,
325
429
$ inOptionalQuantification ,
326
430
$ parentGroup ,
431
+ $ captureGroupId ,
327
432
$ capturingGroups ,
433
+ $ groupCombinations ,
328
434
);
435
+
436
+ if ($ ast ->getId () !== '#alternation ' ) {
437
+ continue ;
438
+ }
439
+
440
+ $ combinationIndex ++;
329
441
}
330
442
}
331
443
0 commit comments