Skip to content

Commit 173862d

Browse files
committed
Add phpstan
1 parent ae12332 commit 173862d

File tree

6 files changed

+165
-20
lines changed

6 files changed

+165
-20
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
tests/vendor/
2+
tests/composer.lock
13

24
ground_truth/apps/
35

tests/CheckLocalFolder.php

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,13 @@
1111
}
1212

1313
$RealPath = realpath( $argv[ 1 ] );
14+
15+
if( $RealPath === false )
16+
{
17+
echo 'Invalid path provided.' . PHP_EOL;
18+
exit( 1 );
19+
}
20+
1421
$RealPathLength = strlen( $RealPath ) + 1;
1522
$Files = [];
1623
$Iterator = new RecursiveIteratorIterator( new RecursiveDirectoryIterator( $RealPath ) );

tests/GenerateTestStrings.php

Lines changed: 83 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,15 @@
33

44
echo "Generating comprehensive test strings from regex patterns...\n";
55

6+
/** @var array<string, array<string, string|string[]>>|false */
67
$Rulesets = parse_ini_file( __DIR__ . '/../rules.ini', true, INI_SCANNER_RAW );
78

9+
if( !is_array( $Rulesets ) )
10+
{
11+
echo "Failed to parse rules.ini file\n";
12+
exit( 1 );
13+
}
14+
815
foreach( $Rulesets as $Type => $Rules )
916
{
1017
foreach( $Rules as $Name => $RuleRegexes )
@@ -20,12 +27,16 @@
2027
if( file_exists( $File ) )
2128
{
2229
$Tests = file( $File, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES );
30+
31+
if( $Tests === false )
32+
{
33+
$Tests = [];
34+
}
2335
}
2436

2537
$Output = [];
2638
$Added = false;
2739

28-
// Skip generating certain regexes
2940
foreach( $RuleRegexes as $Regex )
3041
{
3142
$Generated = generateVariations( $Regex );
@@ -67,21 +78,20 @@
6778
* Native PHP regex pattern generator
6879
* Generates ALL possible variations from regex patterns with smart bounds for infinite cases
6980
* Handles anchors, alternation, quantifiers, character classes, groups, and escapes
81+
* @return string[]
7082
*/
7183
function generateVariations( string $regex ) : array
7284
{
7385
// Parse the regex pattern directly
7486
$parsedPattern = parseRegex( $regex );
7587

76-
if( $parsedPattern === null )
77-
{
78-
throw new InvalidArgumentException( "Invalid regex pattern: {$regex}" );
79-
}
80-
8188
return generateFromParsedPattern( $parsedPattern );
8289
}
8390

84-
function parseRegex( string $pattern ) : ?array
91+
/**
92+
* @return array<array<string,mixed>>
93+
*/
94+
function parseRegex( string $pattern ) : array
8595
{
8696
$tokens = [];
8797
$i = 0;
@@ -143,7 +153,7 @@ function parseRegex( string $pattern ) : ?array
143153
{
144154
// Non-capturing group
145155
$groupEnd = findMatchingParen( $pattern, $i );
146-
if( $groupEnd !== false )
156+
if( $groupEnd !== null )
147157
{
148158
$groupContent = substr( $pattern, $i + 3, $groupEnd - $i - 3 );
149159
$tokens[] = [ 'type' => 'group', 'capturing' => false, 'content' => parseRegex( $groupContent ) ];
@@ -159,7 +169,7 @@ function parseRegex( string $pattern ) : ?array
159169
{
160170
// Capturing group
161171
$groupEnd = findMatchingParen( $pattern, $i );
162-
if( $groupEnd !== false )
172+
if( $groupEnd !== null )
163173
{
164174
$groupContent = substr( $pattern, $i + 1, $groupEnd - $i - 1 );
165175
$tokens[] = [ 'type' => 'group', 'capturing' => true, 'content' => parseRegex( $groupContent ) ];
@@ -199,10 +209,10 @@ function parseRegex( string $pattern ) : ?array
199209
if( $endPos !== false )
200210
{
201211
$quantifier = substr( $pattern, $i + 1, $endPos - $i - 1 );
202-
if( preg_match( '/^(\d+)(?:,(\d+)?)?$/', $quantifier, $matches ) )
212+
if( preg_match( '/^(\d+)(?:,(\d+)?)?$/', $quantifier, $matches ) === 1 )
203213
{
204214
$min = (int)$matches[1];
205-
$max = isset( $matches[2] ) && $matches[2] !== '' ? (int)$matches[2] : ( isset( $matches[2] ) ? null : $min );
215+
$max = !empty( $matches[2] ) ? (int)$matches[2] : ( isset( $matches[2] ) ? null : $min );
206216
$tokens[] = [ 'type' => 'quantifier', 'min' => $min, 'max' => $max ];
207217
$i = $endPos + 1;
208218
}
@@ -256,6 +266,10 @@ function findMatchingParen( string $pattern, int $start ) : ?int
256266
return $depth === 0 ? $i - 1 : null;
257267
}
258268

269+
/**
270+
* @param array<array<string,mixed>> $tokens
271+
* @return string[]
272+
*/
259273
function generateFromParsedPattern( array $tokens, bool $isSubPattern = false ) : array
260274
{
261275
$hasStartAnchor = detectStartAnchor( $tokens );
@@ -288,6 +302,8 @@ function generateFromParsedPattern( array $tokens, bool $isSubPattern = false )
288302
$min = $quantifier['min'];
289303
$max = $quantifier['max'];
290304

305+
assert( is_int( $min ) );
306+
291307
// Bound infinite quantifiers
292308
if( $max === null )
293309
{
@@ -376,30 +392,43 @@ function generateFromParsedPattern( array $tokens, bool $isSubPattern = false )
376392
return array_unique( $results );
377393
}
378394

395+
/**
396+
* @param array<string,mixed> $token
397+
* @return string[]
398+
*/
379399
function generateFromToken( array $token ) : array
380400
{
381401
switch( $token['type'] )
382402
{
383403
case 'literal':
404+
assert( is_string( $token[ 'value' ] ) );
384405
return [ $token['value'] ];
385406

386407
case 'escape':
408+
assert( is_string( $token[ 'value' ] ) );
387409
return generateFromEscape( $token['value'] );
388410

389411
case 'any':
390412
return [ 'a', 'Z', '1', '_', '-' ]; // Sample representative chars
391413

392414
case 'charclass':
415+
assert( is_string( $token[ 'value' ] ) );
393416
return processCharacterClass( $token['value'] );
394417

395418
case 'group':
396-
return generateFromGroupContent( $token['content'] );
419+
assert( is_array( $token[ 'content' ] ) );
420+
/** @var array<array<string,mixed>> $content */
421+
$content = $token['content'];
422+
return generateFromGroupContent( $content );
397423

398424
default:
399425
return [ '' ];
400426
}
401427
}
402428

429+
/**
430+
* @return string[]
431+
*/
403432
function generateFromEscape( string $char ) : array
404433
{
405434
switch( $char )
@@ -432,23 +461,38 @@ function generateFromEscape( string $char ) : array
432461
}
433462
}
434463

464+
/**
465+
* @param array<string,mixed> $token
466+
* @return string[]
467+
*/
435468
function getSampleCharsForToken( array $token ) : array
436469
{
470+
if( !isset( $token['type'] ) )
471+
{
472+
return [ 'a' ];
473+
}
474+
437475
switch( $token['type'] )
438476
{
439477
case 'any':
440478
return [ 'a', 'Z', '1' ];
441479
case 'escape':
480+
assert( is_string( $token['value'] ) );
442481
if( $token['value'] === 'd' ) return [ '0', '1', '9' ];
443482
if( $token['value'] === 'w' ) return [ 'a', 'B', '3' ];
444483
return [ $token['value'] ];
445484
case 'charclass':
485+
assert( is_string( $token['value'] ) );
446486
return array_slice( processCharacterClass( $token['value'] ), 0, 3 );
447487
default:
448488
return [ 'a' ];
449489
}
450490
}
451491

492+
/**
493+
* @param array<array<string,mixed>> $tokens
494+
* @return string[]
495+
*/
452496
function generateFromGroupContent( array $tokens ) : array
453497
{
454498
if( empty( $tokens ) )
@@ -497,6 +541,9 @@ function generateFromGroupContent( array $tokens ) : array
497541
return array_unique( $allResults );
498542
}
499543

544+
/**
545+
* @return string[]
546+
*/
500547
function processCharacterClass( string $charClass ) : array
501548
{
502549
// Handle negated classes
@@ -522,7 +569,7 @@ function processCharacterClass( string $charClass ) : array
522569
}
523570

524571
// Handle ranges like a-z, 0-9
525-
if( preg_match_all( '/(\w)-(\w)/', $charClass, $matches, PREG_SET_ORDER ) )
572+
if( preg_match_all( '/(\w)-(\w)/', $charClass, $matches, PREG_SET_ORDER ) > 0 )
526573
{
527574
foreach( $matches as $match )
528575
{
@@ -567,16 +614,25 @@ function processCharacterClass( string $charClass ) : array
567614
return array_slice( $chars, 0, 6 ); // Limit to reasonable number
568615
}
569616

617+
/**
618+
* @param array<array<string,mixed>> $tokens
619+
*/
570620
function hasStartAnchorInAlternation( array $tokens ) : bool
571621
{
572622
return hasAnchorInAlternation( $tokens, 'start', true );
573623
}
574624

625+
/**
626+
* @param array<array<string,mixed>> $tokens
627+
*/
575628
function hasEndAnchorInAlternation( array $tokens ) : bool
576629
{
577630
return hasAnchorInAlternation( $tokens, 'end', false );
578631
}
579632

633+
/**
634+
* @param array<array<string,mixed>> $tokens
635+
*/
580636
function hasAnchorInAlternation( array $tokens, string $anchorType, bool $checkFirst ) : bool
581637
{
582638
// Split tokens into alternatives
@@ -607,8 +663,6 @@ function hasAnchorInAlternation( array $tokens, string $anchorType, bool $checkF
607663
// Check each alternative for the anchor
608664
foreach( $alternatives as $alternative )
609665
{
610-
if( empty( $alternative ) ) continue;
611-
612666
if( $checkFirst )
613667
{
614668
// Check if alternative starts with the anchor
@@ -640,6 +694,9 @@ function hasAnchorInAlternation( array $tokens, string $anchorType, bool $checkF
640694
return false;
641695
}
642696

697+
/**
698+
* @param array<array<string,mixed>> $tokens
699+
*/
643700
function detectStartAnchor( array $tokens ) : bool
644701
{
645702
if( empty( $tokens ) ) return false;
@@ -653,12 +710,18 @@ function detectStartAnchor( array $tokens ) : bool
653710
// Start anchor in first group (like (?:^|/))
654711
if( $tokens[0]['type'] === 'group' )
655712
{
656-
return hasStartAnchorInAlternation( $tokens[0]['content'] );
713+
assert( is_array( $tokens[0]['content'] ) );
714+
/** @var array<array<string,mixed>> $content */
715+
$content = $tokens[0]['content'];
716+
return hasStartAnchorInAlternation( $content );
657717
}
658718

659719
return false;
660720
}
661721

722+
/**
723+
* @param array<array<string,mixed>> $tokens
724+
*/
662725
function detectEndAnchor( array $tokens ) : bool
663726
{
664727
if( empty( $tokens ) ) return false;
@@ -674,7 +737,10 @@ function detectEndAnchor( array $tokens ) : bool
674737
// End anchor in last group (like (?:$|/))
675738
if( $tokens[$lastIndex]['type'] === 'group' )
676739
{
677-
return hasEndAnchorInAlternation( $tokens[$lastIndex]['content'] );
740+
assert( is_array( $tokens[$lastIndex]['content'] ) );
741+
/** @var array<array<string,mixed>> $content */
742+
$content = $tokens[$lastIndex]['content'];
743+
return hasEndAnchorInAlternation( $content );
678744
}
679745

680746
return false;

0 commit comments

Comments
 (0)