Skip to content

Commit 8353436

Browse files
committed
Tokenizer/PHP: retokenize the match double arrow to T_MATCH_ARROW
The double arrow in PHP is used in a number of contexts: * Long/short arrays with keys; * Long/short lists with keys; * In the `as` part of `foreach()` statements with keys; * For `yield` statements with keys; * For arrow functions as the scope opener; * And now for `match` expressions to separate the cases from the return value (body). As most of these constructs can be nested in each other - an arrow function in an array value, a match expression in a list key -, every sniff handling any of these constructs has to take a lot of care when searching for the double arrow for the construct they are handling, to prevent matching a double arrow belonging to another type of construct nested in the target construct. This type of detection and special handling has to be done in each individual sniff which in one way or another has to deal with the `T_DOUBLE_ARROW` token and can cause quite some processing overhead. With that in mind, the double arrow as a scope opener for arrow functions has previously already been retokenized to `T_FN_ARROW`. Following the same reasoning, I'm proposing to retokenize the double arrow which separates `match` case expressions from the body expression to `T_MATCH_ARROW`. This should make life easier for any sniff dealing with any of the above constructs and will prevent potential false positives being introduced for sniffs currently handling any of these constructs, but not yet updated to allow for match expressions. Includes a set of dedicated unit tests verifying the tokenization of the double arrow operator in all currently supported contexts, including in combined (nested) contexts.
1 parent 0fa0e01 commit 8353436

File tree

5 files changed

+502
-0
lines changed

5 files changed

+502
-0
lines changed

package.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,8 @@ http://pear.php.net/dtd/package-2.0.xsd">
204204
<file baseinstalldir="" name="BitwiseOrTest.php" role="test" />
205205
<file baseinstalldir="" name="DefaultKeywordTest.inc" role="test" />
206206
<file baseinstalldir="" name="DefaultKeywordTest.php" role="test" />
207+
<file baseinstalldir="" name="DoubleArrowTest.inc" role="test" />
208+
<file baseinstalldir="" name="DoubleArrowTest.php" role="test" />
207209
<file baseinstalldir="" name="GotoLabelTest.inc" role="test" />
208210
<file baseinstalldir="" name="GotoLabelTest.php" role="test" />
209211
<file baseinstalldir="" name="NamedFunctionCallArgumentsTest.inc" role="test" />
@@ -2112,6 +2114,8 @@ http://pear.php.net/dtd/package-2.0.xsd">
21122114
<install as="CodeSniffer/Core/Tokenizer/BitwiseOrTest.inc" name="tests/Core/Tokenizer/BitwiseOrTest.inc" />
21132115
<install as="CodeSniffer/Core/Tokenizer/DefaultKeywordTest.php" name="tests/Core/Tokenizer/DefaultKeywordTest.php" />
21142116
<install as="CodeSniffer/Core/Tokenizer/DefaultKeywordTest.inc" name="tests/Core/Tokenizer/DefaultKeywordTest.inc" />
2117+
<install as="CodeSniffer/Core/Tokenizer/DoubleArrowTest.php" name="tests/Core/Tokenizer/DoubleArrowTest.php" />
2118+
<install as="CodeSniffer/Core/Tokenizer/DoubleArrowTest.inc" name="tests/Core/Tokenizer/DoubleArrowTest.inc" />
21152119
<install as="CodeSniffer/Core/Tokenizer/GotoLabelTest.php" name="tests/Core/Tokenizer/GotoLabelTest.php" />
21162120
<install as="CodeSniffer/Core/Tokenizer/GotoLabelTest.inc" name="tests/Core/Tokenizer/GotoLabelTest.inc" />
21172121
<install as="CodeSniffer/Core/Tokenizer/NamedFunctionCallArgumentsTest.php" name="tests/Core/Tokenizer/NamedFunctionCallArgumentsTest.php" />
@@ -2192,6 +2196,8 @@ http://pear.php.net/dtd/package-2.0.xsd">
21922196
<install as="CodeSniffer/Core/Tokenizer/BitwiseOrTest.inc" name="tests/Core/Tokenizer/BitwiseOrTest.inc" />
21932197
<install as="CodeSniffer/Core/Tokenizer/DefaultKeywordTest.php" name="tests/Core/Tokenizer/DefaultKeywordTest.php" />
21942198
<install as="CodeSniffer/Core/Tokenizer/DefaultKeywordTest.inc" name="tests/Core/Tokenizer/DefaultKeywordTest.inc" />
2199+
<install as="CodeSniffer/Core/Tokenizer/DoubleArrowTest.php" name="tests/Core/Tokenizer/DoubleArrowTest.php" />
2200+
<install as="CodeSniffer/Core/Tokenizer/DoubleArrowTest.inc" name="tests/Core/Tokenizer/DoubleArrowTest.inc" />
21952201
<install as="CodeSniffer/Core/Tokenizer/GotoLabelTest.php" name="tests/Core/Tokenizer/GotoLabelTest.php" />
21962202
<install as="CodeSniffer/Core/Tokenizer/GotoLabelTest.inc" name="tests/Core/Tokenizer/GotoLabelTest.inc" />
21972203
<install as="CodeSniffer/Core/Tokenizer/NamedFunctionCallArgumentsTest.php" name="tests/Core/Tokenizer/NamedFunctionCallArgumentsTest.php" />

src/Tokenizers/PHP.php

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,7 @@ class PHP extends Tokenizer
373373
T_LOGICAL_OR => 2,
374374
T_LOGICAL_XOR => 3,
375375
T_MATCH => 5,
376+
T_MATCH_ARROW => 2,
376377
T_MATCH_DEFAULT => 7,
377378
T_METHOD_C => 10,
378379
T_MINUS_EQUAL => 2,
@@ -1371,6 +1372,15 @@ protected function tokenize($string)
13711372
&& is_array($tokens[$x]) === true
13721373
&& $tokens[$x][0] === T_DOUBLE_ARROW
13731374
) {
1375+
// Modify the original token stack for the double arrow so that
1376+
// future checks can disregard the double arrow token more easily.
1377+
// For match expression "case" statements, this is handled
1378+
// in PHP::processAdditional().
1379+
$tokens[$x][0] = T_MATCH_ARROW;
1380+
if (PHP_CODESNIFFER_VERBOSITY > 1) {
1381+
echo "\t\t* token $x changed from T_DOUBLE_ARROW to T_MATCH_ARROW".PHP_EOL;
1382+
}
1383+
13741384
$newToken = [];
13751385
$newToken['code'] = T_MATCH_DEFAULT;
13761386
$newToken['type'] = 'T_MATCH_DEFAULT';
@@ -2450,6 +2460,47 @@ protected function processAdditional()
24502460
echo "\t\t* cleaned parenthesis of token $i *".PHP_EOL;
24512461
}
24522462
}
2463+
} else {
2464+
// Retokenize the double arrows for match expression cases to `T_MATCH_ARROW`.
2465+
$searchFor = [
2466+
T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET,
2467+
T_OPEN_SQUARE_BRACKET => T_OPEN_SQUARE_BRACKET,
2468+
T_OPEN_PARENTHESIS => T_OPEN_PARENTHESIS,
2469+
T_OPEN_SHORT_ARRAY => T_OPEN_SHORT_ARRAY,
2470+
T_DOUBLE_ARROW => T_DOUBLE_ARROW,
2471+
];
2472+
$searchFor += Util\Tokens::$scopeOpeners;
2473+
2474+
for ($x = ($this->tokens[$i]['scope_opener'] + 1); $x < $this->tokens[$i]['scope_closer']; $x++) {
2475+
if (isset($searchFor[$this->tokens[$x]['code']]) === false) {
2476+
continue;
2477+
}
2478+
2479+
if (isset($this->tokens[$x]['scope_closer']) === true) {
2480+
$x = $this->tokens[$x]['scope_closer'];
2481+
continue;
2482+
}
2483+
2484+
if (isset($this->tokens[$x]['parenthesis_closer']) === true) {
2485+
$x = $this->tokens[$x]['parenthesis_closer'];
2486+
continue;
2487+
}
2488+
2489+
if (isset($this->tokens[$x]['bracket_closer']) === true) {
2490+
$x = $this->tokens[$x]['bracket_closer'];
2491+
continue;
2492+
}
2493+
2494+
// This must be a double arrow, but make sure anyhow.
2495+
if ($this->tokens[$x]['code'] === T_DOUBLE_ARROW) {
2496+
$this->tokens[$x]['code'] = T_MATCH_ARROW;
2497+
$this->tokens[$x]['type'] = 'T_MATCH_ARROW';
2498+
2499+
if (PHP_CODESNIFFER_VERBOSITY > 1) {
2500+
echo "\t\t* token $x changed from T_DOUBLE_ARROW to T_MATCH_ARROW".PHP_EOL;
2501+
}
2502+
}
2503+
}//end for
24532504
}//end if
24542505

24552506
continue;

src/Util/Tokens.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
define('T_FN_ARROW', 'PHPCS_T_FN_ARROW');
7878
define('T_TYPE_UNION', 'PHPCS_T_TYPE_UNION');
7979
define('T_PARAM_NAME', 'PHPCS_T_PARAM_NAME');
80+
define('T_MATCH_ARROW', 'PHPCS_T_MATCH_ARROW');
8081
define('T_MATCH_DEFAULT', 'PHPCS_T_MATCH_DEFAULT');
8182

8283
// Some PHP 5.5 tokens, replicated for lower versions.
Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
<?php
2+
3+
function simpleLongArray($x) {
4+
return array(
5+
/* testLongArrayArrowSimple */
6+
0 => 'Zero',
7+
);
8+
}
9+
10+
function simpleShortArray($x) {
11+
return [
12+
/* testShortArrayArrowSimple */
13+
0 => 'Zero',
14+
];
15+
}
16+
17+
function simpleLongList($x) {
18+
list(
19+
/* testLongListArrowSimple */
20+
0 => $a,
21+
) = $x;
22+
}
23+
24+
function simpleShortList($x) {
25+
[
26+
/* testShortListArrowSimple */
27+
0 => $a,
28+
] = $x;
29+
}
30+
31+
function simpleYield($x) {
32+
$i = 0;
33+
foreach (explode("\n", $x) as $line) {
34+
/* testYieldArrowSimple */
35+
yield ++$i => $line;
36+
}
37+
}
38+
39+
function simpleForeach($x) {
40+
/* testForeachArrowSimple */
41+
foreach ($x as $k => $value) {}
42+
}
43+
44+
function simpleMatch($x) {
45+
return match ($x) {
46+
/* testMatchArrowSimpleSingleCase */
47+
0 => 'Zero',
48+
/* testMatchArrowSimpleMultiCase */
49+
2, 4, 6 => 'Zero',
50+
/* testMatchArrowSimpleSingleCaseWithTrailingComma */
51+
1, => 'Zero',
52+
/* testMatchArrowSimpleMultiCaseWithTrailingComma */
53+
3, 5, => 'Zero',
54+
};
55+
}
56+
57+
function simpleArrowFunction($y) {
58+
/* testFnArrowSimple */
59+
return fn ($y) => callMe($y);
60+
}
61+
62+
function matchNestedInMatch() {
63+
$x = match ($y) {
64+
/* testMatchArrowNestedMatchOuter */
65+
default, => match ($z) {
66+
/* testMatchArrowNestedMatchInner */
67+
1 => 1
68+
},
69+
};
70+
}
71+
72+
function matchNestedInLongArrayValue() {
73+
$array = array(
74+
/* testLongArrayArrowWithNestedMatchValue1 */
75+
'a' => match ($test) {
76+
/* testMatchArrowInLongArrayValue1 */
77+
1 => 'a',
78+
/* testMatchArrowInLongArrayValue2 */
79+
2 => 'b'
80+
},
81+
/* testLongArrayArrowWithNestedMatchValue2 */
82+
$i => match ($test) {
83+
/* testMatchArrowInLongArrayValue3 */
84+
1 => 'a',
85+
},
86+
);
87+
}
88+
89+
function matchNestedInShortArrayValue() {
90+
$array = [
91+
/* testShortArrayArrowWithNestedMatchValue1 */
92+
'a' => match ($test) {
93+
/* testMatchArrowInShortArrayValue1 */
94+
1 => 'a',
95+
/* testMatchArrowInShortArrayValue2 */
96+
2 => 'b'
97+
},
98+
/* testShortArrayArrowWithNestedMatchValue2 */
99+
$i => match ($test) {
100+
/* testMatchArrowInShortArrayValue3 */
101+
1 => 'a',
102+
},
103+
];
104+
}
105+
106+
function matchNestedInLongArrayKey() {
107+
$array = array(
108+
match ($test) { /* testMatchArrowInLongArrayKey1 */ 1 => 'a', /* testMatchArrowInLongArrayKey2 */ 2 => 'b' }
109+
/* testLongArrayArrowWithMatchKey */
110+
=> 'dynamic keys, woho!',
111+
);
112+
}
113+
114+
function matchNestedInShortArrayKey() {
115+
$array = [
116+
match ($test) { /* testMatchArrowInShortArrayKey1 */ 1 => 'a', /* testMatchArrowInShortArrayKey2 */ 2 => 'b' }
117+
/* testShortArrayArrowWithMatchKey */
118+
=> 'dynamic keys, woho!',
119+
];
120+
}
121+
122+
function arraysNestedInMatch() {
123+
$matcher = match ($x) {
124+
/* testMatchArrowWithLongArrayBodyWithKeys */
125+
0 => array(
126+
/* testLongArrayArrowInMatchBody1 */
127+
0 => 1,
128+
/* testLongArrayArrowInMatchBody2 */
129+
'a' => 2,
130+
/* testLongArrayArrowInMatchBody3 */
131+
'b' => 3
132+
),
133+
/* testMatchArrowWithShortArrayBodyWithoutKeys */
134+
1 => [1, 2, 3],
135+
/* testMatchArrowWithLongArrayBodyWithoutKeys */
136+
2 => array( 1, [1, 2, 3], 2, 3),
137+
/* testMatchArrowWithShortArrayBodyWithKeys */
138+
3 => [
139+
/* testShortArrayArrowInMatchBody1 */
140+
0 => 1,
141+
/* testShortArrayArrowInMatchBody2 */
142+
'a' => array(1, 2, 3),
143+
/* testShortArrayArrowInMatchBody3 */
144+
'b' => 2,
145+
3
146+
],
147+
/* testShortArrayArrowinMatchCase1 */
148+
[4 => 'a', /* testShortArrayArrowinMatchCase2 */ 5 => 6]
149+
/* testMatchArrowWithShortArrayWithKeysAsCase */
150+
=> 'match with array as case value',
151+
/* testShortArrayArrowinMatchCase3 */
152+
[4 => 'a'], /* testLongArrayArrowinMatchCase4 */ array(5 => 6),
153+
/* testMatchArrowWithMultipleArraysWithKeysAsCase */
154+
=> 'match with multiple arrays as case value',
155+
};
156+
}
157+
158+
function matchNestedInArrowFunction($x) {
159+
/* testFnArrowWithMatchInValue */
160+
$fn = fn($x) => match(true) {
161+
/* testMatchArrowInFnBody1 */
162+
1, 2, 3, 4, 5 => 'foo',
163+
/* testMatchArrowInFnBody2 */
164+
default => 'bar',
165+
};
166+
}
167+
168+
function arrowFunctionsNestedInMatch($x) {
169+
return match ($x) {
170+
/* testMatchArrowWithFnBody1 */
171+
1 => /* testFnArrowInMatchBody1 */ fn($y) => callMe($y),
172+
/* testMatchArrowWithFnBody2 */
173+
default => /* testFnArrowInMatchBody2 */ fn($y) => callThem($y)
174+
};
175+
}
176+
177+
function matchShortArrayMismash() {
178+
$array = [
179+
match ($test) {
180+
/* testMatchArrowInComplexShortArrayKey1 */
181+
1 => [ /* testShortArrayArrowInComplexMatchValueinShortArrayKey */ 1 => 'a'],
182+
/* testMatchArrowInComplexShortArrayKey2 */
183+
2 => 'b'
184+
/* testShortArrayArrowInComplexMatchArrayMismash */
185+
} => match ($test) {
186+
/* testMatchArrowInComplexShortArrayValue1 */
187+
1 => [ /* testShortArrayArrowInComplexMatchValueinShortArrayValue */ 1 => 'a'],
188+
/* testMatchArrowInComplexShortArrayValue1 */
189+
2 => /* testFnArrowInComplexMatchValueInShortArrayValue */ fn($y) => callMe($y)
190+
},
191+
];
192+
}
193+
194+
195+
function longListInMatch($x, $y) {
196+
return match($x) {
197+
/* testMatchArrowWithLongListBody */
198+
1 => list('a' => $a, /* testLongListArrowInMatchBody */ 'b' => $b, 'c' => list('d' => $c)) = $y,
199+
/* testLongListArrowInMatchCase */
200+
list('a' => $a, 'b' => $b) = $y /* testMatchArrowWithLongListInCase */ => 'something'
201+
};
202+
}
203+
204+
function shortListInMatch($x, $y) {
205+
return match($x) {
206+
/* testMatchArrowWithShortListBody */
207+
1 => ['a' => $a, 'b' => $b, 'c' => /* testShortListArrowInMatchBody */ ['d' => $c]] = $y,
208+
/* testShortListArrowInMatchCase */
209+
['a' => $a, 'b' => $b] = $y /* testMatchArrowWithShortListInCase */ => 'something'
210+
};
211+
}
212+
213+
function matchInLongList() {
214+
/* testMatchArrowInLongListKey */
215+
list(match($x) {1 => 1, 2 => 2} /* testLongListArrowWithMatchInKey */ => $a) = $array;
216+
}
217+
218+
function matchInShortList() {
219+
/* testMatchArrowInShortListKey */
220+
[match($x) {1 => 1, 2 => 2} /* testShortListArrowWithMatchInKey */ => $a] = $array;
221+
}

0 commit comments

Comments
 (0)