Skip to content

Commit 95265c3

Browse files
jensjohaCommit Queue
authored andcommitted
[scanner] Optimize scanning of identifiers, comments, spaces etc.
In bytes per microsecond, statistics on 25 runs each of an AOT compile scanner_benchmarker run with `pkg/_fe_analyzer_shared/lib/src/parser/parser_impl.dart --bytes`: ``` N Min Max Median Avg Stddev x 25 116.01859 118.24483 117.63414 117.41239 0.6246963 + 25 136.5563 139.73077 138.82117 138.72373 0.75137577 Difference at 95.0% confidence 21.3113 +/- 0.393007 18.1508% +/- 0.334724% (Student's t, pooled s = 0.690945) ``` With `--string`: ``` N Min Max Median Avg Stddev x 25 104.77452 118.00444 115.83707 115.04005 2.8653416 + 25 120.1637 125.80937 123.84278 123.88533 1.2858698 Difference at 95.0% confidence 8.84528 +/- 1.26317 7.68887% +/- 1.09802% (Student's t, pooled s = 2.22077) ``` And running it through the benchmarker (`pkg/front_end/tool/benchmarker.dart --cache --silent --iterations=25`): `--bytes`: ``` msec task-clock:u: -15.4177% +/- 0.3838% (-515.18 +/- 12.83) cycles:u: -15.5303% +/- 0.3716% (-2263042219.68 +/- 54143984.97) instructions:u: -11.8786% +/- 0.0000% (-3721971162.16 +/- 292.14) branch-misses:u: -13.0375% +/- 0.8048% (-10550278.80 +/- 651299.13) seconds time elapsed: -15.4170% +/- 0.3820% (-0.52 +/- 0.01) seconds user: -15.4936% +/- 0.3859% (-0.51 +/- 0.01) msec task-clock:u: -15.1724% +/- 0.3173% (-505.72 +/- 10.58) L1-icache-load-misses: 42.3794% +/- 6.4906% (1901929.52 +/- 291287.30) LLC-loads: 3.2837% +/- 0.7535% (40264.52 +/- 9239.86) LLC-load-misses: -2.2808% +/- 1.3789% (-3647.96 +/- 2205.39) seconds time elapsed: -15.1722% +/- 0.3201% (-0.51 +/- 0.01) seconds user: -15.1811% +/- 0.3196% (-0.50 +/- 0.01) ``` `--string`: ``` msec task-clock:u: -6.2018% +/- 0.3384% (-207.18 +/- 11.31) cycles:u: -6.2315% +/- 0.3257% (-907517140.20 +/- 47432247.03) instructions:u: -7.2693% +/- 0.0000% (-2325765423.72 +/- 491.22) branch-misses:u: -2.6467% +/- 0.6289% (-2198552.96 +/- 522409.28) seconds time elapsed: -6.1995% +/- 0.3378% (-0.21 +/- 0.01) seconds user: -6.2612% +/- 0.3705% (-0.21 +/- 0.01) msec task-clock:u: -6.1645% +/- 0.4224% (-206.18 +/- 14.13) L1-icache-load-misses: 40.5703% +/- 6.3952% (1945020.52 +/- 306599.42) LLC-loads: 1.5464% +/- 0.8925% (20130.04 +/- 11618.51) seconds time elapsed: -6.1656% +/- 0.4197% (-0.21 +/- 0.01) seconds user: -6.1980% +/- 0.4240% (-0.21 +/- 0.01) ``` And compiling the CFE from December with the CFE, statistics on 50 runs each, again run with `--cache --silent` (so 2 x 50 runs each): ``` msec task-clock:u: -0.7401% +/- 0.1900% (-45.01 +/- 11.56) cycles:u: -0.7971% +/- 0.1917% (-202488820.10 +/- 48684812.34) instructions:u: -0.5975% +/- 0.0004% (-184367556.38 +/- 137673.23) branch-misses:u: -2.9490% +/- 0.8146% (-2788427.60 +/- 770259.65) seconds time elapsed: -0.7428% +/- 0.1901% (-0.05 +/- 0.01) seconds user: -0.7148% +/- 0.2895% (-0.04 +/- 0.02) L1-icache-load-misses: 0.1974% +/- 0.1457% (954253.92 +/- 704297.81) LLC-loads: 0.1988% +/- 0.1244% (97594.22 +/- 61084.80) ``` Change-Id: I0550596f5320a1ff00d85765c121d012f55bec61 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/407400 Reviewed-by: Johnni Winther <[email protected]> Commit-Queue: Jens Johansen <[email protected]>
1 parent f534c13 commit 95265c3

File tree

6 files changed

+402
-52
lines changed

6 files changed

+402
-52
lines changed

pkg/_fe_analyzer_shared/lib/src/scanner/abstract_scanner.dart

Lines changed: 66 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import 'dart:collection' show ListMixin;
1111

1212
import 'dart:typed_data' show Uint16List, Uint32List;
1313

14+
import 'internal_utils.dart' show isIdentifierChar;
15+
1416
import 'token.dart'
1517
show
1618
BeginToken,
@@ -346,19 +348,6 @@ abstract class AbstractScanner implements Scanner {
346348
appendToken(new Token.eof(tokenStart, comments));
347349
}
348350

349-
/**
350-
* Notifies scanning a whitespace character. Note that [appendWhiteSpace] is
351-
* not always invoked for [$SPACE] characters.
352-
*
353-
* This method is used by the scanners to track line breaks and create the
354-
* [lineStarts] map.
355-
*/
356-
void appendWhiteSpace(int next) {
357-
if (next == $LF) {
358-
lineStarts.add(stringOffset + 1); // +1, the line starts after the $LF.
359-
}
360-
}
361-
362351
/**
363352
* Notifies on [$LF] characters in multi-line comments or strings.
364353
*
@@ -814,18 +803,18 @@ abstract class AbstractScanner implements Scanner {
814803
return tokenizeLanguageVersionOrSingleLineComment(next);
815804
}
816805

806+
/// Skip past spaces. Returns the latest character not consumed
807+
/// (i.e. the latest character that is not a space).
808+
int skipSpaces();
809+
817810
int bigSwitch(int next) {
818811
beginToken();
819-
if (next == $SPACE || next == $TAB || next == $LF || next == $CR) {
820-
appendWhiteSpace(next);
821-
next = advance();
822-
// Sequences of spaces are common, so advance through them fast.
823-
while (next == $SPACE) {
824-
// We don't invoke [:appendWhiteSpace(next):] here for efficiency,
825-
// assuming that it does not do anything for space characters.
826-
next = advance();
827-
}
828-
return next;
812+
if (next == $SPACE || next == $TAB || next == $CR) {
813+
return skipSpaces();
814+
}
815+
if (next == $LF) {
816+
lineStarts.add(stringOffset + 1); // +1, the line starts after the $LF.
817+
return skipSpaces();
829818
}
830819

831820
int nextLower = next | 0x20;
@@ -1264,6 +1253,8 @@ abstract class AbstractScanner implements Scanner {
12641253
}
12651254
int nextnext = peek();
12661255
if ($0 <= nextnext && nextnext <= $9) {
1256+
// Use the peeked character.
1257+
advance();
12671258
return tokenizeFractionPart(nextnext, start, hasSeparators);
12681259
} else {
12691260
TokenType tokenType =
@@ -1479,8 +1470,10 @@ abstract class AbstractScanner implements Scanner {
14791470
}
14801471

14811472
int tokenizeLanguageVersionOrSingleLineComment(int next) {
1473+
assert(next == $SLASH);
14821474
int start = scanOffset;
14831475
next = advance();
1476+
assert(next == $SLASH);
14841477

14851478
// Dart doc
14861479
if ($SLASH == peek()) {
@@ -1572,25 +1565,38 @@ abstract class AbstractScanner implements Scanner {
15721565
}
15731566

15741567
int tokenizeSingleLineComment(int next, int start) {
1575-
bool dartdoc = $SLASH == peek();
15761568
next = advance();
1569+
bool dartdoc = $SLASH == next;
15771570
return tokenizeSingleLineCommentRest(next, start, dartdoc);
15781571
}
15791572

1573+
/// Scan until line end (or eof). Returns true if the skipped data is ascii
1574+
/// only and false otherwise. To get the end-of-line (or eof) character call
1575+
/// [current].
1576+
bool scanUntilLineEnd();
1577+
1578+
/// Get the current character, i.e. the latest response from [advance].
1579+
int current();
1580+
15801581
int tokenizeSingleLineCommentRest(int next, int start, bool dartdoc) {
15811582
bool asciiOnly = true;
1582-
while (true) {
1583-
if (next > 127) asciiOnly = false;
1584-
if ($LF == next || $CR == next || $EOF == next) {
1585-
if (!asciiOnly) handleUnicode(start);
1586-
if (dartdoc) {
1587-
appendDartDoc(start, TokenType.SINGLE_LINE_COMMENT, asciiOnly);
1588-
} else {
1589-
appendComment(start, TokenType.SINGLE_LINE_COMMENT, asciiOnly);
1590-
}
1591-
return next;
1592-
}
1593-
next = advance();
1583+
if (next > 127) asciiOnly = false;
1584+
if ($LF == next || $CR == next || $EOF == next) {
1585+
_tokenizeSingleLineCommentAppend(asciiOnly, start, dartdoc);
1586+
return next;
1587+
}
1588+
asciiOnly &= scanUntilLineEnd();
1589+
_tokenizeSingleLineCommentAppend(asciiOnly, start, dartdoc);
1590+
return current();
1591+
}
1592+
1593+
void _tokenizeSingleLineCommentAppend(
1594+
bool asciiOnly, int start, bool dartdoc) {
1595+
if (!asciiOnly) handleUnicode(start);
1596+
if (dartdoc) {
1597+
appendDartDoc(start, TokenType.SINGLE_LINE_COMMENT, asciiOnly);
1598+
} else {
1599+
appendComment(start, TokenType.SINGLE_LINE_COMMENT, asciiOnly);
15941600
}
15951601
}
15961602

@@ -1742,14 +1748,19 @@ abstract class AbstractScanner implements Scanner {
17421748
}
17431749
}
17441750

1751+
int passIdentifierCharAllowDollar();
1752+
17451753
/**
17461754
* [allowDollar] can exclude '$', which is not allowed as part of a string
17471755
* interpolation identifier.
17481756
*/
17491757
int tokenizeIdentifier(int next, int start, bool allowDollar) {
1750-
while (true) {
1751-
if (_isIdentifierChar(next, allowDollar)) {
1752-
next = advance();
1758+
if (allowDollar) {
1759+
// Normal case is to allow dollar.
1760+
if (isIdentifierChar(next, /* allowDollar = */ true)) {
1761+
next = passIdentifierCharAllowDollar();
1762+
appendSubstringToken(
1763+
TokenType.IDENTIFIER, start, /* asciiOnly = */ true);
17531764
} else {
17541765
// Identifier ends here.
17551766
if (start == scanOffset) {
@@ -1758,7 +1769,21 @@ abstract class AbstractScanner implements Scanner {
17581769
appendSubstringToken(
17591770
TokenType.IDENTIFIER, start, /* asciiOnly = */ true);
17601771
}
1761-
break;
1772+
}
1773+
} else {
1774+
while (true) {
1775+
if (isIdentifierChar(next, /* allowDollar = */ false)) {
1776+
next = advance();
1777+
} else {
1778+
// Identifier ends here.
1779+
if (start == scanOffset) {
1780+
return unexpected(next);
1781+
} else {
1782+
appendSubstringToken(
1783+
TokenType.IDENTIFIER, start, /* asciiOnly = */ true);
1784+
}
1785+
break;
1786+
}
17621787
}
17631788
}
17641789
return next;
@@ -2010,7 +2035,7 @@ abstract class AbstractScanner implements Scanner {
20102035
codeUnits.add(errorToken.character);
20112036
prependErrorToken(errorToken);
20122037
int next = advanceAfterError();
2013-
while (_isIdentifierChar(next, /* allowDollar = */ true)) {
2038+
while (isIdentifierChar(next, /* allowDollar = */ true)) {
20142039
codeUnits.add(next);
20152040
next = advance();
20162041
}
@@ -2159,11 +2184,3 @@ class ScannerConfiguration {
21592184
this.forAugmentationLibrary = false,
21602185
});
21612186
}
2162-
2163-
bool _isIdentifierChar(int next, bool allowDollar) {
2164-
return ($a <= next && next <= $z) ||
2165-
($A <= next && next <= $Z) ||
2166-
($0 <= next && next <= $9) ||
2167-
next == $_ ||
2168-
(next == $$ && allowDollar);
2169-
}
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import 'characters.dart' show $$, $0, $9, $A, $Z, $_, $a, $z;
2+
3+
@pragma("vm:prefer-inline")
4+
bool isIdentifierChar(int next, bool allowDollar) {
5+
return ($a <= next && next <= $z) ||
6+
($A <= next && next <= $Z) ||
7+
($0 <= next && next <= $9) ||
8+
next == $_ ||
9+
(next == $$ && allowDollar);
10+
}
11+
12+
/// Checks if the character [next] is an identifier character (allowing the
13+
/// dollar sign) using a table lookup, utilizing the fact that the input is from
14+
/// a Uint8List and therefore between 0 and 255.
15+
/// It is the callers responsibility to ensure that this is the case.
16+
// DartDocTest(() {
17+
// for (int i = 0; i < 256; i++) {
18+
// if (isIdentifierCharAllowDollarTableLookup(i) !=
19+
// isIdentifierChar(i, true)) {
20+
// return false;
21+
// }
22+
// }
23+
// return true;
24+
// }(), true);
25+
@pragma("vm:prefer-inline")
26+
bool isIdentifierCharAllowDollarTableLookup(int next) {
27+
const List<bool> table = [
28+
// format hack.
29+
false, false, false, false, false, false, false, false,
30+
false, false, false, false, false, false, false, false,
31+
false, false, false, false, false, false, false, false,
32+
false, false, false, false, false, false, false, false,
33+
false, false, false, false, true, false, false, false,
34+
false, false, false, false, false, false, false, false,
35+
true, true, true, true, true, true, true, true,
36+
true, true, false, false, false, false, false, false,
37+
false, true, true, true, true, true, true, true,
38+
true, true, true, true, true, true, true, true,
39+
true, true, true, true, true, true, true, true,
40+
true, true, true, false, false, false, false, true,
41+
false, true, true, true, true, true, true, true,
42+
true, true, true, true, true, true, true, true,
43+
true, true, true, true, true, true, true, true,
44+
true, true, true, false, false, false, false, false,
45+
false, false, false, false, false, false, false, false,
46+
false, false, false, false, false, false, false, false,
47+
false, false, false, false, false, false, false, false,
48+
false, false, false, false, false, false, false, false,
49+
false, false, false, false, false, false, false, false,
50+
false, false, false, false, false, false, false, false,
51+
false, false, false, false, false, false, false, false,
52+
false, false, false, false, false, false, false, false,
53+
false, false, false, false, false, false, false, false,
54+
false, false, false, false, false, false, false, false,
55+
false, false, false, false, false, false, false, false,
56+
false, false, false, false, false, false, false, false,
57+
false, false, false, false, false, false, false, false,
58+
false, false, false, false, false, false, false, false,
59+
false, false, false, false, false, false, false, false,
60+
false, false, false, false, false, false, false, false,
61+
// format hack.
62+
];
63+
return table[next];
64+
}

pkg/_fe_analyzer_shared/lib/src/scanner/string_scanner.dart

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44

55
library dart2js.scanner.string_scanner;
66

7-
import 'characters.dart' show $EOF;
7+
import 'characters.dart' show $CR, $EOF, $LF, $SPACE;
8+
9+
import 'internal_utils.dart' show isIdentifierChar;
810

911
import 'token.dart'
1012
show
@@ -78,6 +80,49 @@ class StringScanner extends AbstractScanner {
7880
return _string.codeUnitAt(scanOffset);
7981
}
8082

83+
@override
84+
@pragma('vm:unsafe:no-bounds-checks')
85+
int current() {
86+
if (scanOffset > _stringLengthMinusOne) return $EOF;
87+
return _string.codeUnitAt(scanOffset);
88+
}
89+
90+
@override
91+
int passIdentifierCharAllowDollar() {
92+
while (true) {
93+
int next = advance();
94+
if (!isIdentifierChar(next, /* allowDollar = */ true)) {
95+
return next;
96+
}
97+
}
98+
}
99+
100+
@override
101+
bool scanUntilLineEnd() {
102+
bool asciiOnly = true;
103+
int next = advance();
104+
while (true) {
105+
if (next > 127) asciiOnly = false;
106+
if ($LF == next || $CR == next || $EOF == next) {
107+
return asciiOnly;
108+
}
109+
next = advance();
110+
}
111+
}
112+
113+
@override
114+
@pragma("vm:prefer-inline")
115+
int skipSpaces() {
116+
int next = advance();
117+
// Sequences of spaces are common, so advance through them fast.
118+
while (next == $SPACE) {
119+
// We don't invoke [:appendWhiteSpace(next):] here for efficiency,
120+
// assuming that it does not do anything for space characters.
121+
next = advance();
122+
}
123+
return next;
124+
}
125+
81126
@override
82127
@pragma('vm:unsafe:no-bounds-checks')
83128
int peek() {

0 commit comments

Comments
 (0)