Skip to content

Commit ca16a42

Browse files
jensjohaCommit Queue
authored andcommitted
[scanner] Replace KeywordState
As bechmarked with the AOT compiles of `scanner_benchmark.dart` called with `pkg/_fe_analyzer_shared/lib/src/parser/parser_impl.dart --bytes`: ``` N Min Max Median Avg Stddev x 25 138.56434 144.23326 142.64174 142.51626 1.2207074 + 25 148.45681 157.02163 156.44878 155.88958 1.7051997 Difference at 95.0% confidence 13.3733 +/- 0.843454 9.38372% +/- 0.59183% (Student's t, pooled s = 1.48287) ``` And compiling the CFE from December with the CFE, statistics on 25 runs each, run with `--cache --silent` (so 2 x 50 runs each) (in AOT mode): ``` msec task-clock:u: -0.9684% +/- 0.3536% (-58.65 +/- 21.42) page-faults:u: -0.1051% +/- 0.0818% (-125.36 +/- 97.49) cycles:u: -1.0001% +/- 0.3225% (-252905081.28 +/- 81554799.59) instructions:u: -0.5915% +/- 0.0007% (-181426590.44 +/- 209346.28) seconds time elapsed: -0.9657% +/- 0.3525% (-0.06 +/- 0.02) seconds user: -1.1336% +/- 0.4333% (-0.07 +/- 0.03) msec task-clock:u: -1.3115% +/- 0.3364% (-79.48 +/- 20.39) page-faults:u: -0.1900% +/- 0.0774% (-226.72 +/- 92.37) L1-icache-load-misses: 0.4990% +/- 0.2072% (2422435.64 +/- 1006075.94) LLC-loads: -2.3896% +/- 0.1707% (-1179245.64 +/- 84228.28) LLC-load-misses: -1.9244% +/- 0.2041% (-639169.84 +/- 67791.64) seconds time elapsed: -1.3128% +/- 0.3374% (-0.08 +/- 0.02) seconds user: -1.3311% +/- 0.4183% (-0.08 +/- 0.02) ``` Combined the 3 CLs ending here gives this result when compiling the CFE from December with the CFE, statistics on 25 runs each, run with `--cache --silent` (so 2 x 50 runs each) (in AOT mode): ``` msec task-clock:u: -1.7681% +/- 0.3034% (-106.37 +/- 18.25) page-faults:u: -0.1834% +/- 0.0863% (-218.80 +/- 103.00) cycles:u: -1.8163% +/- 0.2668% (-456090610.64 +/- 66985831.74) instructions:u: -1.1851% +/- 0.0007% (-365653999.24 +/- 220539.29) branch-misses:u: -2.8468% +/- 1.0580% (-2611811.72 +/- 970666.69) seconds time elapsed: -1.7687% +/- 0.3026% (-0.11 +/- 0.02) seconds user: -1.9723% +/- 0.4480% (-0.11 +/- 0.03) msec task-clock:u: -1.7787% +/- 0.3042% (-107.07 +/- 18.31) page-faults:u: -0.2413% +/- 0.1050% (-288.12 +/- 125.39) L1-icache-load-misses: 0.5218% +/- 0.1599% (2523027.12 +/- 773363.93) LLC-loads: -2.2917% +/- 0.1613% (-1125147.16 +/- 79181.20) LLC-load-misses: -2.0256% +/- 0.2667% (-670484.64 +/- 88293.18) seconds time elapsed: -1.7793% +/- 0.3036% (-0.11 +/- 0.02) seconds user: -1.7392% +/- 0.3752% (-0.10 +/- 0.02) ``` And for the scanner benchmark, in AOT mode, called with `pkg/_fe_analyzer_shared/lib/src/parser/parser_impl.dart --bytes` for bytes per microsecond: ``` N Min Max Median Avg Stddev x 25 114.87867 117.70322 117.10106 116.85 0.67188864 + 25 153.512 156.99315 156.38671 155.95981 0.98376554 Difference at 95.0% confidence 39.1098 +/- 0.479146 33.4701% +/- 0.410053% (Student's t, pooled s = 0.842386) ``` Change-Id: Ica6d47d92ab0fb4c3a06aa6686b8c71f52a6aef8 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/407480 Commit-Queue: Jens Johansen <[email protected]> Reviewed-by: Johnni Winther <[email protected]>
1 parent 49a9d42 commit ca16a42

File tree

4 files changed

+298
-161
lines changed

4 files changed

+298
-161
lines changed

pkg/_fe_analyzer_shared/lib/src/scanner/abstract_scanner.dart

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ import 'dart:typed_data' show Uint16List, Uint32List;
1313

1414
import 'internal_utils.dart' show isIdentifierChar;
1515

16+
import 'keyword_state.dart' show KeywordState, KeywordStateHelper;
17+
1618
import 'token.dart'
1719
show
1820
BeginToken,
@@ -47,8 +49,6 @@ import 'error_token.dart'
4749
UnterminatedString,
4850
UnterminatedToken;
4951

50-
import 'keyword_state.dart' show KeywordState;
51-
5252
import 'token_impl.dart' show DartDocToken, StringTokenImpl;
5353

5454
import 'token_constants.dart';
@@ -1711,23 +1711,18 @@ abstract class AbstractScanner implements Scanner {
17111711
}
17121712

17131713
int tokenizeKeywordOrIdentifier(int next, bool allowDollar) {
1714-
KeywordState? state = KeywordState.KEYWORD_STATE;
1714+
KeywordState state = KeywordStateHelper.table;
17151715
int start = scanOffset;
17161716
// We allow a leading capital character.
1717-
if ($A <= next && next <= $Z) {
1718-
state = state.nextCapital(next);
1719-
next = advance();
1720-
} else if ($a <= next && next <= $z) {
1721-
// Do the first next call outside the loop to avoid an additional test
1722-
// and to make the loop monomorphic.
1717+
if ($A <= next && next <= $z) {
17231718
state = state.next(next);
17241719
next = advance();
17251720
}
1726-
while (state != null && $a <= next && next <= $z) {
1721+
while (!state.isNull && $a <= next && next <= $z) {
17271722
state = state.next(next);
17281723
next = advance();
17291724
}
1730-
if (state == null) {
1725+
if (state.isNull) {
17311726
return tokenizeIdentifier(next, start, allowDollar);
17321727
}
17331728
Keyword? keyword = state.keyword;
Lines changed: 56 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -1,166 +1,72 @@
1-
// Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
1+
// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file
22
// for details. All rights reserved. Use of this source code is governed by a
33
// BSD-style license that can be found in the LICENSE file.
44

5-
library _fe_analyzer_shared.scanner.keywords;
5+
import 'dart:typed_data';
66

7-
import 'token.dart' as analyzer;
7+
import 'characters.dart';
8+
import 'token.dart';
89

9-
import 'characters.dart' show $a, $z, $A, $Z;
10+
extension type KeywordState._(int _offset) {
11+
static const int blockSize = 59;
1012

11-
/**
12-
* Abstract state in a state machine for scanning keywords.
13-
*/
14-
abstract class KeywordState {
15-
KeywordState? next(int c);
16-
KeywordState? nextCapital(int c);
13+
@pragma("vm:prefer-inline")
14+
bool get isNull => _offset == 0;
1715

18-
analyzer.Keyword? get keyword;
19-
20-
static KeywordState? _KEYWORD_STATE;
21-
static KeywordState get KEYWORD_STATE {
22-
if (_KEYWORD_STATE == null) {
23-
List<String> strings = analyzer.Keyword.values
24-
.map((keyword) => keyword.lexeme)
25-
.toList(growable: false);
26-
strings.sort((a, b) => a.compareTo(b));
27-
_KEYWORD_STATE = computeKeywordStateTable(
28-
/* start = */ 0,
29-
strings,
30-
/* offset = */ 0,
31-
strings.length,
32-
);
33-
}
34-
return _KEYWORD_STATE!;
16+
@pragma("vm:prefer-inline")
17+
Keyword? get keyword {
18+
// The 0'th index at the offset.
19+
int keywordIndexPlusOne = KeywordStateHelper._table![_offset];
20+
if (keywordIndexPlusOne == 0) return null;
21+
return Keyword.values[keywordIndexPlusOne - 1];
3522
}
3623

37-
static KeywordState computeKeywordStateTable(
38-
int start, List<String> strings, int offset, int length) {
39-
bool isLowercase = true;
40-
41-
List<KeywordState?> table =
42-
new List<KeywordState?>.filled($z - $A + 1, /* fill = */ null);
43-
assert(length != 0);
44-
int chunk = 0;
45-
int chunkStart = -1;
46-
bool isLeaf = false;
47-
for (int i = offset; i < offset + length; i++) {
48-
if (strings[i].length == start) {
49-
isLeaf = true;
50-
}
51-
if (strings[i].length > start) {
52-
int c = strings[i].codeUnitAt(start);
53-
if ($A <= c && c <= $Z) {
54-
isLowercase = false;
55-
}
56-
if (chunk != c) {
57-
if (chunkStart != -1) {
58-
assert(table[chunk - $A] == null);
59-
table[chunk - $A] = computeKeywordStateTable(
60-
start + 1, strings, chunkStart, i - chunkStart);
61-
}
62-
chunkStart = i;
63-
chunk = c;
64-
}
65-
}
66-
}
67-
if (chunkStart != -1) {
68-
assert(table[chunk - $A] == null);
69-
table[chunk - $A] = computeKeywordStateTable(
70-
start + 1, strings, chunkStart, offset + length - chunkStart);
71-
} else {
72-
assert(length == 1);
73-
return new LeafKeywordState(strings[offset]);
74-
}
75-
String? syntax = isLeaf ? strings[offset] : null;
76-
if (isLowercase) {
77-
table = table.sublist($a - $A);
78-
return new LowerCaseArrayKeywordState(table, syntax);
79-
} else {
80-
return new UpperCaseArrayKeywordState(table, syntax);
81-
}
24+
@pragma("vm:prefer-inline")
25+
KeywordState next(int next) {
26+
// The entry for next starts with A at index offset + 1 because offset + 0
27+
// is the (possible) keyword.
28+
return new KeywordState._(
29+
KeywordStateHelper._table![_offset + next - $A + 1]);
8230
}
8331
}
8432

85-
/**
86-
* A state with multiple outgoing transitions.
87-
*/
88-
abstract class ArrayKeywordState implements KeywordState {
89-
final List<KeywordState?> table;
90-
@override
91-
final analyzer.Keyword? keyword;
92-
93-
ArrayKeywordState(this.table, String? syntax)
94-
: keyword = ((syntax == null) ? null : analyzer.Keyword.keywords[syntax]);
95-
96-
@override
97-
KeywordState? next(int c);
98-
99-
@override
100-
KeywordState? nextCapital(int c);
101-
102-
@override
103-
String toString() {
104-
StringBuffer sb = new StringBuffer();
105-
sb.write("[");
106-
if (keyword != null) {
107-
sb.write("*");
108-
sb.write(keyword);
109-
sb.write(" ");
110-
}
111-
List<KeywordState?> foo = table;
112-
for (int i = 0; i < foo.length; i++) {
113-
if (foo[i] != null) {
114-
sb.write("${new String.fromCharCodes([i + $a])}: "
115-
"${foo[i]}; ");
33+
final class KeywordStateHelper {
34+
static Uint16List? _table;
35+
static KeywordState get table {
36+
if (_table == null) {
37+
// This is a fixed calculation, though if creating more keywords this
38+
// number of (double) bytes might have to change.
39+
Uint16List table = _table = new Uint16List(297 * KeywordState.blockSize);
40+
int nextEmpty = 2 * KeywordState.blockSize;
41+
for (int i = 0; i < Keyword.values.length; i++) {
42+
Keyword keyword = Keyword.values[i];
43+
String lexeme = keyword.lexeme;
44+
// At this point we're looking at the $blockSize bytes
45+
// $blockSize->(2 * $blockSize + 1).
46+
// The first blockSize bytes (0->($blockSize-1)) are all 0s,
47+
// being the "null leaf".
48+
int offset = KeywordState.blockSize;
49+
// For an offset, the 0'th byte is a link to the keyword
50+
// (+1, so 0 means no keyword) and the remaining 58 spots are table
51+
// entries for codeUnit - $A.
52+
for (int j = 0; j < lexeme.length; j++) {
53+
int charOffset = lexeme.codeUnitAt(j) - $A;
54+
int link = table[offset + 1 + charOffset];
55+
if (link == 0) {
56+
// New one
57+
table[offset + 1 + charOffset] = nextEmpty;
58+
offset = nextEmpty;
59+
nextEmpty += KeywordState.blockSize;
60+
} else {
61+
// Existing one.
62+
offset = link;
63+
}
64+
}
65+
// this offsets position 0 points to the i+1'th keyword.
66+
table[offset + 0] = i + 1;
11667
}
68+
assert(nextEmpty == table.length);
11769
}
118-
sb.write("]");
119-
return sb.toString();
120-
}
121-
}
122-
123-
class LowerCaseArrayKeywordState extends ArrayKeywordState {
124-
LowerCaseArrayKeywordState(List<KeywordState?> table, String? syntax)
125-
: super(table, syntax) {
126-
assert(table.length == $z - $a + 1);
127-
}
128-
129-
@override
130-
KeywordState? next(int c) => table[c - $a];
131-
132-
@override
133-
KeywordState? nextCapital(int c) => null;
134-
}
135-
136-
class UpperCaseArrayKeywordState extends ArrayKeywordState {
137-
UpperCaseArrayKeywordState(List<KeywordState?> table, String? syntax)
138-
: super(table, syntax) {
139-
assert(table.length == $z - $A + 1);
70+
return new KeywordState._(KeywordState.blockSize);
14071
}
141-
142-
@override
143-
KeywordState? next(int c) => table[c - $A];
144-
145-
@override
146-
KeywordState? nextCapital(int c) => table[c - $A];
147-
}
148-
149-
/**
150-
* A state that has no outgoing transitions.
151-
*/
152-
class LeafKeywordState implements KeywordState {
153-
@override
154-
final analyzer.Keyword keyword;
155-
156-
LeafKeywordState(String syntax)
157-
: keyword = analyzer.Keyword.keywords[syntax]!;
158-
159-
@override
160-
KeywordState? next(int c) => null;
161-
@override
162-
KeywordState? nextCapital(int c) => null;
163-
164-
@override
165-
String toString() => keyword.lexeme;
16672
}

0 commit comments

Comments
 (0)