Skip to content

Commit bb15e60

Browse files
mkustermannCommit Queue
authored andcommitted
[dart2wasm] Make json parser calculate string hash of keys while processing the json object key bytes
The json parser parses string literals in the json. If those string literals are used as keys in json objects we'll later on construct a hash map for the json object and have to calculate the hash of all the string keys. Since we process the bytes of the string key while scanning, we may as well eagerly calculate the hash there while we already processed the bytes, to avoid going over the bytes again when creating the hash map. Change-Id: If998be31394594a64cc6c352edc6e9d805720356 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/409661 Reviewed-by: Ömer Ağacan <[email protected]> Commit-Queue: Martin Kustermann <[email protected]>
1 parent c554548 commit bb15e60

File tree

1 file changed

+97
-3
lines changed

1 file changed

+97
-3
lines changed

sdk/lib/_internal/wasm/lib/convert_patch.dart

Lines changed: 97 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ import "dart:_js_helper" show jsStringToDartString;
1212
import "dart:_list"
1313
show GrowableList, WasmListBaseUnsafeExtensions, WasmListBase;
1414
import "dart:_string";
15+
import "dart:_string_helper";
1516
import "dart:_typed_data";
17+
import "dart:_object_helper";
1618
import "dart:_wasm";
1719
import "dart:typed_data" show Uint8List;
1820

@@ -694,6 +696,12 @@ mixin _ChunkedJsonParser<T> on _ChunkedJsonParserState {
694696
*/
695697
String getString(int start, int end, int bits);
696698

699+
/**
700+
* Same as [getString] but with [hash] containing the already computed string
701+
* hash.
702+
*/
703+
String getStringWithHash(int start, int end, int bits, int hash);
704+
697705
/**
698706
* Parse a slice of the current chunk as a number.
699707
*
@@ -948,8 +956,15 @@ mixin _ChunkedJsonParser<T> on _ChunkedJsonParserState {
948956
switch (char) {
949957
case QUOTE:
950958
if ((state & ALLOW_STRING_MASK) != 0) fail(position);
959+
final calculateHash =
960+
isUtf16Input &&
961+
(state == STATE_OBJECT_EMPTY || state == STATE_OBJECT_COMMA);
951962
state |= VALUE_READ_BITS;
952-
position = parseString(position + 1);
963+
if (calculateHash) {
964+
position = parseStringWithHash(position + 1);
965+
} else {
966+
position = parseString(position + 1);
967+
}
953968
break;
954969
case LBRACKET:
955970
if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
@@ -1155,12 +1170,30 @@ mixin _ChunkedJsonParser<T> on _ChunkedJsonParserState {
11551170
* Returned position right after the final quote.
11561171
*/
11571172
int parseString(int position) {
1173+
return _parseStringWithHashInternal(position, false);
1174+
}
1175+
1176+
/**
1177+
* Same as [parseString] but also calculates the string hash.
1178+
*/
1179+
int parseStringWithHash(int position) {
1180+
return _parseStringWithHashInternal(position, true);
1181+
}
1182+
1183+
@pragma('wasm:prefer-inline')
1184+
int _parseStringWithHashInternal(int position, bool computeHash) {
1185+
// If the input is utf-8 encoded bytes and we process it byte by byte but
1186+
// don't accumulate the utf-16 code points then we cannot easily precompute
1187+
// the hash.
1188+
assert(!computeHash || isUtf16Input);
1189+
11581190
// Format: '"'([^\x00-\x1f\\\"]|'\\'[bfnrt/\\"])*'"'
11591191
// Initial position is right after first '"'.
11601192
int start = position;
11611193
int end = chunkEnd;
11621194
int bits = 0;
11631195
int char = 0;
1196+
int hash = 0;
11641197
if (position < end) {
11651198
do {
11661199
// Caveat: do not combine the following two lines together. It helps
@@ -1170,24 +1203,48 @@ mixin _ChunkedJsonParser<T> on _ChunkedJsonParserState {
11701203
position++;
11711204
bits |= char; // Includes final '"', but that never matters.
11721205
if (isUtf16Input && char > 0xFF) {
1206+
if (computeHash) {
1207+
hash = stringCombineHashes(hash, char);
1208+
}
11731209
continue;
11741210
}
11751211
if ((_characterAttributes.readUnsigned(char) &
11761212
CHAR_SIMPLE_STRING_END) !=
11771213
0) {
11781214
break;
11791215
}
1216+
if (computeHash) {
1217+
hash = stringCombineHashes(hash, char);
1218+
}
11801219
} while (position < end);
11811220
if (char == QUOTE) {
11821221
int sliceEnd = position - 1;
1183-
listener.handleString(getString(start, sliceEnd, bits));
1222+
listener.handleString(
1223+
computeHash
1224+
? getStringWithHash(
1225+
start,
1226+
sliceEnd,
1227+
bits,
1228+
stringFinalizeHash(hash),
1229+
)
1230+
: getString(start, sliceEnd, bits),
1231+
);
11841232
return sliceEnd + 1;
11851233
}
11861234
if (char == BACKSLASH) {
11871235
int sliceEnd = position - 1;
11881236
beginString();
11891237
if (start < sliceEnd) {
1190-
addStringSliceToString(getString(start, sliceEnd, bits));
1238+
addStringSliceToString(
1239+
computeHash
1240+
? getStringWithHash(
1241+
start,
1242+
sliceEnd,
1243+
bits,
1244+
stringFinalizeHash(hash),
1245+
)
1246+
: getString(start, sliceEnd, bits),
1247+
);
11911248
}
11921249
return parseStringToBuffer(sliceEnd);
11931250
}
@@ -1591,6 +1648,18 @@ class _JsonOneByteStringParser extends _ChunkedJsonParserState
15911648
String getString(int start, int end, int bits) =>
15921649
chunk.substringUnchecked(start, end);
15931650

1651+
String getStringWithHash(int start, int end, int bits, int stringHash) {
1652+
final sourceArray = chunk.array;
1653+
final length = end - start;
1654+
final result = OneByteString.withLength(length);
1655+
for (int i = 0; i < length; ++i) {
1656+
result.array.write(i, sourceArray.readUnsigned(start++));
1657+
}
1658+
assert(result.hashCode.toWasmI32() == stringHash.toWasmI32());
1659+
setIdentityHashField(result, stringHash);
1660+
return result;
1661+
}
1662+
15941663
void beginString() {
15951664
assert(stringBuffer.isEmpty);
15961665
}
@@ -1664,6 +1733,27 @@ class _JsonTwoByteStringParser extends _ChunkedJsonParserState
16641733
return result;
16651734
}
16661735

1736+
String getStringWithHash(int start, int end, int bits, int stringHash) {
1737+
final sourceArray = chunk.array;
1738+
final length = end - start;
1739+
1740+
const asciiBits = 0x7f;
1741+
if (bits <= asciiBits) {
1742+
final result = OneByteString.withLength(length);
1743+
for (int i = 0; i < length; ++i) {
1744+
result.array.write(i, sourceArray.readUnsigned(start++));
1745+
}
1746+
setIdentityHashField(result, stringHash);
1747+
return result;
1748+
}
1749+
1750+
final result = TwoByteString.withLength(length);
1751+
result.array.copy(0, sourceArray, start, length);
1752+
assert(result.hashCode.toWasmI32() == stringHash.toWasmI32());
1753+
setIdentityHashField(result, stringHash);
1754+
return result;
1755+
}
1756+
16671757
void beginString() {
16681758
assert(stringBuffer.isEmpty);
16691759
}
@@ -1859,6 +1949,10 @@ class _JsonUtf8Parser extends _ChunkedJsonParserState
18591949
return result;
18601950
}
18611951

1952+
String getStringWithHash(int start, int end, int bits, int stringHash) {
1953+
throw 'unused';
1954+
}
1955+
18621956
void beginString() {
18631957
decoder.reset();
18641958
stringBuffer.clear();

0 commit comments

Comments
 (0)