Skip to content

Commit c6362da

Browse files
osa1Commit Queue
authored andcommitted
[dart2wasm] Convert UTF-8 chunks to U8List before decoding
Currently `convertSingle` converts the input to `U8List`, but `convertChunked` works on `Uint8List`. This makes functions common in both (`decode8`, `decode16`) polymorphic in input. Update `convertChunked` to also convert the input to `U8List`. With this `decode8` and `decode16` becomes monomorphic in the input type. Also update array accesses in these methods to avoid bounds checks. Check for a few fast cases in `List<int>` to `U8List` copying. If the list is a `WasmI8ArrayBase` (used in typed data) or `WasmListBase` (used in lists), we avoid polymorphism, indirections, and bounds checks during copying. Golem reports up to 600% improvement in some chunked parsing micro- benchmarks. Change-Id: Iddf6dae1a5d77cf574be77313dff779b4715e283 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/395980 Commit-Queue: Ömer Ağacan <[email protected]> Reviewed-by: Slava Egorov <[email protected]>
1 parent c5b5090 commit c6362da

File tree

3 files changed

+178
-35
lines changed

3 files changed

+178
-35
lines changed

sdk/lib/_internal/wasm/lib/convert_patch.dart

Lines changed: 92 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ import "dart:_internal"
88
import "dart:_js_string_convert";
99
import "dart:_js_types";
1010
import "dart:_js_helper" show jsStringToDartString;
11-
import "dart:_list" show GrowableList, WasmListBaseUnsafeExtensions;
11+
import "dart:_list"
12+
show GrowableList, WasmListBaseUnsafeExtensions, WasmListBase;
1213
import "dart:_string";
1314
import "dart:_typed_data";
1415
import "dart:_wasm";
@@ -1987,11 +1988,11 @@ class _Utf8Decoder {
19871988
_bomIndex = -1;
19881989
}
19891990

1990-
int scan(Uint8List bytes, int start, int end) {
1991+
int scan(U8List bytes, int start, int end) {
19911992
int size = 0;
19921993
int flags = 0;
19931994
for (int i = start; i < end; i++) {
1994-
int t = scanTable.readUnsigned(bytes[i]);
1995+
int t = scanTable.readUnsigned(bytes.getUnchecked(i));
19951996
size += t & sizeMask;
19961997
flags |= t;
19971998
}
@@ -2104,14 +2105,13 @@ class _Utf8Decoder {
21042105
String convertChunked(List<int> codeUnits, int start, int? maybeEnd) {
21052106
int end = RangeError.checkValidRange(start, maybeEnd, codeUnits.length);
21062107

2107-
// Have bytes as Uint8List.
2108-
Uint8List bytes;
2108+
final U8List bytes;
21092109
int errorOffset;
2110-
if (codeUnits is Uint8List) {
2111-
bytes = unsafeCast<Uint8List>(codeUnits);
2110+
if (codeUnits is U8List) {
2111+
bytes = unsafeCast<U8List>(codeUnits);
21122112
errorOffset = 0;
21132113
} else {
2114-
bytes = _makeUint8List(codeUnits, start, end);
2114+
bytes = _makeU8List(codeUnits, start, end);
21152115
errorOffset = start;
21162116
end -= start;
21172117
start = 0;
@@ -2205,17 +2205,17 @@ class _Utf8Decoder {
22052205
return result;
22062206
}
22072207

2208-
int skipBomSingle(Uint8List bytes, int start, int end) {
2208+
int skipBomSingle(U8List bytes, int start, int end) {
22092209
if (end - start >= 3 &&
2210-
bytes[start] == 0xEF &&
2211-
bytes[start + 1] == 0xBB &&
2212-
bytes[start + 2] == 0xBF) {
2210+
bytes.getUnchecked(start) == 0xEF &&
2211+
bytes.getUnchecked(start + 1) == 0xBB &&
2212+
bytes.getUnchecked(start + 2) == 0xBF) {
22132213
return start + 3;
22142214
}
22152215
return start;
22162216
}
22172217

2218-
int skipBomChunked(Uint8List bytes, int start, int end) {
2218+
int skipBomChunked(U8List bytes, int start, int end) {
22192219
assert(start <= end);
22202220
int bomIndex = _bomIndex;
22212221
// Already skipped?
@@ -2229,7 +2229,7 @@ class _Utf8Decoder {
22292229
_bomIndex = bomIndex;
22302230
return start;
22312231
}
2232-
if (bytes[i++] != bomValues[bomIndex++]) {
2232+
if (bytes.getUnchecked(i++) != bomValues[bomIndex++]) {
22332233
// No BOM.
22342234
_bomIndex = -1;
22352235
return start;
@@ -2241,15 +2241,15 @@ class _Utf8Decoder {
22412241
return i;
22422242
}
22432243

2244-
String decode8(Uint8List bytes, int start, int end, int size) {
2244+
String decode8(U8List bytes, int start, int end, int size) {
22452245
assert(start < end);
22462246
OneByteString result = OneByteString.withLength(size);
22472247
int i = start;
22482248
int j = 0;
22492249
if (_state == X1) {
22502250
// Half-way though 2-byte sequence
22512251
assert(_charOrIndex == 2 || _charOrIndex == 3);
2252-
final int e = bytes[i++] ^ 0x80;
2252+
final int e = bytes.getUnchecked(i++) ^ 0x80;
22532253
if (e >= 0x40) {
22542254
_state = errorMissingExtension;
22552255
_charOrIndex = i - 1;
@@ -2260,7 +2260,7 @@ class _Utf8Decoder {
22602260
}
22612261
assert(_state == accept);
22622262
while (i < end) {
2263-
int byte = bytes[i++];
2263+
int byte = bytes.getUnchecked(i++);
22642264
if (byte >= 0x80) {
22652265
if (byte < 0xC0) {
22662266
_state = errorUnexpectedExtension;
@@ -2273,7 +2273,7 @@ class _Utf8Decoder {
22732273
_charOrIndex = byte & 0x1F;
22742274
break;
22752275
}
2276-
final int e = bytes[i++] ^ 0x80;
2276+
final int e = bytes.getUnchecked(i++) ^ 0x80;
22772277
if (e >= 0x40) {
22782278
_state = errorMissingExtension;
22792279
_charOrIndex = i - 1;
@@ -2293,7 +2293,7 @@ class _Utf8Decoder {
22932293
return result;
22942294
}
22952295

2296-
String decode16(Uint8List bytes, int start, int end, int size) {
2296+
String decode16(U8List bytes, int start, int end, int size) {
22972297
assert(start < end);
22982298
final OneByteString transitionTable = unsafeCast<OneByteString>(
22992299
_Utf8Decoder.transitionTable,
@@ -2309,7 +2309,7 @@ class _Utf8Decoder {
23092309

23102310
// First byte
23112311
assert(!isErrorState(state));
2312-
final int byte = bytes[i++];
2312+
final int byte = bytes.getUnchecked(i++);
23132313
final int type = typeTable.codeUnitAtUnchecked(byte) & typeMask;
23142314
if (state == accept) {
23152315
char = byte & (shiftedByteMask >> type);
@@ -2320,7 +2320,7 @@ class _Utf8Decoder {
23202320
}
23212321

23222322
while (i < end) {
2323-
final int byte = bytes[i++];
2323+
final int byte = bytes.getUnchecked(i++);
23242324
final int type = typeTable.codeUnitAtUnchecked(byte) & typeMask;
23252325
if (state == accept) {
23262326
if (char >= 0x10000) {
@@ -2369,3 +2369,74 @@ class _Utf8Decoder {
23692369
return result;
23702370
}
23712371
}
2372+
2373+
U8List _makeU8List(List<int> codeUnits, int start, int end) {
2374+
if (codeUnits is WasmListBase) {
2375+
return _makeU8ListFromWasmListBase(
2376+
unsafeCast<WasmListBase<int>>(codeUnits),
2377+
start,
2378+
end,
2379+
);
2380+
}
2381+
2382+
if (codeUnits is WasmI8ArrayBase) {
2383+
return _makeU8ListFromWasmI8ArrayBase(
2384+
unsafeCast<WasmI8ArrayBase>(codeUnits),
2385+
start,
2386+
end,
2387+
);
2388+
}
2389+
2390+
final int length = end - start;
2391+
final U8List bytes = U8List(length);
2392+
for (int i = 0; i < length; i++) {
2393+
int b = codeUnits[start + i];
2394+
if ((b & ~0xFF) != 0) {
2395+
// Replace invalid byte values by FF, which is also invalid.
2396+
b = 0xFF;
2397+
}
2398+
bytes.setUnchecked(i, b);
2399+
}
2400+
return bytes;
2401+
}
2402+
2403+
U8List _makeU8ListFromWasmListBase(
2404+
WasmListBase<int> codeUnits,
2405+
int start,
2406+
int end,
2407+
) {
2408+
final int length = end - start;
2409+
final U8List bytes = U8List(length);
2410+
final WasmArray<Object?> listData = codeUnits.data;
2411+
final WasmArray<WasmI8> bytesData = bytes.data;
2412+
for (int i = 0; i < length; i++) {
2413+
int b = unsafeCast<int>(listData[start + i]);
2414+
if ((b & ~0xFF) != 0) {
2415+
// Replace invalid byte values by FF, which is also invalid.
2416+
b = 0xFF;
2417+
}
2418+
bytesData.write(i, b);
2419+
}
2420+
return bytes;
2421+
}
2422+
2423+
U8List _makeU8ListFromWasmI8ArrayBase(
2424+
WasmI8ArrayBase codeUnits,
2425+
int start,
2426+
int end,
2427+
) {
2428+
final int length = end - start;
2429+
final U8List bytes = U8List(length);
2430+
final WasmArray<WasmI8> listData = codeUnits.data;
2431+
final listDataOffset = codeUnits.offsetInBytes;
2432+
final WasmArray<WasmI8> bytesData = bytes.data;
2433+
for (int i = 0; i < length; i++) {
2434+
int b = listData.readSigned(listDataOffset + start + i);
2435+
if ((b & ~0xFF) != 0) {
2436+
// Replace invalid byte values by FF, which is also invalid.
2437+
b = 0xFF;
2438+
}
2439+
bytesData.write(i, b);
2440+
}
2441+
return bytes;
2442+
}

sdk/lib/_internal/wasm/lib/typed_data.dart

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1976,8 +1976,8 @@ mixin _TypedIntListMixin<SpawnedType extends TypedDataList<int>>
19761976
final fromTypedData = unsafeCast<JSIntegerArrayBase>(from);
19771977

19781978
final fromElementSize = fromTypedData.elementSizeInBytes;
1979-
if (fromElementSize == 1 && this is _WasmI8ArrayBase) {
1980-
final destTypedData = unsafeCast<_WasmI8ArrayBase>(this);
1979+
if (fromElementSize == 1 && this is WasmI8ArrayBase) {
1980+
final destTypedData = unsafeCast<WasmI8ArrayBase>(this);
19811981
copyToWasmI8Array(
19821982
fromTypedData.toJSArrayExternRef()!,
19831983
skipCount,
@@ -2565,16 +2565,16 @@ mixin _UnmodifiableDoubleListMixin {
25652565
// Fast lists
25662566
//
25672567

2568-
abstract class _WasmI8ArrayBase extends WasmTypedDataBase {
2568+
abstract class WasmI8ArrayBase extends WasmTypedDataBase {
25692569
final WasmArray<WasmI8> _data;
25702570
final int _offsetInElements;
25712571
final int length;
25722572

2573-
_WasmI8ArrayBase(this.length)
2573+
WasmI8ArrayBase(this.length)
25742574
: _data = WasmArray(_newArrayLengthCheck(length)),
25752575
_offsetInElements = 0;
25762576

2577-
_WasmI8ArrayBase._(this._data, this._offsetInElements, this.length);
2577+
WasmI8ArrayBase._(this._data, this._offsetInElements, this.length);
25782578

25792579
int get elementSizeInBytes => 1;
25802580

@@ -2679,7 +2679,7 @@ abstract class _WasmF64ArrayBase extends WasmTypedDataBase {
26792679
_F64ByteBuffer get buffer => _F64ByteBuffer(_data);
26802680
}
26812681

2682-
extension WasmI8ArrayBaseExt on _WasmI8ArrayBase {
2682+
extension WasmI8ArrayBaseExt on WasmI8ArrayBase {
26832683
@pragma('wasm:prefer-inline')
26842684
WasmArray<WasmI8> get data => _data;
26852685

@@ -2719,7 +2719,7 @@ extension WasmF64ArrayBaseExt on _WasmF64ArrayBase {
27192719
int get offsetInElements => _offsetInElements;
27202720
}
27212721

2722-
class I8List extends _WasmI8ArrayBase
2722+
class I8List extends WasmI8ArrayBase
27232723
with
27242724
_IntListMixin,
27252725
_TypedIntListMixin<I8List>,
@@ -2761,15 +2761,15 @@ class I8List extends _WasmI8ArrayBase
27612761
}
27622762
}
27632763

2764-
class U8List extends _WasmI8ArrayBase
2764+
class U8List extends WasmI8ArrayBase
27652765
with
27662766
_IntListMixin,
27672767
_TypedIntListMixin<U8List>,
27682768
_TypedListCommonOperationsMixin
27692769
implements Uint8List {
27702770
U8List(int length) : super(length);
27712771

2772-
U8List._(WasmArray<WasmI8> data, int offsetInElements, int length)
2772+
U8List.withData(WasmArray<WasmI8> data, int offsetInElements, int length)
27732773
: super._(data, offsetInElements, length);
27742774

27752775
factory U8List._withMutability(
@@ -2779,7 +2779,7 @@ class U8List extends _WasmI8ArrayBase
27792779
bool mutable,
27802780
) =>
27812781
mutable
2782-
? U8List._(buffer, offsetInBytes, length)
2782+
? U8List.withData(buffer, offsetInBytes, length)
27832783
: UnmodifiableU8List._(buffer, offsetInBytes, length);
27842784

27852785
@override
@@ -2792,18 +2792,28 @@ class U8List extends _WasmI8ArrayBase
27922792
@pragma("wasm:prefer-inline")
27932793
int operator [](int index) {
27942794
indexCheck(index, length);
2795-
return _data.readUnsigned(_offsetInElements + index);
2795+
return getUnchecked(index);
27962796
}
27972797

27982798
@override
27992799
@pragma("wasm:prefer-inline")
28002800
void operator []=(int index, int value) {
28012801
indexCheck(index, length);
2802+
setUnchecked(index, value);
2803+
}
2804+
}
2805+
2806+
extension U8ListUncheckedOperations on U8List {
2807+
@pragma("wasm:prefer-inline")
2808+
int getUnchecked(int index) => _data.readUnsigned(_offsetInElements + index);
2809+
2810+
@pragma("wasm:prefer-inline")
2811+
void setUnchecked(int index, int value) {
28022812
_data.write(_offsetInElements + index, value);
28032813
}
28042814
}
28052815

2806-
class U8ClampedList extends _WasmI8ArrayBase
2816+
class U8ClampedList extends WasmI8ArrayBase
28072817
with
28082818
_IntListMixin,
28092819
_TypedIntListMixin<U8ClampedList>,
@@ -3200,10 +3210,10 @@ class UnmodifiableI8List extends I8List with _UnmodifiableIntListMixin {
32003210

32013211
class UnmodifiableU8List extends U8List with _UnmodifiableIntListMixin {
32023212
UnmodifiableU8List(U8List list)
3203-
: super._(list._data, list._offsetInElements, list.length);
3213+
: super.withData(list._data, list._offsetInElements, list.length);
32043214

32053215
UnmodifiableU8List._(WasmArray<WasmI8> data, int offsetInElements, int length)
3206-
: super._(data, offsetInElements, length);
3216+
: super.withData(data, offsetInElements, length);
32073217

32083218
@override
32093219
@pragma('wasm:prefer-inline')
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// Copyright (c) 2024, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
import "dart:convert";
6+
import "dart:typed_data";
7+
8+
import "package:expect/expect.dart";
9+
10+
void main() {
11+
// "é"
12+
final bytes = [195, 169];
13+
14+
// Same as `bytes` when interpreted as unsigned bytes.
15+
final negativeBytes = [-61, -87];
16+
17+
final decoded = "é";
18+
19+
final shouldSucceed = [
20+
bytes,
21+
Uint8List.fromList(bytes),
22+
Uint8List.fromList(negativeBytes),
23+
];
24+
25+
final shouldFail = [
26+
negativeBytes,
27+
Int8List.fromList(bytes),
28+
Int8List.fromList(negativeBytes),
29+
];
30+
31+
for (var bytes in shouldSucceed) {
32+
Expect.equals(utf8.decoder.convert(bytes), decoded);
33+
34+
final stringSink = StringSink();
35+
utf8.decoder.startChunkedConversion(stringSink)
36+
..add(bytes)
37+
..close();
38+
Expect.equals(stringSink.buffer.toString(), decoded);
39+
}
40+
41+
for (var bytes in shouldFail) {
42+
Expect.throwsFormatException(() => utf8.decoder.convert(bytes));
43+
44+
final stringSink = StringSink();
45+
Expect.throwsFormatException(
46+
() => utf8.decoder.startChunkedConversion(stringSink)
47+
..add(bytes)
48+
..close());
49+
}
50+
}
51+
52+
class StringSink implements Sink<String> {
53+
StringBuffer buffer = StringBuffer();
54+
55+
StringSink();
56+
57+
void add(String str) {
58+
buffer.write(str);
59+
}
60+
61+
void close() {}
62+
}

0 commit comments

Comments
 (0)