[dart2wasm] Make json parser calculate string hash of keys while processing the json object key bytes

mkustermann · Commit Queue · commit bb15e60295b3 · 2025-02-13T07:01:28.000-08:00
The json parser parses string literals in the json. If those string literals are used as keys in json objects we'll later on construct a hash map for the json object and have to calculate the hash of all the string keys. Since we process the bytes of the string key while scanning, we may as well eagerly calculate the hash there while we already processed the bytes, to avoid going over the bytes again when creating the hash map. Change-Id: If998be31394594a64cc6c352edc6e9d805720356 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/409661 Reviewed-by: Ömer Ağacan <omersa@google.com> Commit-Queue: Martin Kustermann <kustermann@google.com>
diff --git a/sdk/lib/_internal/wasm/lib/convert_patch.dart b/sdk/lib/_internal/wasm/lib/convert_patch.dart
@@ -12,7 +12,9 @@ import "dart:_js_helper" show jsStringToDartString;
 import "dart:_list"
     show GrowableList, WasmListBaseUnsafeExtensions, WasmListBase;
 import "dart:_string";
+import "dart:_string_helper";
 import "dart:_typed_data";
+import "dart:_object_helper";
 import "dart:_wasm";
 import "dart:typed_data" show Uint8List;
 
@@ -694,6 +696,12 @@ mixin _ChunkedJsonParser<T> on _ChunkedJsonParserState {
    */
   String getString(int start, int end, int bits);
 
+  /**
+   * Same as [getString] but with [hash] containing the already computed string
+   * hash.
+   */
+  String getStringWithHash(int start, int end, int bits, int hash);
+
   /**
    * Parse a slice of the current chunk as a number.
    *
@@ -948,8 +956,15 @@ mixin _ChunkedJsonParser<T> on _ChunkedJsonParserState {
       switch (char) {
         case QUOTE:
           if ((state & ALLOW_STRING_MASK) != 0) fail(position);
+          final calculateHash =
+              isUtf16Input &&
+              (state == STATE_OBJECT_EMPTY || state == STATE_OBJECT_COMMA);
           state |= VALUE_READ_BITS;
-          position = parseString(position + 1);
+          if (calculateHash) {
+            position = parseStringWithHash(position + 1);
+          } else {
+            position = parseString(position + 1);
+          }
           break;
         case LBRACKET:
           if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
@@ -1155,12 +1170,30 @@ mixin _ChunkedJsonParser<T> on _ChunkedJsonParserState {
    * Returned position right after the final quote.
    */
   int parseString(int position) {
+    return _parseStringWithHashInternal(position, false);
+  }
+
+  /**
+   * Same as [parseString] but also calculates the string hash.
+   */
+  int parseStringWithHash(int position) {
+    return _parseStringWithHashInternal(position, true);
+  }
+
+  @pragma('wasm:prefer-inline')
+  int _parseStringWithHashInternal(int position, bool computeHash) {
+    // If the input is utf-8 encoded bytes and we process it byte by byte but
+    // don't accumulate the utf-16 code points then we cannot easily precompute
+    // the hash.
+    assert(!computeHash || isUtf16Input);
+
     // Format: '"'([^\x00-\x1f\\\"]|'\\'[bfnrt/\\"])*'"'
     // Initial position is right after first '"'.
     int start = position;
     int end = chunkEnd;
     int bits = 0;
     int char = 0;
+    int hash = 0;
     if (position < end) {
       do {
         // Caveat: do not combine the following two lines together. It helps
@@ -1170,24 +1203,48 @@ mixin _ChunkedJsonParser<T> on _ChunkedJsonParserState {
         position++;
         bits |= char; // Includes final '"', but that never matters.
         if (isUtf16Input && char > 0xFF) {
+          if (computeHash) {
+            hash = stringCombineHashes(hash, char);
+          }
           continue;
         }
         if ((_characterAttributes.readUnsigned(char) &
                 CHAR_SIMPLE_STRING_END) !=
             0) {
           break;
         }
+        if (computeHash) {
+          hash = stringCombineHashes(hash, char);
+        }
       } while (position < end);
       if (char == QUOTE) {
         int sliceEnd = position - 1;
-        listener.handleString(getString(start, sliceEnd, bits));
+        listener.handleString(
+          computeHash
+              ? getStringWithHash(
+                start,
+                sliceEnd,
+                bits,
+                stringFinalizeHash(hash),
+              )
+              : getString(start, sliceEnd, bits),
+        );
         return sliceEnd + 1;
       }
       if (char == BACKSLASH) {
         int sliceEnd = position - 1;
         beginString();
         if (start < sliceEnd) {
-          addStringSliceToString(getString(start, sliceEnd, bits));
+          addStringSliceToString(
+            computeHash
+                ? getStringWithHash(
+                  start,
+                  sliceEnd,
+                  bits,
+                  stringFinalizeHash(hash),
+                )
+                : getString(start, sliceEnd, bits),
+          );
         }
         return parseStringToBuffer(sliceEnd);
       }
@@ -1591,6 +1648,18 @@ class _JsonOneByteStringParser extends _ChunkedJsonParserState
   String getString(int start, int end, int bits) =>
       chunk.substringUnchecked(start, end);
 
+  String getStringWithHash(int start, int end, int bits, int stringHash) {
+    final sourceArray = chunk.array;
+    final length = end - start;
+    final result = OneByteString.withLength(length);
+    for (int i = 0; i < length; ++i) {
+      result.array.write(i, sourceArray.readUnsigned(start++));
+    }
+    assert(result.hashCode.toWasmI32() == stringHash.toWasmI32());
+    setIdentityHashField(result, stringHash);
+    return result;
+  }
+
   void beginString() {
     assert(stringBuffer.isEmpty);
   }
@@ -1664,6 +1733,27 @@ class _JsonTwoByteStringParser extends _ChunkedJsonParserState
     return result;
   }
 
+  String getStringWithHash(int start, int end, int bits, int stringHash) {
+    final sourceArray = chunk.array;
+    final length = end - start;
+
+    const asciiBits = 0x7f;
+    if (bits <= asciiBits) {
+      final result = OneByteString.withLength(length);
+      for (int i = 0; i < length; ++i) {
+        result.array.write(i, sourceArray.readUnsigned(start++));
+      }
+      setIdentityHashField(result, stringHash);
+      return result;
+    }
+
+    final result = TwoByteString.withLength(length);
+    result.array.copy(0, sourceArray, start, length);
+    assert(result.hashCode.toWasmI32() == stringHash.toWasmI32());
+    setIdentityHashField(result, stringHash);
+    return result;
+  }
+
   void beginString() {
     assert(stringBuffer.isEmpty);
   }
@@ -1859,6 +1949,10 @@ class _JsonUtf8Parser extends _ChunkedJsonParserState
     return result;
   }
 
+  String getStringWithHash(int start, int end, int bits, int stringHash) {
+    throw 'unused';
+  }
+
   void beginString() {
     decoder.reset();
     stringBuffer.clear();