Fuzzing: Allow running wasm exports in different orders in fuzz_shell.js (#7204)

kripken · web-flow · commit 444682c6c573 · 2025-01-10T09:01:43.000-08:00
Normally fuzz_shell.js runs the exports in the natural order, when callExports
is called. There is benefit to running them in different orders, just to get more
variety at runtime. To allow that, it now receives a random seed that, if provided,
it uses to determine the order at runtime.

This is primarily useful for ClusterFuzz, which adds more executions of
callExports. We now add such a seed to those.
diff --git a/scripts/clusterfuzz/run.py b/scripts/clusterfuzz/run.py
@@ -228,17 +228,24 @@ def get_js_file_contents(i, output_dir):
     extra_js_operations = [
         # Compile and link the wasm again. Each link adds more to the total
         # exports that we can call.
-        'build(binary);\n',
-        # Run all the exports we've accumulated.
-        'callExports();\n',
+        'build(binary)',
+        # Run all the exports we've accumulated. This is a placeholder, as we
+        # must pick a random seed for each (the placeholder would cause a JS
+        # error at runtime if we had a bug and did not replace it properly).
+        'CALL_EXPORTS',
     ]
     if has_second:
         extra_js_operations += [
-            'build(secondBinary);\n',
+            'build(secondBinary)',
         ]
 
     for i in range(num):
-        js += system_random.choice(extra_js_operations)
+        choice = system_random.choice(extra_js_operations)
+        if choice == 'CALL_EXPORTS':
+            # The random seed can be any unsigned 32-bit number.
+            seed = system_random.randint(0, 0xffffffff)
+            choice = f'callExports({seed})'
+        js += choice + ';\n'
 
     print(f'Created {bytes} wasm bytes')
 
diff --git a/scripts/fuzz_shell.js b/scripts/fuzz_shell.js
@@ -371,20 +371,49 @@ function build(binary) {
   }
 }
 
-// Run the code by calling exports.
-/* async */ function callExports() {
+// Simple deterministic hashing, on an unsigned 32-bit seed. See e.g.
+// https://www.boost.org/doc/libs/1_55_0/doc/html/hash/reference.html#boost.hash_combine
+function hashCombine(seed, value) {
+  seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >>> 2);
+  return seed >>> 0;
+}
+
+// Run the code by calling exports. The optional |ordering| parameter indicates
+// howe we should order the calls to the exports: if it is not provided, we call
+// them in the natural order, which allows our output to be compared to other
+// executions of the wasm (e.g. from wasm-opt --fuzz-exec). If |ordering| is
+// provided, it is a random seed we use to make deterministic choices on
+// the order of calls.
+/* async */ function callExports(ordering) {
   // Call the exports we were told, or if we were not given an explicit list,
   // call them all.
   var relevantExports = exportsToCall || exportList;
 
+  if (ordering !== undefined) {
+    // Copy the list, and sort it in the simple Fisher-Yates manner.
+    // https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm
+    relevantExports = relevantExports.slice(0);
+    for (var i = 0; i < relevantExports.length - 1; i++) {
+      // Pick the index of the item to place at index |i|.
+      ordering = hashCombine(ordering, i);
+      // The number of items to pick from begins at the full length, then
+      // decreases with i.
+      var j = i + (ordering % (relevantExports.length - i));
+      // Swap the item over here.
+      var t = relevantExports[j];
+      relevantExports[j] = relevantExports[i];
+      relevantExports[i] = t;
+    }
+  }
+
   for (var e of relevantExports) {
     var name, value;
     if (typeof e === 'string') {
       // We are given a string name to call. Look it up in the global namespace.
       name = e;
       value = exports[e];
     } else {
-      // We are given an object form exportList, which bas both a name and a
+      // We are given an object form exportList, which has both a name and a
       // value.
       name = e.name;
       value = e.value;
@@ -396,6 +425,8 @@ function build(binary) {
 
     try {
       console.log('[fuzz-exec] calling ' + name);
+      // TODO: Based on |ordering|, do not always await, leaving a promise
+      //       for later, so we interleave stacks.
       var result = /* await */ callFunc(value);
       if (typeof result !== 'undefined') {
         console.log('[fuzz-exec] note result: ' + name + ' => ' + printed(result));
diff --git a/test/lit/node/fuzz_shell_orders.wast b/test/lit/node/fuzz_shell_orders.wast
@@ -0,0 +1,53 @@
+;; Test that appending a run operation with a seed can lead to a different
+;; order of export calls.
+
+(module
+  (import "fuzzing-support" "log-i32" (func $log (param i32)))
+
+  (func $a (export "a") (result i32)
+    (i32.const 10)
+  )
+
+  (func $b (export "b") (result i32)
+    (i32.const 20)
+  )
+
+  (func $c (export "c") (result i32)
+    (i32.const 30)
+  )
+)
+
+;; Run normally: we should see a,b,c called in order.
+;;
+;; RUN: wasm-opt %s -o %t.wasm -q
+;; RUN: node %S/../../../scripts/fuzz_shell.js %t.wasm | filecheck %s
+;;
+;; CHECK: [fuzz-exec] calling a
+;; CHECK: [fuzz-exec] note result: a => 10
+;; CHECK: [fuzz-exec] calling b
+;; CHECK: [fuzz-exec] note result: b => 20
+;; CHECK: [fuzz-exec] calling c
+;; CHECK: [fuzz-exec] note result: c => 30
+
+;; Append another run with a seed that leads to a different order
+;;
+;; RUN: cp %S/../../../scripts/fuzz_shell.js %t.js
+;; RUN: echo "callExports(1337);" >> %t.js
+;; RUN: node %t.js %t.wasm | filecheck %s --check-prefix=APPENDED
+;;
+;; The original order: a,b,c
+;; APPENDED: [fuzz-exec] calling a
+;; APPENDED: [fuzz-exec] note result: a => 10
+;; APPENDED: [fuzz-exec] calling b
+;; APPENDED: [fuzz-exec] note result: b => 20
+;; APPENDED: [fuzz-exec] calling c
+;; APPENDED: [fuzz-exec] note result: c => 30
+
+;; A new order: b,c,a
+;; APPENDED: [fuzz-exec] calling b
+;; APPENDED: [fuzz-exec] note result: b => 20
+;; APPENDED: [fuzz-exec] calling c
+;; APPENDED: [fuzz-exec] note result: c => 30
+;; APPENDED: [fuzz-exec] calling a
+;; APPENDED: [fuzz-exec] note result: a => 10
+
diff --git a/test/unit/test_cluster_fuzz.py b/test/unit/test_cluster_fuzz.py
@@ -294,12 +294,19 @@ def test_file_contents(self):
         # one wasm in each testcase: each wasm has a chance.
         initial_content_regex = re.compile(r'[/][*] using initial content ([^ ]+) [*][/]')
 
+        # Some calls to callExports come with a random seed, so we have either
+        #
+        #  callExports();
+        #  callExports(123456);
+        #
+        call_exports_regex = re.compile(r'callExports[(](\d*)[)]')
+
         for i in range(1, N + 1):
             fuzz_file = os.path.join(temp_dir.name, f'fuzz-binaryen-{i}.js')
             with open(fuzz_file) as f:
                 js = f.read()
             seen_builds.append(js.count('build(binary);'))
-            seen_calls.append(js.count('callExports();'))
+            seen_calls.append(re.findall(call_exports_regex, js))
             seen_second_builds.append(js.count('build(secondBinary);'))
 
             # If JSPI is enabled, the async and await keywords should be
@@ -331,12 +338,36 @@ def test_file_contents(self):
 
         print()
 
-        print('JS calls are distributed as ~ mean 4, stddev 5, median 2')
-        print(f'mean JS calls:   {statistics.mean(seen_calls)}')
-        print(f'stdev JS calls:  {statistics.stdev(seen_calls)}')
-        print(f'median JS calls: {statistics.median(seen_calls)}')
-        self.assertGreaterEqual(max(seen_calls), 2)
-        self.assertGreater(statistics.stdev(seen_calls), 0)
+        # Generate the counts of seen calls, for convenience. We convert
+        #  [['11', '22'], [], ['99']]
+        # into
+        #  [2, 0, 1]
+        num_seen_calls = [len(x) for x in seen_calls]
+        print('Num JS calls are distributed as ~ mean 4, stddev 5, median 2')
+        print(f'mean JS calls:   {statistics.mean(num_seen_calls)}')
+        print(f'stdev JS calls:  {statistics.stdev(num_seen_calls)}')
+        print(f'median JS calls: {statistics.median(num_seen_calls)}')
+        self.assertGreaterEqual(max(num_seen_calls), 2)
+        self.assertGreater(statistics.stdev(num_seen_calls), 0)
+
+        # The initial callExports have no seed (that makes the first, default,
+        # callExports behave deterministically, so we can compare to
+        # wasm-opt --fuzz-exec etc.), and all subsequent ones must have a seed.
+        seeds = []
+        for calls in seen_calls:
+            if calls:
+                self.assertEqual(calls[0], '')
+                for other in calls[1:]:
+                    self.assertNotEqual(other, '')
+                    seeds.append(int(other))
+
+        # The seeds are random numbers in 0..2^32-1, so overlap between them
+        # should be incredibly unlikely. Allow a few % of such overlap just to
+        # avoid extremely rare errors.
+        num_seeds = len(seeds)
+        num_unique_seeds = len(set(seeds))
+        print(f'unique JS call seeds: {num_unique_seeds} (should be almost {num_seeds})')
+        self.assertGreaterEqual(num_unique_seeds / num_seeds, 0.95)
 
         print()