Skip to content

Commit f12f084

Browse files
authored
Fuzzer: Add a flag to import another module in --translate-to-fuzz (#7949)
-ttf --fuzz-import=foo.wasm will generate a fuzz new file, and that file will import parts of the given foo.wasm. So far, this imports a subset of the functions of that other module. Use this in the Two fuzzer as well as in ClusterFuzz. Add fuzz_shell.js support for importing a second module as "primary". We had that for --fuzz-split mode, and this just expands that to any time we have a second module.
1 parent cc03f3d commit f12f084

File tree

11 files changed

+1170
-17
lines changed

11 files changed

+1170
-17
lines changed

scripts/clusterfuzz/run.py

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -123,16 +123,21 @@ def get_random_initial_content():
123123
# allows us to debug any such failures that we run into.
124124
retry = True
125125

126+
# Temporary files to clean up
127+
temp_files = []
128+
126129

127130
# Generate a random wasm file, and return a string that creates a typed array of
128131
# those bytes, suitable for use in a JS file, in the form
129132
#
130133
# new Uint8Array([..wasm_contents..])
131134
#
132135
# Receives the testcase index and the output dir.
133-
def get_wasm_contents(i, output_dir):
134-
input_data_file_path = os.path.join(output_dir, f'{i}.input')
135-
wasm_file_path = os.path.join(output_dir, f'{i}.wasm')
136+
#
137+
# Also returns the name of the wasm file.
138+
def get_wasm_contents(name, output_dir, extra_args=[]):
139+
input_data_file_path = os.path.join(output_dir, f'{name}.input')
140+
wasm_file_path = os.path.join(output_dir, f'{name}.wasm')
136141

137142
# wasm-opt may fail to run in rare cases (when the fuzzer emits code it
138143
# detects as invalid). Just try again in such a case.
@@ -144,7 +149,7 @@ def get_wasm_contents(i, output_dir):
144149

145150
# Generate a command to use wasm-opt with the proper args to generate
146151
# wasm content from the input data.
147-
cmd = [FUZZER_BINARY_PATH] + FUZZER_ARGS
152+
cmd = [FUZZER_BINARY_PATH] + FUZZER_ARGS + extra_args
148153
cmd += ['-o', wasm_file_path, input_data_file_path]
149154

150155
# Sometimes use a file from the initial content testcases.
@@ -177,16 +182,19 @@ def get_wasm_contents(i, output_dir):
177182
with open(wasm_file_path, 'rb') as file:
178183
wasm_contents = file.read()
179184

180-
# Clean up temp files.
181-
os.remove(wasm_file_path)
182-
os.remove(input_data_file_path)
185+
# Note temp files.
186+
global temp_files
187+
temp_files += [
188+
wasm_file_path,
189+
input_data_file_path
190+
]
183191

184192
# Convert to a string, and wrap into a typed array.
185193
wasm_contents = ','.join([str(c) for c in wasm_contents])
186194
js = f'new Uint8Array([{wasm_contents}])'
187195
if initial_content:
188196
js = f'{js} /* using initial content {os.path.basename(initial_content)} */'
189-
return js
197+
return js, wasm_file_path
190198

191199

192200
# Returns the contents of a .js fuzz file, given the index of the testcase and
@@ -198,17 +206,22 @@ def get_js_file_contents(i, output_dir):
198206

199207
# Prepend the wasm contents, so they are used (rather than the normal
200208
# mechanism where the wasm file's name is provided in argv).
201-
wasm_contents = get_wasm_contents(i, output_dir)
209+
wasm_contents, wasm_file = get_wasm_contents(i, output_dir)
202210
pre = f'var binary = {wasm_contents};\n'
203211
bytes = wasm_contents.count(',')
204212

205213
# Sometimes add a second wasm file as well.
206214
has_second = False
207215
if system_random.random() < 0.333:
208216
has_second = True
209-
wasm_contents = get_wasm_contents(i, output_dir)
210-
pre += f'var secondBinary = {wasm_contents};\n'
211-
bytes += wasm_contents.count(',')
217+
# Most of the time, import the first file.
218+
args = []
219+
if system_random.random() < 0.8:
220+
args = [f'--fuzz-import={wasm_file}']
221+
second_wasm_contents, second_wasm_file = \
222+
get_wasm_contents(f'{i}_second', output_dir, args)
223+
pre += f'var secondBinary = {second_wasm_contents};\n'
224+
bytes += second_wasm_contents.count(',')
212225

213226
js = pre + '\n' + js
214227

@@ -243,7 +256,9 @@ def get_js_file_contents(i, output_dir):
243256
]
244257
if has_second:
245258
extra_js_operations += [
246-
'build(secondBinary)',
259+
# Build the second binary, marking it as second so it imports the
260+
# first.
261+
'build(secondBinary, true)',
247262
]
248263

249264
for i in range(num):
@@ -307,6 +322,11 @@ def main(argv):
307322

308323
print(f'Created {num} testcases.')
309324

325+
for temp in temp_files:
326+
os.remove(temp)
327+
328+
print('Cleaned up.')
329+
310330

311331
if __name__ == '__main__':
312332
main(sys.argv)

scripts/fuzz_opt.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,7 @@ def note_ignored_vm_run(reason, extra_text='', amount=1):
602602
ignored_vm_run_reasons[reason] += amount
603603

604604

605+
# Run a VM command, and filter out known issues.
605606
def run_vm(cmd):
606607
def filter_known_issues(output):
607608
known_issues = [
@@ -753,6 +754,8 @@ def __init__(self):
753754
# care about their relationship.
754755
def handle_pair(self, input, before_wasm, after_wasm, opts):
755756
self.handle(before_wasm)
757+
# Add some visual space between the independent parts.
758+
print('\n')
756759
self.handle(after_wasm)
757760

758761
def can_run_on_wasm(self, wasm):
@@ -1791,6 +1794,9 @@ def handle(self, wasm):
17911794
second_input = abspath('second_input.dat')
17921795
make_random_input(random_size(), second_input)
17931796
args = [second_input, '-ttf', '-o', second_wasm]
1797+
# Most of the time, use the first wasm as an import to the second.
1798+
if random.random() < 0.8:
1799+
args += ['--fuzz-import=' + wasm]
17941800
run([in_bin('wasm-opt')] + args + GEN_ARGS + FEATURE_OPTS)
17951801

17961802
# The binaryen interpreter only supports a single file, so we run them
@@ -1812,6 +1818,20 @@ def handle(self, wasm):
18121818
# We may fail to instantiate the modules for valid reasons, such as
18131819
# an active segment being out of bounds. There is no point to
18141820
# continue in such cases, as no exports are called.
1821+
1822+
# But, check 'primary' is not in the V8 error. That might indicate a
1823+
# problem in the imports of --fuzz-import. To do this, run the d8
1824+
# command directly, without the usual filtering of run_d8_wasm.
1825+
cmd = [shared.V8] + shared.V8_OPTS + get_v8_extra_flags() + [
1826+
get_fuzz_shell_js(),
1827+
'--',
1828+
wasm,
1829+
second_wasm
1830+
]
1831+
out = run(cmd)
1832+
assert '"primary"' not in out, out
1833+
1834+
note_ignored_vm_run('Two instantiate error')
18151835
return
18161836

18171837
# Make sure that fuzz_shell.js actually executed all exports from both
@@ -1845,6 +1865,7 @@ def can_run_on_wasm(self, wasm):
18451865
# mode. We also cannot run shared-everything code in d8 yet. We also
18461866
# cannot compare if there are NaNs (as optimizations can lead to
18471867
# different outputs).
1868+
# TODO: relax some of these
18481869
if CLOSED_WORLD:
18491870
return False
18501871
if NANS:

scripts/fuzz_shell.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ if (secondBinary) {
435435
// Compile and instantiate a wasm file. Receives the binary to build, and
436436
// whether it is the second one.
437437
function build(binary, isSecond) {
438-
if (fuzzSplit && isSecond) {
438+
if (isSecond) {
439439
assert(secondBinary);
440440
// Provide the primary module's exports to the secondary.
441441
imports['primary'] = exports;

src/tools/fuzzing.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,9 @@ class TranslateToFuzzReader {
132132
void setPreserveImportsAndExports(bool preserveImportsAndExports_) {
133133
preserveImportsAndExports = preserveImportsAndExports_;
134134
}
135+
void setImportedModule(std::string importedModule_) {
136+
importedModule = importedModule_;
137+
}
135138

136139
void build();
137140

@@ -157,6 +160,9 @@ class TranslateToFuzzReader {
157160
// existing testcase (using initial-content).
158161
bool preserveImportsAndExports = false;
159162

163+
// An optional module to import from.
164+
std::optional<std::string> importedModule;
165+
160166
// Whether we allow the fuzzer to add unreachable code when generating changes
161167
// to existing code. This is randomized during startup, but could be an option
162168
// like the above options eventually if we find that useful.
@@ -365,6 +371,8 @@ class TranslateToFuzzReader {
365371

366372
void addHangLimitChecks(Function* func);
367373

374+
void useImportedModule();
375+
368376
// Recombination and mutation
369377

370378
// Recombination and mutation can replace a node with another node of the same

src/tools/fuzzing/fuzzing.cpp

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "ir/type-updating.h"
2525
#include "support/string.h"
2626
#include "tools/fuzzing/heap-types.h"
27+
#include "wasm-io.h"
2728

2829
namespace wasm {
2930

@@ -361,7 +362,12 @@ void TranslateToFuzzReader::build() {
361362
addImportLoggingSupport();
362363
addImportCallingSupport();
363364
addImportSleepSupport();
365+
366+
// First, modify initial functions. That includes removing imports. Then,
367+
// use the imported module, which are function imports that we allow.
364368
modifyInitialFunctions();
369+
useImportedModule();
370+
365371
processFunctions();
366372
if (fuzzParams->HANG_LIMIT > 0) {
367373
addHangLimitSupport();
@@ -1158,6 +1164,39 @@ void TranslateToFuzzReader::addHashMemorySupport() {
11581164
}
11591165
}
11601166

1167+
void TranslateToFuzzReader::useImportedModule() {
1168+
if (!importedModule) {
1169+
return;
1170+
}
1171+
1172+
Module imported;
1173+
imported.features = FeatureSet::All;
1174+
ModuleReader().read(*importedModule, imported);
1175+
1176+
// Add some of the module's exported functions as imports, at a random rate.
1177+
auto rate = upTo(100);
1178+
for (auto& exp : imported.exports) {
1179+
if (exp->kind != ExternalKind::Function || upTo(100) > rate) {
1180+
continue;
1181+
}
1182+
1183+
auto* func = imported.getFunction(*exp->getInternalName());
1184+
auto name =
1185+
Names::getValidFunctionName(wasm, "primary_" + exp->name.toString());
1186+
// We can import it as its own type, or any (declared) supertype.
1187+
auto type = getSuperType(func->type);
1188+
auto import = builder.makeFunction(name, type, {});
1189+
import->module = "primary";
1190+
import->base = exp->name;
1191+
wasm.addFunction(std::move(import));
1192+
}
1193+
1194+
// TODO: All other imports: globals, memories, tables, etc. We must, as we do
1195+
// with functions, take care to run this *after* the removal of those
1196+
// imports (as normally we remove them all, as the fuzzer harness will
1197+
// not provide them, but an imported module is the exception).
1198+
}
1199+
11611200
TranslateToFuzzReader::FunctionCreationContext::FunctionCreationContext(
11621201
TranslateToFuzzReader& parent, Function* func)
11631202
: parent(parent), func(func) {
@@ -1553,7 +1592,7 @@ Function* TranslateToFuzzReader::addFunction() {
15531592
wasm.addExport(
15541593
Builder::makeExport(func->name, func->name, ExternalKind::Function));
15551594
}
1556-
// add some to an elem segment
1595+
// add some to an elem segment TODO we could do this for imported funcs too
15571596
while (oneIn(3) && !random.finished()) {
15581597
auto type = Type(func->type, NonNullable);
15591598
std::vector<ElementSegment*> compatibleSegments;

src/tools/wasm-opt.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ int main(int argc, const char* argv[]) {
8787
bool fuzzMemory = true;
8888
bool fuzzOOB = true;
8989
bool fuzzPreserveImportsAndExports = false;
90+
std::string fuzzImport;
9091
std::string emitSpecWrapper;
9192
std::string emitWasm2CWrapper;
9293
std::string inputSourceMapFilename;
@@ -211,6 +212,13 @@ For more on how to optimize effectively, see
211212
[&](Options* o, const std::string& arguments) {
212213
fuzzPreserveImportsAndExports = true;
213214
})
215+
.add(
216+
"--fuzz-import",
217+
"",
218+
"a module to use as an import in -ttf mode",
219+
WasmOptOption,
220+
Options::Arguments::One,
221+
[&](Options* o, const std::string& arguments) { fuzzImport = arguments; })
214222
.add("--emit-spec-wrapper",
215223
"-esw",
216224
"Emit a wasm spec interpreter wrapper file that can run the wasm with "
@@ -344,6 +352,9 @@ For more on how to optimize effectively, see
344352
reader.setAllowMemory(fuzzMemory);
345353
reader.setAllowOOB(fuzzOOB);
346354
reader.setPreserveImportsAndExports(fuzzPreserveImportsAndExports);
355+
if (!fuzzImport.empty()) {
356+
reader.setImportedModule(fuzzImport);
357+
}
347358
reader.build();
348359
if (options.passOptions.validate) {
349360
if (!WasmValidator().validate(wasm, options.passOptions)) {

test/lit/fuzz-import.wast

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
;; Test the flag to import from a given module in fuzzer generation.
2+
3+
;; Generate fuzz output using this wat as initial contents, and importing the
4+
;; side file.
5+
;; RUN: wasm-opt %s.ttf --initial-fuzz=%s -all -ttf --fuzz-import=%s.import \
6+
;; RUN: -S -o - | filecheck %s
7+
8+
(module
9+
;; This existing import will be made a non-import, but the ones from the
10+
;; imported module will be ok.
11+
(import "existing" "foo" (func $import))
12+
)
13+
14+
;; CHECK-NOT: (import "existing"
15+
16+
;; We must see an import from the primary module.
17+
;; XXX This does depend on random choices in the fuzzer. We do have many chances
18+
;; to emit one such import (see the large file on the side), but if this
19+
;; turns out to be too unlikely, we can remove this test in favor of a unit
20+
;; test or the fuzzer.
21+
;; CHECK: (import "primary"
22+

0 commit comments

Comments
 (0)