|
14 | 14 | # limitations under the License. |
15 | 15 |
|
16 | 16 | ''' |
17 | | -Wasm extractor for testcases generated by the ClusterFuzz run.py script. Usage: |
| 17 | +Wasm extractor for testcases generated by the ClusterFuzz run.py script. This is |
| 18 | +general enough to also handle Fuzzilli output. |
| 19 | +
|
| 20 | +Usage: |
18 | 21 |
|
19 | 22 | extract_wasms.py INFILE.js OUTFILE |
20 | 23 |
|
21 | 24 | That will find embedded wasm files in INFILE.js, of the form |
22 | 25 |
|
23 | | - var .. = new Uint8Array([..wasm_contents..]); |
| 26 | + new Uint8Array([..wasm_contents..]); |
24 | 27 |
|
25 | 28 | and extract them into OUTFILE.0.wasm, OUTFILE.1.wasm, etc. It also emits |
26 | 29 | OUTFILE.js which will no longer contain the embedded contents, after which the |
@@ -50,24 +53,41 @@ def get_wasm_filename(): |
50 | 53 | js = f.read() |
51 | 54 |
|
52 | 55 |
|
53 | | -def repl(text): |
| 56 | +def repl(match): |
| 57 | + text = match.group(0) |
| 58 | + |
54 | 59 | # We found something of the form |
55 | 60 | # |
56 | | - # var binary = new Uint8Array([..binary data as numbers..]); |
| 61 | + # new Uint8Array([..binary data as numbers..]); |
57 | 62 | # |
58 | | - # Parse out the numbers into a binary wasm file. |
59 | | - numbers = text.groups()[0] |
| 63 | + # See if the numbers are the beginnings of a wasm file, "\0asm". If so, we |
| 64 | + # assume it is wasm. (We are careful here because Fuzzilli output can |
| 65 | + # contain normal JavaScript Typed Arrays, which we do not want to touch.) |
| 66 | + numbers = match.groups()[0] |
60 | 67 | numbers = numbers.split(',') |
61 | | - numbers = [int(n) for n in numbers] |
| 68 | + |
| 69 | + try: |
| 70 | + # Handle both base 10 and 16 by passing in base 0. |
| 71 | + parsed = [int(n, 0) for n in numbers] |
| 72 | + binary = bytes(parsed) |
| 73 | + except ValueError: |
| 74 | + # Not wasm; return the existing text. |
| 75 | + return text |
| 76 | + |
| 77 | + if binary[:4] != b'\0asm': |
| 78 | + return text |
| 79 | + |
| 80 | + # It is wasm. Parse out the numbers into a binary wasm file. |
62 | 81 | with open(get_wasm_filename(), 'wb') as f: |
63 | | - f.write(bytes(numbers)) |
| 82 | + f.write(binary) |
64 | 83 |
|
65 | | - # Replace it with nothing. |
66 | | - return '' |
| 84 | + # Replace the Uint8Array with undefined + a comment. |
| 85 | + return 'undefined /* extracted wasm */' |
67 | 86 |
|
68 | 87 |
|
69 | | -# Replace the wasm files and write them out. |
70 | | -js = re.sub(r'var \w+ = new Uint8Array\(\[([\d,]+)\]\)', repl, js) |
| 88 | +# Replace the wasm files and write them out. We investigate any new Uint8Array |
| 89 | +# on an array of values like [100, 200] or [0x61, 0x6D, 0x6a] etc. |
| 90 | +js = re.sub(r'new Uint8Array\(\[([\d,x a-fA-F]+)\]\)', repl, js) |
71 | 91 |
|
72 | 92 | # Write out the new JS. |
73 | 93 | with open(f'{out}.js', 'w') as f: |
|
0 commit comments