Skip to content

Commit d52213c

Browse files
authored
Function pointer cast emulation (#1468)
This adds a pass that implements "function pointer cast emulation" - allows indirect calls to go through even if the number of arguments or their types is incorrect. That is undefined behavior in C/C++ but in practice somehow works in native archs. It is even relied upon in e.g. Python. Emscripten already has such emulation for asm.js, which also worked for asm2wasm. This implements something like it in binaryen which also allows the wasm backend to use it. As a result, Python should now be portable using the wasm backend. The mechanism used for the emulation is to make all indirect calls use a fixed number of arguments, all of type i64, and a return type of also i64. Thunks are then placed in the table which translate the arguments properly for the target, basically by reinterpreting to i64 and back. As a result, receiving an i64 when an i32 is sent will have the upper bits all zero, and the reverse would truncate the upper bits, etc. (Note that this is different than emscripten's existing emulation, which converts (as signed) to a double. That makes sense for JS where double's can contain all numeric values, but in wasm we have i64s. Also, bitwise conversion may be more like what native archs do anyhow. It is enough for Python.) Also adds validation for a function's type matching the function's actual params and result (surprised we didn't have that before, but we didn't, and there was even a place in the test suite where that was wrong). Also simplifies the build script by moving two cpp files into the wasm/ subdir, so they can be built once and shared between the various tools.
1 parent 0008b79 commit d52213c

17 files changed

+623
-23
lines changed

CMakeLists.txt

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,6 @@ INSTALL(FILES bin/binaryen.js DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAM
172172

173173
SET(wasm-shell_SOURCES
174174
src/tools/wasm-shell.cpp
175-
src/wasm-interpreter.cpp
176175
)
177176
ADD_EXECUTABLE(wasm-shell
178177
${wasm-shell_SOURCES})
@@ -183,11 +182,10 @@ INSTALL(TARGETS wasm-shell DESTINATION ${CMAKE_INSTALL_BINDIR})
183182

184183
SET(wasm-opt_SOURCES
185184
src/tools/wasm-opt.cpp
186-
src/wasm-interpreter.cpp
187185
)
188186
ADD_EXECUTABLE(wasm-opt
189187
${wasm-opt_SOURCES})
190-
TARGET_LINK_LIBRARIES(wasm-opt wasm asmjs emscripten-optimizer passes ir cfg support)
188+
TARGET_LINK_LIBRARIES(wasm-opt wasm asmjs emscripten-optimizer passes ir cfg support wasm)
191189
SET_PROPERTY(TARGET wasm-opt PROPERTY CXX_STANDARD 11)
192190
SET_PROPERTY(TARGET wasm-opt PROPERTY CXX_STANDARD_REQUIRED ON)
193191
INSTALL(TARGETS wasm-opt DESTINATION ${CMAKE_INSTALL_BINDIR})
@@ -197,7 +195,7 @@ SET(wasm-merge_SOURCES
197195
)
198196
ADD_EXECUTABLE(wasm-merge
199197
${wasm-merge_SOURCES})
200-
TARGET_LINK_LIBRARIES(wasm-merge wasm asmjs emscripten-optimizer passes ir cfg support)
198+
TARGET_LINK_LIBRARIES(wasm-merge wasm asmjs emscripten-optimizer passes ir cfg support wasm)
201199
SET_PROPERTY(TARGET wasm-merge PROPERTY CXX_STANDARD 11)
202200
SET_PROPERTY(TARGET wasm-merge PROPERTY CXX_STANDARD_REQUIRED ON)
203201
INSTALL(TARGETS wasm-merge DESTINATION bin)
@@ -214,7 +212,6 @@ INSTALL(TARGETS wasm-metadce DESTINATION bin)
214212

215213
SET(asm2wasm_SOURCES
216214
src/tools/asm2wasm.cpp
217-
src/wasm-emscripten.cpp
218215
)
219216
ADD_EXECUTABLE(asm2wasm
220217
${asm2wasm_SOURCES})
@@ -235,7 +232,6 @@ INSTALL(TARGETS wasm2asm DESTINATION ${CMAKE_INSTALL_BINDIR})
235232

236233
SET(s2wasm_SOURCES
237234
src/tools/s2wasm.cpp
238-
src/wasm-emscripten.cpp
239235
src/wasm-linker.cpp
240236
)
241237
ADD_EXECUTABLE(s2wasm
@@ -247,7 +243,6 @@ INSTALL(TARGETS s2wasm DESTINATION ${CMAKE_INSTALL_BINDIR})
247243

248244
SET(wasm-emscripten-finalize_SOURCES
249245
src/tools/wasm-emscripten-finalize.cpp
250-
src/wasm-emscripten.cpp
251246
)
252247
ADD_EXECUTABLE(wasm-emscripten-finalize
253248
${wasm-emscripten-finalize_SOURCES})
@@ -290,7 +285,6 @@ IF (UNIX) # TODO: port to windows
290285

291286
SET(wasm-reduce_SOURCES
292287
src/tools/wasm-reduce.cpp
293-
src/wasm-interpreter.cpp
294288
)
295289
ADD_EXECUTABLE(wasm-reduce
296290
${wasm-reduce_SOURCES})

build-js.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ echo "building shared bitcode"
134134
$BINARYEN_SRC/wasm/wasm-type.cpp \
135135
$BINARYEN_SRC/wasm/wasm-validator.cpp \
136136
$BINARYEN_SRC/wasm/wasm.cpp \
137-
$BINARYEN_SRC/wasm-emscripten.cpp \
137+
$BINARYEN_SRC/wasm/wasm-emscripten.cpp \
138138
-I$BINARYEN_SRC \
139139
-o shared.bc
140140

src/passes/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ SET(passes_SOURCES
88
DuplicateFunctionElimination.cpp
99
ExtractFunction.cpp
1010
Flatten.cpp
11+
FuncCastEmulation.cpp
1112
Inlining.cpp
1213
LegalizeJSInterface.cpp
1314
LocalCSE.cpp

src/passes/FuncCastEmulation.cpp

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
/*
2+
* Copyright 2017 WebAssembly Community Group participants
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
//
18+
// Instruments all indirect calls so that they work even if a function
19+
// pointer was cast incorrectly. For example, if you cast an int (int, float)
20+
// to an int (int, float, int) and call it natively, on most archs it will
21+
// happen to work, ignoring the extra param, whereas in wasm it will trap.
22+
// When porting code that relies on such casts working (like e.g. Python),
23+
// this pass may be useful. It sets a new "ABI" for indirect calls, in which
24+
// they all return an i64 and they have a fixed number of i64 params, and
25+
// the pass converts everything to go through that.
26+
//
27+
// This should work even with dynamic linking, however, the number of
28+
// params must be identical, i.e., the "ABI" must match.
29+
30+
#include <wasm.h>
31+
#include <wasm-builder.h>
32+
#include <asm_v_wasm.h>
33+
#include <pass.h>
34+
#include <wasm-emscripten.h>
35+
#include <ir/literal-utils.h>
36+
37+
namespace wasm {
38+
39+
// This should be enough for everybody. (As described above, we need this
40+
// to match when dynamically linking, and also dynamic linking is why we
41+
// can't just detect this automatically in the module we see.)
42+
static const int NUM_PARAMS = 15;
43+
44+
// Converts a value to the ABI type of i64.
45+
static Expression* toABI(Expression* value, Module* module) {
46+
Builder builder(*module);
47+
switch (value->type) {
48+
case i32: {
49+
value = builder.makeUnary(ExtendUInt32, value);
50+
break;
51+
}
52+
case i64: {
53+
// already good
54+
break;
55+
}
56+
case f32: {
57+
value = builder.makeUnary(
58+
ExtendUInt32,
59+
builder.makeUnary(ReinterpretFloat32, value)
60+
);
61+
break;
62+
}
63+
case f64: {
64+
value = builder.makeUnary(ReinterpretFloat64, value);
65+
break;
66+
}
67+
case none: {
68+
// the value is none, but we need a value here
69+
value = builder.makeSequence(
70+
value,
71+
LiteralUtils::makeZero(i64, *module)
72+
);
73+
break;
74+
}
75+
case unreachable: {
76+
// can leave it, the call isn't taken anyhow
77+
break;
78+
}
79+
default: {
80+
// SIMD may be interesting some day
81+
WASM_UNREACHABLE();
82+
}
83+
}
84+
return value;
85+
}
86+
87+
// Converts a value from the ABI type of i64 to the expected type
88+
static Expression* fromABI(Expression* value, Type type, Module* module) {
89+
Builder builder(*module);
90+
switch (type) {
91+
case i32: {
92+
value = builder.makeUnary(WrapInt64, value);
93+
break;
94+
}
95+
case i64: {
96+
// already good
97+
break;
98+
}
99+
case f32: {
100+
value = builder.makeUnary(
101+
ReinterpretInt32,
102+
builder.makeUnary(WrapInt64, value)
103+
);
104+
break;
105+
}
106+
case f64: {
107+
value = builder.makeUnary(ReinterpretInt64, value);
108+
break;
109+
}
110+
case none: {
111+
value = builder.makeDrop(value);
112+
}
113+
case unreachable: {
114+
// can leave it, the call isn't taken anyhow
115+
break;
116+
}
117+
default: {
118+
// SIMD may be interesting some day
119+
WASM_UNREACHABLE();
120+
}
121+
}
122+
return value;
123+
}
124+
125+
struct ParallelFuncCastEmulation : public WalkerPass<PostWalker<ParallelFuncCastEmulation>> {
126+
bool isFunctionParallel() override { return true; }
127+
128+
Pass* create() override { return new ParallelFuncCastEmulation(ABIType); }
129+
130+
ParallelFuncCastEmulation(Name ABIType) : ABIType(ABIType) {}
131+
132+
void visitCallIndirect(CallIndirect* curr) {
133+
if (curr->operands.size() > NUM_PARAMS) {
134+
Fatal() << "FuncCastEmulation::NUM_PARAMS needs to be at least " <<
135+
curr->operands.size();
136+
}
137+
for (Expression*& operand : curr->operands) {
138+
operand = toABI(operand, getModule());
139+
}
140+
// Add extra operands as needed.
141+
while (curr->operands.size() < NUM_PARAMS) {
142+
curr->operands.push_back(LiteralUtils::makeZero(i64, *getModule()));
143+
}
144+
// Set the new types
145+
auto oldType = curr->type;
146+
curr->type = i64;
147+
curr->fullType = ABIType;
148+
// Fix up return value
149+
replaceCurrent(fromABI(curr, oldType, getModule()));
150+
}
151+
152+
private:
153+
// the name of a type for a call with the right params and return
154+
Name ABIType;
155+
};
156+
157+
struct FuncCastEmulation : public Pass {
158+
void run(PassRunner* runner, Module* module) override {
159+
// we just need the one ABI function type for all indirect calls
160+
std::string sig = "j";
161+
for (Index i = 0; i < NUM_PARAMS; i++) {
162+
sig += 'j';
163+
}
164+
ABIType = ensureFunctionType(sig, module)->name;
165+
// Add a way for JS to call into the table (as our i64 ABI means an i64
166+
// is returned when there is a return value, which JS engines will fail on),
167+
// using dynCalls
168+
EmscriptenGlueGenerator generator(*module);
169+
generator.generateDynCallThunks();
170+
// Add a thunk for each function in the table, and do the call through it.
171+
std::unordered_map<Name, Name> funcThunks;
172+
for (auto& segment : module->table.segments) {
173+
for (auto& name : segment.data) {
174+
auto iter = funcThunks.find(name);
175+
if (iter == funcThunks.end()) {
176+
auto thunk = makeThunk(name, module);
177+
funcThunks[name] = thunk;
178+
name = thunk;
179+
} else {
180+
name = iter->second;
181+
}
182+
}
183+
}
184+
// update call_indirects
185+
PassRunner subRunner(module, runner->options);
186+
subRunner.setIsNested(true);
187+
subRunner.add<ParallelFuncCastEmulation>(ABIType);
188+
subRunner.run();
189+
}
190+
191+
private:
192+
// the name of a type for a call with the right params and return
193+
Name ABIType;
194+
195+
// Creates a thunk for a function, casting args and return value as needed.
196+
Name makeThunk(Name name, Module* module) {
197+
Name thunk = std::string("byn$fpcast-emu$") + name.str;
198+
if (module->getFunctionOrNull(thunk)) {
199+
Fatal() << "FuncCastEmulation::makeThunk seems a thunk name already in use. Was the pass already run on this code?";
200+
}
201+
// The item in the table may be a function or a function import.
202+
auto* func = module->getFunctionOrNull(name);
203+
Import* imp = nullptr;
204+
if (!func) imp = module->getImport(name);
205+
std::vector<Type>& params = func ? func->params : module->getFunctionType(imp->functionType)->params;
206+
Type type = func ? func->result : module->getFunctionType(imp->functionType)->result;
207+
Builder builder(*module);
208+
std::vector<Expression*> callOperands;
209+
for (Index i = 0; i < params.size(); i++) {
210+
callOperands.push_back(fromABI(builder.makeGetLocal(i, i64), params[i], module));
211+
}
212+
Expression* call = func ? (Expression*)builder.makeCall(name, callOperands, type)
213+
: (Expression*)builder.makeCallImport(name, callOperands, type);
214+
std::vector<Type> thunkParams;
215+
for (Index i = 0; i < NUM_PARAMS; i++) {
216+
thunkParams.push_back(i64);
217+
}
218+
auto* thunkFunc = builder.makeFunction(
219+
thunk,
220+
std::move(thunkParams),
221+
i64,
222+
{}, // no vars
223+
toABI(call, module)
224+
);
225+
thunkFunc->type = ABIType;
226+
module->addFunction(thunkFunc);
227+
return thunk;
228+
}
229+
};
230+
231+
Pass* createFuncCastEmulationPass() {
232+
return new FuncCastEmulation();
233+
}
234+
235+
} // namespace wasm

src/passes/pass.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ void PassRegistry::registerPasses() {
7474
registerPass("duplicate-function-elimination", "removes duplicate functions", createDuplicateFunctionEliminationPass);
7575
registerPass("extract-function", "leaves just one function (useful for debugging)", createExtractFunctionPass);
7676
registerPass("flatten", "flattens out code, removing nesting", createFlattenPass);
77+
registerPass("fpcast-emu", "emulates function pointer casts, allowing incorrect indirect calls to (sometimes) work", createFuncCastEmulationPass);
7778
registerPass("func-metrics", "reports function metrics", createFunctionMetricsPass);
7879
registerPass("inlining", "inline functions (you probably want inlining-optimizing)", createInliningPass);
7980
registerPass("inlining-optimizing", "inline functions and optimizes where we inlined", createInliningOptimizingPass);

src/passes/passes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ Pass* createDeadCodeEliminationPass();
3131
Pass* createDuplicateFunctionEliminationPass();
3232
Pass* createExtractFunctionPass();
3333
Pass* createFlattenPass();
34+
Pass* createFuncCastEmulationPass();
3435
Pass* createFullPrinterPass();
3536
Pass* createFunctionMetricsPass();
3637
Pass* createI64ToI32LoweringPass();

src/wasm-builder.h

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,20 @@ class Builder {
4242

4343
// make* functions, create nodes
4444

45+
Function* makeFunction(Name name,
46+
std::vector<Type>&& params,
47+
Type resultType,
48+
std::vector<Type>&& vars,
49+
Expression* body = nullptr) {
50+
auto* func = new Function;
51+
func->name = name;
52+
func->result = resultType;
53+
func->body = body;
54+
func->params.swap(params);
55+
func->vars.swap(vars);
56+
return func;
57+
}
58+
4559
Function* makeFunction(Name name,
4660
std::vector<NameType>&& params,
4761
Type resultType,
@@ -51,7 +65,6 @@ class Builder {
5165
func->name = name;
5266
func->result = resultType;
5367
func->body = body;
54-
5568
for (auto& param : params) {
5669
func->params.push_back(param.type);
5770
Index index = func->localNames.size();
@@ -64,7 +77,6 @@ class Builder {
6477
func->localIndices[var.name] = index;
6578
func->localNames[index] = var.name;
6679
}
67-
6880
return func;
6981
}
7082

src/wasm-linker.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,13 @@ void Linker::makeDummyFunction() {
382382
if (!create) return;
383383
wasm::Builder wasmBuilder(out.wasm);
384384
Expression *unreachable = wasmBuilder.makeUnreachable();
385-
Function *dummy = wasmBuilder.makeFunction(Name(dummyFunction), {}, Type::none, {}, unreachable);
385+
Function *dummy = wasmBuilder.makeFunction(
386+
Name(dummyFunction),
387+
std::vector<Type>{},
388+
Type::none,
389+
std::vector<Type>{},
390+
unreachable
391+
);
386392
out.wasm.addFunction(dummy);
387393
getFunctionIndex(dummy->name);
388394
}

src/wasm/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ SET(wasm_SOURCES
22
literal.cpp
33
wasm.cpp
44
wasm-binary.cpp
5+
wasm-emscripten.cpp
6+
wasm-interpreter.cpp
57
wasm-io.cpp
68
wasm-s-parser.cpp
79
wasm-type.cpp

0 commit comments

Comments
 (0)