Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion mlir/docs/Dialects/GPU.md
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,25 @@ llvm.func @foo() {
// mlir-translate --mlir-to-llvmir:
@binary_bin_cst = internal constant [6 x i8] c"AMDGPU", align 8
@binary_func_kernel_name = private unnamed_addr constant [7 x i8] c"func\00", align 1
@binary_module = internal global ptr null
@llvm.global_ctors = appending global [1 x {i32, ptr, ptr}] [{i32 123, ptr @binary_load, ptr null}]
@llvm.global_dtors = appending global [1 x {i32, ptr, ptr}] [{i32 123, ptr @binary_unload, ptr null}]
define internal void @binary_load() section ".text.startup" {
entry:
%0 = call ptr @mgpuModuleLoad(ptr @binary_bin_cst)
store ptr %0, ptr @binary_module
...
}
define internal void @binary_unload() section ".text.startup" {
entry:
%0 = load ptr, ptr @binary_module, align 8
call void @mgpuModuleUnload(ptr %0)
...
}
...
define void @foo() {
...
%module = call ptr @mgpuModuleLoad(ptr @binary_bin_cst)
%module = load ptr, ptr @binary_module, align 8
%kernel = call ptr @mgpuModuleGetFunction(ptr %module, ptr @binary_func_kernel_name)
call void @mgpuLaunchKernel(ptr %kernel, ...) ; Launch the kernel
...
Expand Down
7 changes: 7 additions & 0 deletions mlir/include/mlir-c/ExecutionEngine.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ MLIR_CAPI_EXPORTED MlirExecutionEngine mlirExecutionEngineCreate(
MlirModule op, int optLevel, int numPaths,
const MlirStringRef *sharedLibPaths, bool enableObjectDump);

/// Initialize the ExecutionEngine. Global constructors specified by
/// `llvm.mlir.global_ctors` will be run. One common scenario is that kernel
/// binary compiled from `gpu.module` gets loaded during initialization. Make
/// sure all symbols are resolvable before initialization by calling
/// `mlirExecutionEngineRegisterSymbol` or including shared libraries.
MLIR_CAPI_EXPORTED void mlirExecutionEngineInitialize(MlirExecutionEngine jit);

/// Destroy an ExecutionEngine instance.
MLIR_CAPI_EXPORTED void mlirExecutionEngineDestroy(MlirExecutionEngine jit);

Expand Down
9 changes: 9 additions & 0 deletions mlir/include/mlir/ExecutionEngine/ExecutionEngine.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,13 @@ class ExecutionEngine {
llvm::function_ref<llvm::orc::SymbolMap(llvm::orc::MangleAndInterner)>
symbolMap);

/// Initialize the ExecutionEngine. Global constructors specified by
/// `llvm.mlir.global_ctors` will be run. One common scenario is that kernel
/// binary compiled from `gpu.module` gets loaded during initialization. Make
/// sure all symbols are resolvable before initialization by calling
/// `registerSymbols` or including shared libraries.
void initialize();

private:
/// Ordering of llvmContext and jit is important for destruction purposes: the
/// jit must be destroyed before the context.
Expand All @@ -250,6 +257,8 @@ class ExecutionEngine {
/// Destroy functions in the libraries loaded by the ExecutionEngine that are
/// called when this ExecutionEngine is destructed.
SmallVector<LibraryDestroyFn> destroyFns;

bool isInitialized = false;
};

} // namespace mlir
Expand Down
13 changes: 12 additions & 1 deletion mlir/lib/Bindings/Python/ExecutionEngineModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
//===----------------------------------------------------------------------===//

#include "mlir-c/ExecutionEngine.h"
#include "mlir/Bindings/Python/NanobindAdaptors.h"
#include "mlir/Bindings/Python/Nanobind.h"
#include "mlir/Bindings/Python/NanobindAdaptors.h"

namespace nb = nanobind;
using namespace mlir;
Expand Down Expand Up @@ -124,6 +124,17 @@ NB_MODULE(_mlirExecutionEngine, m) {
},
nb::arg("name"), nb::arg("callback"),
"Register `callback` as the runtime symbol `name`.")
.def(
"initialize",
[](PyExecutionEngine &executionEngine) {
mlirExecutionEngineInitialize(executionEngine.get());
},
"Initialize the ExecutionEngine. Global constructors specified by "
"`llvm.mlir.global_ctors` will be run. One common scenario is that "
"kernel binary compiled from `gpu.module` gets loaded during "
"initialization. Make sure all symbols are resolvable before "
"initialization by calling `register_runtime` or including "
"shared libraries.")
.def(
"dump_to_object_file",
[](PyExecutionEngine &executionEngine, const std::string &fileName) {
Expand Down
9 changes: 6 additions & 3 deletions mlir/lib/CAPI/ExecutionEngine/ExecutionEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ mlirExecutionEngineCreate(MlirModule op, int optLevel, int numPaths,
return wrap(jitOrError->release());
}

extern "C" void mlirExecutionEngineInitialize(MlirExecutionEngine jit) {
unwrap(jit)->initialize();
}

extern "C" void mlirExecutionEngineDestroy(MlirExecutionEngine jit) {
delete (unwrap(jit));
}
Expand Down Expand Up @@ -106,9 +110,8 @@ extern "C" void mlirExecutionEngineRegisterSymbol(MlirExecutionEngine jit,
void *sym) {
unwrap(jit)->registerSymbols([&](llvm::orc::MangleAndInterner interner) {
llvm::orc::SymbolMap symbolMap;
symbolMap[interner(unwrap(name))] =
{ llvm::orc::ExecutorAddr::fromPtr(sym),
llvm::JITSymbolFlags::Exported };
symbolMap[interner(unwrap(name))] = {llvm::orc::ExecutorAddr::fromPtr(sym),
llvm::JITSymbolFlags::Exported};
return symbolMap;
});
}
Expand Down
20 changes: 12 additions & 8 deletions mlir/lib/ExecutionEngine/ExecutionEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ void ExecutionEngine::dumpToObjectFile(StringRef filename) {
}
// Compilation is lazy and it doesn't populate object cache unless requested.
// In case object dump is requested before cache is populated, we need to
// force compilation manually.
// force compilation manually.
if (cache->isEmpty()) {
for (std::string &functionName : functionNames) {
auto result = lookupPacked(functionName);
Expand Down Expand Up @@ -400,13 +400,6 @@ ExecutionEngine::create(Operation *m, const ExecutionEngineOptions &options,
return symbolMap;
};
engine->registerSymbols(runtimeSymbolMap);

// Execute the global constructors from the module being processed.
// TODO: Allow JIT initialize for AArch64. Currently there's a bug causing a
// crash for AArch64 see related issue #71963.
if (!engine->jit->getTargetTriple().isAArch64())
cantFail(engine->jit->initialize(engine->jit->getMainJITDylib()));

return std::move(engine);
}

Expand Down Expand Up @@ -442,6 +435,7 @@ Expected<void *> ExecutionEngine::lookup(StringRef name) const {

Error ExecutionEngine::invokePacked(StringRef name,
MutableArrayRef<void *> args) {
initialize();
auto expectedFPtr = lookupPacked(name);
if (!expectedFPtr)
return expectedFPtr.takeError();
Expand All @@ -451,3 +445,13 @@ Error ExecutionEngine::invokePacked(StringRef name,

return Error::success();
}

void ExecutionEngine::initialize() {
if (isInitialized)
return;
// TODO: Allow JIT initialize for AArch64. Currently there's a bug causing a
// crash for AArch64 see related issue #71963.
if (!jit->getTargetTriple().isAArch64())
cantFail(jit->initialize(jit->getMainJITDylib()));
isInitialized = true;
}
2 changes: 2 additions & 0 deletions mlir/lib/ExecutionEngine/JitRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ compileAndExecute(Options &options, Operation *module, StringRef entryPoint,

auto engine = std::move(*expectedEngine);

engine->initialize();

auto expectedFPtr = engine->lookupPacked(entryPoint);
if (!expectedFPtr)
return expectedFPtr.takeError();
Expand Down
1 change: 1 addition & 0 deletions mlir/python/mlir/_mlir_libs/_mlirExecutionEngine.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,6 @@ class ExecutionEngine:
def dump_to_object_file(self, file_name: str) -> None: ...
def raw_lookup(self, func_name: str) -> int: ...
def raw_register_runtime(self, name: str, callback: object) -> None: ...
def init() -> None: ...
@property
def _CAPIPtr(self) -> object: ...
60 changes: 49 additions & 11 deletions mlir/test/python/execution_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,23 +339,61 @@ def callback(a):

run(testUnrankedMemRefWithOffsetCallback)

# Test JIT callback in global constructor
# CHECK-LABEL: TEST: testJITCallbackInGlobalCtor
def testJITCallbackInGlobalCtor():
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we're gonna have the issue that this test would fail on aarch64?

init_cnt = 0

@ctypes.CFUNCTYPE(None)
def initCallback():
nonlocal init_cnt
init_cnt += 1

with Context():
module = Module.parse(r"""
llvm.mlir.global_ctors ctors = [@ctor], priorities = [0 : i32], data = [#llvm.zero]
llvm.func @ctor() {
func.call @init_callback() : () -> ()
llvm.return
}
func.func private @init_callback() attributes { llvm.emit_c_interface }
""")

# Setup execution engine
execution_engine = ExecutionEngine(lowerToLLVM(module))

# Validate initialization hasn't run yet
assert init_cnt == 0

# # Register callback
execution_engine.register_runtime("init_callback", initCallback)

# # Initialize and verify
execution_engine.initialize()
assert init_cnt == 1
# # Second initialization should be no-op
execution_engine.initialize()
assert init_cnt == 1

run(testJITCallbackInGlobalCtor)

# Test addition of two memrefs.
# CHECK-LABEL: TEST: testMemrefAdd
def testMemrefAdd():
with Context():
module = Module.parse(
"""
module {
func.func @main(%arg0: memref<1xf32>, %arg1: memref<f32>, %arg2: memref<1xf32>) attributes { llvm.emit_c_interface } {
%0 = arith.constant 0 : index
%1 = memref.load %arg0[%0] : memref<1xf32>
%2 = memref.load %arg1[] : memref<f32>
%3 = arith.addf %1, %2 : f32
memref.store %3, %arg2[%0] : memref<1xf32>
return
}
} """
r"""
module {
func.func @main(%arg0: memref<1xf32>, %arg1: memref<f32>, %arg2: memref<1xf32>) attributes { llvm.emit_c_interface } {
%0 = arith.constant 0 : index
%1 = memref.load %arg0[%0] : memref<1xf32>
%2 = memref.load %arg1[] : memref<f32>
%3 = arith.addf %1, %2 : f32
memref.store %3, %arg2[%0] : memref<1xf32>
return
}
}
"""
)
arg1 = np.array([32.5]).astype(np.float32)
arg2 = np.array(6).astype(np.float32)
Expand Down
57 changes: 57 additions & 0 deletions mlir/unittests/ExecutionEngine/Invoke.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -322,4 +322,61 @@ TEST(NativeMemRefJit, MAYBE_JITCallback) {
ASSERT_EQ(elt, coefficient * count++);
}

static int initCnt = 0;
// A helper function that will be called during the JIT's initialization.
static void initCallback() { initCnt += 1; }

#if __has_feature(memory_sanitizer) || __has_feature(address_sanitizer) || \
__has_feature(hwaddress_sanitizer) || \
__has_feature(undefined_behavior_sanitizer)
#define MAYBE_JITCallbackInGlobalCtor DISABLED_JITCallbackInGlobalCtor
#else
#define MAYBE_JITCallbackInGlobalCtor SKIP_WITHOUT_JIT(JITCallbackInGlobalCtor)
#endif
TEST(MLIRExecutionEngine, MAYBE_JITCallbackInGlobalCtor) {
std::string moduleStr = R"mlir(
llvm.mlir.global_ctors ctors = [@ctor], priorities = [0 : i32], data = [#llvm.zero]
llvm.func @ctor() {
func.call @init_callback() : () -> ()
llvm.return
}
func.func private @init_callback() attributes { llvm.emit_c_interface }
)mlir";

DialectRegistry registry;
registerAllDialects(registry);
registerBuiltinDialectTranslation(registry);
registerLLVMDialectTranslation(registry);
MLIRContext context(registry);
auto module = parseSourceString<ModuleOp>(moduleStr, &context);
ASSERT_TRUE(!!module);
ASSERT_TRUE(succeeded(lowerToLLVMDialect(*module)));
ExecutionEngineOptions jitOptions;
auto jitOrError = ExecutionEngine::create(*module, jitOptions);
ASSERT_TRUE(!!jitOrError);
// validate initialization is not run on construction
ASSERT_EQ(initCnt, 0);
auto jit = std::move(jitOrError.get());
// Define any extra symbols so they're available at initialization.
jit->registerSymbols([&](llvm::orc::MangleAndInterner interner) {
llvm::orc::SymbolMap symbolMap;
symbolMap[interner("_mlir_ciface_init_callback")] = {
llvm::orc::ExecutorAddr::fromPtr(initCallback),
llvm::JITSymbolFlags::Exported};
return symbolMap;
});
jit->initialize();
// TODO: Allow JIT initialize for AArch64. Currently there's a bug causing a
// crash for AArch64 see related issue #71963.
auto tmBuilderOrError = llvm::orc::JITTargetMachineBuilder::detectHost();
ASSERT_TRUE(!!tmBuilderOrError);
if (!tmBuilderOrError->getTargetTriple().isAArch64()) {
// validate the side effect of initialization
ASSERT_EQ(initCnt, 1);
// next initialization should be noop
jit->initialize();
ASSERT_EQ(initCnt, 1);
}
}

#endif // _WIN32
Loading