-
Notifications
You must be signed in to change notification settings - Fork 790
[SYCL] fix for __sycl_unregister_lib() on Windows and tests #19633
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
c480714
8c1905e
8688a67
6742db0
154eaf0
44bef0d
d8bc95a
496c746
d1c48c8
c415c15
455bdf8
e2458f3
c6afa75
843a961
a6ef7e0
8c7d1d5
2a193e0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1293,6 +1293,50 @@ class BinaryWrapper { | |
appendToGlobalDtors(M, Func, /*Priority*/ 1); | ||
} | ||
|
||
void createSyclRegisterWithAtexitUnregister(GlobalVariable *BinDesc) { | ||
auto *UnregFuncTy = | ||
FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); | ||
auto *UnregFunc = | ||
Function::Create(UnregFuncTy, GlobalValue::InternalLinkage, | ||
"sycl.descriptor_unreg.atexit", &M); | ||
UnregFunc->setSection(".text.startup"); | ||
|
||
// Declaration for __sycl_unregister_lib(void*). | ||
auto *UnregTargetTy = | ||
FunctionType::get(Type::getVoidTy(C), getPtrTy(), false); | ||
FunctionCallee UnregTargetC = | ||
M.getOrInsertFunction("__sycl_unregister_lib", UnregTargetTy); | ||
|
||
IRBuilder<> UnregBuilder(BasicBlock::Create(C, "entry", UnregFunc)); | ||
UnregBuilder.CreateCall(UnregTargetC, BinDesc); | ||
UnregBuilder.CreateRetVoid(); | ||
|
||
auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); | ||
auto *RegFunc = Function::Create(RegFuncTy, GlobalValue::InternalLinkage, | ||
"sycl.descriptor_reg", &M); | ||
RegFunc->setSection(".text.startup"); | ||
|
||
auto *RegTargetTy = | ||
FunctionType::get(Type::getVoidTy(C), getPtrTy(), false); | ||
FunctionCallee RegTargetC = | ||
M.getOrInsertFunction("__sycl_register_lib", RegTargetTy); | ||
|
||
// `atexit` takes a `void(*)()` function pointer. In LLVM IR, this is | ||
// typically represented as `i32 (ptr)`. | ||
|
||
FunctionType *AtExitTy = | ||
FunctionType::get(Type::getInt32Ty(C), getPtrTy(), false); | ||
FunctionCallee AtExitC = M.getOrInsertFunction("atexit", AtExitTy); | ||
|
||
IRBuilder<> RegBuilder(BasicBlock::Create(C, "entry", RegFunc)); | ||
RegBuilder.CreateCall(RegTargetC, BinDesc); | ||
RegBuilder.CreateCall(AtExitC, UnregFunc); | ||
RegBuilder.CreateRetVoid(); | ||
|
||
// Add this function to global destructors. | ||
// Match priority of __tgt_register_lib | ||
againull marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
appendToGlobalCtors(M, RegFunc, /*Priority*/ 1); | ||
} | ||
|
||
public: | ||
BinaryWrapper(StringRef Target, StringRef ToolName, | ||
StringRef SymPropBCFiles = "") | ||
|
@@ -1370,8 +1414,13 @@ class BinaryWrapper { | |
|
||
if (EmitRegFuncs) { | ||
GlobalVariable *Desc = *DescOrErr; | ||
createRegisterFunction(Kind, Desc); | ||
createUnregisterFunction(Kind, Desc); | ||
if (Kind == OffloadKind::SYCL && | ||
Triple(M.getTargetTriple()).isOSWindows()) { | ||
createSyclRegisterWithAtexitUnregister(Desc); | ||
} else { | ||
createRegisterFunction(Kind, Desc); | ||
createUnregisterFunction(Kind, Desc); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It doesn't necessarily have to be part of this patch, but could you please document this approach in a design document? |
||
} | ||
} | ||
return &M; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2180,8 +2180,6 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { | |
m_VFSet2BinImage.erase(SetName); | ||
} | ||
|
||
m_DeviceGlobals.eraseEntries(Img); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The device global entries in the map often point to the global variables in host code corresponding to the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There has been some other change to SYCLOS, so I've downgraded this to draft. I've been rechecking things. Presently in SYCLOS there are TWO different code paths that lead to the PATH A: PATH B:
On Linux, during app shutdown On Windows, the order is reversed. Given that we don't support using the same device global in different shared libraries anyway, my fix should be correct and safe. BUT, we shall see. Like I said, there have been some other changes, and I am retreating to Draft to try and see what needs to be done now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But what happens if I load a shared library with a device global, then unload it and load it again. The addresses would be different. Would it be able to update the maps? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can't "share" a single device global between and app and a shared library right now. Or, at least, I can't. I can do that stunt with regular global variables, but not with device globals. I'd have to consult my notes to remember the exact failure, but I'm pretty sure it was linking. So, given that limitation, reloading a shared library with a device global should work correctly. But I'll test it to be sure and maybe expand the testing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @steffenlarsen - Reloading a shared library with a device global triggers an assertion in both SYCLOS and the fix to
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @steffenlarsen I might need your advice on this last bit. When we are reloading a shared lib with a device global, we first unload, which ends up calling The reason for that seems to be that the device globals from the shared lib don't originate from any
I didn't even know that was a possibility. I see no mechanism for removing device globals added in this way. We have a couple of options:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Discussed offline. Let's change the assert for now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I changed the assert last night and tested it Win and Lin. Everything was fine. But the CI was experiencing problems. I managed to reproduce and fix on battlemage, but afterwards the CI continues to have problems and I am unable to reproduce the problem locally (Win + battlemage). The CI seems to be able to reproduce it , but not for me. Passes every single time. So I'm reverting those changes back to where we had the discussion and the tests were passing. I think we should proceed with "ignore for now" option and we can open a ticket about the load/reload issue. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since it isn't a regression, I am okay with making it a follow-up. We will need a tracker for it though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. I'll make a tracker once this is merged so I can use its test as a reproducer. It demonstrates it easily when |
||
|
||
{ | ||
std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex); | ||
auto HostPipes = Img->getHostPipes(); | ||
|
@@ -3824,10 +3822,5 @@ extern "C" void __sycl_register_lib(sycl_device_binaries desc) { | |
|
||
// Executed as a part of current module's (.exe, .dll) static initialization | ||
extern "C" void __sycl_unregister_lib(sycl_device_binaries desc) { | ||
// Partial cleanup is not necessary at shutdown | ||
#ifndef _WIN32 | ||
if (!sycl::detail::GlobalHandler::instance().isOkToDefer()) | ||
return; | ||
sycl::detail::ProgramManager::getInstance().removeImages(desc); | ||
#endif | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,6 @@ | ||
// UNSUPPORTED: hip | ||
// UNSUPPORTED-TRACKER: CMPLRLLVM-69478 | ||
|
||
// RUN: %{build} -o %t.out | ||
// RUN: %{run} %t.out | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#include <sycl/detail/core.hpp> | ||
|
||
#if defined(_WIN32) | ||
#define API_EXPORT __declspec(dllexport) | ||
#else | ||
#define API_EXPORT | ||
#endif | ||
|
||
#ifndef INC | ||
#define INC 1 | ||
#endif | ||
|
||
#ifndef CLASSNAME | ||
#define CLASSNAME same | ||
#endif | ||
|
||
extern "C" API_EXPORT void performIncrementation(sycl::queue &q, | ||
sycl::buffer<int, 1> &buf) { | ||
sycl::range<1> r = buf.get_range(); | ||
q.submit([&](sycl::handler &cgh) { | ||
auto acc = buf.get_access<sycl::access::mode::write>(cgh); | ||
cgh.parallel_for<class CLASSNAME>( | ||
r, [=](sycl::id<1> idx) { acc[idx] += INC; }); | ||
}); | ||
} | ||
cperkinsintel marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
// UNSUPPORTED: cuda || hip | ||
// UNSUPPORTED-TRACKER: CMPLRLLVM-69415 | ||
|
||
// REQUIRES: level_zero | ||
cperkinsintel marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
||
// DEFINE: %{fPIC_flag} = %if windows %{%} %else %{-fPIC%} | ||
// DEFINE: %{shared_lib_ext} = %if windows %{dll%} %else %{so%} | ||
|
||
// clang-format off | ||
// IMPORTANT -DSO_PATH='R"(%T)"' WTF ?? | ||
againull marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
// We need to capture %T, the build directory, in a string | ||
// and the normal STRINGIFY() macros hack won't work. | ||
// Because on Windows, the path delimiters are \, | ||
// which C++ preprocessor converts to escape sequences, | ||
// which becomes a nightmare. | ||
// SO the hack here is to put heredoc in the definition | ||
againull marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
// and use single quotes, which Python forgivingly accepts. | ||
// clang-format on | ||
|
||
// RUN: %{build} %{fPIC_flag} -DSO_PATH='R"(%T)"' -o %t.out | ||
|
||
// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=1 -o %T/lib_a.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp | ||
// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=2 -o %T/lib_b.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp | ||
// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=4 -o %T/lib_c.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp | ||
|
||
// RUN: env UR_L0_LEAKS_DEBUG=1 %{run} %t.out | ||
|
||
// This test uses a kernel of the same name in three different shared libraries. | ||
// It loads each library, calls the kernel, and checks that the incrementation | ||
// is done correctly, and then unloads the library. | ||
// This test ensures that __sycl_register_lib() and __sycl_unregister_lib() | ||
// are called correctly, and that the device images are cleaned up properly. | ||
|
||
#include <sycl/detail/core.hpp> | ||
|
||
using namespace sycl::ext::oneapi::experimental; | ||
|
||
|
||
#ifdef _WIN32 | ||
#include <windows.h> | ||
|
||
void *loadOsLibrary(const std::string &LibraryPath) { | ||
HMODULE h = | ||
LoadLibraryExA(LibraryPath.c_str(), NULL, LOAD_WITH_ALTERED_SEARCH_PATH); | ||
return (void *)h; | ||
} | ||
int unloadOsLibrary(void *Library) { | ||
return FreeLibrary((HMODULE)Library) ? 0 : 1; | ||
} | ||
void *getOsLibraryFuncAddress(void *Library, const std::string &FunctionName) { | ||
return (void *)GetProcAddress((HMODULE)Library, FunctionName.c_str()); | ||
} | ||
|
||
#else | ||
#include <dlfcn.h> | ||
|
||
void *loadOsLibrary(const std::string &LibraryPath) { | ||
void *so = dlopen(LibraryPath.c_str(), RTLD_NOW); | ||
if (!so) { | ||
char *Error = dlerror(); | ||
std::cerr << "dlopen(" << LibraryPath << ") failed with <" | ||
<< (Error ? Error : "unknown error") << ">" << std::endl; | ||
} | ||
return so; | ||
} | ||
|
||
int unloadOsLibrary(void *Library) { return dlclose(Library); } | ||
|
||
void *getOsLibraryFuncAddress(void *Library, const std::string &FunctionName) { | ||
return dlsym(Library, FunctionName.c_str()); | ||
} | ||
#endif | ||
|
||
// Define the function pointer type for performIncrementation | ||
using IncFuncT = void(sycl::queue &, sycl::buffer<int, 1> &); | ||
|
||
void initializeBuffer(sycl::buffer<int, 1> &buf) { | ||
auto acc = sycl::host_accessor<int, 1>(buf); | ||
for (size_t i = 0; i < buf.size(); ++i) | ||
acc[i] = 0; | ||
} | ||
|
||
void checkIncrementation(sycl::buffer<int, 1> &buf, int val) { | ||
auto acc = sycl::host_accessor<int, 1>(buf); | ||
for (size_t i = 0; i < buf.size(); ++i) { | ||
std::cout << acc[i] << " "; | ||
assert(acc[i] == val); | ||
} | ||
std::cout << std::endl; | ||
} | ||
|
||
int main() { | ||
sycl::queue q; | ||
|
||
sycl::range<1> r(8); | ||
sycl::buffer<int, 1> buf(r); | ||
initializeBuffer(buf); | ||
|
||
std::string base_path = SO_PATH; | ||
|
||
#ifdef _WIN32 | ||
std::string path_to_lib_a = base_path + "\\lib_a.dll"; | ||
std::string path_to_lib_b = base_path + "\\lib_b.dll"; | ||
std::string path_to_lib_c = base_path + "\\lib_c.dll"; | ||
#else | ||
std::string path_to_lib_a = base_path + "/lib_a.so"; | ||
std::string path_to_lib_b = base_path + "/lib_b.so"; | ||
std::string path_to_lib_c = base_path + "/lib_c.so"; | ||
#endif | ||
steffenlarsen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
std::cout << "paths: " << path_to_lib_a << std::endl; | ||
std::cout << "SO_PATH: " << SO_PATH << std::endl; | ||
|
||
void *lib_a = loadOsLibrary(path_to_lib_a); | ||
void *f = getOsLibraryFuncAddress(lib_a, "performIncrementation"); | ||
auto performIncrementationFuncA = reinterpret_cast<IncFuncT *>(f); | ||
performIncrementationFuncA(q, buf); // call the function from lib_a | ||
q.wait(); | ||
checkIncrementation(buf, 1); | ||
unloadOsLibrary(lib_a); | ||
std::cout << "lib_a done" << std::endl; | ||
|
||
void *lib_b = loadOsLibrary(path_to_lib_b); | ||
f = getOsLibraryFuncAddress(lib_b, "performIncrementation"); | ||
auto performIncrementationFuncB = reinterpret_cast<IncFuncT *>(f); | ||
performIncrementationFuncB(q, buf); // call the function from lib_b | ||
q.wait(); | ||
checkIncrementation(buf, 1 + 2); | ||
unloadOsLibrary(lib_b); | ||
std::cout << "lib_b done" << std::endl; | ||
|
||
void *lib_c = loadOsLibrary(path_to_lib_c); | ||
f = getOsLibraryFuncAddress(lib_c, "performIncrementation"); | ||
auto performIncrementationFuncC = reinterpret_cast<IncFuncT *>(f); | ||
q.wait(); | ||
performIncrementationFuncC(q, buf); // call the function from lib_c | ||
checkIncrementation(buf, 1 + 2 + 4); | ||
unloadOsLibrary(lib_c); | ||
std::cout << "lib_c done" << std::endl; | ||
|
||
return 0; | ||
} | ||
cperkinsintel marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
Uh oh!
There was an error while loading. Please reload this page.