-
Notifications
You must be signed in to change notification settings - Fork 39
Description
oneCCL and Intel MPI are considering using UMF via dlopen
. In that case, it is not possible to use pools that are compiled as a static library and are not part of the libumf.so
binary.
For example, the disjoint pool is compiled to the libdisjoint_pool.a
static library. If an application uses libumf.so
via dlopen
(has no link dependency) and wants to use the disjoint pool it will get a linker error because the libdisjoint_pool.a
requires UMF symbols that cannot be found.
There are no such issues with pools that are part of libumf.so
. The reproducer below causes a link-time error if pool_disjoint
is used and works fine in the case of pool_proxy
.
Please provide a reproduction of the bug:
Consider the following code below:
// reproducer.c:
#include <dlfcn.h>
#include <stdio.h>
#include <umf.h>
#include <umf/pools/pool_disjoint.h>
#include <umf/pools/pool_proxy.h>
#include <umf/providers/provider_os_memory.h>
struct umfFunc_t {
typeof(umfPoolCreate) *umfPoolCreateFn;
typeof(umfPoolDestroy) *umfPoolDestroyFn;
typeof(umfMemoryProviderCreate) *umfMemoryProviderCreateFn;
typeof(umfMemoryProviderDestroy) *umfMemoryProviderDestroyFn;
typeof(umfOsMemoryProviderOps) *umfOsMemoryProviderOpsFn;
typeof(umfProxyPoolOps) *umfProxyPoolOpsFn;
} umfFunc;
int initUmf(void *umfHandle)
{
umfFunc.umfPoolCreateFn = (typeof(umfPoolCreate) *)dlsym(umfHandle, "umfPoolCreate");
umfFunc.umfPoolDestroyFn = (typeof(umfPoolDestroy) *)dlsym(umfHandle, "umfPoolDestroy");
umfFunc.umfMemoryProviderCreateFn = (typeof(umfMemoryProviderCreate) *)dlsym(umfHandle, "umfMemoryProviderCreate");
umfFunc.umfMemoryProviderDestroyFn = (typeof(umfMemoryProviderDestroy) *)dlsym(umfHandle, "umfMemoryProviderDestroy");
umfFunc.umfOsMemoryProviderOpsFn = (typeof(umfOsMemoryProviderOps) *)dlsym(umfHandle, "umfOsMemoryProviderOps");
umfFunc.umfProxyPoolOpsFn = (typeof(umfProxyPoolOps) *)dlsym(umfHandle, "umfProxyPoolOps");
if (!umfFunc.umfPoolCreateFn || !umfFunc.umfPoolDestroyFn || !umfFunc.umfMemoryProviderCreateFn ||
!umfFunc.umfMemoryProviderDestroyFn || !umfFunc.umfOsMemoryProviderOpsFn || !umfFunc.umfProxyPoolOpsFn)
{
fprintf(stderr, "Error: %s\n", dlerror());
return 1;
}
return 0;
}
int main() {
umf_result_t res;
umf_memory_provider_handle_t hProvider = NULL;
umf_memory_pool_handle_t hPool = NULL;
umf_os_memory_provider_params_t params = umfOsMemoryProviderParamsDefault();
umf_disjoint_pool_params_t disjoint_params = umfDisjointPoolParamsDefault();
void *umfHandle = dlopen("libumf.so", RTLD_LAZY);
if (!umfHandle) {
fprintf(stderr, "Error: %s\n", dlerror());
return 1;
}
int ret = initUmf(umfHandle);
if (ret != 0) {
goto err_dlclose;
}
// Create memory provider
res = umfFunc.umfMemoryProviderCreateFn(umfFunc.umfOsMemoryProviderOpsFn(), ¶ms, &hProvider);
if (res != UMF_RESULT_SUCCESS) {
fprintf(stderr, "ERROR: umfMemoryProviderCreate failed %d\n", res);
goto err_dlclose;
}
// Create pool
#if 1
res = umfFunc.umfPoolCreateFn(umfDisjointPoolOps(), hProvider, &disjoint_params, 0, &hPool);
#else
res = umfFunc.umfPoolCreateFn(umfFunc.umfProxyPoolOpsFn(), hProvider, NULL, 0, &hPool);
#endif
if (res != UMF_RESULT_SUCCESS) {
fprintf(stderr, "ERROR: umfPoolCreate failed %d\n", res);
goto err_provider_destroy;
}
err_pool_destroy:
umfFunc.umfPoolDestroyFn(hPool);
err_provider_destroy:
umfFunc.umfMemoryProviderDestroyFn(hProvider);
err_dlclose:
dlclose(umfHandle);
fprintf(stdout, "DONE! Example completed successfuly\n");
return 0;
}
To build reproducer.c
:
#! /bin/sh
UMF_ROOT=/unified-memory-framework
UMF_INCLUDE=$UMF_ROOT/include
UMF_LIB=$UMF_ROOT/build/lib
rm -rf reproducer
gcc -o reproducer reproducer.c -I ${UMF_INCLUDE} -L ${UMF_LIB} -ldisjoint_pool -ldl -lstdc++
LD_LIBRARY_PATH=${UMF_LIB} ./reproducer
How often bug is revealed:
always
Actual behavior:
Linker errors:
/unified-memory-framework/build/lib/libdisjoint_pool.a(pool_disjoint.cpp.o): in function `memoryProviderAlloc(umf_memory_provider_t*, unsigned long, unsigned long)':
/unified-memory-framework/src/pool/pool_disjoint.cpp:399: undefined reference to `umfMemoryProviderAlloc'
/usr/bin/ld: /unified-memory-framework/build/lib/libdisjoint_pool.a(pool_disjoint.cpp.o): in function `memoryProviderFree(umf_memory_provider_t*, void*)':
/unified-memory-framework/src/pool/pool_disjoint.cpp:413: undefined reference to `umfMemoryTrackerGetAllocInfo'
/usr/bin/ld: /unified-memory-framework/src/pool/pool_disjoint.cpp:419: undefined reference to `umfMemoryProviderFree'
/usr/bin/ld: /unified-memory-framework/build/lib/libdisjoint_pool.a(pool_disjoint.cpp.o): in function `Slab::~Slab()':
/unified-memory-framework/src/pool/pool_disjoint.cpp:462: undefined reference to `umfGetLastFailedMemoryProvider'
/usr/bin/ld: /unified-memory-framework/src/pool/pool_disjoint.cpp:462: undefined reference to `umfMemoryProviderGetLastNativeError'
/usr/bin/ld: /unified-memory-framework/build/lib/libdisjoint_pool.a(pool_disjoint.cpp.o): in function `DisjointPool::AllocImpl::AllocImpl(umf_memory_provider_t*, umf_disjoint_pool_params_t*)':
/unified-memory-framework/src/pool/pool_disjoint.cpp:354: undefined reference to `umfMemoryProviderGetMinPageSize'
collect2: error: ld returned 1 exit status
Expected behavior:
TBD
Details
Today oneCCL and Intel MPI directly use Level 0 for memory allocations. So during migration to UMF, they can use pool_proxy
(since it is part of the libumf.so
binary) and it will be equivalent to their current implementation on top of Level 0. But eventually, using some "real" (that do actually pooling) pool manager might be beneficial for them.
We can make pool_disjoint
part of libumf.so
if we will be able to get rid of C++ runtime dependencies.
Requested priority:
Medium ?