Skip to content

Cannot use disjoint pool if UMF is used via dlopen #519

@vinser52

Description

@vinser52

oneCCL and Intel MPI are considering using UMF via dlopen. In that case, it is not possible to use pools that are compiled as a static library and are not part of the libumf.so binary.

For example, the disjoint pool is compiled to the libdisjoint_pool.a static library. If an application uses libumf.so via dlopen (has no link dependency) and wants to use the disjoint pool it will get a linker error because the libdisjoint_pool.a requires UMF symbols that cannot be found.

There are no such issues with pools that are part of libumf.so. The reproducer below causes a link-time error if pool_disjoint is used and works fine in the case of pool_proxy.

Please provide a reproduction of the bug:

Consider the following code below:

// reproducer.c:
#include <dlfcn.h>
#include <stdio.h>

#include <umf.h>
#include <umf/pools/pool_disjoint.h>
#include <umf/pools/pool_proxy.h>
#include <umf/providers/provider_os_memory.h>

struct umfFunc_t {
    typeof(umfPoolCreate) *umfPoolCreateFn;
    typeof(umfPoolDestroy) *umfPoolDestroyFn;
    typeof(umfMemoryProviderCreate) *umfMemoryProviderCreateFn;
    typeof(umfMemoryProviderDestroy) *umfMemoryProviderDestroyFn;
    typeof(umfOsMemoryProviderOps) *umfOsMemoryProviderOpsFn;
    typeof(umfProxyPoolOps) *umfProxyPoolOpsFn;
} umfFunc;

int initUmf(void *umfHandle)
{
    umfFunc.umfPoolCreateFn = (typeof(umfPoolCreate) *)dlsym(umfHandle, "umfPoolCreate");
    umfFunc.umfPoolDestroyFn = (typeof(umfPoolDestroy) *)dlsym(umfHandle, "umfPoolDestroy");
    umfFunc.umfMemoryProviderCreateFn = (typeof(umfMemoryProviderCreate) *)dlsym(umfHandle, "umfMemoryProviderCreate");
    umfFunc.umfMemoryProviderDestroyFn = (typeof(umfMemoryProviderDestroy) *)dlsym(umfHandle, "umfMemoryProviderDestroy");
    umfFunc.umfOsMemoryProviderOpsFn = (typeof(umfOsMemoryProviderOps) *)dlsym(umfHandle, "umfOsMemoryProviderOps");
    umfFunc.umfProxyPoolOpsFn = (typeof(umfProxyPoolOps) *)dlsym(umfHandle, "umfProxyPoolOps");

    if (!umfFunc.umfPoolCreateFn || !umfFunc.umfPoolDestroyFn || !umfFunc.umfMemoryProviderCreateFn ||
        !umfFunc.umfMemoryProviderDestroyFn || !umfFunc.umfOsMemoryProviderOpsFn || !umfFunc.umfProxyPoolOpsFn)
    {
        fprintf(stderr, "Error: %s\n", dlerror());
        return 1;
    }

    return 0;
}

int main() {
    umf_result_t res;
    umf_memory_provider_handle_t hProvider = NULL;
    umf_memory_pool_handle_t hPool = NULL;
    umf_os_memory_provider_params_t params = umfOsMemoryProviderParamsDefault();
    umf_disjoint_pool_params_t disjoint_params = umfDisjointPoolParamsDefault();

    void *umfHandle = dlopen("libumf.so", RTLD_LAZY);
    if (!umfHandle) {
        fprintf(stderr, "Error: %s\n", dlerror());
        return 1;
    }

    int ret = initUmf(umfHandle);
    if (ret != 0) {
        goto err_dlclose;
    }

    // Create memory provider
    
    res = umfFunc.umfMemoryProviderCreateFn(umfFunc.umfOsMemoryProviderOpsFn(), &params, &hProvider);
    if (res != UMF_RESULT_SUCCESS) {
        fprintf(stderr, "ERROR: umfMemoryProviderCreate failed %d\n", res);
        goto err_dlclose;
    }

    // Create pool
#if 1
    res = umfFunc.umfPoolCreateFn(umfDisjointPoolOps(), hProvider, &disjoint_params, 0, &hPool);
#else
    res = umfFunc.umfPoolCreateFn(umfFunc.umfProxyPoolOpsFn(), hProvider, NULL, 0, &hPool);
#endif
    if (res != UMF_RESULT_SUCCESS) {
        fprintf(stderr, "ERROR: umfPoolCreate failed %d\n", res);
        goto err_provider_destroy;
    }

err_pool_destroy:
    umfFunc.umfPoolDestroyFn(hPool);

err_provider_destroy:
    umfFunc.umfMemoryProviderDestroyFn(hProvider);

err_dlclose:
    dlclose(umfHandle);

    fprintf(stdout, "DONE! Example completed successfuly\n");
    
    return 0;
}

To build reproducer.c:

#! /bin/sh

UMF_ROOT=/unified-memory-framework
UMF_INCLUDE=$UMF_ROOT/include
UMF_LIB=$UMF_ROOT/build/lib

rm -rf reproducer

gcc -o reproducer reproducer.c -I ${UMF_INCLUDE} -L ${UMF_LIB} -ldisjoint_pool -ldl -lstdc++

LD_LIBRARY_PATH=${UMF_LIB} ./reproducer

How often bug is revealed:

always

Actual behavior:

Linker errors:

/unified-memory-framework/build/lib/libdisjoint_pool.a(pool_disjoint.cpp.o): in function `memoryProviderAlloc(umf_memory_provider_t*, unsigned long, unsigned long)':
/unified-memory-framework/src/pool/pool_disjoint.cpp:399: undefined reference to `umfMemoryProviderAlloc'
/usr/bin/ld: /unified-memory-framework/build/lib/libdisjoint_pool.a(pool_disjoint.cpp.o): in function `memoryProviderFree(umf_memory_provider_t*, void*)':
/unified-memory-framework/src/pool/pool_disjoint.cpp:413: undefined reference to `umfMemoryTrackerGetAllocInfo'
/usr/bin/ld: /unified-memory-framework/src/pool/pool_disjoint.cpp:419: undefined reference to `umfMemoryProviderFree'
/usr/bin/ld: /unified-memory-framework/build/lib/libdisjoint_pool.a(pool_disjoint.cpp.o): in function `Slab::~Slab()':
/unified-memory-framework/src/pool/pool_disjoint.cpp:462: undefined reference to `umfGetLastFailedMemoryProvider'
/usr/bin/ld: /unified-memory-framework/src/pool/pool_disjoint.cpp:462: undefined reference to `umfMemoryProviderGetLastNativeError'
/usr/bin/ld: /unified-memory-framework/build/lib/libdisjoint_pool.a(pool_disjoint.cpp.o): in function `DisjointPool::AllocImpl::AllocImpl(umf_memory_provider_t*, umf_disjoint_pool_params_t*)':
/unified-memory-framework/src/pool/pool_disjoint.cpp:354: undefined reference to `umfMemoryProviderGetMinPageSize'
collect2: error: ld returned 1 exit status

Expected behavior:

TBD

Details

Today oneCCL and Intel MPI directly use Level 0 for memory allocations. So during migration to UMF, they can use pool_proxy (since it is part of the libumf.so binary) and it will be equivalent to their current implementation on top of Level 0. But eventually, using some "real" (that do actually pooling) pool manager might be beneficial for them.

We can make pool_disjoint part of libumf.so if we will be able to get rid of C++ runtime dependencies.

Requested priority:

Medium ?

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions