diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 9b8c6102dbee2..5a89b8e2d9e5c 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -1107,6 +1107,7 @@ extern omp_allocator_handle_t __kmp_def_allocator; #endif extern int __kmp_memkind_available; +extern bool __kmp_hwloc_available; typedef omp_memspace_handle_t kmp_memspace_t; // placeholder @@ -1119,6 +1120,9 @@ typedef struct kmp_allocator_t { kmp_uint64 pool_size; kmp_uint64 pool_used; bool pinned; +#if KMP_USE_HWLOC + omp_alloctrait_value_t membind; +#endif } kmp_allocator_t; extern omp_allocator_handle_t __kmpc_init_allocator(int gtid, diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp index c3d5ecf1345e8..f2520db145552 100644 --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -1444,6 +1444,7 @@ void KMPAffinity::pick_api() { if (__kmp_affinity_top_method == affinity_top_method_hwloc && __kmp_affinity.type != affinity_disabled) { affinity_dispatch = new KMPHwlocAffinity(); + __kmp_hwloc_available = true; } else #endif { diff --git a/openmp/runtime/src/kmp_alloc.cpp b/openmp/runtime/src/kmp_alloc.cpp index fb1b0eb5f0fe5..783d9ffe88aa3 100644 --- a/openmp/runtime/src/kmp_alloc.cpp +++ b/openmp/runtime/src/kmp_alloc.cpp @@ -14,6 +14,20 @@ #include "kmp_io.h" #include "kmp_wrapper_malloc.h" +#if KMP_USE_HWLOC +#if HWLOC_API_VERSION > 0x00020300 +#define KMP_HWLOC_LOCATION_TYPE_CPUSET HWLOC_LOCATION_TYPE_CPUSET +#elif HWLOC_API_VERSION == 0x00020300 +#define KMP_HWLOC_LOCATION_TYPE_CPUSET \ + hwloc_location::HWLOC_LOCATION_TYPE_CPUSET +#else +enum hwloc_memattr_id_e { + HWLOC_MEMATTR_ID_BANDWIDTH, + HWLOC_MEMATTR_ID_CAPACITY +}; +#endif +#endif // KMP_USE_HWLOC + // Disable bget when it is not used #if KMP_USE_BGET @@ -1356,6 +1370,74 @@ void __kmp_fini_memkind() { #endif } +#if KMP_USE_HWLOC +static bool __kmp_is_hwloc_membind_supported(hwloc_membind_policy_t policy) { +#if HWLOC_API_VERSION >= 0x00020300 + const hwloc_topology_support *support; + support = hwloc_topology_get_support(__kmp_hwloc_topology); + if (support) { + if (policy == HWLOC_MEMBIND_BIND) + return (support->membind->alloc_membind && + support->membind->bind_membind); + if (policy == HWLOC_MEMBIND_INTERLEAVE) + return (support->membind->alloc_membind && + support->membind->interleave_membind); + } + return false; +#else + return false; +#endif +} + +void *__kmp_hwloc_alloc_membind(hwloc_memattr_id_e attr, size_t size, + hwloc_membind_policy_t policy) { +#if HWLOC_API_VERSION >= 0x00020300 + void *ptr = NULL; + hwloc_obj_t node; + struct hwloc_location initiator; + int ret; + // TODO: We should make this more efficient by getting rid of the OS syscall + // 'hwloc_bitmap_alloc' and 'hwloc_get_cpubind' to get affinity and instead + // use th_affin_mask field when it's capable of getting the underlying + // mask implementation. + hwloc_cpuset_t mask = hwloc_bitmap_alloc(); + ret = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); + if (ret < 0) { + hwloc_bitmap_free(mask); + return ptr; + } + initiator.type = KMP_HWLOC_LOCATION_TYPE_CPUSET; + initiator.location.cpuset = mask; + ret = hwloc_memattr_get_best_target(__kmp_hwloc_topology, attr, &initiator, 0, + &node, NULL); + if (ret < 0) { + return ptr; + } + return hwloc_alloc_membind(__kmp_hwloc_topology, size, node->nodeset, policy, + HWLOC_MEMBIND_BYNODESET); +#else + return NULL; +#endif +} + +void *__kmp_hwloc_membind_policy(omp_memspace_handle_t ms, size_t size, + hwloc_membind_policy_t policy) { +#if HWLOC_API_VERSION >= 0x00020300 + void *ptr = NULL; + if (ms == omp_high_bw_mem_space) { + ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH, size, policy); + } else if (ms == omp_large_cap_mem_space) { + ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY, size, policy); + } else { + ptr = hwloc_alloc(__kmp_hwloc_topology, size); + } + return ptr; +#else + return NULL; +#endif +} +#endif // KMP_USE_HWLOC + void __kmp_init_target_mem() { *(void **)(&kmp_target_alloc_host) = KMP_DLSYM("llvm_omp_target_alloc_host"); *(void **)(&kmp_target_alloc_shared) = @@ -1412,6 +1494,13 @@ omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms, al->fb_data = RCAST(kmp_allocator_t *, traits[i].value); break; case omp_atk_partition: +#if KMP_USE_HWLOC + al->membind = (omp_alloctrait_value_t)traits[i].value; + KMP_DEBUG_ASSERT(al->membind == omp_atv_environment || + al->membind == omp_atv_nearest || + al->membind == omp_atv_blocked || + al->membind == omp_atv_interleaved); +#endif al->memkind = RCAST(void **, traits[i].value); break; default: @@ -1466,7 +1555,8 @@ omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms, __kmp_free(al); return omp_null_allocator; } else { - if (ms == omp_high_bw_mem_space) { + if (!__kmp_hwloc_available && + (ms == omp_high_bw_mem_space || ms == omp_large_cap_mem_space)) { // cannot detect HBW memory presence without memkind library __kmp_free(al); return omp_null_allocator; @@ -1573,8 +1663,9 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size, if (allocator > kmp_max_mem_alloc) is_pinned = al->pinned; - // Use default allocator if libmemkind is not available - int use_default_allocator = (__kmp_memkind_available) ? false : true; + // Use default allocator if hwloc and libmemkind are not available + int use_default_allocator = + (!__kmp_hwloc_available && !__kmp_memkind_available); if (KMP_IS_TARGET_MEM_ALLOC(allocator)) { // Use size input directly as the memory may not be accessible on host. @@ -1610,38 +1701,152 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size, } } - if (__kmp_memkind_available) { - if (allocator < kmp_max_mem_alloc) { - // pre-defined allocator - if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) { - ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a); - } else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) { - ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a); +#if KMP_USE_HWLOC + if (__kmp_hwloc_available) { + if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_BIND)) { + if (allocator < kmp_max_mem_alloc) { + // pre-defined allocator + if (allocator == omp_high_bw_mem_alloc) { + ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH, + desc.size_a, HWLOC_MEMBIND_BIND); + if (ptr == NULL) + use_default_allocator = true; + } else if (allocator == omp_large_cap_mem_alloc) { + ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY, + desc.size_a, HWLOC_MEMBIND_BIND); + if (ptr == NULL) + use_default_allocator = true; + } else { + use_default_allocator = true; + } + if (use_default_allocator) { + ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a); + } + } else if (al->pool_size > 0) { + // custom allocator with pool size requested + kmp_uint64 used = + KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a); + if (used + desc.size_a > al->pool_size) { + // not enough space, need to go fallback path + KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); + if (al->fb == omp_atv_default_mem_fb) { + al = (kmp_allocator_t *)omp_default_mem_alloc; + ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a); + } else if (al->fb == omp_atv_abort_fb) { + KMP_ASSERT(0); // abort fallback requested + } else if (al->fb == omp_atv_allocator_fb) { + KMP_ASSERT(al != al->fb_data); + al = al->fb_data; + return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + } // else ptr == NULL; + } else { + // pool has enough space + if (al->membind == omp_atv_interleaved) { + if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) { + ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a, + HWLOC_MEMBIND_INTERLEAVE); + } + } else if (al->membind == omp_atv_environment) { + ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a, + HWLOC_MEMBIND_DEFAULT); + } else { + ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a); + } + if (ptr == NULL) { + if (al->fb == omp_atv_default_mem_fb) { + al = (kmp_allocator_t *)omp_default_mem_alloc; + ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a); + } else if (al->fb == omp_atv_abort_fb) { + KMP_ASSERT(0); // abort fallback requested + } else if (al->fb == omp_atv_allocator_fb) { + KMP_ASSERT(al != al->fb_data); + al = al->fb_data; + return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + } + } + } } else { - ptr = kmp_mk_alloc(*mk_default, desc.size_a); + // custom allocator, pool size not requested + if (al->membind == omp_atv_interleaved) { + if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) { + ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a, + HWLOC_MEMBIND_INTERLEAVE); + } + } else if (al->membind == omp_atv_environment) { + ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a, + HWLOC_MEMBIND_DEFAULT); + } else { + ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a); + } + if (ptr == NULL) { + if (al->fb == omp_atv_default_mem_fb) { + al = (kmp_allocator_t *)omp_default_mem_alloc; + ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a); + } else if (al->fb == omp_atv_abort_fb) { + KMP_ASSERT(0); // abort fallback requested + } else if (al->fb == omp_atv_allocator_fb) { + KMP_ASSERT(al != al->fb_data); + al = al->fb_data; + return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + } + } } - } else if (al->pool_size > 0) { - // custom allocator with pool size requested - kmp_uint64 used = - KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a); - if (used + desc.size_a > al->pool_size) { - // not enough space, need to go fallback path - KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); - if (al->fb == omp_atv_default_mem_fb) { - al = (kmp_allocator_t *)omp_default_mem_alloc; + } else { // alloc membind not supported, use hwloc_alloc + ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a); + } + } else { +#endif + if (__kmp_memkind_available) { + if (allocator < kmp_max_mem_alloc) { + // pre-defined allocator + if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) { + ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a); + } else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) { + ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a); + } else { ptr = kmp_mk_alloc(*mk_default, desc.size_a); - } else if (al->fb == omp_atv_abort_fb) { - KMP_ASSERT(0); // abort fallback requested - } else if (al->fb == omp_atv_allocator_fb) { - KMP_ASSERT(al != al->fb_data); - al = al->fb_data; - ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); - if (is_pinned && kmp_target_lock_mem) - kmp_target_lock_mem(ptr, size, default_device); - return ptr; - } // else ptr == NULL; + } + } else if (al->pool_size > 0) { + // custom allocator with pool size requested + kmp_uint64 used = + KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a); + if (used + desc.size_a > al->pool_size) { + // not enough space, need to go fallback path + KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); + if (al->fb == omp_atv_default_mem_fb) { + al = (kmp_allocator_t *)omp_default_mem_alloc; + ptr = kmp_mk_alloc(*mk_default, desc.size_a); + } else if (al->fb == omp_atv_abort_fb) { + KMP_ASSERT(0); // abort fallback requested + } else if (al->fb == omp_atv_allocator_fb) { + KMP_ASSERT(al != al->fb_data); + al = al->fb_data; + ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + if (is_pinned && kmp_target_lock_mem) + kmp_target_lock_mem(ptr, size, default_device); + return ptr; + } // else ptr == NULL; + } else { + // pool has enough space + ptr = kmp_mk_alloc(*al->memkind, desc.size_a); + if (ptr == NULL) { + if (al->fb == omp_atv_default_mem_fb) { + al = (kmp_allocator_t *)omp_default_mem_alloc; + ptr = kmp_mk_alloc(*mk_default, desc.size_a); + } else if (al->fb == omp_atv_abort_fb) { + KMP_ASSERT(0); // abort fallback requested + } else if (al->fb == omp_atv_allocator_fb) { + KMP_ASSERT(al != al->fb_data); + al = al->fb_data; + ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + if (is_pinned && kmp_target_lock_mem) + kmp_target_lock_mem(ptr, size, default_device); + return ptr; + } + } + } } else { - // pool has enough space + // custom allocator, pool size not requested ptr = kmp_mk_alloc(*al->memkind, desc.size_a); if (ptr == NULL) { if (al->fb == omp_atv_default_mem_fb) { @@ -1659,13 +1864,39 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size, } } } - } else { - // custom allocator, pool size not requested - ptr = kmp_mk_alloc(*al->memkind, desc.size_a); - if (ptr == NULL) { + } else if (allocator < kmp_max_mem_alloc) { + // pre-defined allocator + if (allocator == omp_high_bw_mem_alloc) { + KMP_WARNING(OmpNoAllocator, "omp_high_bw_mem_alloc"); + } else if (allocator == omp_large_cap_mem_alloc) { + KMP_WARNING(OmpNoAllocator, "omp_large_cap_mem_alloc"); + } else if (allocator == omp_const_mem_alloc) { + KMP_WARNING(OmpNoAllocator, "omp_const_mem_alloc"); + } else if (allocator == omp_low_lat_mem_alloc) { + KMP_WARNING(OmpNoAllocator, "omp_low_lat_mem_alloc"); + } else if (allocator == omp_cgroup_mem_alloc) { + KMP_WARNING(OmpNoAllocator, "omp_cgroup_mem_alloc"); + } else if (allocator == omp_pteam_mem_alloc) { + KMP_WARNING(OmpNoAllocator, "omp_pteam_mem_alloc"); + } else if (allocator == omp_thread_mem_alloc) { + KMP_WARNING(OmpNoAllocator, "omp_thread_mem_alloc"); + } else { // default allocator requested + use_default_allocator = true; + } + if (use_default_allocator) { + ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); + use_default_allocator = false; + } + } else if (al->pool_size > 0) { + // custom allocator with pool size requested + kmp_uint64 used = + KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a); + if (used + desc.size_a > al->pool_size) { + // not enough space, need to go fallback path + KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); if (al->fb == omp_atv_default_mem_fb) { al = (kmp_allocator_t *)omp_default_mem_alloc; - ptr = kmp_mk_alloc(*mk_default, desc.size_a); + ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); } else if (al->fb == omp_atv_abort_fb) { KMP_ASSERT(0); // abort fallback requested } else if (al->fb == omp_atv_allocator_fb) { @@ -1675,66 +1906,25 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size, if (is_pinned && kmp_target_lock_mem) kmp_target_lock_mem(ptr, size, default_device); return ptr; - } - } - } - } else if (allocator < kmp_max_mem_alloc) { - // pre-defined allocator - if (allocator == omp_high_bw_mem_alloc) { - KMP_WARNING(OmpNoAllocator, "omp_high_bw_mem_alloc"); - } else if (allocator == omp_large_cap_mem_alloc) { - KMP_WARNING(OmpNoAllocator, "omp_large_cap_mem_alloc"); - } else if (allocator == omp_const_mem_alloc) { - KMP_WARNING(OmpNoAllocator, "omp_const_mem_alloc"); - } else if (allocator == omp_low_lat_mem_alloc) { - KMP_WARNING(OmpNoAllocator, "omp_low_lat_mem_alloc"); - } else if (allocator == omp_cgroup_mem_alloc) { - KMP_WARNING(OmpNoAllocator, "omp_cgroup_mem_alloc"); - } else if (allocator == omp_pteam_mem_alloc) { - KMP_WARNING(OmpNoAllocator, "omp_pteam_mem_alloc"); - } else if (allocator == omp_thread_mem_alloc) { - KMP_WARNING(OmpNoAllocator, "omp_thread_mem_alloc"); - } else { // default allocator requested - use_default_allocator = true; - } - if (use_default_allocator) { - ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); - use_default_allocator = false; - } - } else if (al->pool_size > 0) { - // custom allocator with pool size requested - kmp_uint64 used = - KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a); - if (used + desc.size_a > al->pool_size) { - // not enough space, need to go fallback path - KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); - if (al->fb == omp_atv_default_mem_fb) { - al = (kmp_allocator_t *)omp_default_mem_alloc; + } // else ptr == NULL + } else { + // pool has enough space ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); - } else if (al->fb == omp_atv_abort_fb) { - KMP_ASSERT(0); // abort fallback requested - } else if (al->fb == omp_atv_allocator_fb) { - KMP_ASSERT(al != al->fb_data); - al = al->fb_data; - ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); - if (is_pinned && kmp_target_lock_mem) - kmp_target_lock_mem(ptr, size, default_device); - return ptr; - } // else ptr == NULL; + if (ptr == NULL && al->fb == omp_atv_abort_fb) { + KMP_ASSERT(0); // abort fallback requested + } // no sense to look for another fallback because of same internal + // alloc + } } else { - // pool has enough space + // custom allocator, pool size not requested ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); if (ptr == NULL && al->fb == omp_atv_abort_fb) { KMP_ASSERT(0); // abort fallback requested } // no sense to look for another fallback because of same internal alloc } - } else { - // custom allocator, pool size not requested - ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); - if (ptr == NULL && al->fb == omp_atv_abort_fb) { - KMP_ASSERT(0); // abort fallback requested - } // no sense to look for another fallback because of same internal alloc +#if KMP_USE_HWLOC } +#endif KE_TRACE(10, ("__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a)); if (ptr == NULL) return NULL; @@ -1864,34 +2054,48 @@ void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) { kmp_target_unlock_mem(desc.ptr_alloc, device); } - if (__kmp_memkind_available) { - if (oal < kmp_max_mem_alloc) { - // pre-defined allocator - if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) { - kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc); - } else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) { - kmp_mk_free(*mk_dax_kmem_all, desc.ptr_alloc); +#if KMP_USE_HWLOC + if (__kmp_hwloc_available) { + if (oal > kmp_max_mem_alloc && al->pool_size > 0) { + kmp_uint64 used = + KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); + (void)used; // to suppress compiler warning + KMP_DEBUG_ASSERT(used >= desc.size_a); + } + hwloc_free(__kmp_hwloc_topology, desc.ptr_alloc, desc.size_a); + } else { +#endif + if (__kmp_memkind_available) { + if (oal < kmp_max_mem_alloc) { + // pre-defined allocator + if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) { + kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc); + } else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) { + kmp_mk_free(*mk_dax_kmem_all, desc.ptr_alloc); + } else { + kmp_mk_free(*mk_default, desc.ptr_alloc); + } } else { - kmp_mk_free(*mk_default, desc.ptr_alloc); + if (al->pool_size > 0) { // custom allocator with pool size requested + kmp_uint64 used = + KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); + (void)used; // to suppress compiler warning + KMP_DEBUG_ASSERT(used >= desc.size_a); + } + kmp_mk_free(*al->memkind, desc.ptr_alloc); } } else { - if (al->pool_size > 0) { // custom allocator with pool size requested + if (oal > kmp_max_mem_alloc && al->pool_size > 0) { kmp_uint64 used = KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); (void)used; // to suppress compiler warning KMP_DEBUG_ASSERT(used >= desc.size_a); } - kmp_mk_free(*al->memkind, desc.ptr_alloc); + __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc); } - } else { - if (oal > kmp_max_mem_alloc && al->pool_size > 0) { - kmp_uint64 used = - KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); - (void)used; // to suppress compiler warning - KMP_DEBUG_ASSERT(used >= desc.size_a); - } - __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc); +#if KMP_USE_HWLOC } +#endif } /* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp index 52e0fdbdfb1da..eb077bca4ce21 100644 --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -296,6 +296,7 @@ kmp_int32 __kmp_max_task_priority = 0; kmp_uint64 __kmp_taskloop_min_tasks = 0; int __kmp_memkind_available = 0; +bool __kmp_hwloc_available = false; omp_allocator_handle_t const omp_null_allocator = NULL; omp_allocator_handle_t const omp_default_mem_alloc = (omp_allocator_handle_t const)1; diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index 8b6092cb1085c..392a02ebbd9aa 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -3767,7 +3767,7 @@ static void __kmp_stg_parse_allocator(char const *name, char const *value, if (__kmp_match_str("omp_high_bw_mem_alloc", scan, &next)) { SKIP_WS(next); if (is_memalloc) { - if (__kmp_memkind_available) { + if (__kmp_hwloc_available || __kmp_memkind_available) { __kmp_def_allocator = omp_high_bw_mem_alloc; return; } else { @@ -3780,7 +3780,7 @@ static void __kmp_stg_parse_allocator(char const *name, char const *value, } else if (__kmp_match_str("omp_large_cap_mem_alloc", scan, &next)) { SKIP_WS(next); if (is_memalloc) { - if (__kmp_memkind_available) { + if (__kmp_hwloc_available || __kmp_memkind_available) { __kmp_def_allocator = omp_large_cap_mem_alloc; return; } else { diff --git a/openmp/runtime/test/api/omp_alloc_hwloc.c b/openmp/runtime/test/api/omp_alloc_hwloc.c new file mode 100644 index 0000000000000..7cdcae9b36e8a --- /dev/null +++ b/openmp/runtime/test/api/omp_alloc_hwloc.c @@ -0,0 +1,25 @@ +// RUN: %libomp-compile && env KMP_TOPOLOGY_METHOD=hwloc %libomp-run +// REQUIRES: hwloc + +#include +#include + +int main() { + void *p[2]; +#pragma omp parallel num_threads(2) + { + int i = omp_get_thread_num(); + p[i] = omp_alloc(1024 * 1024, omp_get_default_allocator()); +#pragma omp barrier + printf("th %d, ptr %p\n", i, p[i]); + omp_free(p[i], omp_get_default_allocator()); + } + // Both pointers should be non-NULL + if (p[0] != NULL && p[1] != NULL) { + printf("passed\n"); + return 0; + } else { + printf("failed: pointers %p %p\n", p[0], p[1]); + return 1; + } +}