diff --git a/include/umf/memory_pool.h b/include/umf/memory_pool.h index d47448760..a93d400f9 100644 --- a/include/umf/memory_pool.h +++ b/include/umf/memory_pool.h @@ -147,7 +147,7 @@ umf_result_t umfFree(void *ptr); /// /// * The implementation of this function *should* be lock-free. /// @param hPool specified memory pool handle for which the last allocation error is returned -/// @return Error code desciribng the failure of the last failed allocation operation. +/// @return Error code describing the failure of the last failed allocation operation. /// The value is undefined if the previous allocation was successful. /// umf_result_t umfPoolGetLastAllocationError(umf_memory_pool_handle_t hPool); diff --git a/include/umf/memory_pool_ops.h b/include/umf/memory_pool_ops.h index 9ddb34700..b67238a46 100644 --- a/include/umf/memory_pool_ops.h +++ b/include/umf/memory_pool_ops.h @@ -22,7 +22,7 @@ extern "C" { /// calls. Each memory pool implementation should initialize all function /// pointers. /// -typedef struct umf_memory_pool_ops_t { +struct umf_memory_pool_ops_t { /// Version of the ops structure. /// Should be initialized using UMF_VERSION_CURRENT. uint32_t version; @@ -120,7 +120,7 @@ typedef struct umf_memory_pool_ops_t { /// The value is undefined if the previous allocation was successful. /// umf_result_t (*get_last_allocation_error)(void *pool); -} umf_memory_pool_ops_t; +}; #ifdef __cplusplus } diff --git a/include/umf/providers/provider_coarse.h b/include/umf/providers/provider_coarse.h new file mode 100644 index 000000000..610fc4a43 --- /dev/null +++ b/include/umf/providers/provider_coarse.h @@ -0,0 +1,73 @@ +// Copyright (C) 2023-2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef UMF_COARSE_PROVIDER_H +#define UMF_COARSE_PROVIDER_H + +#include +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +/// @brief Coarse Memory Provider settings struct. +typedef struct coarse_memory_provider_params_t { + /// Handle to the upstream memory provider, could be NULL. + umf_memory_provider_handle_t upstream_memory_provider; + + /// When set, the init buffer would be pre-allocated (with + /// `init_buffer_size` bytes) during creation time. The memory used to + /// pre-allocate it would be taken either from the `init_buffer` or from + /// the `upstream_memory_provider`, so either one of them has to be set. + bool immediate_init; + + /// Init buffer used to pre-allocate memory at the creation time, could be + /// NULL. + void *init_buffer; + + /// Size of the pre-allocated buffer. If the `init_buffer` is set, the + /// `init_buffer_size` should be the size of this buffer. + size_t init_buffer_size; + + /// Enable extra tracing (TODO - move to CTL) + bool trace; + + /// If this flag is set, the Coarse Provider wouldn't ask the upstream + /// memory provider to free the memory during destruction. + bool WA_do_not_free_upstream; +} coarse_memory_provider_params_t; + +/// @brief Coarse Memory Provider stats (TODO move to CTL) +typedef struct coarse_memory_provider_stats_t { + /// Total allocation size. + size_t alloc_size; + + /// Size of used memory. + size_t used_size; + + /// Number of memory blocks allocated from the upstream provider. + size_t upstream_blocks_num; + + /// Total number of allocated memory blocks. + size_t blocks_num; + + /// Number of free memory blocks. + size_t free_blocks_num; +} coarse_memory_provider_stats_t; + +umf_memory_provider_ops_t *umfCoarseMemoryProviderOps(void); + +// TODO use CTL +coarse_memory_provider_stats_t +umfCoarseMemoryProviderGetStats(umf_memory_provider_handle_t provider); + +umf_memory_provider_handle_t umfCoarseMemoryProviderGetUpstreamProvider( + umf_memory_provider_handle_t provider); + +#ifdef __cplusplus +} +#endif + +#endif // UMF_COARSE_PROVIDER_H diff --git a/scripts/docs_config/api.rst b/scripts/docs_config/api.rst index c434f0005..a80aca61f 100644 --- a/scripts/docs_config/api.rst +++ b/scripts/docs_config/api.rst @@ -80,6 +80,15 @@ and operate on the provider. .. doxygenfile:: memory_provider.h :sections: define enum typedef func var +Coarse Provider +------------------------------------------ + +A memory provider that should be used as a "cache" for pre-allocated buffer or +with additional upstream provider (e.g. OS Memory Provider). + +.. doxygenfile:: provider_coarse.h + :sections: define enum typedef func var + OS Memory Provider ------------------------------------------ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 663fe542f..e93f443f4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -54,14 +54,17 @@ set(BA_SOURCES set(UMF_SOURCES ${BA_SOURCES} ipc.c + critnib/critnib.c memory_pool.c memory_provider.c memory_provider_get_last_failed.c memory_target.c memspace.c - provider/provider_tracking.c - critnib/critnib.c pool/pool_proxy.c + provider/provider_coarse.c + provider/provider_os_memory.c + provider/provider_tracking.c + ravl/ravl.c topology.c) set(UMF_SOURCES_LINUX libumf_linux.c) @@ -75,14 +78,13 @@ set(UMF_PRIVATE_COMPILE_DEFINITIONS "") set(UMF_SOURCES_LINUX ${UMF_SOURCES_LINUX} - provider/provider_os_memory.c provider/provider_os_memory_linux.c memory_targets/memory_target_numa.c memspaces/memspace_numa.c memspaces/memspace_host_all.c memspaces/memspace_highest_capacity.c) -set(UMF_SOURCES_WINDOWS ${UMF_SOURCES_WINDOWS} provider/provider_os_memory.c +set(UMF_SOURCES_WINDOWS ${UMF_SOURCES_WINDOWS} provider/provider_os_memory_windows.c) set(UMF_LIBS ${UMF_LIBS} ${LIBHWLOC_LIBRARIES}) @@ -157,6 +159,7 @@ target_include_directories( umf PUBLIC $ $ + $ $ $ $ diff --git a/src/critnib/critnib.c b/src/critnib/critnib.c index c8ee202e8..965ca03b9 100644 --- a/src/critnib/critnib.c +++ b/src/critnib/critnib.c @@ -315,7 +315,7 @@ static struct critnib_leaf *alloc_leaf(struct critnib *__restrict c) { } /* - * crinib_insert -- write a key:value pair to the critnib structure + * critnib_insert -- write a key:value pair to the critnib structure * * Returns: * • 0 on success diff --git a/src/libumf.def b/src/libumf.def index dafda36f1..9e1729f07 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -11,6 +11,9 @@ VERSION 1.0 EXPORTS DllMain umfCloseIPCHandle + umfCoarseMemoryProviderGetStats + umfCoarseMemoryProviderGetUpstreamProvider + umfCoarseMemoryProviderOps umfFree umfGetIPCHandle umfGetLastFailedMemoryProvider diff --git a/src/libumf.map b/src/libumf.map index 9a3152a4b..d49aa11c1 100644 --- a/src/libumf.map +++ b/src/libumf.map @@ -5,6 +5,9 @@ UMF_1.0 { global: umfCloseIPCHandle; + umfCoarseMemoryProviderGetStats; + umfCoarseMemoryProviderGetUpstreamProvider; + umfCoarseMemoryProviderOps; umfFree; umfGetIPCHandle; umfGetLastFailedMemoryProvider; diff --git a/src/memory_target.h b/src/memory_target.h index 4811cd527..e0d3b6007 100644 --- a/src/memory_target.h +++ b/src/memory_target.h @@ -12,15 +12,13 @@ #include +#include "base_alloc.h" +#include "memory_target_ops.h" + #ifdef __cplusplus extern "C" { #endif -#include "base_alloc.h" - -struct umf_memory_target_ops_t; -typedef struct umf_memory_target_ops_t umf_memory_target_ops_t; - typedef struct umf_memory_target_t { const umf_memory_target_ops_t *ops; void *priv; diff --git a/src/memory_target_ops.h b/src/memory_target_ops.h index c36a06326..ee25ea148 100644 --- a/src/memory_target_ops.h +++ b/src/memory_target_ops.h @@ -19,8 +19,6 @@ extern "C" { #endif -typedef struct umf_memory_target_t *umf_memory_target_handle_t; - typedef struct umf_memory_target_ops_t { /// Version of the ops structure. /// Should be initialized using UMF_VERSION_CURRENT diff --git a/src/provider/provider_coarse.c b/src/provider/provider_coarse.c new file mode 100644 index 000000000..1c61ab79d --- /dev/null +++ b/src/provider/provider_coarse.c @@ -0,0 +1,1405 @@ +/* + Copyright (c) 2023 Intel Corporation + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include +#include +#include +#include +#include +#include + +#include + +#include "base_alloc_global.h" +#include "memory_provider_internal.h" +#include "ravl.h" +#include "utils_concurrency.h" + +#ifndef BYTE +#define BYTE unsigned char +#endif + +static umf_result_t coarse_memory_provider_alloc(void *provider, size_t size, + size_t alignment, + void **resultPtr); + +static umf_result_t coarse_memory_provider_free(void *provider, void *ptr, + size_t bytes); + +static umf_result_t +coarse_memory_provider_get_stats(void *provider, + coarse_memory_provider_stats_t *stats); + +static void ravl_cb_count(void *data, void *arg); +static void ravl_cb_count_free(void *data, void *arg); + +typedef struct block_t { + size_t size; + BYTE *data; + + // Origin is the element of the provider's upstream_alloc that contains the + // beginning of data in current block. Note that data address could be + // higher than the origin - this means, that the origin allocation + // covers current block only partially. + // If the size of the block is greater than the size of the allocation, + // it means that there are multiple allocations. + // Note that provider's upstream_alloc doesn't use "origin" and "used" fields. + struct block_t *origin; + bool used; + + struct block_t *next; + struct block_t *prev; + + // Node in the list of free blocks of the same size pointing to this block. + // The list is located in the (coarse_provider->free_blocks) RAVL tree. + struct ravl_free_blocks_elem_t *free_list_ptr; +} block_t; + +// A general node in a RAVL tree. +// 1) coarse_provider->all_blocks RAVL tree (tree of all blocks - sorted by an address of data): +// key - pointer (block_t->data) to the beginning of the block data +// value - pointer (block_t) to the block of the allocation +// 2) coarse_provider->free_blocks RAVL tree (tree of free blocks - sorted by a size of data): +// key - size of the allocation (block_t->size) +// value - pointer (ravl_free_blocks_head_t) to the head of the list of free blocks of the same size +typedef struct ravl_data_t { + uintptr_t key; + void *value; +} ravl_data_t; + +// The head of the list of free blocks of the same size, +// so there is a separate mutex for each size. +typedef struct ravl_free_blocks_head_t { + struct ravl_free_blocks_elem_t *head; + struct os_mutex_t mutex; +} ravl_free_blocks_head_t; + +// The node of the list of free blocks of the same size +typedef struct ravl_free_blocks_elem_t { + struct block_t *block; + struct ravl_free_blocks_elem_t *next; + struct ravl_free_blocks_elem_t *prev; +} ravl_free_blocks_elem_t; + +// The compare function of a RAVL tree +static int ravl_comp(const void *lhs, const void *rhs) { + ravl_data_t *lhs_ravl = (ravl_data_t *)lhs; + ravl_data_t *rhs_ravl = (ravl_data_t *)rhs; + + if (lhs_ravl->key < rhs_ravl->key) { + return -1; + } else if (lhs_ravl->key == rhs_ravl->key) { + return 0; + } else { + return 1; + } +} + +// The functions "ravl_tree_*" handle lists of blocks: +// - coarse_provider->all_blocks and coarse_provider->upstream_alloc +// sorted by a pointer (block_t->data) to the beginning of the block data. +// +// ravl_tree_add_new - allocate and add a new block to the tree +// and link this block to the next and the previous one. +static block_t *ravl_tree_add_new(struct ravl *rtree, BYTE *data, size_t size) { + assert(rtree); + assert(data); + assert(size); + + // TODO add valgrind annotations + block_t *block = (block_t *)umf_ba_global_alloc(sizeof(block_t)); + if (block == NULL) { + return NULL; + } + + block->data = data; + block->size = size; + block->next = NULL; + block->prev = NULL; + block->free_list_ptr = NULL; + + ravl_data_t rdata = {(uintptr_t)block->data, block}; + assert(NULL == ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL)); + int ret = ravl_emplace_copy(rtree, &rdata); + if (ret) { + umf_ba_global_free(block); + return NULL; + } + + struct ravl_node *ravl_node = + ravl_find(rtree, &rdata, RAVL_PREDICATE_EQUAL); + + assert(ravl_node != NULL); + + struct ravl_node *ravl_next = ravl_node_successor(ravl_node); + if (ravl_next) { + ravl_data_t *node_data = ravl_data(ravl_next); + assert(node_data); + block->next = node_data->value; + assert(block->next); + } + + struct ravl_node *ravl_prev = ravl_node_predecessor(ravl_node); + if (ravl_prev) { + ravl_data_t *node_data = ravl_data(ravl_prev); + assert(node_data); + block->prev = node_data->value; + assert(block->prev); + } + + if (block->next) { + assert(block->next->prev == block->prev); + block->next->prev = block; + } + + if (block->prev) { + assert(block->prev->next == block->next); + block->prev->next = block; + } + + return block; +} + +// ravl_tree_find - find the block in the tree +static block_t *ravl_tree_find(struct ravl *rtree, void *ptr) { + ravl_data_t data = {(uintptr_t)ptr, NULL}; + struct ravl_node *node; + node = ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL); + if (node) { + ravl_data_t *node_data = ravl_data(node); + assert(node_data); + return (block_t *)node_data->value; + } + return NULL; +} + +// ravl_tree_find - remove the block from the tree +static block_t *ravl_tree_rm(struct ravl *rtree, void *ptr) { + ravl_data_t data = {(uintptr_t)ptr, NULL}; + struct ravl_node *node; + node = ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL); + if (node) { + ravl_data_t *node_data = ravl_data(node); + assert(node_data); + block_t *block = node_data->value; + assert(block); + ravl_remove(rtree, node); + assert(NULL == ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL)); + return block; + } + return NULL; +} + +// The functions "node_list_*" handle lists of free block of the same size. +// The heads (ravl_free_blocks_head_t) of those lists are stored in nodes of +// the coarse_provider->free_blocks RAVL tree. +// +// node_list_add - add a free block to the list of free blocks of the same size +static ravl_free_blocks_elem_t * +node_list_add(ravl_free_blocks_head_t *head_node, struct block_t *block) { + assert(head_node); + assert(block); + + ravl_free_blocks_elem_t *node = + (ravl_free_blocks_elem_t *)umf_ba_global_alloc( + sizeof(ravl_free_blocks_elem_t)); + if (node == NULL) { + return NULL; + } + + util_mutex_lock(&head_node->mutex); + + if (head_node->head) { + head_node->head->prev = node; + } + + node->block = block; + node->next = head_node->head; + node->prev = NULL; + head_node->head = node; + + util_mutex_unlock(&head_node->mutex); + + return node; +} + +// node_list_rm_first - remove the first free block from the list of free blocks of the same size +static block_t *node_list_rm_first(ravl_free_blocks_head_t *head_node) { + assert(head_node); + + util_mutex_lock(&head_node->mutex); + + if (!head_node->head) { + util_mutex_unlock(&head_node->mutex); + return NULL; + } + + ravl_free_blocks_elem_t *node = head_node->head; + assert(node->prev == NULL); + if (node->next) { + node->next->prev = NULL; + } + + head_node->head = node->next; + util_mutex_unlock(&head_node->mutex); + + struct block_t *block = node->block; + block->free_list_ptr = NULL; + umf_ba_global_free(node); + + return block; +} + +// node_list_rm - remove the given free block from the list of free blocks of the same size +static block_t *node_list_rm(ravl_free_blocks_head_t *head_node, + ravl_free_blocks_elem_t *node) { + assert(head_node); + assert(node); + + util_mutex_lock(&head_node->mutex); + + if (!head_node->head) { + util_mutex_unlock(&head_node->mutex); + return NULL; + } + + if (node == head_node->head) { + assert(node->prev == NULL); + head_node->head = node->next; + } + + ravl_free_blocks_elem_t *node_next = node->next; + ravl_free_blocks_elem_t *node_prev = node->prev; + if (node_next) { + node_next->prev = node_prev; + } + + if (node_prev) { + node_prev->next = node_next; + } + + util_mutex_unlock(&head_node->mutex); + struct block_t *block = node->block; + block->free_list_ptr = NULL; + umf_ba_global_free(node); + + return block; +} + +// The functions "free_blocks_*" handle the coarse_provider->free_blocks RAVL tree +// sorted by a size of the allocation (block_t->size). +// This is a tree of heads (ravl_free_blocks_head_t) of lists of free block of the same size. +// +// free_blocks_add - add a free block to the list of free blocks of the same size +static int free_blocks_add(struct ravl *free_blocks, block_t *block) { + ravl_free_blocks_head_t *head_node = NULL; + int rv; + + ravl_data_t head_node_data = {(uintptr_t)block->size, NULL}; + struct ravl_node *node; + node = ravl_find(free_blocks, &head_node_data, RAVL_PREDICATE_EQUAL); + if (node) { + ravl_data_t *node_data = ravl_data(node); + assert(node_data); + head_node = node_data->value; + assert(head_node); + } + + if (!head_node) { + head_node = umf_ba_global_alloc(sizeof(ravl_free_blocks_head_t)); + if (!head_node) { + return -1; + } + + head_node->head = NULL; + if (util_mutex_init(&head_node->mutex) == NULL) { + umf_ba_global_free(head_node); + return -1; + } + + ravl_data_t data = {(uintptr_t)block->size, head_node}; + assert(NULL == ravl_find(free_blocks, &data, RAVL_PREDICATE_EQUAL)); + rv = ravl_emplace_copy(free_blocks, &data); + if (rv) { + umf_ba_global_free(head_node); + return -1; + } + } + + block->free_list_ptr = node_list_add(head_node, block); + if (!block->free_list_ptr) { + return -1; + } + + assert(block->free_list_ptr->block->size == block->size); + + return 0; +} + +// free_blocks_rm_ge - remove the first free block of a size greater or equal to the given size. +// If it was the last block, the head node is freed and removed from the tree. +// It is used during memory allocation (looking for a free block). +static block_t *free_blocks_rm_ge(struct ravl *free_blocks, size_t size) { + ravl_data_t data = {(uintptr_t)size, NULL}; + struct ravl_node *node; + node = ravl_find(free_blocks, &data, RAVL_PREDICATE_GREATER_EQUAL); + if (!node) { + return NULL; + } + + ravl_data_t *node_data = ravl_data(node); + assert(node_data); + assert(node_data->key >= size); + + ravl_free_blocks_head_t *head_node = node_data->value; + assert(head_node); + + block_t *block = node_list_rm_first(head_node); + + if (head_node->head == NULL) { + umf_ba_global_free(head_node); + ravl_remove(free_blocks, node); + } + + return block; +} + +// free_blocks_rm_node - remove the free block pointed by the given node. +// If it was the last block, the head node is freed and removed from the tree. +// It is used during merging free blocks and destroying the coarse_provider->free_blocks tree. +static block_t *free_blocks_rm_node(struct ravl *free_blocks, + ravl_free_blocks_elem_t *node) { + assert(free_blocks); + assert(node); + size_t size = node->block->size; + ravl_data_t data = {(uintptr_t)size, NULL}; + struct ravl_node *ravl_node; + ravl_node = ravl_find(free_blocks, &data, RAVL_PREDICATE_EQUAL); + assert(ravl_node); + + ravl_data_t *node_data = ravl_data(ravl_node); + assert(node_data); + assert(node_data->key == size); + + ravl_free_blocks_head_t *head_node = node_data->value; + assert(head_node); + + block_t *block = node_list_rm(head_node, node); + + if (head_node->head == NULL) { + umf_ba_global_free(head_node); + ravl_remove(free_blocks, ravl_node); + } + + return block; +} + +// free_block_merge_with_prev - merge the given free block +// with the previous one if both are unused and have continuous data. +// Remove the merged block from the tree of free blocks. +static block_t *free_block_merge_with_prev( + umf_memory_provider_handle_t upstream_memory_provider, + struct ravl *all_blocks, struct ravl *free_blocks, block_t *block) { + + assert(all_blocks); + assert(free_blocks); + assert(block); + assert(block->used == false); + + // check if blocks could be merged by the upstream provider + umf_result_t merge_success = UMF_RESULT_ERROR_UNKNOWN; + if (upstream_memory_provider && block->prev && block->prev->used == false && + (block->prev->data + block->prev->size == block->data)) { + + merge_success = umfMemoryProviderAllocationMerge( + upstream_memory_provider, block->prev->data, block->data, + block->prev->size + block->size); + } + + if (merge_success == UMF_RESULT_SUCCESS) { + block_t *to_free = block; + + if (block->prev->free_list_ptr) { + free_blocks_rm_node(free_blocks, block->prev->free_list_ptr); + block->prev->free_list_ptr = NULL; + } + + // set neighbors + block->prev->next = block->next; + block->prev->size += block->size; + + if (block->next) { + block->next->prev = block->prev; + } + + block = block->prev; + block_t *block_rm = ravl_tree_rm(all_blocks, to_free->data); + assert(block_rm == to_free); + (void)block_rm; // WA for unused variable error + umf_ba_global_free(to_free); + } + + return block; +} + +// free_block_merge_with_next - merge the given free block +// with the next one if both are unused and have continuous data. +// Remove the merged block from the tree of free blocks. +static block_t *free_block_merge_with_next( + umf_memory_provider_handle_t upstream_memory_provider, + struct ravl *all_blocks, struct ravl *free_blocks, block_t *block) { + + assert(all_blocks); + assert(free_blocks); + assert(block); + assert(block->used == false); + + // check if blocks could be merged by the upstream provider + umf_result_t merge_success = UMF_RESULT_ERROR_UNKNOWN; + if (upstream_memory_provider && block->next && block->next->used == false && + (block->data + block->size == block->next->data)) { + + merge_success = umfMemoryProviderAllocationMerge( + upstream_memory_provider, block->data, block->next->data, + block->size + block->next->size); + } + + if (merge_success == UMF_RESULT_SUCCESS) { + block_t *to_free = block->next; + + if (block->next->free_list_ptr) { + free_blocks_rm_node(free_blocks, block->next->free_list_ptr); + block->next->free_list_ptr = NULL; + } + + assert(block->data < block->next->data); + assert((block->data + block->size) == block->next->data); + + if (block->next->next) { + block->next->next->prev = block; + } + + // set neighbors + block->size += block->next->size; + block->next = block->next->next; + + block_t *block_rm = ravl_tree_rm(all_blocks, to_free->data); + assert(block_rm == to_free); + (void)block_rm; // WA for unused variable error + umf_ba_global_free(to_free); + } + + return block; +} + +// alloc_find_origin - find the upstream allocation that contains data with given offset. +static block_t *alloc_find_origin(block_t *alloc, size_t offset) { + assert(alloc); + + while (offset >= alloc->size) { + offset -= alloc->size; + alloc = alloc->next; + assert(alloc); + } + + return alloc; +} + +typedef struct coarse_memory_provider_t { + umf_memory_provider_handle_t upstream_memory_provider; + void *init_buffer; + + size_t used_size; + size_t alloc_size; + + // upstream_alloc - tree of all blocks allocated from the upstream provider + struct ravl *upstream_alloc; + + // all_blocks - tree of all blocks - sorted by an address of data + struct ravl *all_blocks; + + // free_blocks - tree of free blocks - sorted by a size of data, + // each node contains a pointer (ravl_free_blocks_head_t) + // to the head of the list of free blocks of the same size + struct ravl *free_blocks; + + struct os_mutex_t lock; + + bool trace; + bool WA_do_not_free_upstream; +} coarse_memory_provider_t; + +#ifndef NDEBUG +// ravl_tree_get_head_block() - find the head (head->prev == NULL) of the all_blocks list. +// It is not used in the critical path. +static block_t *ravl_tree_get_head_block(struct ravl *rtree) { + // find head of blocks (head->prev == NULL) + block_t *block = NULL; + struct ravl_node *rnode = ravl_first(rtree); + if (!rnode) { + return NULL; + } + + ravl_data_t *rdata = ravl_data(rnode); + assert(rdata); + block = rdata->value; + assert(block); + // make sure it is really the head + assert(block->prev == NULL); + return block; +} + +static bool debug_check(coarse_memory_provider_t *provider) { + assert(provider); + + size_t sum_used = 0; + size_t sum_blocks_size = 0; + size_t sum_allocs_size = 0; + + coarse_memory_provider_stats_t stats = {0}; + coarse_memory_provider_get_stats(provider, &stats); + + // find the head (head->prev == NULL) of the all_blocks list + block_t *head = ravl_tree_get_head_block(provider->all_blocks); + if (stats.blocks_num == 0) { + assert(head == NULL); + } else { + assert(head != NULL); + } + + // tail of blocks (tail->next == NULL) + block_t *tail = NULL; + + // count blocks by next + size_t count_next = 0; + size_t count_free_next = 0; + block_t *block = head; + while (block) { + count_next++; + if (!block->used) { + count_free_next++; + } + tail = block; + block = block->next; + } + assert(count_next == stats.blocks_num); + assert(count_free_next == stats.free_blocks_num); + + // count blocks by prev + size_t count_prev = 0; + size_t count_free_prev = 0; + block = tail; + while (block) { + count_prev++; + if (!block->used) { + count_free_prev++; + } + block = block->prev; + } + assert(count_prev == stats.blocks_num); + assert(count_free_prev == stats.free_blocks_num); + + block = head; + while (block) { + assert(block->data); + assert(block->size > 0); + assert(block->origin); + assert(block->origin->data); + assert(block->data >= block->origin->data); + assert(block->data < (block->origin->data + block->origin->size)); + + // only the HEAD could have an empty prev + if (block != head) { + assert(block->prev); + } + + // check double-linking + if (block->prev) { + assert(block->prev->next == block); + } + + if (block->next) { + assert(block->next->prev == block); + } + + // there shouldn't be two adjacent not-used blocks + // if they allocs are continuous and could be merged + if (provider->upstream_memory_provider && block->prev && + block->prev->used == false && block->used == false && + (block->prev->data + block->prev->size == block->data)) { + + umf_result_t merge_success = umfMemoryProviderAllocationMerge( + provider->upstream_memory_provider, block->prev->data, + block->data, block->prev->size + block->size); + assert(merge_success != UMF_RESULT_SUCCESS); + } + + if (provider->upstream_memory_provider && block->next && + block->next->used == false && block->used == false && + (block->data + block->size == block->next->data)) { + + umf_result_t merge_success = umfMemoryProviderAllocationMerge( + provider->upstream_memory_provider, block->data, + block->next->data, block->size + block->next->size); + assert(merge_success != UMF_RESULT_SUCCESS); + } + + // data addresses in the list are in ascending order + if (block->prev) { + assert(block->prev->data < block->data); + } + + if (block->next) { + assert(block->data < block->next->data); + } + + // two block's data should not overlap + if (block->next) { + assert((block->data + block->size) <= block->next->data); + } + + // allocs used in block should be continuous + block_t *alloc = block->origin; + size_t alloc_offset = block->data - block->origin->data; + size_t block_size_w_off = block->size + alloc_offset; + size_t allocs_sum = alloc->size; + while (allocs_sum < block_size_w_off) { + assert(alloc->next); + assert((alloc->data + alloc->size) == alloc->next->data); + alloc = alloc->next; + allocs_sum += alloc->size; + } + + sum_blocks_size += block->size; + if (block->used) { + sum_used += block->size; + } + + block = block->next; + } + + assert(sum_used == provider->used_size); + assert(sum_blocks_size == provider->alloc_size); + assert(provider->alloc_size >= provider->used_size); + + count_next = 0; + + // find head of blocks (head->prev == NULL) + head = ravl_tree_get_head_block(provider->upstream_alloc); + block_t *alloc = head; + while (alloc) { + assert(alloc->data); + assert(alloc->size > 0); + + // only the HEAD could have an empty prev + if (alloc != head) { + assert(alloc->prev); + } + + // check double-linking + if (alloc->prev) { + assert(alloc->prev->next == alloc); + } + + if (alloc->next) { + assert(alloc->next->prev == alloc); + } + + // data addresses in the list are in ascending order + if (alloc->prev) { + assert(alloc->prev->data < alloc->data); + } + + if (alloc->next) { + assert(alloc->data < alloc->next->data); + } + + // data should not overlap + if (alloc->next) { + assert((alloc->data + alloc->size) <= alloc->next->data); + } + + sum_allocs_size += alloc->size; + count_next++; + + alloc = alloc->next; + } + + assert(sum_allocs_size == provider->alloc_size); + assert(count_next == stats.upstream_blocks_num); + + return true; +} +#endif + +static umf_result_t coarse_memory_provider_initialize(void *params, + void **provider) { + umf_result_t ret = UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + + if (provider == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (params == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + coarse_memory_provider_params_t *coarse_params = + (coarse_memory_provider_params_t *)params; + + // check params + // we should either provider an upstream provider or init buffer + if (coarse_params->upstream_memory_provider == NULL && + coarse_params->init_buffer == NULL) { + // TODO error message + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + coarse_memory_provider_t *coarse_provider = + (coarse_memory_provider_t *)umf_ba_global_alloc( + sizeof(coarse_memory_provider_t)); + + if (!coarse_provider) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + if (util_mutex_init(&coarse_provider->lock) == NULL) { + goto err_free_coarse_provider; + } + + coarse_provider->trace = coarse_params->trace; + coarse_provider->WA_do_not_free_upstream = + coarse_params->WA_do_not_free_upstream; + coarse_provider->upstream_memory_provider = + coarse_params->upstream_memory_provider; + coarse_provider->init_buffer = coarse_params->init_buffer; + + coarse_provider->upstream_alloc = + ravl_new_sized(ravl_comp, sizeof(ravl_data_t)); + if (coarse_provider->upstream_alloc == NULL) { + goto err_free_coarse_provider; + } + + coarse_provider->free_blocks = + ravl_new_sized(ravl_comp, sizeof(ravl_data_t)); + if (coarse_provider->free_blocks == NULL) { + goto err_delete_ravl_upstream_alloc; + } + + coarse_provider->all_blocks = + ravl_new_sized(ravl_comp, sizeof(ravl_data_t)); + if (coarse_provider->all_blocks == NULL) { + goto err_delete_ravl_free_blocks; + } + + coarse_provider->alloc_size = 0; + coarse_provider->used_size = 0; + + if (coarse_params->immediate_init) { + // allocate and immediately deallocate memory using the upstream + // provider + void *init_buffer = NULL; + coarse_memory_provider_alloc( + coarse_provider, coarse_params->init_buffer_size, 0, &init_buffer); + + if (init_buffer == NULL) { + goto err_delete_ravl_all_blocks; + } + + coarse_memory_provider_free(coarse_provider, init_buffer, + coarse_params->init_buffer_size); + + // since we use alloc and free functions, we have set the block as unused + assert(coarse_provider->used_size == 0); + assert(coarse_provider->alloc_size == coarse_params->init_buffer_size); + } else if (coarse_provider->init_buffer) { + block_t *alloc = ravl_tree_add_new(coarse_provider->upstream_alloc, + coarse_provider->init_buffer, + coarse_params->init_buffer_size); + if (alloc == NULL) { + ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + goto err_delete_ravl_all_blocks; + } + + block_t *new_block = ravl_tree_add_new(coarse_provider->all_blocks, + coarse_provider->init_buffer, + coarse_params->init_buffer_size); + if (new_block == NULL) { + assert(0); + ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + goto block_error; + } + + new_block->used = true; + new_block->origin = alloc; + + coarse_provider->alloc_size += coarse_params->init_buffer_size; + coarse_provider->used_size += coarse_params->init_buffer_size; + + if (coarse_provider->trace) { + printf("coarse_ALLOC (init) %zu used %zu alloc %zu\n", + coarse_params->init_buffer_size, coarse_provider->used_size, + coarse_provider->alloc_size); + } + + coarse_memory_provider_free(coarse_provider, + coarse_provider->init_buffer, + coarse_params->init_buffer_size); + + assert(coarse_provider->used_size == 0); + assert(coarse_provider->alloc_size == coarse_params->init_buffer_size); + } + + *provider = coarse_provider; + + assert(debug_check(coarse_provider)); + + return UMF_RESULT_SUCCESS; + +block_error: + ravl_tree_rm(coarse_provider->upstream_alloc, coarse_provider->init_buffer); +err_delete_ravl_all_blocks: + ravl_delete(coarse_provider->all_blocks); +err_delete_ravl_free_blocks: + ravl_delete(coarse_provider->free_blocks); +err_delete_ravl_upstream_alloc: + ravl_delete(coarse_provider->upstream_alloc); +err_free_coarse_provider: + umf_ba_global_free(coarse_provider); + return ret; +} + +static void ravl_cb_rm_upstream_alloc_node(void *data, void *arg) { + assert(data); + assert(arg); + + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)arg; + ravl_data_t *node_data = data; + block_t *alloc = node_data->value; + assert(alloc); + + if (coarse_provider->upstream_memory_provider && + coarse_provider->WA_do_not_free_upstream == false) { + umf_result_t ret = + umfMemoryProviderFree(coarse_provider->upstream_memory_provider, + alloc->data, alloc->size); + + // We would continue to deallocate alloc blocks even if the upstream + // provider doesn't return success. + assert(ret == UMF_RESULT_SUCCESS); + (void)ret; + } + + assert(coarse_provider->alloc_size >= alloc->size); + coarse_provider->alloc_size -= alloc->size; + + umf_ba_global_free(alloc); +} + +static void ravl_cb_rm_all_blocks_node(void *data, void *arg) { + assert(data); + assert(arg); + + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)arg; + ravl_data_t *node_data = data; + block_t *block = node_data->value; + assert(block); + + if (block->used) { + assert(coarse_provider->used_size >= block->size); + coarse_provider->used_size -= block->size; + } + + if (block->free_list_ptr) { + free_blocks_rm_node(coarse_provider->free_blocks, block->free_list_ptr); + } + + umf_ba_global_free(block); +} + +static void coarse_memory_provider_finalize(void *provider) { + if (provider == NULL) { + assert(0); + return; + } + + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)provider; + + ravl_foreach(coarse_provider->upstream_alloc, + ravl_cb_rm_upstream_alloc_node, coarse_provider); + assert(coarse_provider->alloc_size == 0); + + ravl_foreach(coarse_provider->all_blocks, ravl_cb_rm_all_blocks_node, + coarse_provider); + assert(coarse_provider->used_size == 0); + + ravl_delete(coarse_provider->upstream_alloc); + ravl_delete(coarse_provider->all_blocks); + ravl_delete(coarse_provider->free_blocks); + + umf_ba_global_free(coarse_provider); +} + +static umf_result_t coarse_memory_provider_alloc(void *provider, size_t size, + size_t alignment, + void **resultPtr) { + umf_result_t ret = UMF_RESULT_SUCCESS; + int rv; + + if (provider == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (resultPtr == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)provider; + assert(debug_check(coarse_provider)); + + if (util_mutex_lock(&coarse_provider->lock) != 0) { + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + + // apply alignment + size += alignment; + + // Find first blocks with greater or equal size. + // If the block that we want to reuse has greater size, split it. + // Try to merge split part with the successor if it is not used. + block_t *curr = free_blocks_rm_ge(coarse_provider->free_blocks, size); + if (curr && curr->size > size) { + assert(curr->used == false); + + // In case of non-zero alignment create an aligned block what would be + // further used. + // TODO: we leave some free mem in current block - merge it with prev? + uintptr_t original = (uintptr_t)curr->data; + uintptr_t aligned = (original + alignment - 1) & ~(alignment - 1); + size_t padding = aligned - original; + if (alignment > 0 && padding > 0) { + size_t curr_offset = curr->data - curr->origin->data; + block_t *origin = + alloc_find_origin(curr->origin, curr_offset + padding); + assert(origin); + + block_t *aligned_block = + ravl_tree_add_new(coarse_provider->all_blocks, + curr->data + padding, curr->size - padding); + if (aligned_block == NULL) { + if (util_mutex_unlock(&coarse_provider->lock) != 0) { + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + curr->used = false; + curr->size = padding; + aligned_block->origin = origin; + + rv = free_blocks_add(coarse_provider->free_blocks, curr); + if (rv) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + // Calculate new size and use aligned block. + size -= padding; + curr = aligned_block; + assert(curr->size >= size); + } + + // Split the rest of block and put the new block after the one that we + // use. + // Find the origin of the new block. + size_t curr_offset = curr->data - curr->origin->data; + block_t *origin = alloc_find_origin(curr->origin, curr_offset + size); + assert(origin); + void *data = curr->data + size; + + block_t *new_block = ravl_tree_add_new(coarse_provider->all_blocks, + data, curr->size - size); + if (new_block == NULL) { + if (util_mutex_unlock(&coarse_provider->lock) != 0) { + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + new_block->origin = origin; + new_block->used = false; + + curr->used = true; + curr->size = size; + + *resultPtr = curr->data; + coarse_provider->used_size += size; + + // Try to merge new empty block with the next one. + new_block = free_block_merge_with_next( + coarse_provider->upstream_memory_provider, + coarse_provider->all_blocks, coarse_provider->free_blocks, + new_block); + rv = free_blocks_add(coarse_provider->free_blocks, new_block); + if (rv) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + if (coarse_provider->trace) { + printf("coarse_ALLOC (split_block) %zu used %zu alloc %zu\n", size, + coarse_provider->used_size, coarse_provider->alloc_size); + } + + if (util_mutex_unlock(&coarse_provider->lock) != 0) { + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + + assert(debug_check(coarse_provider)); + return UMF_RESULT_SUCCESS; + } else if (curr && curr->size == size) { + assert(curr->used == false); + + // In case of non-zero alignment create an aligned block what would be + // further used. + // TODO: we leave some free mem in current block - merge it with prev? + uintptr_t original = (uintptr_t)curr->data; + uintptr_t aligned = (original + alignment - 1) & ~(alignment - 1); + size_t padding = aligned - original; + if (alignment > 0 && padding > 0) { + + size_t curr_offset = curr->data - curr->origin->data; + block_t *origin = + alloc_find_origin(curr->origin, curr_offset + padding); + assert(origin); + + block_t *aligned_block = + ravl_tree_add_new(coarse_provider->all_blocks, + curr->data + padding, curr->size - padding); + if (aligned_block == NULL) { + if (util_mutex_unlock(&coarse_provider->lock) != 0) { + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + curr->used = false; + curr->size = padding; + aligned_block->origin = origin; + + rv = free_blocks_add(coarse_provider->free_blocks, curr); + if (rv) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + // Calculate new size and use aligned block. + size -= padding; + curr = aligned_block; + assert(curr->size >= size); + } + + curr->used = true; + + *resultPtr = curr->data; + coarse_provider->used_size += size; + + if (coarse_provider->trace) { + printf("coarse_ALLOC (same_block) %zu used %zu alloc %zu\n", size, + coarse_provider->used_size, coarse_provider->alloc_size); + } + + if (util_mutex_unlock(&coarse_provider->lock) != 0) { + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + + assert(debug_check(coarse_provider)); + return UMF_RESULT_SUCCESS; + } + + // no suitable block - try to get more memory from the upstream provider + if (coarse_provider->upstream_memory_provider == 0) { + // TODO: OOM + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + + umfMemoryProviderAlloc(coarse_provider->upstream_memory_provider, size, + alignment, resultPtr); + if (*resultPtr == NULL) { + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + + // TODO alignment + + block_t *alloc = + ravl_tree_add_new(coarse_provider->upstream_alloc, *resultPtr, size); + if (alloc == NULL) { + ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + goto alloc_error; + } + + block_t *new_block = + ravl_tree_add_new(coarse_provider->all_blocks, *resultPtr, size); + if (new_block == NULL) { + assert(0); + ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + goto block_error; + } + + new_block->used = true; + new_block->origin = alloc; + + coarse_provider->alloc_size += size; + coarse_provider->used_size += size; + + if (coarse_provider->trace) { + printf("coarse_ALLOC (upstream) %zu used %zu alloc %zu\n", size, + coarse_provider->used_size, coarse_provider->alloc_size); + } + + if (util_mutex_unlock(&coarse_provider->lock) != 0) { + assert(0); + ret = UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + goto unlock_error; + } + + assert(debug_check(coarse_provider)); + return UMF_RESULT_SUCCESS; + +unlock_error: + ravl_tree_rm(coarse_provider->all_blocks, *resultPtr); + +block_error: + ravl_tree_rm(coarse_provider->upstream_alloc, *resultPtr); + +alloc_error: + + if (coarse_provider->WA_do_not_free_upstream == false) { + umfMemoryProviderFree(coarse_provider->upstream_memory_provider, + *resultPtr, size); + } + return ret; +} + +static umf_result_t coarse_memory_provider_free(void *provider, void *ptr, + size_t bytes) { + if (provider == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)provider; + assert(debug_check(coarse_provider)); + + if (util_mutex_lock(&coarse_provider->lock) != 0) { + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + + block_t *block = ravl_tree_find(coarse_provider->all_blocks, ptr); + if (block == NULL) { + // the block was not found + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + + if (bytes) { + assert(bytes == block->size); + } + + if (coarse_provider->trace) { + printf("coarse_FREE (return_block_to_pool) %zu used %zu alloc %zu\n", + block->size, coarse_provider->used_size - block->size, + coarse_provider->alloc_size); + } + + assert(coarse_provider->used_size >= block->size); + coarse_provider->used_size -= block->size; + + block->used = false; + + // Merge with prev and/or next block if they are unused and have continuous data. + block = free_block_merge_with_prev( + coarse_provider->upstream_memory_provider, coarse_provider->all_blocks, + coarse_provider->free_blocks, block); + block = free_block_merge_with_next( + coarse_provider->upstream_memory_provider, coarse_provider->all_blocks, + coarse_provider->free_blocks, block); + + int rv = free_blocks_add(coarse_provider->free_blocks, block); + if (rv) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + assert(debug_check(coarse_provider)); + + if (util_mutex_unlock(&coarse_provider->lock) != 0) { + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + + return UMF_RESULT_SUCCESS; +} + +static void coarse_memory_provider_get_last_native_error(void *provider, + const char **ppMessage, + int32_t *pError) { + if (provider == NULL) { + return; + } + + (void)ppMessage; + (void)pError; + assert(0); +} + +static umf_result_t coarse_memory_provider_get_min_page_size(void *provider, + void *ptr, + size_t *pageSize) { + if (provider == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)provider; + + if (coarse_provider->upstream_memory_provider) { + umf_result_t ret = umfMemoryProviderGetMinPageSize( + coarse_provider->upstream_memory_provider, ptr, pageSize); + return ret; + } else { + *pageSize = 1024; + return UMF_RESULT_SUCCESS; + } +} + +static umf_result_t +coarse_memory_provider_get_recommended_page_size(void *provider, size_t size, + size_t *pageSize) { + if (provider == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)provider; + + assert(coarse_provider->upstream_memory_provider); + umf_result_t ret = umfMemoryProviderGetRecommendedPageSize( + coarse_provider->upstream_memory_provider, size, pageSize); + + return ret; +} + +static const char *coarse_memory_provider_get_name(void *provider) { + (void)provider; + + return "coarse"; +} + +static umf_result_t +coarse_memory_provider_get_stats(void *provider, + coarse_memory_provider_stats_t *stats) { + if (provider == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (stats == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)provider; + + // count blocks + size_t upstream_blocks_num = 0; + ravl_foreach(coarse_provider->upstream_alloc, ravl_cb_count, + &upstream_blocks_num); + + size_t blocks_num = 0; + ravl_foreach(coarse_provider->all_blocks, ravl_cb_count, &blocks_num); + + size_t free_blocks_num = 0; + ravl_foreach(coarse_provider->free_blocks, ravl_cb_count_free, + &free_blocks_num); + + stats->alloc_size = coarse_provider->alloc_size; + stats->used_size = coarse_provider->used_size; + stats->upstream_blocks_num = upstream_blocks_num; + stats->blocks_num = blocks_num; + stats->free_blocks_num = free_blocks_num; + + return UMF_RESULT_SUCCESS; +} + +umf_memory_provider_ops_t UMF_COARSE_MEMORY_PROVIDER_OPS = { + .version = UMF_VERSION_CURRENT, + .initialize = coarse_memory_provider_initialize, + .finalize = coarse_memory_provider_finalize, + .alloc = coarse_memory_provider_alloc, + .free = coarse_memory_provider_free, + .get_last_native_error = coarse_memory_provider_get_last_native_error, + .get_recommended_page_size = + coarse_memory_provider_get_recommended_page_size, + .get_min_page_size = coarse_memory_provider_get_min_page_size, + .get_name = coarse_memory_provider_get_name, +}; + +umf_memory_provider_ops_t *umfCoarseMemoryProviderOps(void) { + return &UMF_COARSE_MEMORY_PROVIDER_OPS; +} + +static void ravl_cb_count(void *data, void *arg) { + assert(arg); + (void)data; /* unused */ + + size_t *blocks_num = arg; + (*blocks_num)++; +} + +static void ravl_cb_count_free(void *data, void *arg) { + assert(data); + assert(arg); + + ravl_data_t *node_data = data; + assert(node_data); + ravl_free_blocks_head_t *head_node = node_data->value; + assert(head_node); + struct ravl_free_blocks_elem_t *free_block = head_node->head; + assert(free_block); + + size_t *blocks_num = arg; + while (free_block) { + (*blocks_num)++; + free_block = free_block->next; + } +} + +coarse_memory_provider_stats_t +umfCoarseMemoryProviderGetStats(umf_memory_provider_handle_t provider) { + assert(provider); + void *priv = umfMemoryProviderGetPriv(provider); + + coarse_memory_provider_stats_t stats = {0}; + coarse_memory_provider_get_stats(priv, &stats); + + return stats; +} + +umf_memory_provider_handle_t umfCoarseMemoryProviderGetUpstreamProvider( + umf_memory_provider_handle_t provider) { + assert(provider); + + coarse_memory_provider_t *coarse_provider = + (struct coarse_memory_provider_t *)umfMemoryProviderGetPriv(provider); + + return coarse_provider->upstream_memory_provider; +} diff --git a/src/provider/provider_tracking.c b/src/provider/provider_tracking.c index bf461d82b..26d171bb8 100644 --- a/src/provider/provider_tracking.c +++ b/src/provider/provider_tracking.c @@ -138,8 +138,6 @@ typedef struct umf_tracking_memory_provider_t { critnib *ipcCache; } umf_tracking_memory_provider_t; -typedef struct umf_tracking_memory_provider_t umf_tracking_memory_provider_t; - static umf_result_t trackingAlloc(void *hProvider, size_t size, size_t alignment, void **ptr) { umf_tracking_memory_provider_t *p = diff --git a/src/ravl/ravl.c b/src/ravl/ravl.c new file mode 100644 index 000000000..52602f889 --- /dev/null +++ b/src/ravl/ravl.c @@ -0,0 +1,551 @@ +/* + * + * Copyright (C) 2018-2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +/* + * ravl.c -- implementation of a RAVL tree + * https://sidsen.azurewebsites.net//papers/ravl-trees-journal.pdf + */ + +#include "ravl.h" +#include "../src/utils/utils_common.h" +#include "../src/utils/utils_concurrency.h" +#include "assert.h" + +#include +#include +#include +#include + +#define RAVL_DEFAULT_DATA_SIZE (sizeof(void *)) + +enum ravl_slot_type { + RAVL_LEFT, + RAVL_RIGHT, + + MAX_SLOTS, + + RAVL_ROOT +}; + +struct ravl_node { + struct ravl_node *parent; + struct ravl_node *slots[MAX_SLOTS]; + int32_t rank; /* cannot be greater than height of the subtree */ + int32_t pointer_based; + char data[]; +}; + +struct ravl { + struct ravl_node *root; + ravl_compare *compare; + size_t data_size; +}; + +/* + * ravl_new -- creates a new ravl tree instance + */ +struct ravl *ravl_new_sized(ravl_compare *compare, size_t data_size) { + struct ravl *r = malloc(sizeof(*r)); + if (r == NULL) { + return NULL; + } + + r->compare = compare; + r->root = NULL; + r->data_size = data_size; + + return r; +} + +/* + * ravl_new -- creates a new tree that stores data pointers + */ +struct ravl *ravl_new(ravl_compare *compare) { + return ravl_new_sized(compare, RAVL_DEFAULT_DATA_SIZE); +} + +/* + * ravl_clear_node -- (internal) recursively clears the given subtree, + * calls callback in an in-order fashion. Optionally frees the given node. + */ +static void ravl_foreach_node(struct ravl_node *n, ravl_cb cb, void *arg, + int free_node) { + if (n == NULL) { + return; + } + + ravl_foreach_node(n->slots[RAVL_LEFT], cb, arg, free_node); + if (cb) { + cb((void *)n->data, arg); + } + ravl_foreach_node(n->slots[RAVL_RIGHT], cb, arg, free_node); + + if (free_node) { + free(n); + } +} + +/* + * ravl_clear -- clears the entire tree, starting from the root + */ +void ravl_clear(struct ravl *ravl) { + ravl_foreach_node(ravl->root, NULL, NULL, 1); + ravl->root = NULL; +} + +/* + * ravl_delete_cb -- clears and deletes the given ravl instance, calls callback + */ +void ravl_delete_cb(struct ravl *ravl, ravl_cb cb, void *arg) { + ravl_foreach_node(ravl->root, cb, arg, 1); + free(ravl); +} + +/* + * ravl_delete -- clears and deletes the given ravl instance + */ +void ravl_delete(struct ravl *ravl) { ravl_delete_cb(ravl, NULL, NULL); } + +/* + * ravl_foreach -- traverses the entire tree, calling callback for every node + */ +void ravl_foreach(struct ravl *ravl, ravl_cb cb, void *arg) { + ravl_foreach_node(ravl->root, cb, arg, 0); +} + +/* + * ravl_empty -- checks whether the given tree is empty + */ +int ravl_empty(struct ravl *ravl) { return ravl->root == NULL; } + +/* + * ravl_node_insert_constructor -- node data constructor for ravl_insert + */ +static void ravl_node_insert_constructor(void *data, size_t data_size, + const void *arg) { + /* suppress unused-parameter errors */ + (void)data_size; + + /* copy only the 'arg' pointer */ + memcpy(data, &arg, sizeof(arg)); +} + +/* + * ravl_node_copy_constructor -- node data constructor for ravl_emplace_copy + */ +static void ravl_node_copy_constructor(void *data, size_t data_size, + const void *arg) { + memcpy(data, arg, data_size); +} + +/* + * ravl_new_node -- (internal) allocates and initializes a new node + */ +static struct ravl_node *ravl_new_node(struct ravl *ravl, ravl_constr constr, + const void *arg) { + struct ravl_node *n = malloc(sizeof(*n) + ravl->data_size); + if (n == NULL) { + return NULL; + } + + n->parent = NULL; + n->slots[RAVL_LEFT] = NULL; + n->slots[RAVL_RIGHT] = NULL; + n->rank = 0; + n->pointer_based = constr == ravl_node_insert_constructor; + constr(n->data, ravl->data_size, arg); + + return n; +} + +/* + * ravl_slot_opposite -- (internal) returns the opposite slot type, cannot be + * called for root type + */ +static enum ravl_slot_type ravl_slot_opposite(enum ravl_slot_type t) { + assert(t != RAVL_ROOT); + + return t == RAVL_LEFT ? RAVL_RIGHT : RAVL_LEFT; +} + +/* + * ravl_node_slot_type -- (internal) returns the type of the given node: + * left child, right child or root + */ +static enum ravl_slot_type ravl_node_slot_type(struct ravl_node *n) { + if (n->parent == NULL) { + return RAVL_ROOT; + } + + return n->parent->slots[RAVL_LEFT] == n ? RAVL_LEFT : RAVL_RIGHT; +} + +/* + * ravl_node_sibling -- (internal) returns the sibling of the given node, + * NULL if the node is root (has no parent) + */ +static struct ravl_node *ravl_node_sibling(struct ravl_node *n) { + enum ravl_slot_type t = ravl_node_slot_type(n); + if (t == RAVL_ROOT) { + return NULL; + } + + return n->parent->slots[t == RAVL_LEFT ? RAVL_RIGHT : RAVL_LEFT]; +} + +/* + * ravl_node_ref -- (internal) returns the pointer to the memory location in + * which the given node resides + */ +static struct ravl_node **ravl_node_ref(struct ravl *ravl, + struct ravl_node *n) { + enum ravl_slot_type t = ravl_node_slot_type(n); + + return t == RAVL_ROOT ? &ravl->root : &n->parent->slots[t]; +} + +/* + * ravl_rotate -- (internal) performs a rotation around a given node + * + * The node n swaps place with its parent. If n is right child, parent becomes + * the left child of n, otherwise parent becomes right child of n. + */ +static void ravl_rotate(struct ravl *ravl, struct ravl_node *n) { + assert(n->parent != NULL); + struct ravl_node *p = n->parent; + struct ravl_node **pref = ravl_node_ref(ravl, p); + + enum ravl_slot_type t = ravl_node_slot_type(n); + enum ravl_slot_type t_opposite = ravl_slot_opposite(t); + + n->parent = p->parent; + p->parent = n; + *pref = n; + + if ((p->slots[t] = n->slots[t_opposite]) != NULL) { + p->slots[t]->parent = p; + } + n->slots[t_opposite] = p; +} + +/* + * ravl_node_rank -- (internal) returns the rank of the node + * + * For the purpose of balancing, NULL nodes have rank -1. + */ +static int ravl_node_rank(struct ravl_node *n) { + return n == NULL ? -1 : n->rank; +} + +/* + * ravl_node_rank_difference_parent -- (internal) returns the rank different + * between parent node p and its child n + * + * Every rank difference must be positive. + * + * Either of these can be NULL. + */ +static int ravl_node_rank_difference_parent(struct ravl_node *p, + struct ravl_node *n) { + return ravl_node_rank(p) - ravl_node_rank(n); +} + +/* + * ravl_node_rank_difference - (internal) returns the rank difference between + * parent and its child + * + * Can be used to check if a given node is an i-child. + */ +static int ravl_node_rank_difference(struct ravl_node *n) { + return ravl_node_rank_difference_parent(n->parent, n); +} + +/* + * ravl_node_is_i_j -- (internal) checks if a given node is strictly i,j-node + */ +static int ravl_node_is_i_j(struct ravl_node *n, int i, int j) { + return (ravl_node_rank_difference_parent(n, n->slots[RAVL_LEFT]) == i && + ravl_node_rank_difference_parent(n, n->slots[RAVL_RIGHT]) == j); +} + +/* + * ravl_node_is -- (internal) checks if a given node is i,j-node or j,i-node + */ +static int ravl_node_is(struct ravl_node *n, int i, int j) { + return ravl_node_is_i_j(n, i, j) || ravl_node_is_i_j(n, j, i); +} + +/* + * ravl_node_promote -- promotes a given node by increasing its rank + */ +static void ravl_node_promote(struct ravl_node *n) { n->rank += 1; } + +/* + * ravl_node_promote -- demotes a given node by increasing its rank + */ +static void ravl_node_demote(struct ravl_node *n) { + assert(n->rank > 0); + n->rank -= 1; +} + +/* + * ravl_balance -- balances the tree after insert + * + * This function must restore the invariant that every rank + * difference is positive. + */ +static void ravl_balance(struct ravl *ravl, struct ravl_node *n) { + /* walk up the tree, promoting nodes */ + while (n->parent && ravl_node_is(n->parent, 0, 1)) { + ravl_node_promote(n->parent); + n = n->parent; + } + + /* + * Either the rank rule holds or n is a 0-child whose sibling is an + * i-child with i > 1. + */ + struct ravl_node *s = ravl_node_sibling(n); + if (!(ravl_node_rank_difference(n) == 0 && + ravl_node_rank_difference_parent(n->parent, s) > 1)) { + return; + } + + struct ravl_node *y = n->parent; + /* if n is a left child, let z be n's right child and vice versa */ + enum ravl_slot_type t = ravl_slot_opposite(ravl_node_slot_type(n)); + struct ravl_node *z = n->slots[t]; + + if (z == NULL || ravl_node_rank_difference(z) == 2) { + ravl_rotate(ravl, n); + ravl_node_demote(y); + } else if (ravl_node_rank_difference(z) == 1) { + ravl_rotate(ravl, z); + ravl_rotate(ravl, z); + ravl_node_promote(z); + ravl_node_demote(n); + ravl_node_demote(y); + } +} + +/* + * ravl_insert -- insert data into the tree + */ +int ravl_insert(struct ravl *ravl, const void *data) { + return ravl_emplace(ravl, ravl_node_insert_constructor, data); +} + +/* + * ravl_insert -- copy construct data inside of a new tree node + */ +int ravl_emplace_copy(struct ravl *ravl, const void *data) { + return ravl_emplace(ravl, ravl_node_copy_constructor, data); +} + +/* + * ravl_emplace -- construct data inside of a new tree node + */ +int ravl_emplace(struct ravl *ravl, ravl_constr constr, const void *arg) { + struct ravl_node *n = ravl_new_node(ravl, constr, arg); + if (n == NULL) { + return -1; + } + + /* walk down the tree and insert the new node into a missing slot */ + struct ravl_node **dstp = &ravl->root; + struct ravl_node *dst = NULL; + while (*dstp != NULL) { + dst = (*dstp); + int cmp_result = ravl->compare(ravl_data(n), ravl_data(dst)); + if (cmp_result == 0) { + goto error_duplicate; + } + + dstp = &dst->slots[cmp_result > 0]; + } + n->parent = dst; + *dstp = n; + + ravl_balance(ravl, n); + + return 0; + +error_duplicate: + errno = EEXIST; + free(n); + return -1; +} + +/* + * ravl_node_type_most -- (internal) returns left-most or right-most node in + * the subtree + */ +static struct ravl_node *ravl_node_type_most(struct ravl_node *n, + enum ravl_slot_type t) { + while (n->slots[t] != NULL) { + n = n->slots[t]; + } + + return n; +} + +/* + * ravl_node_cessor -- (internal) returns the successor or predecessor of the + * node + */ +static struct ravl_node *ravl_node_cessor(struct ravl_node *n, + enum ravl_slot_type t) { + /* + * If t child is present, we are looking for t-opposite-most node + * in t child subtree + */ + if (n->slots[t]) { + return ravl_node_type_most(n->slots[t], ravl_slot_opposite(t)); + } + + /* otherwise get the first parent on the t path */ + while (n->parent != NULL && n == n->parent->slots[t]) { + n = n->parent; + } + + return n->parent; +} + +/* + * ravl_node_successor -- returns node's successor + * + * It's the first node larger than n. + */ +struct ravl_node *ravl_node_successor(struct ravl_node *n) { + return ravl_node_cessor(n, RAVL_RIGHT); +} + +/* + * ravl_node_predecessor -- returns node's successor + * + * It's the first node smaller than n. + */ +struct ravl_node *ravl_node_predecessor(struct ravl_node *n) { + return ravl_node_cessor(n, RAVL_LEFT); +} + +/* + * ravl_predicate_holds -- (internal) verifies the given predicate for + * the current node in the search path + * + * If the predicate holds for the given node or a node that can be directly + * derived from it, returns 1. Otherwise returns 0. + */ +static int ravl_predicate_holds(int result, struct ravl_node **ret, + struct ravl_node *n, + enum ravl_predicate flags) { + if (flags & RAVL_PREDICATE_EQUAL) { + if (result == 0) { + *ret = n; + return 1; + } + } + if (flags & RAVL_PREDICATE_GREATER) { + if (result < 0) { /* data < n->data */ + *ret = n; + return 0; + } else if (result == 0) { + *ret = ravl_node_successor(n); + return 1; + } + } + if (flags & RAVL_PREDICATE_LESS) { + if (result > 0) { /* data > n->data */ + *ret = n; + return 0; + } else if (result == 0) { + *ret = ravl_node_predecessor(n); + return 1; + } + } + + return 0; +} + +/* + * ravl_find -- searches for the node in the tree + */ +struct ravl_node *ravl_find(struct ravl *ravl, const void *data, + enum ravl_predicate flags) { + struct ravl_node *r = NULL; + struct ravl_node *n = ravl->root; + while (n) { + int result = ravl->compare(data, ravl_data(n)); + if (ravl_predicate_holds(result, &r, n, flags)) { + return r; + } + + n = n->slots[result > 0]; + } + + return r; +} + +/* + * ravl_remove -- removes the given node from the tree + */ +void ravl_remove(struct ravl *ravl, struct ravl_node *n) { + if (n->slots[RAVL_LEFT] != NULL && n->slots[RAVL_RIGHT] != NULL) { + /* if both children are present, remove the successor instead */ + struct ravl_node *s = ravl_node_successor(n); + memcpy(n->data, s->data, ravl->data_size); + + ravl_remove(ravl, s); + } else { + /* swap n with the child that may exist */ + struct ravl_node *r = + n->slots[RAVL_LEFT] ? n->slots[RAVL_LEFT] : n->slots[RAVL_RIGHT]; + if (r != NULL) { + r->parent = n->parent; + } + + *ravl_node_ref(ravl, n) = r; + free(n); + } +} + +/* + * ravl_data -- returns the data contained within the node + */ +void *ravl_data(struct ravl_node *node) { + if (node->pointer_based) { + void *data; + memcpy(&data, node->data, sizeof(void *)); + return data; + } else { + return (void *)node->data; + } +} + +/* + * ravl_first -- returns first (left-most) node in the tree + */ +struct ravl_node *ravl_first(struct ravl *ravl) { + if (ravl->root) { + return ravl_node_type_most(ravl->root, RAVL_LEFT); + } + + return NULL; +} + +/* + * ravl_last -- returns last (right-most) node in the tree + */ +struct ravl_node *ravl_last(struct ravl *ravl) { + if (ravl->root) { + return ravl_node_type_most(ravl->root, RAVL_RIGHT); + } + + return NULL; +} diff --git a/src/ravl/ravl.h b/src/ravl/ravl.h new file mode 100644 index 000000000..741f20015 --- /dev/null +++ b/src/ravl/ravl.h @@ -0,0 +1,63 @@ +/* + * + * Copyright (C) 2018-2023 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +/* + * ravl.h -- internal definitions for ravl tree + */ + +#ifndef UMF_RAVL_H +#define UMF_RAVL_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct ravl; +struct ravl_node; + +enum ravl_predicate { + RAVL_PREDICATE_EQUAL = 1 << 0, + RAVL_PREDICATE_GREATER = 1 << 1, + RAVL_PREDICATE_LESS = 1 << 2, + RAVL_PREDICATE_LESS_EQUAL = RAVL_PREDICATE_EQUAL | RAVL_PREDICATE_LESS, + RAVL_PREDICATE_GREATER_EQUAL = + RAVL_PREDICATE_EQUAL | RAVL_PREDICATE_GREATER, +}; + +typedef int ravl_compare(const void *lhs, const void *rhs); +typedef void ravl_cb(void *data, void *arg); +typedef void ravl_constr(void *data, size_t data_size, const void *arg); + +struct ravl *ravl_new(ravl_compare *compare); +struct ravl *ravl_new_sized(ravl_compare *compare, size_t data_size); +void ravl_delete(struct ravl *ravl); +void ravl_delete_cb(struct ravl *ravl, ravl_cb cb, void *arg); +void ravl_foreach(struct ravl *ravl, ravl_cb cb, void *arg); +int ravl_empty(struct ravl *ravl); +void ravl_clear(struct ravl *ravl); +int ravl_insert(struct ravl *ravl, const void *data); +int ravl_emplace(struct ravl *ravl, ravl_constr constr, const void *arg); +int ravl_emplace_copy(struct ravl *ravl, const void *data); + +struct ravl_node *ravl_find(struct ravl *ravl, const void *data, + enum ravl_predicate predicate_flags); +struct ravl_node *ravl_first(struct ravl *ravl); +struct ravl_node *ravl_last(struct ravl *ravl); +void *ravl_data(struct ravl_node *node); +void ravl_remove(struct ravl *ravl, struct ravl_node *node); +struct ravl_node *ravl_node_successor(struct ravl_node *n); +struct ravl_node *ravl_node_predecessor(struct ravl_node *n); + +#ifdef __cplusplus +} +#endif + +#endif /* UMF_RAVL_H */ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 0a6fc27b3..b488901e5 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -121,6 +121,15 @@ if(UMF_BUILD_LIBUMF_POOL_DISJOINT) NAME c_api_disjoint_pool SRCS c_api/disjoint_pool.c LIBS disjoint_pool) + # TODO fix case for win + shared lib + if(UMF_BUILD_SHARED_LIBRARY) + + else() + add_umf_test( + NAME disjointCoarseMallocPool + SRCS disjointCoarseMallocPool.cpp + LIBS disjoint_pool) + endif() endif() if(UMF_BUILD_LIBUMF_POOL_DISJOINT @@ -210,6 +219,7 @@ add_umf_test( NAME base_alloc SRCS ${BA_SOURCES_FOR_TEST} test_base_alloc.cpp LIBS ${UMF_UTILS_FOR_TEST}) + add_umf_test( NAME base_alloc_linear SRCS ${BA_SOURCES_FOR_TEST} test_base_alloc_linear.cpp diff --git a/test/disjointCoarseMallocPool.cpp b/test/disjointCoarseMallocPool.cpp new file mode 100644 index 000000000..157ff50cc --- /dev/null +++ b/test/disjointCoarseMallocPool.cpp @@ -0,0 +1,610 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// This file contains tests for UMF provider API + +#include +#include +#include +#include + +#include "../src/memory_provider_internal.h" +#include "provider.hpp" +#include "test_helpers.h" + +#include +#include +#include + +using umf_test::test; + +// TODO move malloc provider somewhere + +constexpr int PAGE_SIZE = 4 * 1024; + +static enum umf_result_t mallocInitialize(void *params, void **pool) { + (void)params; + *pool = NULL; + return UMF_RESULT_SUCCESS; +} + +static void mallocFinalize(void *pool) { (void)pool; } + +static enum umf_result_t mallocAlloc(void *provider, size_t size, + size_t alignment, void **ptr) { + (void)provider; + (void)alignment; + *ptr = calloc(1, size); + return UMF_RESULT_SUCCESS; +} + +static enum umf_result_t mallocFree(void *provider, void *ptr, size_t size) { + (void)provider; + (void)size; + free(ptr); + return UMF_RESULT_SUCCESS; +} + +static void mallocGetLastError(void *provider, const char **ppMsg, + int32_t *pError) { + (void)provider; + (void)ppMsg; + (void)pError; + assert(0); +} + +static enum umf_result_t +mallocGetRecommendedPageSize(void *provider, size_t size, size_t *pageSize) { + (void)provider; + (void)size; + *pageSize = PAGE_SIZE; + return UMF_RESULT_SUCCESS; +} + +static enum umf_result_t mallocGetPageSize(void *provider, void *ptr, + + size_t *pageSize) { + (void)provider; + (void)ptr; + *pageSize = PAGE_SIZE; + return UMF_RESULT_SUCCESS; +} + +static enum umf_result_t mallocPurgeLazy(void *provider, void *ptr, + size_t size) { + (void)provider; + (void)ptr; + (void)size; + assert(0); + return UMF_RESULT_SUCCESS; +} + +static enum umf_result_t mallocPurgeForce(void *provider, void *ptr, + size_t size) { + (void)provider; + (void)ptr; + (void)size; + assert(0); + return UMF_RESULT_SUCCESS; +} + +static enum umf_result_t mallocAllocSplit(void *provider, void *ptr, + size_t totalSize, size_t firstSize) { + (void)provider; + (void)ptr; + (void)totalSize; + (void)firstSize; + + return UMF_RESULT_SUCCESS; +} + +static enum umf_result_t mallocAllocMerge(void *provider, void *ptr1, + void *ptr2, size_t totalSize) { + (void)provider; + (void)ptr1; + (void)ptr2; + (void)totalSize; + + return UMF_RESULT_SUCCESS; +} + +static const char *mallocName(void *provider) { + (void)provider; + return "malloc"; +} + +struct umf_memory_provider_ops_t UMF_MALLOC_MEMORY_PROVIDER_OPS = { + UMF_VERSION_CURRENT, + mallocInitialize, + mallocFinalize, + mallocAlloc, + mallocFree, + mallocGetLastError, + mallocGetRecommendedPageSize, + mallocGetPageSize, + mallocName, + // ext + { + mallocPurgeLazy, + mallocPurgeForce, + mallocAllocMerge, + mallocAllocSplit, + }}; + +TEST_F(test, disjointCoarseMallocPool_basic) { + umf_memory_provider_handle_t malloc_memory_provider; + umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_NE(malloc_memory_provider, nullptr); + + const size_t KB = 1024; + const size_t MB = 1024 * KB; + + const size_t init_buffer_size = 20 * MB; + + coarse_memory_provider_params_t coarse_memory_provider_params = { + malloc_memory_provider, // upstream_memory_provider + true, // immediate_init + 0, + init_buffer_size, + true, // trace + false, // WA_do_not_free_upstream + }; + + umf_memory_provider_handle_t coarse_memory_provider; + umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + &coarse_memory_provider_params, + &coarse_memory_provider); + ASSERT_NE(coarse_memory_provider, nullptr); + + umf_disjoint_pool_params_t disjoint_memory_pool_params = {}; + disjoint_memory_pool_params.SlabMinSize = 4096; + disjoint_memory_pool_params.MaxPoolableSize = 4096; + disjoint_memory_pool_params.Capacity = 4; + disjoint_memory_pool_params.MinBucketSize = 64; + disjoint_memory_pool_params.PoolTrace = 1; + + umf_memory_pool_handle_t pool; + umfPoolCreate(umfDisjointPoolOps(), coarse_memory_provider, + &disjoint_memory_pool_params, UMF_POOL_CREATE_FLAG_NONE, + &pool); + ASSERT_NE(pool, nullptr); + + // test + + umf_memory_provider_handle_t prov = NULL; + umfPoolGetMemoryProvider(pool, &prov); + ASSERT_NE(prov, nullptr); + + // alloc 2x 2MB + void *p1 = umfPoolMalloc(pool, 2 * MB); + ASSERT_NE(p1, nullptr); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).used_size, 2 * MB); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).alloc_size, + init_buffer_size); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).blocks_num, 2); + + void *p2 = umfPoolMalloc(pool, 2 * MB); + ASSERT_NE(p2, nullptr); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).used_size, 4 * MB); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).alloc_size, + init_buffer_size); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).blocks_num, 3); + ASSERT_NE(p1, p2); + + // swap pointers to get p1 < p2 + if (p1 > p2) { + std::swap(p1, p2); + } + + // free + alloc first block + // the block should be reused + // currently there is no purging, so the alloc size shouldn't change + // there should be no block merging between used and not-used blocks + umf_result_t res = umfPoolFree(pool, p1); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).used_size, 2 * MB); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).alloc_size, + init_buffer_size); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).blocks_num, 3); + + p1 = umfPoolMalloc(pool, 2 * MB); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).used_size, 4 * MB); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).alloc_size, + init_buffer_size); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).blocks_num, 3); + + // free all allocs + // overall alloc size shouldn't change + // block p2 should merge with the prev free block p1 + // and the remaining init block + res = umfPoolFree(pool, p1); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).blocks_num, 3); + res = umfPoolFree(pool, p2); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).used_size, 0 * MB); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).alloc_size, + init_buffer_size); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).blocks_num, 1); + + // test allocations with alignment + // TODO: what about holes? + p1 = umfPoolAlignedMalloc(pool, 1 * MB - 4, 128); + ASSERT_NE(p1, nullptr); + ASSERT_EQ((uintptr_t)p1 & 127, 0); + p2 = umfPoolAlignedMalloc(pool, 1 * MB - 4, 128); + ASSERT_NE(p2, nullptr); + ASSERT_EQ((uintptr_t)p1 & 127, 0); + res = umfPoolFree(pool, p1); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + res = umfPoolFree(pool, p2); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + + // alloc whole buffer + // after this, there should be one single block + p1 = umfPoolMalloc(pool, init_buffer_size); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).used_size, + init_buffer_size); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).alloc_size, + init_buffer_size); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).blocks_num, 1); + + // free all memory + // alloc 2 MB block - the init block should be split + res = umfPoolFree(pool, p1); + p1 = umfPoolMalloc(pool, 2 * MB); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).used_size, 2 * MB); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).alloc_size, + init_buffer_size); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).blocks_num, 2); + + // alloc additional 2 MB + // the non-used block should be used + p2 = umfPoolMalloc(pool, 2 * MB); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).used_size, 4 * MB); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).alloc_size, + init_buffer_size); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).blocks_num, 3); + ASSERT_NE(p1, p2); + + // make sure that p1 < p2 + if (p1 > p2) { + std::swap(p1, p2); + } + + // free blocks in order: p2, p1 + // block p1 should merge with the next block p2 + // swap pointers to get p1 < p2 + umfPoolFree(pool, p2); + umfPoolFree(pool, p1); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).used_size, 0 * MB); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).alloc_size, + init_buffer_size); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).blocks_num, 1); + + // alloc 10x 2 MB - this should occupy all allocated memory + constexpr int allocs_size = 10; + void *allocs[allocs_size] = {0}; + for (int i = 0; i < allocs_size; i++) { + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).used_size, i * 2 * MB); + allocs[i] = umfPoolMalloc(pool, 2 * MB); + ASSERT_NE(allocs[i], nullptr); + } + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).used_size, 20 * MB); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).alloc_size, + init_buffer_size); + // there should be no block with the free memory + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).blocks_num, allocs_size); + + // free all memory + for (int i = 0; i < allocs_size; i++) { + res = umfPoolFree(pool, allocs[i]); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + } + + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).blocks_num, 1); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).used_size, 0 * MB); + ASSERT_EQ(umfCoarseMemoryProviderGetStats(prov).alloc_size, + init_buffer_size); + + umfPoolDestroy(pool); + umfMemoryProviderDestroy(coarse_memory_provider); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_F(test, disjointCoarseMallocPool_simple1) { + umf_memory_provider_handle_t malloc_memory_provider; + umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_NE(malloc_memory_provider, nullptr); + + const size_t KB = 1024; + const size_t MB = 1024 * KB; + + const size_t init_buffer_size = 20 * MB; + + coarse_memory_provider_params_t coarse_memory_provider_params = { + malloc_memory_provider, // upstream_memory_provider + true, // immediate_init + 0, + init_buffer_size, + true, // trace + false, // WA_do_not_free_upstream + }; + + umf_memory_provider_handle_t coarse_memory_provider; + umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + &coarse_memory_provider_params, + &coarse_memory_provider); + ASSERT_NE(coarse_memory_provider, nullptr); + + umf_disjoint_pool_params_t disjoint_memory_pool_params = {}; + disjoint_memory_pool_params.SlabMinSize = 4096; + disjoint_memory_pool_params.MaxPoolableSize = 4096; + disjoint_memory_pool_params.Capacity = 4; + disjoint_memory_pool_params.MinBucketSize = 64; + disjoint_memory_pool_params.PoolTrace = 1; + + umf_memory_pool_handle_t pool; + umfPoolCreate(umfDisjointPoolOps(), coarse_memory_provider, + &disjoint_memory_pool_params, UMF_POOL_CREATE_FLAG_NONE, + &pool); + ASSERT_NE(pool, nullptr); + + umf_memory_provider_handle_t prov = NULL; + umfPoolGetMemoryProvider(pool, &prov); + ASSERT_NE(prov, nullptr); + + // test 1 + + size_t s1 = 74659 * KB; + size_t s2 = 8206 * KB; + + size_t max_alloc_size = 0; + + // s1 + for (int j = 0; j < 2; j++) { + void *t[6] = {0}; + for (int i = 0; i < 6; i++) { + t[i] = umfPoolMalloc(pool, s1); + ASSERT_NE(t[i], nullptr); + } + + if (max_alloc_size == 0) { + max_alloc_size = umfCoarseMemoryProviderGetStats(prov).alloc_size; + } + + for (int i = 0; i < 6; i++) { + umf_result_t res = umfPoolFree(pool, t[i]); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + } + } + + // s2 + for (int j = 0; j < 2; j++) { + void *t[6] = {0}; + for (int i = 0; i < 6; i++) { + t[i] = umfPoolMalloc(pool, s2); + ASSERT_NE(t[i], nullptr); + } + + // all s2 should fit into single block leaved after freeing s1 + ASSERT_LE(umfCoarseMemoryProviderGetStats(prov).alloc_size, + max_alloc_size); + + for (int i = 0; i < 6; i++) { + umf_result_t res = umfPoolFree(pool, t[i]); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + } + } + + umfPoolDestroy(pool); + umfMemoryProviderDestroy(coarse_memory_provider); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_F(test, disjointCoarseMallocPool_simple2) { + + umf_memory_provider_handle_t malloc_memory_provider; + umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_NE(malloc_memory_provider, nullptr); + + const size_t KB = 1024; + const size_t MB = 1024 * KB; + + const size_t init_buffer_size = 20 * MB; + + coarse_memory_provider_params_t coarse_memory_provider_params = { + malloc_memory_provider, // upstream_memory_provider + true, // immediate_init + 0, + init_buffer_size, + true, // trace + false, // WA_do_not_free_upstream + }; + + umf_memory_provider_handle_t coarse_memory_provider; + umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + &coarse_memory_provider_params, + &coarse_memory_provider); + ASSERT_NE(coarse_memory_provider, nullptr); + + umf_disjoint_pool_params_t disjoint_memory_pool_params = {}; + disjoint_memory_pool_params.SlabMinSize = 4096; + disjoint_memory_pool_params.MaxPoolableSize = 4096; + disjoint_memory_pool_params.Capacity = 4; + disjoint_memory_pool_params.MinBucketSize = 64; + disjoint_memory_pool_params.PoolTrace = 1; + + umf_memory_pool_handle_t pool; + umfPoolCreate(umfDisjointPoolOps(), coarse_memory_provider, + &disjoint_memory_pool_params, UMF_POOL_CREATE_FLAG_NONE, + &pool); + ASSERT_NE(pool, nullptr); + + // test + double sizes[] = {2, 4, 0.5, 1, 8, 0.25}; + size_t alignment[] = {0, 4, 0, 16, 32, 128}; + for (int i = 0; i < 6; i++) { + size_t s = (size_t)(sizes[i] * MB); + void *t[8] = {0}; + for (int j = 0; j < 8; j++) { + t[j] = umfPoolAlignedMalloc(pool, s, alignment[i]); + ASSERT_NE(t[j], nullptr); + } + + for (int j = 0; j < 8; j++) { + umf_result_t res = umfPoolFree(pool, t[j]); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + } + } + + umfPoolDestroy(pool); + umfMemoryProviderDestroy(coarse_memory_provider); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +struct alloc_ptr_size { + void *ptr; + size_t size; + + bool operator<(const alloc_ptr_size &other) const { + if (ptr == other.ptr) { + return size < other.size; + } + return ptr < other.ptr; + } +}; + +TEST_F(test, disjointCoarseMMapPool_random) { + + const size_t KB = 1024; + const size_t MB = 1024 * KB; + const size_t init_buffer_size = 200 * MB; + + // Preallocate some memory + void *buf = malloc(init_buffer_size); + memset(buf, 0, init_buffer_size); + ASSERT_NE(buf, nullptr); + + const unsigned char alloc_check_val = 11; + coarse_memory_provider_params_t coarse_memory_provider_params = { + NULL, // upstream_memory_provider + false, // immediate_init - this is used only with the upstream provider + buf, // ptr to preallocated memory + init_buffer_size, + true, // trace + false, // WA_do_not_free_upstream + }; + + umf_memory_provider_handle_t coarse_memory_provider; + umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), + &coarse_memory_provider_params, + &coarse_memory_provider); + ASSERT_NE(coarse_memory_provider, nullptr); + + umf_disjoint_pool_params_t disjoint_memory_pool_params = {}; + disjoint_memory_pool_params.SlabMinSize = 1024; + disjoint_memory_pool_params.MaxPoolableSize = 1024; + disjoint_memory_pool_params.Capacity = 2; + disjoint_memory_pool_params.MinBucketSize = 16; + disjoint_memory_pool_params.PoolTrace = 1; + + umf_memory_pool_handle_t pool; + umfPoolCreate(umfDisjointPoolOps(), coarse_memory_provider, + &disjoint_memory_pool_params, UMF_POOL_CREATE_FLAG_NONE, + &pool); + ASSERT_NE(pool, nullptr); + + // set constant seed so each test run will have the same scenario + uint32_t seed = 1234; + std::mt19937 mt(seed); + + // different sizes to alloc + std::vector sizes = {15, 49, 588, 1025, + 2 * KB, 5 * KB, 160 * KB, 511 * KB, + 1000 * KB, MB, 3 * MB, 7 * MB}; + std::uniform_int_distribution sizes_dist(0, (int)(sizes.size() - 1)); + + // each alloc would be done few times + std::vector counts = {1, 3, 4, 8, 9, 11}; + std::uniform_int_distribution counts_dist(0, (int)(counts.size() - 1)); + + // action to take will be random + // alloc = <0, .5), free = <.5, 1) + std::uniform_real_distribution actions_dist(0, 1); + + std::set allocs; + for (size_t i = 0; i < 100; i++) { + size_t count = counts[counts_dist(mt)]; + float action = actions_dist(mt); + + if (action < 0.5) { + size_t size = sizes[sizes_dist(mt)]; + std::cout << "size: " << size << " count: " << count + << " action: alloc" << std::endl; + + // alloc + for (size_t j = 0; j < count; j++) { + void *ptr = umfPoolMalloc(pool, size); + ASSERT_NE(ptr, nullptr); + + if (ptr == nullptr) { + break; + } + + // check if first and last bytes are empty and fill them with control data + ASSERT_EQ(((unsigned char *)ptr)[0], 0); + ASSERT_EQ(((unsigned char *)ptr)[size - 1], 0); + ((unsigned char *)ptr)[0] = alloc_check_val; + ((unsigned char *)ptr)[size - 1] = alloc_check_val; + + allocs.insert({ptr, size}); + } + } else { + std::cout << "count: " << count << " action: free" << std::endl; + + // free random allocs + for (size_t j = 0; j < count; j++) { + if (allocs.size() == 0) { + continue; + } + + std::uniform_int_distribution free_dist( + 0, (int)(allocs.size() - 1)); + size_t free_id = free_dist(mt); + auto it = allocs.begin(); + std::advance(it, free_id); + auto [ptr, size] = (*it); + ASSERT_NE(ptr, nullptr); + + // check if control bytes are set and clean them + + ASSERT_EQ(((unsigned char *)ptr)[0], alloc_check_val); + ASSERT_EQ(((unsigned char *)ptr)[size - 1], alloc_check_val); + ((unsigned char *)ptr)[0] = 0; + ((unsigned char *)ptr)[size - 1] = 0; + + umf_result_t ret = umfPoolFree(pool, ptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + allocs.erase((*it)); + } + } + } + + std::cout << "cleanup" << std::endl; + + while (allocs.size()) { + umf_result_t ret = umfPoolFree(pool, (*allocs.begin()).ptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + allocs.erase(allocs.begin()); + } + + umfPoolDestroy(pool); + umfMemoryProviderDestroy(coarse_memory_provider); + + free(buf); +} diff --git a/test/supp/memcheck-umf_test-disjointCoarseMallocPool.supp b/test/supp/memcheck-umf_test-disjointCoarseMallocPool.supp new file mode 100644 index 000000000..f6da4a01c --- /dev/null +++ b/test/supp/memcheck-umf_test-disjointCoarseMallocPool.supp @@ -0,0 +1,16 @@ +{ + [TODO - verify and fix if needed] + Memcheck:Cond + fun:_ZN7testing8internal11CmpHelperEQIhiEENS_15AssertionResultEPKcS4_RKT_RKT0_ + fun:_ZN7testing8internal8EqHelper7CompareIhiLPv0EEENS_15AssertionResultEPKcS6_RKT_RKT0_ + fun:_ZN39test_disjointCoarseMMapPool_random_Test8TestBodyEv + fun:_ZN7testing8internal38HandleSehExceptionsInMethodIfSupportedINS_4TestEvEET0_PT_MS4_FS3_vEPKc + fun:_ZN7testing8internal35HandleExceptionsInMethodIfSupportedINS_4TestEvEET0_PT_MS4_FS3_vEPKc + fun:_ZN7testing4Test3RunEv + fun:_ZN7testing8TestInfo3RunEv + fun:_ZN7testing9TestSuite3RunEv + fun:_ZN7testing8internal12UnitTestImpl11RunAllTestsEv + fun:_ZN7testing8internal38HandleSehExceptionsInMethodIfSupportedINS0_12UnitTestImplEbEET0_PT_MS4_FS3_vEPKc + fun:_ZN7testing8internal35HandleExceptionsInMethodIfSupportedINS0_12UnitTestImplEbEET0_PT_MS4_FS3_vEPKc + fun:_ZN7testing8UnitTest3RunEv +} diff --git a/test/test_valgrind.sh b/test/test_valgrind.sh index a3e7847ed..775f25744 100755 --- a/test/test_valgrind.sh +++ b/test/test_valgrind.sh @@ -89,6 +89,9 @@ for test in $(ls -1 umf_test-*); do umf_test-provider_os_memory_config) FILTER='--gtest_filter="-*protection_flag_none:*protection_flag_read:*providerConfigTestNumaMode*"' ;; + ./umf_test-disjointCoarseMallocPool) # TODO + FILTER='--gtest_filter="*disjointCoarseMallocPool*"' + ;; umf_test-memspace_highest_capacity) FILTER='--gtest_filter="-*highestCapacityVerify*"' ;;