diff --git a/mooncake-store/CMakeLists.txt b/mooncake-store/CMakeLists.txt index f5241faaf..8483085e8 100644 --- a/mooncake-store/CMakeLists.txt +++ b/mooncake-store/CMakeLists.txt @@ -20,4 +20,5 @@ include_directories( # Add subdirectories add_subdirectory(src) -add_subdirectory(tests) \ No newline at end of file +add_subdirectory(tests) +add_subdirectory(benchmarks) \ No newline at end of file diff --git a/mooncake-store/benchmarks/CMakeLists.txt b/mooncake-store/benchmarks/CMakeLists.txt new file mode 100644 index 000000000..0a78170c8 --- /dev/null +++ b/mooncake-store/benchmarks/CMakeLists.txt @@ -0,0 +1,3 @@ +# Add allocator benchmark executable +add_executable(allocator_bench allocator_bench.cpp) +target_link_libraries(allocator_bench PRIVATE mooncake_store) diff --git a/mooncake-store/benchmarks/allocator_bench.cpp b/mooncake-store/benchmarks/allocator_bench.cpp new file mode 100644 index 000000000..9cf848810 --- /dev/null +++ b/mooncake-store/benchmarks/allocator_bench.cpp @@ -0,0 +1,179 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "offset_allocator/offset_allocator.hpp" + +using namespace mooncake::offset_allocator; + +class OffsetAllocatorBenchHelper { + public: + OffsetAllocatorBenchHelper(uint64_t baseAddress, uint32_t poolSize, uint32_t maxAllocs) + : pool_size_(poolSize), + allocated_size_(0), + allocator_(OffsetAllocator::create(baseAddress, poolSize, maxAllocs)), + rd_(), + gen_(rd_()) {} + + void allocate(uint32_t size) { + while (true) { + auto handle = allocator_->allocate(size); + if (handle.has_value()) { + allocated_.push_back(std::move(*handle)); + allocated_sizes_.push_back(size); + allocated_size_ += size; + break; + } + if (allocated_.size() == 0) { + break; + } + std::uniform_int_distribution dist(0, + allocated_.size() - 1); + auto index = dist(gen_); + std::swap(allocated_[index], allocated_.back()); + std::swap(allocated_sizes_[index], allocated_sizes_.back()); + allocated_size_ -= allocated_sizes_.back(); + allocated_.pop_back(); + allocated_sizes_.pop_back(); + } + } + + double get_allocated_ratio() const { + return static_cast(allocated_size_) / pool_size_; + } + + private: + uint64_t pool_size_; + uint64_t allocated_size_; + std::shared_ptr allocator_; + std::vector allocated_; + std::vector allocated_sizes_; + std::random_device rd_; + std::mt19937 gen_; +}; + +template +void uniform_size_allocation_benchmark() { + std::cout << std::endl << "=== Uniform Size Allocation Benchmark ===" << std::endl; + const size_t max_pool_size = 2ull * 1024 * 1024 * 1024; + std::vector allocation_sizes; + for (uint32_t i = 32; i < (1 << 26); i *= 4) { + allocation_sizes.push_back(i); + } + for (uint32_t i = 32; i < (1 << 26); i *= 4) { + allocation_sizes.push_back(i - 17); + } + for (uint32_t i = 32; i < (1 << 26); i *= 4) { + allocation_sizes.push_back(i + 17); + } + for (uint32_t i = 32; i < (1 << 26); i *= 4) { + allocation_sizes.push_back(i * 0.9); + } + for (uint32_t i = 32; i < (1 << 26); i *= 4) { + allocation_sizes.push_back(i * 1.1); + } + + for (auto alloc_size : allocation_sizes) { + // For small allocation sizes, use a smaller pool size to avoid + // benchmark runs too slow. + size_t pool_size = + alloc_size < 1024 ? max_pool_size / 16 : max_pool_size; + size_t max_allocs = pool_size / alloc_size + 10; + BenchHelper bench_helper(0x1000, pool_size, max_allocs); + int warmup_num = pool_size / alloc_size; + for (int i = 0; i < warmup_num; i++) { + bench_helper.allocate(alloc_size); + } + + // START + auto start_time = std::chrono::high_resolution_clock::now(); + double min_util_ratio = 1.0; + double total_util_ratio = 0.0; + int benchmark_num = 1000000; + for (int i = 0; i < benchmark_num; i++) { + bench_helper.allocate(alloc_size); + double util_ratio = bench_helper.get_allocated_ratio(); + if (util_ratio < min_util_ratio) { + min_util_ratio = util_ratio; + } + total_util_ratio += util_ratio; + } + auto end_time = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end_time - start_time); + // END + double avg_util_ratio = total_util_ratio / benchmark_num; + std::cout << "Alloc size: " << alloc_size + << ", min util ratio: " << min_util_ratio + << ", avg util ratio: " << avg_util_ratio + << ", time: " << duration.count() / benchmark_num << " ns" << std::endl; + } +} + +template +void random_size_allocation_benchmark() { + std::cout << std::endl << "=== Random Size Allocation Benchmark ===" << std::endl; + const size_t pool_size = 2ull * 1024 * 1024 * 1024; + const size_t max_alloc_size = 1ull << 26; + const size_t min_alloc_size = 1024; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dist(min_alloc_size, max_alloc_size); + + // Warmup + size_t max_allocs = pool_size / min_alloc_size + 10; + BenchHelper bench_helper(0x1000, pool_size, max_allocs); + for (size_t warmup_size = 0; warmup_size < pool_size;) { + size_t alloc_size = dist(gen); + bench_helper.allocate(alloc_size); + warmup_size += alloc_size; + } + + int benchmark_num = 1000000; + std::vector util_ratios; + util_ratios.reserve(benchmark_num); + + // Run benchmark + auto start_time = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < benchmark_num; i++) { + size_t alloc_size = dist(gen); + bench_helper.allocate(alloc_size); + util_ratios.push_back(bench_helper.get_allocated_ratio()); + } + auto end_time = std::chrono::high_resolution_clock::now(); + + // Calculate metrics + const double avg_time_ns = + std::chrono::duration_cast(end_time - + start_time) + .count() / + static_cast(benchmark_num); + + std::sort(util_ratios.begin(), util_ratios.end()); + + const double min_util = util_ratios.front(); + const double max_util = util_ratios.back(); + const double p50 = util_ratios[util_ratios.size() * 0.50]; + const double p90 = util_ratios[util_ratios.size() * 0.10]; + const double p99 = util_ratios[util_ratios.size() * 0.01]; + + const double mean_util = + std::accumulate(util_ratios.begin(), util_ratios.end(), 0.0) / + util_ratios.size(); + + std::cout << std::fixed << std::setprecision(6); + std::cout << "util ratio (min / p99 / p90 / p50 / max / avg): " << min_util + << " / " << p99 << " / " << p90 << " / " << p50 << " / " + << max_util << " / " << mean_util << std::endl; + std::cout << "avg alloc time: " << avg_time_ns << " ns/op" << std::endl; +} + +int main() { + std::cout << "=== OffsetAllocator Benchmark ===" << std::endl; + uniform_size_allocation_benchmark(); + random_size_allocation_benchmark(); +} \ No newline at end of file diff --git a/mooncake-store/benchmarks/allocator_bench_result.md b/mooncake-store/benchmarks/allocator_bench_result.md new file mode 100644 index 000000000..2246903b2 --- /dev/null +++ b/mooncake-store/benchmarks/allocator_bench_result.md @@ -0,0 +1,176 @@ +# Allocator Memory Utilization Benchmark + +## Execution + +```bash +./mooncake-store/benchmarks/allocator_bench +``` + +## Result + +- alloc size: The size of each object +- utilization ratio: The total allocated size / total space +- time: time in nanoseconds for each object allocation +- OffsetAllocator optimization: whether round up the allocated size to a bin size + +### Uniform size, size equals power of 2 + +**OffsetAllocator (After Optimization)** + +``` +Alloc size: 32, min util ratio: 1, avg util ratio: 1, time: 544 ns +Alloc size: 128, min util ratio: 1, avg util ratio: 1, time: 417 ns +Alloc size: 512, min util ratio: 1, avg util ratio: 1, time: 174 ns +Alloc size: 2048, min util ratio: 1, avg util ratio: 1, time: 406 ns +Alloc size: 8192, min util ratio: 1, avg util ratio: 1, time: 180 ns +Alloc size: 32768, min util ratio: 1, avg util ratio: 1, time: 133 ns +Alloc size: 131072, min util ratio: 1, avg util ratio: 1, time: 109 ns +Alloc size: 524288, min util ratio: 1, avg util ratio: 1, time: 100 ns +Alloc size: 2097152, min util ratio: 1, avg util ratio: 1, time: 99 ns +Alloc size: 8388608, min util ratio: 1, avg util ratio: 1, time: 99 ns +Alloc size: 33554432, min util ratio: 1, avg util ratio: 1, time: 98 ns +``` + +**OffsetAllocator (Before Optimization)** + +``` +Alloc size: 32, min util ratio: 1, avg util ratio: 1, time: 539 ns +Alloc size: 128, min util ratio: 1, avg util ratio: 1, time: 419 ns +Alloc size: 512, min util ratio: 1, avg util ratio: 1, time: 217 ns +Alloc size: 2048, min util ratio: 1, avg util ratio: 1, time: 408 ns +Alloc size: 8192, min util ratio: 1, avg util ratio: 1, time: 175 ns +Alloc size: 32768, min util ratio: 1, avg util ratio: 1, time: 130 ns +Alloc size: 131072, min util ratio: 1, avg util ratio: 1, time: 107 ns +Alloc size: 524288, min util ratio: 1, avg util ratio: 1, time: 99 ns +Alloc size: 2097152, min util ratio: 1, avg util ratio: 1, time: 100 ns +Alloc size: 8388608, min util ratio: 1, avg util ratio: 1, time: 98 ns +Alloc size: 33554432, min util ratio: 1, avg util ratio: 1, time: 98 ns +``` + +### Uniform size, size equals power of 2 +/- 17 + +**OffsetAllocator (After Optimization)** + +``` +Alloc size: 15, min util ratio: 1, avg util ratio: 1, time: 568 ns +Alloc size: 111, min util ratio: 0.991071, avg util ratio: 0.991071, time: 441 ns +Alloc size: 495, min util ratio: 0.966797, avg util ratio: 0.966797, time: 178 ns +Alloc size: 2031, min util ratio: 0.991699, avg util ratio: 0.991699, time: 418 ns +Alloc size: 8175, min util ratio: 0.997925, avg util ratio: 0.997925, time: 170 ns +Alloc size: 32751, min util ratio: 0.999481, avg util ratio: 0.999481, time: 133 ns +Alloc size: 131055, min util ratio: 0.99987, avg util ratio: 0.99987, time: 109 ns +Alloc size: 524271, min util ratio: 0.999968, avg util ratio: 0.999968, time: 100 ns +Alloc size: 2097135, min util ratio: 0.999992, avg util ratio: 0.999992, time: 99 ns +Alloc size: 8388591, min util ratio: 0.999998, avg util ratio: 0.999998, time: 98 ns +Alloc size: 33554415, min util ratio: 0.999999, avg util ratio: 0.999999, time: 99 ns +Alloc size: 49, min util ratio: 0.942308, avg util ratio: 0.942308, time: 508 ns +Alloc size: 145, min util ratio: 0.906249, avg util ratio: 0.906249, time: 372 ns +Alloc size: 529, min util ratio: 0.918399, avg util ratio: 0.918399, time: 172 ns +Alloc size: 2065, min util ratio: 0.896267, avg util ratio: 0.896267, time: 403 ns +Alloc size: 8209, min util ratio: 0.89073, avg util ratio: 0.89073, time: 174 ns +Alloc size: 32785, min util ratio: 0.889347, avg util ratio: 0.889347, time: 131 ns +Alloc size: 131089, min util ratio: 0.88897, avg util ratio: 0.88897, time: 105 ns +Alloc size: 524305, min util ratio: 0.888701, avg util ratio: 0.888701, time: 102 ns +Alloc size: 2097169, min util ratio: 0.888679, avg util ratio: 0.888679, time: 100 ns +Alloc size: 8388625, min util ratio: 0.886721, avg util ratio: 0.886721, time: 100 ns +Alloc size: 33554449, min util ratio: 0.875, avg util ratio: 0.875, time: 100 ns +``` + +**OffsetAllocator (Before Optimization)** + +``` +Alloc size: 15, min util ratio: 1, avg util ratio: 1, time: 566 ns +Alloc size: 111, min util ratio: 0.669866, avg util ratio: 0.710845, time: 703 ns +Alloc size: 495, min util ratio: 0.665779, avg util ratio: 0.676874, time: 238 ns +Alloc size: 2031, min util ratio: 0.668333, avg util ratio: 0.705411, time: 637 ns +Alloc size: 8175, min util ratio: 0.666175, avg util ratio: 0.676474, time: 242 ns +Alloc size: 32751, min util ratio: 0.664435, avg util ratio: 0.669078, time: 168 ns +Alloc size: 131055, min util ratio: 0.66062, avg util ratio: 0.667341, time: 124 ns +Alloc size: 524271, min util ratio: 0.653055, avg util ratio: 0.666993, time: 118 ns +Alloc size: 2097135, min util ratio: 0.64062, avg util ratio: 0.666873, time: 116 ns +Alloc size: 8388591, min util ratio: 0.605468, avg util ratio: 0.667812, time: 115 ns +Alloc size: 33554415, min util ratio: 0.5625, avg util ratio: 0.670944, time: 116 ns +Alloc size: 49, min util ratio: 0.692229, avg util ratio: 0.753062, time: 1122 ns +Alloc size: 145, min util ratio: 0.667789, avg util ratio: 0.700907, time: 572 ns +Alloc size: 529, min util ratio: 0.66577, avg util ratio: 0.676238, time: 238 ns +Alloc size: 2065, min util ratio: 0.667926, avg util ratio: 0.704884, time: 632 ns +Alloc size: 8209, min util ratio: 0.665708, avg util ratio: 0.676372, time: 239 ns +Alloc size: 32785, min util ratio: 0.664224, avg util ratio: 0.669058, time: 168 ns +Alloc size: 131089, min util ratio: 0.659631, avg util ratio: 0.667287, time: 129 ns +Alloc size: 524305, min util ratio: 0.652609, avg util ratio: 0.666884, time: 122 ns +Alloc size: 2097169, min util ratio: 0.638677, avg util ratio: 0.666516, time: 120 ns +Alloc size: 8388625, min util ratio: 0.60547, avg util ratio: 0.665131, time: 121 ns +Alloc size: 33554449, min util ratio: 0.546875, avg util ratio: 0.660917, time: 120 ns +``` + +### Uniform size, size equals power of 2 multiply 0.9 or 1.1 + +**OffsetAllocator (After Optimization)** + +``` +Alloc size: 28, min util ratio: 1, avg util ratio: 1, time: 543 ns +Alloc size: 115, min util ratio: 0.958333, avg util ratio: 0.958333, time: 418 ns +Alloc size: 460, min util ratio: 0.958332, avg util ratio: 0.958332, time: 189 ns +Alloc size: 1843, min util ratio: 0.959896, avg util ratio: 0.959896, time: 418 ns +Alloc size: 7372, min util ratio: 0.959895, avg util ratio: 0.959895, time: 197 ns +Alloc size: 29491, min util ratio: 0.959993, avg util ratio: 0.959993, time: 135 ns +Alloc size: 117964, min util ratio: 0.959979, avg util ratio: 0.959979, time: 111 ns +Alloc size: 471859, min util ratio: 0.959985, avg util ratio: 0.959985, time: 100 ns +Alloc size: 1887436, min util ratio: 0.959765, avg util ratio: 0.959765, time: 99 ns +Alloc size: 7549747, min util ratio: 0.959766, avg util ratio: 0.959766, time: 99 ns +Alloc size: 30198988, min util ratio: 0.95625, avg util ratio: 0.95625, time: 99 ns +Alloc size: 35, min util ratio: 0.972222, avg util ratio: 0.972222, time: 531 ns +Alloc size: 140, min util ratio: 0.972222, avg util ratio: 0.972222, time: 397 ns +Alloc size: 563, min util ratio: 0.977427, avg util ratio: 0.977427, time: 180 ns +Alloc size: 2252, min util ratio: 0.97743, avg util ratio: 0.97743, time: 389 ns +Alloc size: 9011, min util ratio: 0.977752, avg util ratio: 0.977752, time: 183 ns +Alloc size: 36044, min util ratio: 0.977752, avg util ratio: 0.977752, time: 133 ns +Alloc size: 144179, min util ratio: 0.977739, avg util ratio: 0.977739, time: 106 ns +Alloc size: 576716, min util ratio: 0.977538, avg util ratio: 0.977538, time: 103 ns +Alloc size: 2306867, min util ratio: 0.977539, avg util ratio: 0.977539, time: 99 ns +Alloc size: 9227468, min util ratio: 0.975391, avg util ratio: 0.975391, time: 99 ns +Alloc size: 36909875, min util ratio: 0.9625, avg util ratio: 0.9625, time: 100 ns +``` + +**OffsetAllocator (Before Optimization)** + +``` +Alloc size: 28, min util ratio: 1, avg util ratio: 1, time: 539 ns +Alloc size: 115, min util ratio: 0.669299, avg util ratio: 0.709245, time: 701 ns +Alloc size: 460, min util ratio: 0.665825, avg util ratio: 0.677532, time: 255 ns +Alloc size: 1843, min util ratio: 0.669352, avg util ratio: 0.709202, time: 691 ns +Alloc size: 7372, min util ratio: 0.66619, avg util ratio: 0.677401, time: 260 ns +Alloc size: 29491, min util ratio: 0.664311, avg util ratio: 0.669511, time: 172 ns +Alloc size: 117964, min util ratio: 0.661812, avg util ratio: 0.667356, time: 133 ns +Alloc size: 471859, min util ratio: 0.654345, avg util ratio: 0.667048, time: 123 ns +Alloc size: 1887436, min util ratio: 0.640722, avg util ratio: 0.666447, time: 121 ns +Alloc size: 7549747, min util ratio: 0.611719, avg util ratio: 0.666847, time: 119 ns +Alloc size: 30198988, min util ratio: 0.548437, avg util ratio: 0.669799, time: 125 ns +Alloc size: 35, min util ratio: 0.7098, avg util ratio: 0.774162, time: 1306 ns +Alloc size: 140, min util ratio: 0.667934, avg util ratio: 0.702151, time: 599 ns +Alloc size: 563, min util ratio: 0.665599, avg util ratio: 0.675548, time: 239 ns +Alloc size: 2252, min util ratio: 0.667371, avg util ratio: 0.701623, time: 601 ns +Alloc size: 9011, min util ratio: 0.665485, avg util ratio: 0.675528, time: 244 ns +Alloc size: 36044, min util ratio: 0.663248, avg util ratio: 0.668912, time: 170 ns +Alloc size: 144179, min util ratio: 0.660308, avg util ratio: 0.666934, time: 127 ns +Alloc size: 576716, min util ratio: 0.654467, avg util ratio: 0.66679, time: 122 ns +Alloc size: 2306867, min util ratio: 0.633789, avg util ratio: 0.666159, time: 121 ns +Alloc size: 9227468, min util ratio: 0.597266, avg util ratio: 0.666037, time: 118 ns +Alloc size: 36909875, min util ratio: 0.55, avg util ratio: 0.669564, time: 121 ns +``` + +### Random Size + +**OffsetAllocator (After Optimization)** + +``` +util ratio (min / p99 / p90 / p50 / max / avg): 0.544250 / 0.713338 / 0.779739 / 0.847867 / 0.952591 / 0.841576 +avg alloc time: 145.575738 ns/op +``` + +**OffsetAllocator (Before Optimization)** + +``` +util ratio (min / p99 / p90 / p50 / max / avg): 0.569255 / 0.712076 / 0.781224 / 0.855046 / 0.976057 / 0.848873 +avg alloc time: 142.508508 ns/op +``` \ No newline at end of file diff --git a/mooncake-store/include/offset_allocator/offset_allocator.hpp b/mooncake-store/include/offset_allocator/offset_allocator.hpp new file mode 100644 index 000000000..93fa81fe3 --- /dev/null +++ b/mooncake-store/include/offset_allocator/offset_allocator.hpp @@ -0,0 +1,184 @@ +#pragma once +// (C) Sebastian Aaltonen 2023 +// MIT License (see file: LICENSE) + +#include +#include + +#include "mutex.h" + +namespace mooncake::offset_allocator { +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef unsigned int uint32; +using NodeIndex = uint32; + +// Forward declarations +class OffsetAllocator; +class __Allocator; + +static constexpr uint32 NUM_TOP_BINS = 32; +static constexpr uint32 BINS_PER_LEAF = 8; +static constexpr uint32 TOP_BINS_INDEX_SHIFT = 3; +static constexpr uint32 LEAF_BINS_INDEX_MASK = 0x7; +static constexpr uint32 NUM_LEAF_BINS = NUM_TOP_BINS * BINS_PER_LEAF; + +struct OffsetAllocation { + static constexpr uint32 NO_SPACE = 0xffffffff; + + uint32 offset = NO_SPACE; + NodeIndex metadata = NO_SPACE; // internal: node index +}; + +struct OffsetAllocStorageReport { + uint64_t totalFreeSpace; + uint64_t largestFreeRegion; +}; + +struct OffsetAllocStorageReportFull { + struct Region { + uint64_t size; + uint64_t count; + }; + + Region freeRegions[NUM_LEAF_BINS]; +}; + +// RAII Handle class for automatic deallocation +class OffsetAllocationHandle { + public: + // Constructor for valid allocation + OffsetAllocationHandle(std::shared_ptr allocator, + OffsetAllocation allocation, uint64_t base, uint64_t size); + + // Move constructor + OffsetAllocationHandle(OffsetAllocationHandle&& other) noexcept; + + // Move assignment operator + OffsetAllocationHandle& operator=(OffsetAllocationHandle&& other) noexcept; + + // Disable copy constructor and copy assignment + OffsetAllocationHandle(const OffsetAllocationHandle&) = delete; + OffsetAllocationHandle& operator=(const OffsetAllocationHandle&) = delete; + + // Destructor - automatically deallocates + ~OffsetAllocationHandle(); + + // Check if the allocation handle is valid + bool isValid() const { return !m_allocator.expired(); } + + // Get offset + uint64_t address() const { return real_base; } + + void* ptr() const { return reinterpret_cast(address()); } + + // Get size + uint64_t size() const { return requested_size; } + + private: + std::weak_ptr m_allocator; + // The offset in m_allocation may not be equal to the real offset. + OffsetAllocation m_allocation; + // The real base and requested size of the allocated memory. + uint64_t real_base; + uint64_t requested_size; +}; + +// Wrapper class for __Allocator, it 1) supports thread-safe allocation and +// deallocation, 2) supports creating a buffer or allocating a memory region +// that is larger than the largest bin size (3.75GB). The __allocator class is +// also optimized to round up the allocated size to a bin size. This will +// a) slightly decrease the memory utilization ratio in general cases, b) makes +// no difference when the allocated size is equal to a bin size, c) largely +// improve the memory utilization ratio when the allocated size is mostly +// uniform and not equal to any bin size. +class OffsetAllocator : public std::enable_shared_from_this { + public: + // Factory method to create shared_ptr + static std::shared_ptr create(uint64_t base, size_t size, + uint32 maxAllocs = 128 * 1024); + + // Disable copy constructor and copy assignment + OffsetAllocator(const OffsetAllocator&) = delete; + OffsetAllocator& operator=(const OffsetAllocator&) = delete; + + // Disable move constructor and move assignment + OffsetAllocator(OffsetAllocator&& other) noexcept = delete; + OffsetAllocator& operator=(OffsetAllocator&& other) noexcept = delete; + + // Destructor + ~OffsetAllocator() = default; + + // Allocate memory and return a Handle (thread-safe) + [[nodiscard]] + std::optional allocate(size_t size); + + // Get storage report (thread-safe) + [[nodiscard]] + OffsetAllocStorageReport storageReport() const; + + // Get full storage report (thread-safe) + [[nodiscard]] + OffsetAllocStorageReportFull storageReportFull() const; + + private: + friend class OffsetAllocationHandle; + + // Internal method for Handle to free allocation (thread-safe) + void freeAllocation(const OffsetAllocation& allocation); + + std::unique_ptr<__Allocator> m_allocator GUARDED_BY(m_mutex); + const uint64_t m_base; + // The real offset and size of the allocated memory need to be multiplied by + // m_multiplier + const uint64_t m_multiplier; + mutable Mutex m_mutex; + + // Private constructor - use create() factory method instead + OffsetAllocator(uint64_t base, size_t size, uint32 maxAllocs = 128 * 1024); +}; + +class __Allocator { + public: + __Allocator(uint32 size, uint32 maxAllocs = 128 * 1024); + __Allocator(__Allocator&& other); + ~__Allocator(); + void reset(); + + OffsetAllocation allocate(uint32 size); + void free(OffsetAllocation allocation); + + uint32 allocationSize(OffsetAllocation allocation) const; + OffsetAllocStorageReport storageReport() const; + OffsetAllocStorageReportFull storageReportFull() const; + + private: + uint32 insertNodeIntoBin(uint32 size, uint32 dataOffset); + void removeNodeFromBin(uint32 nodeIndex); + + struct Node { + static constexpr NodeIndex unused = 0xffffffff; + + uint32 dataOffset = 0; + uint32 dataSize = 0; + NodeIndex binListPrev = unused; + NodeIndex binListNext = unused; + NodeIndex neighborPrev = unused; + NodeIndex neighborNext = unused; + bool used = false; // TODO: Merge as bit flag + }; + + uint32 m_size; + uint32 m_maxAllocs; + uint32 m_freeStorage; + + uint32 m_usedBinsTop; + uint8 m_usedBins[NUM_TOP_BINS]; + NodeIndex m_binIndices[NUM_LEAF_BINS]; + + Node* m_nodes; + NodeIndex* m_freeNodes; + uint32 m_freeOffset; + }; + +} // namespace mooncake::offset_allocator \ No newline at end of file diff --git a/mooncake-store/src/CMakeLists.txt b/mooncake-store/src/CMakeLists.txt index 8c1fa4741..157f9a6e1 100644 --- a/mooncake-store/src/CMakeLists.txt +++ b/mooncake-store/src/CMakeLists.txt @@ -19,6 +19,7 @@ set(MOONCAKE_STORE_SOURCES etcd_helper.cpp ha_helper.cpp rpc_service.cpp + offset_allocator.cpp ) # The cache_allocator library @@ -41,4 +42,4 @@ if (STORE_USE_ETCD) add_dependencies(mooncake_master build_etcd_wrapper) endif() -install(TARGETS mooncake_master DESTINATION bin) +install(TARGETS mooncake_master DESTINATION bin) \ No newline at end of file diff --git a/mooncake-store/src/offset_allocator.cpp b/mooncake-store/src/offset_allocator.cpp new file mode 100644 index 000000000..1b81b0135 --- /dev/null +++ b/mooncake-store/src/offset_allocator.cpp @@ -0,0 +1,600 @@ +// (C) Sebastian Aaltonen 2023 +// MIT License (see file: LICENSE) + +#include "offset_allocator/offset_allocator.hpp" + +#include "mutex.h" +#include + +#ifdef DEBUG +#include +#define ASSERT(x) assert(x) +// #define DEBUG_VERBOSE +#else +#define ASSERT(x) +#endif + +#ifdef DEBUG_VERBOSE +#include +#endif + +#ifdef _MSC_VER +#include +#endif + +#include + +namespace mooncake::offset_allocator { +inline uint32 lzcnt_nonzero(uint32 v) { +#ifdef _MSC_VER + unsigned long retVal; + _BitScanReverse(&retVal, v); + return 31 - retVal; +#else + return __builtin_clz(v); +#endif +} + +inline uint32 tzcnt_nonzero(uint32 v) { +#ifdef _MSC_VER + unsigned long retVal; + _BitScanForward(&retVal, v); + return retVal; +#else + return __builtin_ctz(v); +#endif +} + +namespace SmallFloat { +static constexpr uint32 MANTISSA_BITS = 3; +static constexpr uint32 MANTISSA_VALUE = 1 << MANTISSA_BITS; +static constexpr uint32 MANTISSA_MASK = MANTISSA_VALUE - 1; +static constexpr uint64_t MAX_BIN_SIZE = 4026531840ull; // 3.75GB + +// Bin sizes follow floating point (exponent + mantissa) distribution (piecewise +// linear log approx) This ensures that for each size class, the average +// overhead percentage stays the same +uint32 uintToFloatRoundUp(uint32 size) { + uint32 exp = 0; + uint32 mantissa = 0; + + if (size < MANTISSA_VALUE) { + // Denorm: 0..(MANTISSA_VALUE-1) + mantissa = size; + } else { + // Normalized: Hidden high bit always 1. Not stored. Just like float. + uint32 leadingZeros = lzcnt_nonzero(size); + uint32 highestSetBit = 31 - leadingZeros; + + uint32 mantissaStartBit = highestSetBit - MANTISSA_BITS; + exp = mantissaStartBit + 1; + mantissa = (size >> mantissaStartBit) & MANTISSA_MASK; + + uint32 lowBitsMask = (1 << mantissaStartBit) - 1; + + // Round up! + if ((size & lowBitsMask) != 0) mantissa++; + } + + return (exp << MANTISSA_BITS) + + mantissa; // + allows mantissa->exp overflow for round up +} + +uint32 uintToFloatRoundDown(uint32 size) { + uint32 exp = 0; + uint32 mantissa = 0; + + if (size < MANTISSA_VALUE) { + // Denorm: 0..(MANTISSA_VALUE-1) + mantissa = size; + } else { + // Normalized: Hidden high bit always 1. Not stored. Just like float. + uint32 leadingZeros = lzcnt_nonzero(size); + uint32 highestSetBit = 31 - leadingZeros; + + uint32 mantissaStartBit = highestSetBit - MANTISSA_BITS; + exp = mantissaStartBit + 1; + mantissa = (size >> mantissaStartBit) & MANTISSA_MASK; + } + + return (exp << MANTISSA_BITS) | mantissa; +} + +uint32 floatToUint(uint32 floatValue) { + uint32 exponent = floatValue >> MANTISSA_BITS; + uint32 mantissa = floatValue & MANTISSA_MASK; + if (exponent == 0) { + // Denorms + return mantissa; + } else { + return (mantissa | MANTISSA_VALUE) << (exponent - 1); + } +} +} // namespace SmallFloat + +// Utility functions +uint32 findLowestSetBitAfter(uint32 bitMask, uint32 startBitIndex) { + uint32 maskBeforeStartIndex = (1 << startBitIndex) - 1; + uint32 maskAfterStartIndex = ~maskBeforeStartIndex; + uint32 bitsAfter = bitMask & maskAfterStartIndex; + if (bitsAfter == 0) return OffsetAllocation::NO_SPACE; + return tzcnt_nonzero(bitsAfter); +} + +// __Allocator... +__Allocator::__Allocator(uint32 size, uint32 maxAllocs) + : m_size(size), + m_maxAllocs(maxAllocs), + m_nodes(nullptr), + m_freeNodes(nullptr) { + if (sizeof(NodeIndex) == 2) { + ASSERT(maxAllocs <= 65536); + } + reset(); +} + +__Allocator::__Allocator(__Allocator&& other) + : m_size(other.m_size), + m_maxAllocs(other.m_maxAllocs), + m_freeStorage(other.m_freeStorage), + m_usedBinsTop(other.m_usedBinsTop), + m_nodes(other.m_nodes), + m_freeNodes(other.m_freeNodes), + m_freeOffset(other.m_freeOffset) { + memcpy(m_usedBins, other.m_usedBins, sizeof(uint8) * NUM_TOP_BINS); + memcpy(m_binIndices, other.m_binIndices, sizeof(NodeIndex) * NUM_LEAF_BINS); + + other.m_nodes = nullptr; + other.m_freeNodes = nullptr; + other.m_freeOffset = 0; + other.m_maxAllocs = 0; + other.m_usedBinsTop = 0; +} + +void __Allocator::reset() { + m_freeStorage = 0; + m_usedBinsTop = 0; + m_freeOffset = m_maxAllocs - 1; + + for (uint32 i = 0; i < NUM_TOP_BINS; i++) m_usedBins[i] = 0; + + for (uint32 i = 0; i < NUM_LEAF_BINS; i++) m_binIndices[i] = Node::unused; + + if (m_nodes) delete[] m_nodes; + if (m_freeNodes) delete[] m_freeNodes; + + m_nodes = new Node[m_maxAllocs]; + m_freeNodes = new NodeIndex[m_maxAllocs]; + + // Freelist is a stack. Nodes in inverse order so that [0] pops first. + for (uint32 i = 0; i < m_maxAllocs; i++) { + m_freeNodes[i] = m_maxAllocs - i - 1; + } + + // Start state: Whole storage as one big node + // Algorithm will split remainders and push them back as smaller nodes + insertNodeIntoBin(m_size, 0); +} + +__Allocator::~__Allocator() { + delete[] m_nodes; + delete[] m_freeNodes; +} + +OffsetAllocation __Allocator::allocate(uint32 size) { + // Out of allocations? + if (m_freeOffset == 0) { + return {.offset = OffsetAllocation::NO_SPACE, + .metadata = OffsetAllocation::NO_SPACE}; + } + + // Round up to bin index to ensure that alloc >= bin + // Gives us min bin index that fits the size + uint32 minBinIndex = SmallFloat::uintToFloatRoundUp(size); + + uint32 minTopBinIndex = minBinIndex >> TOP_BINS_INDEX_SHIFT; + uint32 minLeafBinIndex = minBinIndex & LEAF_BINS_INDEX_MASK; + + uint32 topBinIndex = minTopBinIndex; + uint32 leafBinIndex = OffsetAllocation::NO_SPACE; + + // If top bin exists, scan its leaf bin. This can fail (NO_SPACE). + if (m_usedBinsTop & (1 << topBinIndex)) { + leafBinIndex = + findLowestSetBitAfter(m_usedBins[topBinIndex], minLeafBinIndex); + } + + // If we didn't find space in top bin, we search top bin from +1 + if (leafBinIndex == OffsetAllocation::NO_SPACE) { + topBinIndex = findLowestSetBitAfter(m_usedBinsTop, minTopBinIndex + 1); + + // Out of space? + if (topBinIndex == OffsetAllocation::NO_SPACE) { + return {.offset = OffsetAllocation::NO_SPACE, + .metadata = OffsetAllocation::NO_SPACE}; + } + + // All leaf bins here fit the alloc, since the top bin was rounded up. + // Start leaf search from bit 0. NOTE: This search can't fail since at + // least one leaf bit was set because the top bit was set. + leafBinIndex = tzcnt_nonzero(m_usedBins[topBinIndex]); + } + + uint32 binIndex = (topBinIndex << TOP_BINS_INDEX_SHIFT) | leafBinIndex; + + // Pop the top node of the bin. Bin top = node.next. + uint32 nodeIndex = m_binIndices[binIndex]; + Node& node = m_nodes[nodeIndex]; + uint32 nodeTotalSize = node.dataSize; + // Modified in Mooncake project: Round up to bin size. Otherwise when this + // node is freed, if it cannot be merged with neighbors, it will be inserted + // to a smaller bin. +#ifdef OFFSET_ALLOCATOR_NOT_ROUND_UP + uint32 roundupSize = size; +#else + // In default, round up to bin size. + uint32 roundupSize = SmallFloat::floatToUint(minBinIndex); +#endif + node.dataSize = roundupSize; + node.used = true; + m_binIndices[binIndex] = node.binListNext; + if (node.binListNext != Node::unused) + m_nodes[node.binListNext].binListPrev = Node::unused; + m_freeStorage -= nodeTotalSize; +#ifdef DEBUG_VERBOSE + printf("Free storage: %u (-%u) (allocate)\n", m_freeStorage, nodeTotalSize); +#endif + + // Bin empty? + if (m_binIndices[binIndex] == Node::unused) { + // Remove a leaf bin mask bit + m_usedBins[topBinIndex] &= ~(1 << leafBinIndex); + + // All leaf bins empty? + if (m_usedBins[topBinIndex] == 0) { + // Remove a top bin mask bit + m_usedBinsTop &= ~(1 << topBinIndex); + } + } + + // Push back reminder N elements to a lower bin + uint32 reminderSize = nodeTotalSize - roundupSize; + if (reminderSize > 0) { + uint32 newNodeIndex = + insertNodeIntoBin(reminderSize, node.dataOffset + roundupSize); + + // Link nodes next to each other so that we can merge them later if both + // are free And update the old next neighbor to point to the new node + // (in middle) + if (node.neighborNext != Node::unused) + m_nodes[node.neighborNext].neighborPrev = newNodeIndex; + m_nodes[newNodeIndex].neighborPrev = nodeIndex; + m_nodes[newNodeIndex].neighborNext = node.neighborNext; + node.neighborNext = newNodeIndex; + } + + return {.offset = node.dataOffset, .metadata = nodeIndex}; +} + +void __Allocator::free(OffsetAllocation allocation) { + ASSERT(allocation.metadata != OffsetAllocation::NO_SPACE); + if (!m_nodes) return; + + uint32 nodeIndex = allocation.metadata; + Node& node = m_nodes[nodeIndex]; + + // Double delete check + ASSERT(node.used == true); + + // Merge with neighbors... + uint32 offset = node.dataOffset; + uint32 size = node.dataSize; + + if ((node.neighborPrev != Node::unused) && + (m_nodes[node.neighborPrev].used == false)) { + // Previous (contiguous) free node: Change offset to previous node + // offset. Sum sizes + Node& prevNode = m_nodes[node.neighborPrev]; + offset = prevNode.dataOffset; + size += prevNode.dataSize; + + // Remove node from the bin linked list and put it in the freelist + removeNodeFromBin(node.neighborPrev); + + ASSERT(prevNode.neighborNext == nodeIndex); + node.neighborPrev = prevNode.neighborPrev; + } + + if ((node.neighborNext != Node::unused) && + (m_nodes[node.neighborNext].used == false)) { + // Next (contiguous) free node: Offset remains the same. Sum sizes. + Node& nextNode = m_nodes[node.neighborNext]; + size += nextNode.dataSize; + + // Remove node from the bin linked list and put it in the freelist + removeNodeFromBin(node.neighborNext); + + ASSERT(nextNode.neighborPrev == nodeIndex); + node.neighborNext = nextNode.neighborNext; + } + + uint32 neighborNext = node.neighborNext; + uint32 neighborPrev = node.neighborPrev; + + // Insert the removed node to freelist +#ifdef DEBUG_VERBOSE + printf("Putting node %u into freelist[%u] (free)\n", nodeIndex, + m_freeOffset + 1); +#endif + m_freeNodes[++m_freeOffset] = nodeIndex; + + // Insert the (combined) free node to bin + uint32 combinedNodeIndex = insertNodeIntoBin(size, offset); + + // Connect neighbors with the new combined node + if (neighborNext != Node::unused) { + m_nodes[combinedNodeIndex].neighborNext = neighborNext; + m_nodes[neighborNext].neighborPrev = combinedNodeIndex; + } + if (neighborPrev != Node::unused) { + m_nodes[combinedNodeIndex].neighborPrev = neighborPrev; + m_nodes[neighborPrev].neighborNext = combinedNodeIndex; + } +} + +uint32 __Allocator::insertNodeIntoBin(uint32 size, uint32 dataOffset) { + // Round down to bin index to ensure that bin >= alloc + uint32 binIndex = SmallFloat::uintToFloatRoundDown(size); + + uint32 topBinIndex = binIndex >> TOP_BINS_INDEX_SHIFT; + uint32 leafBinIndex = binIndex & LEAF_BINS_INDEX_MASK; + + // Bin was empty before? + if (m_binIndices[binIndex] == Node::unused) { + // Set bin mask bits + m_usedBins[topBinIndex] |= 1 << leafBinIndex; + m_usedBinsTop |= 1 << topBinIndex; + } + + // Take a freelist node and insert on top of the bin linked list (next = old + // top) + uint32 topNodeIndex = m_binIndices[binIndex]; + uint32 nodeIndex = m_freeNodes[m_freeOffset--]; +#ifdef DEBUG_VERBOSE + printf("Getting node %u from freelist[%u]\n", nodeIndex, m_freeOffset + 1); +#endif + m_nodes[nodeIndex] = {.dataOffset = dataOffset, + .dataSize = size, + .binListNext = topNodeIndex}; + if (topNodeIndex != Node::unused) + m_nodes[topNodeIndex].binListPrev = nodeIndex; + m_binIndices[binIndex] = nodeIndex; + + m_freeStorage += size; +#ifdef DEBUG_VERBOSE + printf("Free storage: %u (+%u) (insertNodeIntoBin)\n", m_freeStorage, size); +#endif + + return nodeIndex; +} + +void __Allocator::removeNodeFromBin(uint32 nodeIndex) { + Node& node = m_nodes[nodeIndex]; + + if (node.binListPrev != Node::unused) { + // Easy case: We have previous node. Just remove this node from the + // middle of the list. + m_nodes[node.binListPrev].binListNext = node.binListNext; + if (node.binListNext != Node::unused) + m_nodes[node.binListNext].binListPrev = node.binListPrev; + } else { + // Hard case: We are the first node in a bin. Find the bin. + + // Round down to bin index to ensure that bin >= alloc + uint32 binIndex = SmallFloat::uintToFloatRoundDown(node.dataSize); + + uint32 topBinIndex = binIndex >> TOP_BINS_INDEX_SHIFT; + uint32 leafBinIndex = binIndex & LEAF_BINS_INDEX_MASK; + + m_binIndices[binIndex] = node.binListNext; + if (node.binListNext != Node::unused) + m_nodes[node.binListNext].binListPrev = Node::unused; + + // Bin empty? + if (m_binIndices[binIndex] == Node::unused) { + // Remove a leaf bin mask bit + m_usedBins[topBinIndex] &= ~(1 << leafBinIndex); + + // All leaf bins empty? + if (m_usedBins[topBinIndex] == 0) { + // Remove a top bin mask bit + m_usedBinsTop &= ~(1 << topBinIndex); + } + } + } + + // Insert the node to freelist +#ifdef DEBUG_VERBOSE + printf("Putting node %u into freelist[%u] (removeNodeFromBin)\n", nodeIndex, + m_freeOffset + 1); +#endif + m_freeNodes[++m_freeOffset] = nodeIndex; + + m_freeStorage -= node.dataSize; +#ifdef DEBUG_VERBOSE + printf("Free storage: %u (-%u) (removeNodeFromBin)\n", m_freeStorage, + node.dataSize); +#endif +} + +uint32 __Allocator::allocationSize(OffsetAllocation allocation) const { + if (allocation.metadata == OffsetAllocation::NO_SPACE) return 0; + if (!m_nodes) return 0; + + return m_nodes[allocation.metadata].dataSize; +} + +OffsetAllocStorageReport __Allocator::storageReport() const { + uint32 largestFreeRegion = 0; + uint32 freeStorage = 0; + + // Out of allocations? -> Zero free space + if (m_freeOffset > 0) { + freeStorage = m_freeStorage; + if (m_usedBinsTop) { + uint32 topBinIndex = 31 - lzcnt_nonzero(m_usedBinsTop); + uint32 leafBinIndex = 31 - lzcnt_nonzero(m_usedBins[topBinIndex]); + largestFreeRegion = SmallFloat::floatToUint( + (topBinIndex << TOP_BINS_INDEX_SHIFT) | leafBinIndex); + ASSERT(freeStorage >= largestFreeRegion); + } + } + + return {.totalFreeSpace = freeStorage, + .largestFreeRegion = largestFreeRegion}; +} + +OffsetAllocStorageReportFull __Allocator::storageReportFull() const { + OffsetAllocStorageReportFull report; + for (uint32 i = 0; i < NUM_LEAF_BINS; i++) { + uint32 count = 0; + uint32 nodeIndex = m_binIndices[i]; + while (nodeIndex != Node::unused) { + nodeIndex = m_nodes[nodeIndex].binListNext; + count++; + } + report.freeRegions[i] = {.size = SmallFloat::floatToUint(i), + .count = count}; + } + return report; +} + +// OffsetAllocationHandle implementation +OffsetAllocationHandle::OffsetAllocationHandle(std::shared_ptr allocator, + OffsetAllocation allocation, uint64_t base, + uint64_t size) + : m_allocator(std::move(allocator)), + m_allocation(allocation), + real_base(base), + requested_size(size) {} + +OffsetAllocationHandle::OffsetAllocationHandle(OffsetAllocationHandle&& other) noexcept + : m_allocator(std::move(other.m_allocator)), + m_allocation(other.m_allocation), + real_base(other.real_base), + requested_size(other.requested_size) { + other.m_allocation = {OffsetAllocation::NO_SPACE, OffsetAllocation::NO_SPACE}; + other.real_base = 0; + other.requested_size = 0; +} + +OffsetAllocationHandle& OffsetAllocationHandle::operator=( + OffsetAllocationHandle&& other) noexcept { + if (this != &other) { + // Free current allocation if valid{ + auto allocator = m_allocator.lock(); + if (allocator) { + allocator->freeAllocation(m_allocation); + } + + // Move from other + m_allocator = std::move(other.m_allocator); + m_allocation = other.m_allocation; + real_base = other.real_base; + requested_size = other.requested_size; + + // Reset other + other.m_allocation = {OffsetAllocation::NO_SPACE, OffsetAllocation::NO_SPACE}; + other.real_base = 0; + other.requested_size = 0; + } + return *this; +} + +OffsetAllocationHandle::~OffsetAllocationHandle() { + auto allocator = m_allocator.lock(); + if (allocator) { + allocator->freeAllocation(m_allocation); + } +} + +// Helper function to calculate the multiplier +static uint64_t calculateMultiplier(size_t size) { + uint64_t multiplier = 1; + for (; SmallFloat::MAX_BIN_SIZE < size / multiplier; multiplier *= 2) { + } + return multiplier; +} + +// Thread-safe OffsetAllocator implementation +std::shared_ptr OffsetAllocator::create(uint64_t base, size_t size, + uint32 maxAllocs) { + // Use a custom deleter to allow private constructor + return std::shared_ptr(new OffsetAllocator(base, size, maxAllocs)); +} + +OffsetAllocator::OffsetAllocator(uint64_t base, size_t size, uint32 maxAllocs) + : m_base(base), m_multiplier(calculateMultiplier(size)) { + m_allocator = std::make_unique<__Allocator>(size / m_multiplier, maxAllocs); +} + +std::optional OffsetAllocator::allocate(size_t size) { + if (size == 0) { + return std::nullopt; + } + + MutexLocker guard(&m_mutex); + if (!m_allocator) { + return std::nullopt; + } + + size_t fake_size = + m_multiplier > 1 ? (size + m_multiplier - 1) / m_multiplier : size; + + if (fake_size > SmallFloat::MAX_BIN_SIZE) { + return std::nullopt; + } + + OffsetAllocation allocation = m_allocator->allocate(fake_size); + if (allocation.offset == OffsetAllocation::NO_SPACE) { + return std::nullopt; + } + + // Use shared_from_this to get a shared_ptr to this OffsetAllocator + return OffsetAllocationHandle(shared_from_this(), allocation, + m_base + allocation.offset * m_multiplier, + size); +} + +OffsetAllocStorageReport OffsetAllocator::storageReport() const { + MutexLocker guard(&m_mutex); + if (!m_allocator) { + return {0, 0}; + } + OffsetAllocStorageReport report = m_allocator->storageReport(); + return {report.totalFreeSpace * m_multiplier, + report.largestFreeRegion * m_multiplier}; +} + +OffsetAllocStorageReportFull OffsetAllocator::storageReportFull() const { + MutexLocker lock(&m_mutex); + if (!m_allocator) { + OffsetAllocStorageReportFull report{}; + return report; + } + OffsetAllocStorageReportFull report = m_allocator->storageReportFull(); + for (uint32 i = 0; i < NUM_LEAF_BINS; i++) { + report.freeRegions[i] = {.size = report.freeRegions[i].size * m_multiplier, + .count = report.freeRegions[i].count}; + } + return report; +} + +void OffsetAllocator::freeAllocation(const OffsetAllocation& allocation) { + MutexLocker lock(&m_mutex); + if (m_allocator) { + m_allocator->free(allocation); + } +} + +} // namespace mooncake::offset_allocator \ No newline at end of file diff --git a/mooncake-store/tests/CMakeLists.txt b/mooncake-store/tests/CMakeLists.txt index 83ba7c3d8..5b1f4c84a 100644 --- a/mooncake-store/tests/CMakeLists.txt +++ b/mooncake-store/tests/CMakeLists.txt @@ -90,4 +90,15 @@ target_link_libraries(segment_test PUBLIC ) add_test(NAME segment_test COMMAND segment_test) +add_executable(offset_allocator_test offset_allocator_test.cpp) +target_link_libraries(offset_allocator_test PUBLIC + mooncake_store + cachelib_memory_allocator + glog + gtest + gtest_main + pthread +) +add_test(NAME offset_allocator_test COMMAND offset_allocator_test) + add_subdirectory(e2e) diff --git a/mooncake-store/tests/offset_allocator_test.cpp b/mooncake-store/tests/offset_allocator_test.cpp new file mode 100644 index 000000000..e75f164b9 --- /dev/null +++ b/mooncake-store/tests/offset_allocator_test.cpp @@ -0,0 +1,1130 @@ +#include + +#include +#include +#include + +#include "offset_allocator/offset_allocator.hpp" + +using namespace mooncake::offset_allocator; + +// 240 bins, according to https://github.com/sebbbi/OffsetAllocator +constexpr uint32 NUM_BINS = 240; +const uint32 bin_sizes[] = { + 0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 18, + 20, 22, 24, 26, 28, 30, + 32, 36, 40, 44, 48, 52, + 56, 60, 64, 72, 80, 88, + 96, 104, 112, 120, 128, 144, + 160, 176, 192, 208, 224, 240, + 256, 288, 320, 352, 384, 416, + 448, 480, 512, 576, 640, 704, + 768, 832, 896, 960, 1024, 1152, + 1280, 1408, 1536, 1664, 1792, 1920, + 2048, 2304, 2560, 2816, 3072, 3328, + 3584, 3840, 4096, 4608, 5120, 5632, + 6144, 6656, 7168, 7680, 8192, 9216, + 10240, 11264, 12288, 13312, 14336, 15360, + 16384, 18432, 20480, 22528, 24576, 26624, + 28672, 30720, 32768, 36864, 40960, 45056, + 49152, 53248, 57344, 61440, 65536, 73728, + 81920, 90112, 98304, 106496, 114688, 122880, + 131072, 147456, 163840, 180224, 196608, 212992, + 229376, 245760, 262144, 294912, 327680, 360448, + 393216, 425984, 458752, 491520, 524288, 589824, + 655360, 720896, 786432, 851968, 917504, 983040, + 1048576, 1179648, 1310720, 1441792, 1572864, 1703936, + 1835008, 1966080, 2097152, 2359296, 2621440, 2883584, + 3145728, 3407872, 3670016, 3932160, 4194304, 4718592, + 5242880, 5767168, 6291456, 6815744, 7340032, 7864320, + 8388608, 9437184, 10485760, 11534336, 12582912, 13631488, + 14680064, 15728640, 16777216, 18874368, 20971520, 23068672, + 25165824, 27262976, 29360128, 31457280, 33554432, 37748736, + 41943040, 46137344, 50331648, 54525952, 58720256, 62914560, + 67108864, 75497472, 83886080, 92274688, 100663296, 109051904, + 117440512, 125829120, 134217728, 150994944, 167772160, 184549376, + 201326592, 218103808, 234881024, 251658240, 268435456, 301989888, + 335544320, 369098752, 402653184, 436207616, 469762048, 503316480, + 536870912, 603979776, 671088640, 738197504, 805306368, 872415232, + 939524096, 1006632960, 1073741824, 1207959552, 1342177280, 1476395008, + 1610612736, 1744830464, 1879048192, 2013265920, 2147483648, 2415919104, + 2684354560, 2952790016, 3221225472, 3489660928, 3758096384, 4026531840, +}; + +// Forward declaration +class AllocatorWrapper; + +// The wrapper will inform the AllocatorWrapper when the handle is destroyed. +class AllocationHandleWrapper { + public: + // Constructor for valid allocation + AllocationHandleWrapper(std::shared_ptr allocator_wrapper, + OffsetAllocationHandle handle) + : m_allocator_wrapper(std::move(allocator_wrapper)), + m_handle(std::move(handle)) {} + + // Move constructor + AllocationHandleWrapper(AllocationHandleWrapper&& other) noexcept + : m_allocator_wrapper(std::move(other.m_allocator_wrapper)), + m_handle(std::move(other.m_handle)) {} + + // Move assignment operator + AllocationHandleWrapper& operator=( + AllocationHandleWrapper&& other) noexcept; + + // Disable copy constructor and copy assignment + AllocationHandleWrapper(const AllocationHandleWrapper&) = delete; + AllocationHandleWrapper& operator=(const AllocationHandleWrapper&) = delete; + + // Destructor - automatically notifies allocator wrapper + ~AllocationHandleWrapper(); + + // Check if the allocation handle is valid + bool isValid() const { return m_handle.isValid(); } + + // Get address + uint64_t address() const { return m_handle.address(); } + + // Get size + uint64_t size() const { return m_handle.size(); } + + // Get the underlying handle + const OffsetAllocationHandle& getHandle() const { return m_handle; } + + private: + std::shared_ptr m_allocator_wrapper; + OffsetAllocationHandle m_handle; +}; + +// The wrapper will track the allocated memory spaces and check if the +// allocation is legal. +class AllocatorWrapper : public std::enable_shared_from_this { + public: + // Constructor + AllocatorWrapper(uint64_t base, size_t size, uint32 maxAllocs = 128 * 1024) + : m_allocator(OffsetAllocator::create(base, size, maxAllocs)), + m_base(base), + m_buffer_size(size) { + // The allocator is created with the specified base and size + // We can now properly track the allocation bounds + } + + AllocatorWrapper(const AllocatorWrapper&) = delete; + AllocatorWrapper& operator=(const AllocatorWrapper&) = delete; + AllocatorWrapper(AllocatorWrapper&& other) = default; + AllocatorWrapper& operator=(AllocatorWrapper&& other) = default; + + ~AllocatorWrapper() = default; + + // Allocate memory and return a wrapped handle + std::optional allocate(size_t size) { + if (!m_allocator) { + return std::nullopt; + } + + auto handle = m_allocator->allocate(size); + if (!handle.has_value()) { + return std::nullopt; + } + + // Validate the allocation + EXPECT_EQ(handle->size(), size) + << "Allocation size mismatch: " << handle->size() << " != " << size; + verifyAllocation(handle->address(), handle->address() + handle->size()); + + // Record the allocation + m_allocated_regions[handle->address()] = { + handle->address(), handle->address() + handle->size()}; + + return AllocationHandleWrapper(shared_from_this(), std::move(*handle)); + } + + // Get storage report + OffsetAllocStorageReport storageReport() const { + return m_allocator->storageReport(); + } + + private: + // Called by AllocationHandleWrapper when it's destroyed + void onHandleDeallocated(uint64_t address, uint64_t size) { + ASSERT_TRUE(m_allocated_regions.find(address) != + m_allocated_regions.end()) + << "Allocation not found in tracking: " << address; + ASSERT_EQ(m_allocated_regions[address].end, address + size) + << "Allocation size mismatch: " << m_allocated_regions[address].end + << " != " << address + size; + m_allocated_regions.erase(address); + } + + // Check if an allocation is legal (within bounds and doesn't overlap) + void verifyAllocation(uint64_t begin, uint64_t end) const { + // Check bounds + ASSERT_TRUE(begin >= m_base && end <= m_base + m_buffer_size) + << "Allocation is out of bounds: " << "Begin: " << begin + << ", End: " << end << ", Base: " << m_base + << ", Buffer Size: " << m_buffer_size; + + // Check for overlap with existing allocations using O(log(N)) algorithm + // Find the first region that starts >= begin + auto it = m_allocated_regions.lower_bound(begin); + + // Check if the previous region (if exists) overlaps with our allocation + if (it != m_allocated_regions.begin()) { + auto prev_it = std::prev(it); + if (prev_it->second.end > begin) { + ASSERT_TRUE(false) + << "Allocation overlaps with previous region: " + << "New allocation [" << begin << ", " << end << ") " + << "overlaps with existing region [" + << prev_it->second.begin << ", " << prev_it->second.end + << ")"; + } + } + + // Check if the current region (if exists) overlaps with our allocation + if (it != m_allocated_regions.end() && it->second.begin < end) { + ASSERT_TRUE(false) + << "Allocation overlaps with current region: " + << "New allocation [" << begin << ", " << end << ") " + << "overlaps with existing region [" << it->second.begin << ", " + << it->second.end << ")"; + } + } + + struct AllocatedRegion { + uint64_t begin; + uint64_t end; + }; + + std::shared_ptr m_allocator; + uint64_t m_base; + uint64_t m_buffer_size; + std::map m_allocated_regions; + + friend class AllocationHandleWrapper; +}; + +// Implementation of AllocationHandleWrapper methods that need AllocatorWrapper +// to be fully defined +AllocationHandleWrapper::~AllocationHandleWrapper() { + if (m_allocator_wrapper && m_handle.isValid()) { + m_allocator_wrapper->onHandleDeallocated(m_handle.address(), + m_handle.size()); + } +} + +AllocationHandleWrapper& AllocationHandleWrapper::operator=( + AllocationHandleWrapper&& other) noexcept { + if (this != &other) { + // Notify allocator wrapper about deallocation + if (m_allocator_wrapper && m_handle.isValid()) { + m_allocator_wrapper->onHandleDeallocated(m_handle.address(), + m_handle.size()); + } + + // Move from other + m_allocator_wrapper = std::move(other.m_allocator_wrapper); + m_handle = std::move(other.m_handle); + } + return *this; +} + +class OffsetAllocatorTest : public ::testing::Test { + protected: + void SetUp() override {} + + void TearDown() override {} +}; + +// Test basic allocation and deallocation +TEST_F(OffsetAllocatorTest, BasicAllocation) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024 * 1024; // 1GB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + // Allocate handle + auto handle = allocator->allocate(ALLOCATOR_SIZE); + ASSERT_TRUE(handle.has_value()); + EXPECT_TRUE(handle->isValid()); + EXPECT_NE(handle->address(), OffsetAllocation::NO_SPACE); + EXPECT_EQ(handle->size(), ALLOCATOR_SIZE); + + // Try allocate new handle + auto handle2 = allocator->allocate(ALLOCATOR_SIZE); + ASSERT_FALSE(handle2.has_value()); + + // Release handle + handle.reset(); + + // Try allocate again + handle2 = allocator->allocate(ALLOCATOR_SIZE); + ASSERT_TRUE(handle2.has_value()); + EXPECT_TRUE(handle2->isValid()); + EXPECT_NE(handle2->address(), OffsetAllocation::NO_SPACE); +} + +// Test allocation failure when out of space +TEST_F(OffsetAllocatorTest, AllocationFailure) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024 * 1024; // 1GB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + // Try to allocate more than available space + auto handle = + allocator->allocate(2 * ALLOCATOR_SIZE); // 2GB > 1GB available + EXPECT_FALSE(handle.has_value()); +} + +// Test multiple allocations +TEST_F(OffsetAllocatorTest, MultipleAllocations) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024 * 1024; // 1GB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + std::vector handles; + + for (int i = 0; i < 10; ++i) { + auto handle = allocator->allocate(1000); + ASSERT_TRUE(handle.has_value()); + handles.push_back(std::move(*handle)); + } + + // All handles should be valid and have different offsets + for (size_t i = 0; i < handles.size(); ++i) { + EXPECT_TRUE(handles[i].isValid()); + EXPECT_EQ(handles[i].size(), 1000); + for (size_t j = i + 1; j < handles.size(); ++j) { + EXPECT_NE(handles[i].address(), handles[j].address()); + } + } +} + +// Test allocations with different sizes don't overlap +TEST_F(OffsetAllocatorTest, DifferentSizesNoOverlap) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024 * 1024; // 1GB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + std::vector handles; + std::vector sizes = {100, 500, 1000, 2000, 50, 1500, 800, 300}; + + for (uint32 size : sizes) { + auto handle = allocator->allocate(size); + ASSERT_TRUE(handle.has_value()) << "Failed to allocate size: " << size; + EXPECT_EQ(handle->size(), size); + handles.push_back(std::move(*handle)); + } + + // Verify all handles are valid + for (const auto& handle : handles) { + EXPECT_TRUE(handle.isValid()); + } +} + +// Test storage reports +TEST_F(OffsetAllocatorTest, StorageReports) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024 * 1024; // 1GB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + OffsetAllocStorageReport report = allocator->storageReport(); + EXPECT_GT(report.totalFreeSpace, 0); + EXPECT_GT(report.largestFreeRegion, 0); + + // Allocate some space + auto handle = allocator->allocate(1000); + ASSERT_TRUE(handle.has_value()); + + OffsetAllocStorageReport newReport = allocator->storageReport(); + EXPECT_LT(newReport.totalFreeSpace, report.totalFreeSpace); +} + +// Test continuous allocation and deallocation with random sizes +TEST_F(OffsetAllocatorTest, ContinuousRandomAllocationDeallocation) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024 * 1024; // 1GB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution size_dist(1, + 1024 * 64); // 1B to 64KB + + const int max_iterations = 20000; + + // Allocate and deallocate random sizes + for (int i = 0; i < max_iterations; ++i) { + uint32_t size = size_dist(gen); + auto handle = allocator->allocate(size); + EXPECT_TRUE(handle.has_value()) << "Failed to allocate size: " << size; + // It will free automatically when handle goes out of scope + } + + auto full_space_handle = allocator->allocate(ALLOCATOR_SIZE); + ASSERT_TRUE(full_space_handle.has_value()); + EXPECT_EQ(full_space_handle->size(), ALLOCATOR_SIZE); +} + +// Full size allocation is only possible when the buffer size is exactly the +// same as one of the bin sizes. +TEST_F(OffsetAllocatorTest, FullSizeAllocation) { + for (uint32 size : bin_sizes) { + if (size == 0) continue; // Skip 0 size + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, size, MAX_ALLOCS); + + auto handle = allocator->allocate(size); + ASSERT_TRUE(handle.has_value()); + } +} + +TEST_F(OffsetAllocatorTest, RepeatedLargeSizeAllocation) { + for (size_t i = 0; i < NUM_BINS; ++i) { + uint32_t bin_size = bin_sizes[i]; + if (bin_size < 1024) continue; // Skip small sizes + constexpr uint32_t MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, bin_size + 10, MAX_ALLOCS); + EXPECT_EQ(allocator->storageReport().totalFreeSpace, bin_size + 10); + + for (uint32_t i = 0; i < 10; i++) { + auto handle = allocator->allocate(bin_size - (10 - i)); + ASSERT_TRUE(handle.has_value()); + } + } +} + +// Can only allocate MAX_ALLOCS - 2 times. +TEST_F(OffsetAllocatorTest, MaxNumAllocations) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024 * 1024; + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + std::vector handles; + for (uint32 i = 0; i < MAX_ALLOCS - 2; ++i) { + auto handle = allocator->allocate(1024); + ASSERT_TRUE(handle.has_value()) + << "Failed to allocate size: " << 1024 << " at iteration: " << i; + handles.push_back(std::move(*handle)); + } + + auto handle = allocator->allocate(1024); + ASSERT_FALSE(handle.has_value()); +} + +TEST_F(OffsetAllocatorTest, FullAllocationAfterRandomAllocationAndFree) { + const uint32 ALLOCATOR_SIZE = bin_sizes[NUM_BINS - 1]; + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution size_dist(1, ALLOCATOR_SIZE / 1000); + + std::vector handles; + for (uint32 i = 0; i < MAX_ALLOCS; ++i) { + uint32 size = size_dist(gen); + auto handle = allocator->allocate(size); + if (handle.has_value()) { + handles.push_back(std::move(*handle)); + } + } + + handles.clear(); + auto handle = allocator->allocate(ALLOCATOR_SIZE); + ASSERT_TRUE(handle.has_value()); +} + +// The original implementation will fail this test. +TEST_F(OffsetAllocatorTest, AllocationSameSizeAfterFree) { + constexpr uint32 ALLOCATOR_SIZE = 2048; + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + auto handle = allocator->allocate(1023); + ASSERT_TRUE(handle.has_value()); + + auto handle2 = allocator->allocate(16); + ASSERT_TRUE(handle2.has_value()); + + handle.reset(); + handle = allocator->allocate(1023); + ASSERT_TRUE(handle.has_value()); +} + +// The original implementation will fail this test. +TEST_F(OffsetAllocatorTest, RandomRepeatAllocationSameSizeAfterFree) { + const uint32 ALLOCATOR_SIZE = bin_sizes[NUM_BINS - 1]; + constexpr uint32 MAX_ALLOCS = 10000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution size_dist(1, ALLOCATOR_SIZE / 100); + + std::vector handles; + std::vector alloc_sizes; + for (uint32 i = 0; i < 2000; ++i) { + uint32 size = size_dist(gen); + auto handle = allocator->allocate(size); + if (handle.has_value()) { + handles.push_back(std::move(*handle)); + alloc_sizes.push_back(size); + } + + std::uniform_int_distribution index_dist(0, handles.size() - 1); + uint32 index = index_dist(gen); + std::swap(handles[index], handles.back()); + std::swap(alloc_sizes[index], alloc_sizes.back()); + uint32 test_size = alloc_sizes.back(); + handles.pop_back(); + alloc_sizes.pop_back(); + + auto handle2 = allocator->allocate(test_size); + ASSERT_TRUE(handle2.has_value()); + handles.push_back(std::move(*handle2)); + alloc_sizes.push_back(test_size); + } +} + +// Test when the size multiplier is more than one. +TEST_F(OffsetAllocatorTest, BasicLargeAllocatorSize) { + // The size multiplier is larger than 1 when the allocator size is larger + // than MAX_BIN_SIZE. + constexpr size_t MIN_BUFFER_SIZE = (1ull << 30); + constexpr size_t MAX_BUFFER_SIZE = (1ull << 40); + constexpr uint32 MAX_ALLOCS = 10000; + + for (size_t buffer_size = MIN_BUFFER_SIZE; buffer_size <= MAX_BUFFER_SIZE; + buffer_size *= 2) { + auto allocator = + std::make_shared(0, buffer_size, MAX_ALLOCS); + size_t max_alloc_size = allocator->storageReport().largestFreeRegion; + // The largest free region equals buffer size only in this specific + // buffer size. + ASSERT_EQ(max_alloc_size, buffer_size); + + auto handle = allocator->allocate(1); + ASSERT_TRUE(handle.has_value()); + handle.reset(); + + handle = allocator->allocate(max_alloc_size - 1); + ASSERT_TRUE(handle.has_value()); + handle.reset(); + + handle = allocator->allocate(max_alloc_size); + ASSERT_TRUE(handle.has_value()); + EXPECT_EQ(handle->size(), max_alloc_size); + } +} + +// Test when the size multiplier is more than one. +TEST_F(OffsetAllocatorTest, PowerOfTwoLargeAllocatorSize) { + // The size multiplier is larger than 1 when the allocator size is larger + // than MAX_BIN_SIZE. + constexpr size_t MIN_BUFFER_SIZE = (1ull << 30); + constexpr size_t MAX_BUFFER_SIZE = (1ull << 40); + constexpr uint32 MAX_ALLOCS = 10000; + std::random_device rd; + std::mt19937 gen(rd()); + for (size_t buffer_size = MIN_BUFFER_SIZE; buffer_size <= MAX_BUFFER_SIZE; + buffer_size *= 2) { + auto allocator = + std::make_shared(0, buffer_size, MAX_ALLOCS); + size_t max_alloc_size = buffer_size / 100; + + std::vector handles; + std::vector alloc_sizes; + std::uniform_int_distribution size_dist(1, max_alloc_size); + for (uint32 i = 0; i < 200; ++i) { + size_t size = size_dist(gen); + auto handle = allocator->allocate(size); + if (handle.has_value()) { + handles.push_back(std::move(*handle)); + alloc_sizes.push_back(size); + } + + std::uniform_int_distribution index_dist( + 0, handles.size() - 1); + uint32 index = index_dist(gen); + std::swap(handles[index], handles.back()); + std::swap(alloc_sizes[index], alloc_sizes.back()); + uint32 test_size = alloc_sizes.back(); + handles.pop_back(); + alloc_sizes.pop_back(); + + auto handle2 = allocator->allocate(test_size); + ASSERT_TRUE(handle2.has_value()); + handles.push_back(std::move(*handle2)); + alloc_sizes.push_back(test_size); + } + } +} + +// Test when the size multiplier is more than one. +TEST_F(OffsetAllocatorTest, MaxAllocSizeWithLargeAllocatorSize) { + // The size multiplier is larger than 1 when the allocator size is larger + // than MAX_BIN_SIZE. + constexpr size_t MIN_BUFFER_SIZE = (1ull << 31) + 1; + constexpr size_t MAX_BUFFER_SIZE = (1ull << 40); + constexpr uint32 MAX_ALLOCS = 10000; + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution buffer_size_dist(MIN_BUFFER_SIZE, + MAX_BUFFER_SIZE); + for (int i = 0; i < 100; i++) { + size_t buffer_size = buffer_size_dist(gen); + auto allocator = + std::make_shared(0, buffer_size, MAX_ALLOCS); + size_t max_alloc_size = allocator->storageReport().largestFreeRegion; + ASSERT_GT(max_alloc_size, buffer_size / 2); + + auto handle = allocator->allocate(max_alloc_size); + ASSERT_TRUE(handle.has_value()); + } +} + +// Test when the size multiplier is more than one. +TEST_F(OffsetAllocatorTest, RandomSmallAllocWithLargeAllocatorSize) { + // The size multiplier is larger than 1 when the allocator size is larger + // than MAX_BIN_SIZE. + constexpr size_t MIN_BUFFER_SIZE = (1ull << 31) + 1; + constexpr size_t MAX_BUFFER_SIZE = (1ull << 40); + constexpr uint32 MAX_ALLOCS = 10000; + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution buffer_size_dist(MIN_BUFFER_SIZE, + MAX_BUFFER_SIZE); + for (int i = 0; i < 100; i++) { + size_t buffer_size = buffer_size_dist(gen); + auto allocator = + std::make_shared(0, buffer_size, MAX_ALLOCS); + size_t max_alloc_size = buffer_size / 100; + + std::vector handles; + std::vector alloc_sizes; + std::uniform_int_distribution size_dist(1, max_alloc_size); + for (uint32 i = 0; i < 200; ++i) { + size_t size = size_dist(gen); + auto handle = allocator->allocate(size); + if (handle.has_value()) { + handles.push_back(std::move(*handle)); + alloc_sizes.push_back(size); + } + + std::uniform_int_distribution index_dist( + 0, handles.size() - 1); + uint32 index = index_dist(gen); + std::swap(handles[index], handles.back()); + std::swap(alloc_sizes[index], alloc_sizes.back()); + uint32 test_size = alloc_sizes.back(); + handles.pop_back(); + alloc_sizes.pop_back(); + + auto handle2 = allocator->allocate(test_size); + ASSERT_TRUE(handle2.has_value()); + handles.push_back(std::move(*handle2)); + alloc_sizes.push_back(test_size); + } + } +} + +// ========== EDGE CASE TESTS, Generated by AI ========== + +// Test zero size allocation - should fail +TEST_F(OffsetAllocatorTest, ZeroSizeAllocation) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024; // 1MB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + auto handle = allocator->allocate(0); + EXPECT_FALSE(handle.has_value()) << "Zero size allocation should fail"; +} + +// Test allocation size of 1 byte (minimum valid size) +TEST_F(OffsetAllocatorTest, OneByteAllocation) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024; // 1MB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + auto handle = allocator->allocate(1); + ASSERT_TRUE(handle.has_value()); + EXPECT_TRUE(handle->isValid()); + EXPECT_EQ(handle->size(), 1); + EXPECT_NE(handle->address(), OffsetAllocation::NO_SPACE); +} + +// Test allocation at exact allocator capacity +TEST_F(OffsetAllocatorTest, ExactCapacityAllocation) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024; // 1MB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + auto handle = allocator->allocate(ALLOCATOR_SIZE); + ASSERT_TRUE(handle.has_value()); + EXPECT_EQ(handle->size(), ALLOCATOR_SIZE); + EXPECT_EQ(handle->address(), 0); // Should start at base address + + // Verify no more space available + auto handle2 = allocator->allocate(1); + EXPECT_FALSE(handle2.has_value()); +} + +// Test allocation slightly larger than capacity +TEST_F(OffsetAllocatorTest, OversizeAllocation) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024; // 1MB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + auto handle = allocator->allocate(ALLOCATOR_SIZE + 1); + EXPECT_FALSE(handle.has_value()) + << "Allocation larger than capacity should fail"; +} + +// Test allocation with size just below bin size +TEST_F(OffsetAllocatorTest, JustBelowBinSizeAllocation) { + constexpr uint32 ALLOCATOR_SIZE = 2048; + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + // Allocate size just below a bin size + auto handle = allocator->allocate(1023); + ASSERT_TRUE(handle.has_value()); + EXPECT_EQ(handle->size(), 1023); + + // Should still be able to allocate the remainder + auto handle2 = allocator->allocate(1024); + ASSERT_TRUE(handle2.has_value()); + EXPECT_EQ(handle2->size(), 1024); +} + +// Test maximum allocation count edge case +TEST_F(OffsetAllocatorTest, MaxAllocationCountEdgeCase) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024; // 1MB + constexpr uint32 MAX_ALLOCS = 10; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + std::vector handles; + + // Allocate up to the limit + for (uint32 i = 0; i < MAX_ALLOCS - 2; ++i) { + auto handle = allocator->allocate(1024); + ASSERT_TRUE(handle.has_value()) << "Failed at iteration " << i; + handles.push_back(std::move(*handle)); + } + + // Try one more allocation - should fail + auto handle = allocator->allocate(1024); + EXPECT_FALSE(handle.has_value()) << "Should fail at max allocation count"; + + // Free one allocation + handles.pop_back(); + + // Should be able to allocate again + handle = allocator->allocate(1024); + EXPECT_TRUE(handle.has_value()); +} + +// Test very small allocator size +TEST_F(OffsetAllocatorTest, VerySmallAllocatorSize) { + constexpr uint32 ALLOCATOR_SIZE = 16; // Very small + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + auto handle = allocator->allocate(16); + ASSERT_TRUE(handle.has_value()); + EXPECT_EQ(handle->size(), 16); + + // Should not be able to allocate more + auto handle2 = allocator->allocate(1); + EXPECT_FALSE(handle2.has_value()); +} + +// Test allocation with size equal to allocator size minus 1 +TEST_F(OffsetAllocatorTest, AllocatorSizeMinusOne) { + constexpr uint32 ALLOCATOR_SIZE = 1024; + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + auto handle = allocator->allocate(ALLOCATOR_SIZE - 1); + ASSERT_TRUE(handle.has_value()); + EXPECT_EQ(handle->size(), ALLOCATOR_SIZE - 1); +} + +// Test allocation with size that is a power of 2 +TEST_F(OffsetAllocatorTest, PowerOfTwoAllocation) { + constexpr uint32 ALLOCATOR_SIZE = 2048; + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + // Test various power of 2 sizes + std::vector power_of_two_sizes = {1, 2, 4, 8, 16, 32, + 64, 128, 256, 512, 1024}; + + for (uint32 size : power_of_two_sizes) { + if (size <= ALLOCATOR_SIZE) { + auto handle = allocator->allocate(size); + ASSERT_TRUE(handle.has_value()) + << "Failed to allocate size: " << size; + EXPECT_EQ(handle->size(), size); + // Handle will be automatically freed when it goes out of scope + } + } +} + +// ========== BIN SYSTEM TESTS, Generated by AI ========== + +// Test bin size calculations and selection +TEST_F(OffsetAllocatorTest, BinSizeCalculation) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024; // 1MB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + // Test that allocations are placed in appropriate bins + // SmallFloat::uintToFloatRoundUp should determine the bin + auto handle1 = allocator->allocate(100); + ASSERT_TRUE(handle1.has_value()); + + auto handle2 = allocator->allocate(200); + ASSERT_TRUE(handle2.has_value()); + + auto handle3 = allocator->allocate(500); + ASSERT_TRUE(handle3.has_value()); + + // All allocations should be valid and non-overlapping + EXPECT_TRUE(handle1->isValid()); + EXPECT_TRUE(handle2->isValid()); + EXPECT_TRUE(handle3->isValid()); +} + +// Test bin overflow scenarios +TEST_F(OffsetAllocatorTest, BinOverflowScenarios) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024; // 1MB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + // Fill up a specific bin size with many small allocations + std::vector handles; + uint32 small_size = 64; // Choose a small bin size + + // Allocate many small blocks to potentially overflow the bin + for (int i = 0; i < 100; ++i) { + auto handle = allocator->allocate(small_size); + if (handle.has_value()) { + handles.push_back(std::move(*handle)); + } else { + break; // Bin is full or out of memory + } + } + + // Verify all allocations are valid + for (const auto& handle : handles) { + EXPECT_TRUE(handle.isValid()); + EXPECT_EQ(handle.size(), small_size); + } +} + +// Test bin merging behavior when adjacent blocks are freed +TEST_F(OffsetAllocatorTest, BinMergingBehavior) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024; // 1MB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + // Allocate three adjacent blocks + auto handle1 = allocator->allocate(1024); + auto handle2 = allocator->allocate(1024); + auto handle3 = allocator->allocate(1024); + + ASSERT_TRUE(handle1.has_value()); + ASSERT_TRUE(handle2.has_value()); + ASSERT_TRUE(handle3.has_value()); + + // Free the middle block first + handle2 = std::nullopt; + + // Free the first block - should merge with the freed middle block + handle1 = std::nullopt; + + // Free the third block - should merge with the large freed block + handle3 = std::nullopt; + + // Now we should be able to allocate the entire space again + auto large_handle = allocator->allocate(ALLOCATOR_SIZE); + ASSERT_TRUE(large_handle.has_value()); + EXPECT_EQ(large_handle->size(), ALLOCATOR_SIZE); +} + +// Test bin selection for edge case sizes +TEST_F(OffsetAllocatorTest, BinSelectionEdgeCases) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024; // 1MB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + // Test sizes that are just below and above bin boundaries + std::vector edge_sizes = { + 1, // Minimum size + 2, // Power of 2 + 3, // Just above power of 2 + 7, // Just below power of 2 + 8, // Power of 2 + 15, // Just below power of 2 + 16, // Power of 2 + 31, // Just below power of 2 + 32, // Power of 2 + 63, // Just below power of 2 + 64, // Power of 2 + 127, // Just below power of 2 + 128, // Power of 2 + 255, // Just below power of 2 + 256, // Power of 2 + 511, // Just below power of 2 + 512, // Power of 2 + 1023, // Just below power of 2 + 1024, // Power of 2 + }; + + for (uint32 size : edge_sizes) { + auto handle = allocator->allocate(size); + ASSERT_TRUE(handle.has_value()) << "Failed to allocate size: " << size; + EXPECT_EQ(handle->size(), size); + // Handle will be automatically freed when it goes out of scope + } +} + +// Test bin system with very large allocations +TEST_F(OffsetAllocatorTest, BinSystemLargeAllocations) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024 * 1024; // 1GB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + // Test large allocations that should go into high-numbered bins + std::vector large_sizes = { + 1024 * 1024, // 1MB + 2 * 1024 * 1024, // 2MB + 4 * 1024 * 1024, // 4MB + 8 * 1024 * 1024, // 8MB + 16 * 1024 * 1024, // 16MB + 32 * 1024 * 1024, // 32MB + 64 * 1024 * 1024, // 64MB + 128 * 1024 * 1024, // 128MB + 256 * 1024 * 1024, // 256MB + 512 * 1024 * 1024, // 512MB + }; + + for (uint32 size : large_sizes) { + if (size <= ALLOCATOR_SIZE) { + auto handle = allocator->allocate(size); + ASSERT_TRUE(handle.has_value()) + << "Failed to allocate size: " << size; + EXPECT_EQ(handle->size(), size); + // Handle will be automatically freed when it goes out of scope + } + } +} + +// Test bin system with mixed allocation patterns +TEST_F(OffsetAllocatorTest, BinSystemMixedPatterns) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024; // 1MB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + std::vector handles; + + // Mix of small, medium, and large allocations + std::vector mixed_sizes = {16, 64, 256, 1024, 4096, 16384, 65536}; + + for (uint32 size : mixed_sizes) { + auto handle = allocator->allocate(size); + ASSERT_TRUE(handle.has_value()) << "Failed to allocate size: " << size; + EXPECT_EQ(handle->size(), size); + handles.push_back(std::move(*handle)); + } + + // Verify all allocations are valid + for (const auto& handle : handles) { + EXPECT_TRUE(handle.isValid()); + } +} + +// Test bin system with repeated allocation/deallocation cycles +TEST_F(OffsetAllocatorTest, BinSystemRepeatedCycles) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024; // 1MB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + std::vector test_sizes = {64, 128, 256, 512, 1024, 2048, 4096}; + + // Perform multiple allocation/deallocation cycles + for (int cycle = 0; cycle < 10; ++cycle) { + std::vector cycle_handles; + + // Allocate blocks + for (uint32 size : test_sizes) { + auto handle = allocator->allocate(size); + ASSERT_TRUE(handle.has_value()) + << "Failed to allocate size: " << size << " in cycle " << cycle; + EXPECT_EQ(handle->size(), size); + cycle_handles.push_back(std::move(*handle)); + } + + // Verify all allocations are valid + for (const auto& handle : cycle_handles) { + EXPECT_TRUE(handle.isValid()); + } + + // All handles will be automatically freed when cycle_handles goes out + // of scope + } + + // After all cycles, should be able to allocate the full size again + auto full_handle = allocator->allocate(ALLOCATOR_SIZE); + ASSERT_TRUE(full_handle.has_value()); + EXPECT_EQ(full_handle->size(), ALLOCATOR_SIZE); +} + +// Test bin system with allocation sizes that don't match bin boundaries +TEST_F(OffsetAllocatorTest, BinSystemNonAlignedSizes) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024; // 1MB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + // Test sizes that don't align with typical bin boundaries + std::vector non_aligned_sizes = { + 17, // Not a power of 2 + 33, // Not a power of 2 + 65, // Not a power of 2 + 129, // Not a power of 2 + 257, // Not a power of 2 + 513, // Not a power of 2 + 1025, // Just above power of 2 + 2049, // Just above power of 2 + 4097, // Just above power of 2 + }; + + for (uint32 size : non_aligned_sizes) { + auto handle = allocator->allocate(size); + ASSERT_TRUE(handle.has_value()) << "Failed to allocate size: " << size; + EXPECT_EQ(handle->size(), size); + // Handle will be automatically freed when it goes out of scope + } +} + +// Test bin system with allocation sizes that are prime numbers +TEST_F(OffsetAllocatorTest, BinSystemPrimeSizes) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024; // 1MB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + // Test sizes that are prime numbers (should be challenging for bin system) + std::vector prime_sizes = { + 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, + 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, + 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, + 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, + 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, + 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, + 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, + 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, + 617, 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, + 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, + 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, + 907, 911, 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997, + 1009, 1013}; + + for (uint32 size : prime_sizes) { + auto handle = allocator->allocate(size); + ASSERT_TRUE(handle.has_value()) + << "Failed to allocate prime size: " << size; + EXPECT_EQ(handle->size(), size); + // Handle will be automatically freed when it goes out of scope + } +} + +// Test bin system with Fibonacci sequence sizes +TEST_F(OffsetAllocatorTest, BinSystemFibonacciSizes) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024; // 1MB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + // Test sizes that follow the Fibonacci sequence + std::vector fibonacci_sizes = { + 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, + 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, + 10946, 17711, 28657, 46368, 75025, 121393, 196418, 317811}; + + for (uint32 size : fibonacci_sizes) { + auto handle = allocator->allocate(size); + ASSERT_TRUE(handle.has_value()) + << "Failed to allocate Fibonacci size: " << size; + EXPECT_EQ(handle->size(), size); + // Handle will be automatically freed when it goes out of scope + } +} + +// Test bin system with allocation sizes that are multiples of common page sizes +TEST_F(OffsetAllocatorTest, BinSystemPageSizeMultiples) { + constexpr uint32 ALLOCATOR_SIZE = 1024 * 1024; // 1MB + constexpr uint32 MAX_ALLOCS = 1000; + auto allocator = + std::make_shared(0, ALLOCATOR_SIZE, MAX_ALLOCS); + + // Test sizes that are multiples of common page sizes (4KB, 8KB, 16KB, 64KB) + std::vector page_size_multiples = { + 4096, // 4KB + 8192, // 8KB + 16384, // 16KB + 32768, // 32KB + 65536, // 64KB + 131072, // 128KB + 262144, // 256KB + 524288, // 512KB + 1048576 // 1MB + }; + + for (uint32 size : page_size_multiples) { + auto handle = allocator->allocate(size); + ASSERT_TRUE(handle.has_value()) + << "Failed to allocate page size multiple: " << size; + EXPECT_EQ(handle->size(), size); + // Handle will be automatically freed when it goes out of scope + } +} + +int main(int argc, char** argv) { + // Initialize Google Test + ::testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +} \ No newline at end of file