Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion mooncake-store/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@ include_directories(

# Add subdirectories
add_subdirectory(src)
add_subdirectory(tests)
add_subdirectory(tests)
add_subdirectory(benchmarks)
3 changes: 3 additions & 0 deletions mooncake-store/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Add allocator benchmark executable
add_executable(allocator_bench allocator_bench.cpp)
target_link_libraries(allocator_bench PRIVATE mooncake_store)
179 changes: 179 additions & 0 deletions mooncake-store/benchmarks/allocator_bench.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
#include <iostream>
#include <random>
#include <vector>
#include <chrono>
#include <algorithm>
#include <numeric>
#include <iomanip>

#include "offset_allocator/offset_allocator.hpp"

using namespace mooncake::offset_allocator;

class OffsetAllocatorBenchHelper {
public:
OffsetAllocatorBenchHelper(uint64_t baseAddress, uint32_t poolSize, uint32_t maxAllocs)
: pool_size_(poolSize),
allocated_size_(0),
allocator_(OffsetAllocator::create(baseAddress, poolSize, maxAllocs)),
rd_(),
gen_(rd_()) {}

void allocate(uint32_t size) {
while (true) {
auto handle = allocator_->allocate(size);
if (handle.has_value()) {
allocated_.push_back(std::move(*handle));
allocated_sizes_.push_back(size);
allocated_size_ += size;
break;
}
if (allocated_.size() == 0) {
break;
}
std::uniform_int_distribution<uint32_t> dist(0,
allocated_.size() - 1);
auto index = dist(gen_);
std::swap(allocated_[index], allocated_.back());
std::swap(allocated_sizes_[index], allocated_sizes_.back());
allocated_size_ -= allocated_sizes_.back();
allocated_.pop_back();
allocated_sizes_.pop_back();
}
}

double get_allocated_ratio() const {
return static_cast<double>(allocated_size_) / pool_size_;
}

private:
uint64_t pool_size_;
uint64_t allocated_size_;
std::shared_ptr<OffsetAllocator> allocator_;
std::vector<OffsetAllocationHandle> allocated_;
std::vector<uint32_t> allocated_sizes_;
std::random_device rd_;
std::mt19937 gen_;
};

template <typename BenchHelper>
void uniform_size_allocation_benchmark() {
std::cout << std::endl << "=== Uniform Size Allocation Benchmark ===" << std::endl;
const size_t max_pool_size = 2ull * 1024 * 1024 * 1024;
std::vector<uint32_t> allocation_sizes;
for (uint32_t i = 32; i < (1 << 26); i *= 4) {
allocation_sizes.push_back(i);
}
for (uint32_t i = 32; i < (1 << 26); i *= 4) {
allocation_sizes.push_back(i - 17);
}
for (uint32_t i = 32; i < (1 << 26); i *= 4) {
allocation_sizes.push_back(i + 17);
}
for (uint32_t i = 32; i < (1 << 26); i *= 4) {
allocation_sizes.push_back(i * 0.9);
}
for (uint32_t i = 32; i < (1 << 26); i *= 4) {
allocation_sizes.push_back(i * 1.1);
}

for (auto alloc_size : allocation_sizes) {
// For small allocation sizes, use a smaller pool size to avoid
// benchmark runs too slow.
size_t pool_size =
alloc_size < 1024 ? max_pool_size / 16 : max_pool_size;
size_t max_allocs = pool_size / alloc_size + 10;
BenchHelper bench_helper(0x1000, pool_size, max_allocs);
int warmup_num = pool_size / alloc_size;
for (int i = 0; i < warmup_num; i++) {
bench_helper.allocate(alloc_size);
}

// START
auto start_time = std::chrono::high_resolution_clock::now();
double min_util_ratio = 1.0;
double total_util_ratio = 0.0;
int benchmark_num = 1000000;
for (int i = 0; i < benchmark_num; i++) {
bench_helper.allocate(alloc_size);
double util_ratio = bench_helper.get_allocated_ratio();
if (util_ratio < min_util_ratio) {
min_util_ratio = util_ratio;
}
total_util_ratio += util_ratio;
}
auto end_time = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time);
// END
double avg_util_ratio = total_util_ratio / benchmark_num;
std::cout << "Alloc size: " << alloc_size
<< ", min util ratio: " << min_util_ratio
<< ", avg util ratio: " << avg_util_ratio
<< ", time: " << duration.count() / benchmark_num << " ns" << std::endl;
}
}

template <typename BenchHelper>
void random_size_allocation_benchmark() {
std::cout << std::endl << "=== Random Size Allocation Benchmark ===" << std::endl;
const size_t pool_size = 2ull * 1024 * 1024 * 1024;
const size_t max_alloc_size = 1ull << 26;
const size_t min_alloc_size = 1024;

std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<uint32_t> dist(min_alloc_size, max_alloc_size);

// Warmup
size_t max_allocs = pool_size / min_alloc_size + 10;
BenchHelper bench_helper(0x1000, pool_size, max_allocs);
for (size_t warmup_size = 0; warmup_size < pool_size;) {
size_t alloc_size = dist(gen);
bench_helper.allocate(alloc_size);
warmup_size += alloc_size;
}

int benchmark_num = 1000000;
std::vector<double> util_ratios;
util_ratios.reserve(benchmark_num);

// Run benchmark
auto start_time = std::chrono::high_resolution_clock::now();
for (int i = 0; i < benchmark_num; i++) {
size_t alloc_size = dist(gen);
bench_helper.allocate(alloc_size);
util_ratios.push_back(bench_helper.get_allocated_ratio());
}
auto end_time = std::chrono::high_resolution_clock::now();

// Calculate metrics
const double avg_time_ns =
std::chrono::duration_cast<std::chrono::nanoseconds>(end_time -
start_time)
.count() /
static_cast<double>(benchmark_num);

std::sort(util_ratios.begin(), util_ratios.end());

const double min_util = util_ratios.front();
const double max_util = util_ratios.back();
const double p50 = util_ratios[util_ratios.size() * 0.50];
const double p90 = util_ratios[util_ratios.size() * 0.10];
const double p99 = util_ratios[util_ratios.size() * 0.01];

const double mean_util =
std::accumulate(util_ratios.begin(), util_ratios.end(), 0.0) /
util_ratios.size();

std::cout << std::fixed << std::setprecision(6);
std::cout << "util ratio (min / p99 / p90 / p50 / max / avg): " << min_util
<< " / " << p99 << " / " << p90 << " / " << p50 << " / "
<< max_util << " / " << mean_util << std::endl;
std::cout << "avg alloc time: " << avg_time_ns << " ns/op" << std::endl;
}

int main() {
std::cout << "=== OffsetAllocator Benchmark ===" << std::endl;
uniform_size_allocation_benchmark<OffsetAllocatorBenchHelper>();
random_size_allocation_benchmark<OffsetAllocatorBenchHelper>();
}
176 changes: 176 additions & 0 deletions mooncake-store/benchmarks/allocator_bench_result.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
# Allocator Memory Utilization Benchmark
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Move it to doc dir?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good. Will do in next PR


## Execution

```bash
./mooncake-store/benchmarks/allocator_bench
```

## Result

- alloc size: The size of each object
- utilization ratio: The total allocated size / total space
- time: time in nanoseconds for each object allocation
- OffsetAllocator optimization: whether round up the allocated size to a bin size

### Uniform size, size equals power of 2

**OffsetAllocator (After Optimization)**

```
Alloc size: 32, min util ratio: 1, avg util ratio: 1, time: 544 ns
Alloc size: 128, min util ratio: 1, avg util ratio: 1, time: 417 ns
Alloc size: 512, min util ratio: 1, avg util ratio: 1, time: 174 ns
Alloc size: 2048, min util ratio: 1, avg util ratio: 1, time: 406 ns
Alloc size: 8192, min util ratio: 1, avg util ratio: 1, time: 180 ns
Alloc size: 32768, min util ratio: 1, avg util ratio: 1, time: 133 ns
Alloc size: 131072, min util ratio: 1, avg util ratio: 1, time: 109 ns
Alloc size: 524288, min util ratio: 1, avg util ratio: 1, time: 100 ns
Alloc size: 2097152, min util ratio: 1, avg util ratio: 1, time: 99 ns
Alloc size: 8388608, min util ratio: 1, avg util ratio: 1, time: 99 ns
Alloc size: 33554432, min util ratio: 1, avg util ratio: 1, time: 98 ns
```

**OffsetAllocator (Before Optimization)**

```
Alloc size: 32, min util ratio: 1, avg util ratio: 1, time: 539 ns
Alloc size: 128, min util ratio: 1, avg util ratio: 1, time: 419 ns
Alloc size: 512, min util ratio: 1, avg util ratio: 1, time: 217 ns
Alloc size: 2048, min util ratio: 1, avg util ratio: 1, time: 408 ns
Alloc size: 8192, min util ratio: 1, avg util ratio: 1, time: 175 ns
Alloc size: 32768, min util ratio: 1, avg util ratio: 1, time: 130 ns
Alloc size: 131072, min util ratio: 1, avg util ratio: 1, time: 107 ns
Alloc size: 524288, min util ratio: 1, avg util ratio: 1, time: 99 ns
Alloc size: 2097152, min util ratio: 1, avg util ratio: 1, time: 100 ns
Alloc size: 8388608, min util ratio: 1, avg util ratio: 1, time: 98 ns
Alloc size: 33554432, min util ratio: 1, avg util ratio: 1, time: 98 ns
```

### Uniform size, size equals power of 2 +/- 17

**OffsetAllocator (After Optimization)**

```
Alloc size: 15, min util ratio: 1, avg util ratio: 1, time: 568 ns
Alloc size: 111, min util ratio: 0.991071, avg util ratio: 0.991071, time: 441 ns
Alloc size: 495, min util ratio: 0.966797, avg util ratio: 0.966797, time: 178 ns
Alloc size: 2031, min util ratio: 0.991699, avg util ratio: 0.991699, time: 418 ns
Alloc size: 8175, min util ratio: 0.997925, avg util ratio: 0.997925, time: 170 ns
Alloc size: 32751, min util ratio: 0.999481, avg util ratio: 0.999481, time: 133 ns
Alloc size: 131055, min util ratio: 0.99987, avg util ratio: 0.99987, time: 109 ns
Alloc size: 524271, min util ratio: 0.999968, avg util ratio: 0.999968, time: 100 ns
Alloc size: 2097135, min util ratio: 0.999992, avg util ratio: 0.999992, time: 99 ns
Alloc size: 8388591, min util ratio: 0.999998, avg util ratio: 0.999998, time: 98 ns
Alloc size: 33554415, min util ratio: 0.999999, avg util ratio: 0.999999, time: 99 ns
Alloc size: 49, min util ratio: 0.942308, avg util ratio: 0.942308, time: 508 ns
Alloc size: 145, min util ratio: 0.906249, avg util ratio: 0.906249, time: 372 ns
Alloc size: 529, min util ratio: 0.918399, avg util ratio: 0.918399, time: 172 ns
Alloc size: 2065, min util ratio: 0.896267, avg util ratio: 0.896267, time: 403 ns
Alloc size: 8209, min util ratio: 0.89073, avg util ratio: 0.89073, time: 174 ns
Alloc size: 32785, min util ratio: 0.889347, avg util ratio: 0.889347, time: 131 ns
Alloc size: 131089, min util ratio: 0.88897, avg util ratio: 0.88897, time: 105 ns
Alloc size: 524305, min util ratio: 0.888701, avg util ratio: 0.888701, time: 102 ns
Alloc size: 2097169, min util ratio: 0.888679, avg util ratio: 0.888679, time: 100 ns
Alloc size: 8388625, min util ratio: 0.886721, avg util ratio: 0.886721, time: 100 ns
Alloc size: 33554449, min util ratio: 0.875, avg util ratio: 0.875, time: 100 ns
```

**OffsetAllocator (Before Optimization)**

```
Alloc size: 15, min util ratio: 1, avg util ratio: 1, time: 566 ns
Alloc size: 111, min util ratio: 0.669866, avg util ratio: 0.710845, time: 703 ns
Alloc size: 495, min util ratio: 0.665779, avg util ratio: 0.676874, time: 238 ns
Alloc size: 2031, min util ratio: 0.668333, avg util ratio: 0.705411, time: 637 ns
Alloc size: 8175, min util ratio: 0.666175, avg util ratio: 0.676474, time: 242 ns
Alloc size: 32751, min util ratio: 0.664435, avg util ratio: 0.669078, time: 168 ns
Alloc size: 131055, min util ratio: 0.66062, avg util ratio: 0.667341, time: 124 ns
Alloc size: 524271, min util ratio: 0.653055, avg util ratio: 0.666993, time: 118 ns
Alloc size: 2097135, min util ratio: 0.64062, avg util ratio: 0.666873, time: 116 ns
Alloc size: 8388591, min util ratio: 0.605468, avg util ratio: 0.667812, time: 115 ns
Alloc size: 33554415, min util ratio: 0.5625, avg util ratio: 0.670944, time: 116 ns
Alloc size: 49, min util ratio: 0.692229, avg util ratio: 0.753062, time: 1122 ns
Alloc size: 145, min util ratio: 0.667789, avg util ratio: 0.700907, time: 572 ns
Alloc size: 529, min util ratio: 0.66577, avg util ratio: 0.676238, time: 238 ns
Alloc size: 2065, min util ratio: 0.667926, avg util ratio: 0.704884, time: 632 ns
Alloc size: 8209, min util ratio: 0.665708, avg util ratio: 0.676372, time: 239 ns
Alloc size: 32785, min util ratio: 0.664224, avg util ratio: 0.669058, time: 168 ns
Alloc size: 131089, min util ratio: 0.659631, avg util ratio: 0.667287, time: 129 ns
Alloc size: 524305, min util ratio: 0.652609, avg util ratio: 0.666884, time: 122 ns
Alloc size: 2097169, min util ratio: 0.638677, avg util ratio: 0.666516, time: 120 ns
Alloc size: 8388625, min util ratio: 0.60547, avg util ratio: 0.665131, time: 121 ns
Alloc size: 33554449, min util ratio: 0.546875, avg util ratio: 0.660917, time: 120 ns
```

### Uniform size, size equals power of 2 multiply 0.9 or 1.1

**OffsetAllocator (After Optimization)**

```
Alloc size: 28, min util ratio: 1, avg util ratio: 1, time: 543 ns
Alloc size: 115, min util ratio: 0.958333, avg util ratio: 0.958333, time: 418 ns
Alloc size: 460, min util ratio: 0.958332, avg util ratio: 0.958332, time: 189 ns
Alloc size: 1843, min util ratio: 0.959896, avg util ratio: 0.959896, time: 418 ns
Alloc size: 7372, min util ratio: 0.959895, avg util ratio: 0.959895, time: 197 ns
Alloc size: 29491, min util ratio: 0.959993, avg util ratio: 0.959993, time: 135 ns
Alloc size: 117964, min util ratio: 0.959979, avg util ratio: 0.959979, time: 111 ns
Alloc size: 471859, min util ratio: 0.959985, avg util ratio: 0.959985, time: 100 ns
Alloc size: 1887436, min util ratio: 0.959765, avg util ratio: 0.959765, time: 99 ns
Alloc size: 7549747, min util ratio: 0.959766, avg util ratio: 0.959766, time: 99 ns
Alloc size: 30198988, min util ratio: 0.95625, avg util ratio: 0.95625, time: 99 ns
Alloc size: 35, min util ratio: 0.972222, avg util ratio: 0.972222, time: 531 ns
Alloc size: 140, min util ratio: 0.972222, avg util ratio: 0.972222, time: 397 ns
Alloc size: 563, min util ratio: 0.977427, avg util ratio: 0.977427, time: 180 ns
Alloc size: 2252, min util ratio: 0.97743, avg util ratio: 0.97743, time: 389 ns
Alloc size: 9011, min util ratio: 0.977752, avg util ratio: 0.977752, time: 183 ns
Alloc size: 36044, min util ratio: 0.977752, avg util ratio: 0.977752, time: 133 ns
Alloc size: 144179, min util ratio: 0.977739, avg util ratio: 0.977739, time: 106 ns
Alloc size: 576716, min util ratio: 0.977538, avg util ratio: 0.977538, time: 103 ns
Alloc size: 2306867, min util ratio: 0.977539, avg util ratio: 0.977539, time: 99 ns
Alloc size: 9227468, min util ratio: 0.975391, avg util ratio: 0.975391, time: 99 ns
Alloc size: 36909875, min util ratio: 0.9625, avg util ratio: 0.9625, time: 100 ns
```

**OffsetAllocator (Before Optimization)**

```
Alloc size: 28, min util ratio: 1, avg util ratio: 1, time: 539 ns
Alloc size: 115, min util ratio: 0.669299, avg util ratio: 0.709245, time: 701 ns
Alloc size: 460, min util ratio: 0.665825, avg util ratio: 0.677532, time: 255 ns
Alloc size: 1843, min util ratio: 0.669352, avg util ratio: 0.709202, time: 691 ns
Alloc size: 7372, min util ratio: 0.66619, avg util ratio: 0.677401, time: 260 ns
Alloc size: 29491, min util ratio: 0.664311, avg util ratio: 0.669511, time: 172 ns
Alloc size: 117964, min util ratio: 0.661812, avg util ratio: 0.667356, time: 133 ns
Alloc size: 471859, min util ratio: 0.654345, avg util ratio: 0.667048, time: 123 ns
Alloc size: 1887436, min util ratio: 0.640722, avg util ratio: 0.666447, time: 121 ns
Alloc size: 7549747, min util ratio: 0.611719, avg util ratio: 0.666847, time: 119 ns
Alloc size: 30198988, min util ratio: 0.548437, avg util ratio: 0.669799, time: 125 ns
Alloc size: 35, min util ratio: 0.7098, avg util ratio: 0.774162, time: 1306 ns
Alloc size: 140, min util ratio: 0.667934, avg util ratio: 0.702151, time: 599 ns
Alloc size: 563, min util ratio: 0.665599, avg util ratio: 0.675548, time: 239 ns
Alloc size: 2252, min util ratio: 0.667371, avg util ratio: 0.701623, time: 601 ns
Alloc size: 9011, min util ratio: 0.665485, avg util ratio: 0.675528, time: 244 ns
Alloc size: 36044, min util ratio: 0.663248, avg util ratio: 0.668912, time: 170 ns
Alloc size: 144179, min util ratio: 0.660308, avg util ratio: 0.666934, time: 127 ns
Alloc size: 576716, min util ratio: 0.654467, avg util ratio: 0.66679, time: 122 ns
Alloc size: 2306867, min util ratio: 0.633789, avg util ratio: 0.666159, time: 121 ns
Alloc size: 9227468, min util ratio: 0.597266, avg util ratio: 0.666037, time: 118 ns
Alloc size: 36909875, min util ratio: 0.55, avg util ratio: 0.669564, time: 121 ns
```

### Random Size

**OffsetAllocator (After Optimization)**

```
util ratio (min / p99 / p90 / p50 / max / avg): 0.544250 / 0.713338 / 0.779739 / 0.847867 / 0.952591 / 0.841576
avg alloc time: 145.575738 ns/op
```

**OffsetAllocator (Before Optimization)**

```
util ratio (min / p99 / p90 / p50 / max / avg): 0.569255 / 0.712076 / 0.781224 / 0.855046 / 0.976057 / 0.848873
avg alloc time: 142.508508 ns/op
```
Loading
Loading