Skip to content

Commit f4966ba

Browse files
issue/809 支持锁定、缓存的内存分配器
1 parent 0ead67f commit f4966ba

File tree

5 files changed

+222
-9
lines changed

5 files changed

+222
-9
lines changed
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
#include "pinnable_block_allocator.hpp"
2+
3+
#include "../../utils.hpp"
4+
5+
#include <algorithm>
6+
#include <infinirt.h>
7+
#include <stdexcept>
8+
9+
namespace infinicore {
10+
11+
// ------------------- Helper functions -------------------
12+
13+
// Round up size to nearest multiple of alignment
14+
inline size_t align_up(size_t size, size_t alignment) {
15+
return (size + alignment - 1) / alignment * alignment;
16+
}
17+
18+
// ------------------- Constructor -------------------
19+
PinnableBlockAllocator::PinnableBlockAllocator(Device device)
20+
: device_(device) {
21+
size_classes_ = {
22+
{256 * 1024, {}}, // 256 KB
23+
{1 * 1024 * 1024, {}}, // 1 MB
24+
{4 * 1024 * 1024, {}}, // 4 MB
25+
{16 * 1024 * 1024, {}}, // 16 MB
26+
{64 * 1024 * 1024, {}}, // 64 MB
27+
{256 * 1024 * 1024, {}}, // 256 MB
28+
};
29+
}
30+
31+
// ------------------- allocate -------------------
32+
std::byte *PinnableBlockAllocator::allocate(size_t size) {
33+
std::lock_guard<std::mutex> lock(mutex_);
34+
35+
// Align size to 256 bytes for GPU
36+
size = align_up(size, 256);
37+
38+
std::shared_ptr<Block> block;
39+
40+
// 1. Try size-class allocation for small/medium
41+
for (auto &cls : size_classes_) {
42+
if (size <= cls.block_size) {
43+
if (!cls.free_blocks.empty()) {
44+
block = cls.free_blocks.back();
45+
cls.free_blocks.pop_back();
46+
block->in_use = true;
47+
return reinterpret_cast<std::byte *>(block->ptr);
48+
}
49+
// Allocate a new block for this class
50+
block = std::make_shared<Block>();
51+
block->size = cls.block_size;
52+
block->frozen = pinned_mode_;
53+
block->in_use = true;
54+
55+
INFINICORE_CHECK_ERROR(infinirtMalloc(&block->ptr, block->size));
56+
57+
all_blocks_[block->ptr] = block;
58+
return reinterpret_cast<std::byte *>(block->ptr);
59+
}
60+
}
61+
62+
// 2. Large block allocation
63+
// Try to reuse a frozen or free large block
64+
auto it = std::find_if(large_blocks_.begin(), large_blocks_.end(),
65+
[size](const std::shared_ptr<Block> &b) { return b->size >= size && !b->in_use; });
66+
67+
if (it != large_blocks_.end()) {
68+
block = *it;
69+
block->in_use = true;
70+
block->frozen = block->frozen || pinned_mode_;
71+
return reinterpret_cast<std::byte *>(block->ptr);
72+
}
73+
74+
// Allocate new large block
75+
block = std::make_shared<Block>();
76+
block->size = size;
77+
block->frozen = pinned_mode_;
78+
block->in_use = true;
79+
80+
INFINICORE_CHECK_ERROR(infinirtMalloc(&block->ptr, block->size));
81+
82+
large_blocks_.push_back(block);
83+
all_blocks_[block->ptr] = block;
84+
85+
return reinterpret_cast<std::byte *>(block->ptr);
86+
}
87+
88+
// ------------------- deallocate -------------------
89+
void PinnableBlockAllocator::deallocate(std::byte *ptr) {
90+
if (!ptr) {
91+
return;
92+
}
93+
94+
std::lock_guard<std::mutex> lock(mutex_);
95+
96+
auto it = all_blocks_.find(reinterpret_cast<void *>(ptr));
97+
if (it == all_blocks_.end()) {
98+
throw std::runtime_error("Pointer not allocated by this allocator");
99+
}
100+
101+
auto block = it->second;
102+
if (!block->in_use) {
103+
throw std::runtime_error("Double free detected in PinnableBlockAllocator");
104+
}
105+
106+
block->in_use = false;
107+
108+
if (!block->in_use) {
109+
for (auto &cls : size_classes_) {
110+
if (block->size == cls.block_size) {
111+
cls.free_blocks.push_back(block);
112+
break;
113+
}
114+
}
115+
}
116+
}
117+
118+
// ------------------- trim -------------------
119+
void PinnableBlockAllocator::trim() {
120+
std::lock_guard<std::mutex> lock(mutex_);
121+
// Free non-frozen size-class blocks
122+
for (auto &cls : size_classes_) {
123+
for (auto it = cls.free_blocks.begin(); it != cls.free_blocks.end();) {
124+
if (!(*it)->frozen) {
125+
INFINICORE_CHECK_ERROR(infinirtFree((*it)->ptr));
126+
all_blocks_.erase((*it)->ptr);
127+
it = cls.free_blocks.erase(it);
128+
} else {
129+
++it;
130+
}
131+
}
132+
}
133+
// Free non-frozen large blocks
134+
for (auto it = large_blocks_.begin(); it != large_blocks_.end();) {
135+
if (!(*it)->frozen && !(*it)->in_use) {
136+
INFINICORE_CHECK_ERROR(infinirtFree((*it)->ptr));
137+
all_blocks_.erase((*it)->ptr);
138+
it = large_blocks_.erase(it);
139+
} else {
140+
++it;
141+
}
142+
}
143+
}
144+
145+
// ------------------- Destructor -------------------
146+
PinnableBlockAllocator::~PinnableBlockAllocator() {
147+
std::lock_guard<std::mutex> lock(mutex_);
148+
for (auto &p : all_blocks_) {
149+
if (p.second->ptr) {
150+
infinirtFree(p.second->ptr);
151+
}
152+
}
153+
all_blocks_.clear();
154+
large_blocks_.clear();
155+
for (auto &cls : size_classes_) {
156+
cls.free_blocks.clear();
157+
}
158+
}
159+
160+
} // namespace infinicore
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#pragma once
2+
3+
#include "memory_allocator.hpp"
4+
5+
#include "../context_impl.hpp"
6+
7+
#include <mutex>
8+
#include <unordered_map>
9+
#include <vector>
10+
11+
namespace infinicore {
12+
class PinnableBlockAllocator : public MemoryAllocator {
13+
// Represents a single memory block
14+
struct Block {
15+
void *ptr = nullptr; // Device pointer
16+
size_t size = 0; // Block size in bytes
17+
bool frozen = false; // True if used in pinned/graph mode
18+
bool in_use = false; // Wether the block is currently in use
19+
};
20+
21+
// A simple size-class allocator for small/medium blocks
22+
struct SizeClass {
23+
size_t block_size; // Fixed size for this class
24+
std::vector<std::shared_ptr<Block>> free_blocks;
25+
};
26+
27+
public:
28+
explicit PinnableBlockAllocator(Device device);
29+
~PinnableBlockAllocator();
30+
31+
std::byte *allocate(size_t size) override;
32+
void deallocate(std::byte *ptr) override;
33+
34+
// Switch pinned/graph mode
35+
void set_pin_mode(bool pinned) { pinned_mode_ = pinned; }
36+
37+
// trim cached blocks back to GPU (not pinned)
38+
void trim();
39+
40+
private:
41+
Device device_;
42+
43+
bool pinned_mode_ = false;
44+
45+
std::vector<SizeClass> size_classes_;
46+
std::vector<std::shared_ptr<Block>> large_blocks_;
47+
std::unordered_map<void *, std::shared_ptr<Block>> all_blocks_;
48+
49+
std::mutex mutex_; // Thread safety
50+
};
51+
52+
} // namespace infinicore

src/infinicore/context/allocators/device_caching_allocator.cc renamed to src/infinicore/context/allocators/stream_ordered_allocator.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
1-
#include "device_caching_allocator.hpp"
1+
#include "stream_ordered_allocator.hpp"
22

33
#include <infinirt.h>
44

55
#include "../../utils.hpp"
66

77
namespace infinicore {
8-
DeviceCachingAllocator::DeviceCachingAllocator(Device device) : MemoryAllocator(), device_(device) {}
8+
StreamOrderedAllocator::StreamOrderedAllocator(Device device) : MemoryAllocator(), device_(device) {}
99

10-
std::byte *DeviceCachingAllocator::allocate(size_t size) {
10+
std::byte *StreamOrderedAllocator::allocate(size_t size) {
1111
void *ptr = nullptr;
1212
INFINICORE_CHECK_ERROR(infinirtMallocAsync(&ptr, size, context::getStream()));
1313
return (std::byte *)ptr;
1414
}
1515

16-
void DeviceCachingAllocator::deallocate(std::byte *ptr) {
16+
void StreamOrderedAllocator::deallocate(std::byte *ptr) {
1717
INFINICORE_CHECK_ERROR(infinirtFreeAsync(ptr, context::getStream()));
1818
}
1919
} // namespace infinicore

src/infinicore/context/allocators/device_caching_allocator.hpp renamed to src/infinicore/context/allocators/stream_ordered_allocator.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
#include "../context_impl.hpp"
66

77
namespace infinicore {
8-
class DeviceCachingAllocator : public MemoryAllocator {
8+
class StreamOrderedAllocator : public MemoryAllocator {
99
public:
10-
explicit DeviceCachingAllocator(Device device);
11-
~DeviceCachingAllocator() = default;
10+
explicit StreamOrderedAllocator(Device device);
11+
~StreamOrderedAllocator() = default;
1212

1313
std::byte *allocate(size_t size) override;
1414
void deallocate(std::byte *ptr) override;

src/infinicore/context/runtime/runtime.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22

33
#include "../../utils.hpp"
44

5-
#include "../allocators/device_caching_allocator.hpp"
65
#include "../allocators/device_pinned_allocator.hpp"
76
#include "../allocators/host_allocator.hpp"
7+
#include "../allocators/pinnable_block_allocator.hpp"
8+
#include "../allocators/stream_ordered_allocator.hpp"
89

910
namespace infinicore {
1011
Runtime::Runtime(Device device) : device_(device) {
@@ -14,7 +15,7 @@ Runtime::Runtime(Device device) : device_(device) {
1415
if (device_.getType() == Device::Type::CPU) {
1516
device_memory_allocator_ = std::make_unique<HostAllocator>();
1617
} else {
17-
device_memory_allocator_ = std::make_unique<DeviceCachingAllocator>(device);
18+
device_memory_allocator_ = std::make_unique<PinnableBlockAllocator>(device);
1819
pinned_host_memory_allocator_ = std::make_unique<DevicePinnedHostAllocator>(device);
1920
}
2021
}

0 commit comments

Comments
 (0)