@@ -22,6 +22,8 @@ limitations under the License. */
22
22
#include " paddle/fluid/platform/gpu_info.h"
23
23
24
24
#ifdef PADDLE_WITH_CUDA
25
+ #include < cuda_runtime.h>
26
+
25
27
DECLARE_double (fraction_of_gpu_memory_to_use);
26
28
DECLARE_uint64 (initial_gpu_memory_in_mb);
27
29
DECLARE_uint64 (reallocate_gpu_memory_in_mb);
@@ -31,29 +33,37 @@ namespace paddle {
31
33
namespace memory {
32
34
namespace detail {
33
35
34
- constexpr static int test_gpu_id = 0 ;
36
+ constexpr static int TEST_GPU_ID = 0 ;
35
37
36
- void TestBuddyAllocator (BuddyAllocator* allocator, size_t size_bytes) {
38
+ int * TestBuddyAllocator (BuddyAllocator* allocator, size_t size_bytes,
39
+ bool use_system_allocator = false ,
40
+ bool free_ptr = true ) {
37
41
bool freed = false ;
38
42
size_t used_bytes = allocator->Used ();
39
43
40
44
if (size_bytes > 0 ) {
41
45
void * p = allocator->Alloc (size_bytes);
42
46
43
47
EXPECT_NE (p, nullptr );
48
+
44
49
#ifdef PADDLE_WITH_CUDA
45
- if (size_bytes < platform::GpuMaxChunkSize ()) {
50
+ if (size_bytes < allocator-> GetMaxChunkSize ()) {
46
51
#else
47
- if (size_bytes < platform::CpuMaxChunkSize ()) {
52
+ if (size_bytes < allocator-> GetMaxChunkSize ()) {
48
53
#endif
49
54
// Not allocate from SystemAllocator
55
+ EXPECT_FALSE (use_system_allocator);
50
56
EXPECT_GE (allocator->Used (), used_bytes + size_bytes);
51
57
} else {
52
58
// Allocate from SystemAllocator doesn't count in Used()
59
+ EXPECT_TRUE (use_system_allocator);
53
60
EXPECT_EQ (allocator->Used (), used_bytes);
54
61
}
55
62
56
63
int * intp = static_cast <int *>(p);
64
+ if (!free_ptr) {
65
+ return intp;
66
+ }
57
67
std::shared_ptr<int > ptr (intp, [&](void * p) {
58
68
allocator->Free (intp);
59
69
freed = true ;
@@ -64,20 +74,30 @@ void TestBuddyAllocator(BuddyAllocator* allocator, size_t size_bytes) {
64
74
65
75
EXPECT_EQ (used_bytes, allocator->Used ());
66
76
EXPECT_TRUE (freed);
77
+ return nullptr ;
67
78
}
68
79
69
80
#ifdef PADDLE_WITH_CUDA
70
81
TEST (BuddyAllocator, GpuFraction) {
82
+ // In a 16 GB machine, the pool size will be about 160 MB
71
83
FLAGS_fraction_of_gpu_memory_to_use = 0.01 ;
84
+ FLAGS_initial_gpu_memory_in_mb = 0 ;
85
+ FLAGS_reallocate_gpu_memory_in_mb = 0 ;
72
86
73
87
BuddyAllocator buddy_allocator (
74
- std::unique_ptr<SystemAllocator>(new GPUAllocator (test_gpu_id )),
88
+ std::unique_ptr<SystemAllocator>(new GPUAllocator (TEST_GPU_ID )),
75
89
platform::GpuMinChunkSize (), platform::GpuMaxChunkSize ());
76
90
91
+ // Less than pool size
77
92
TestBuddyAllocator (&buddy_allocator, 10 );
78
93
TestBuddyAllocator (&buddy_allocator, 10 << 10 );
79
94
TestBuddyAllocator (&buddy_allocator, 10 << 20 );
80
- TestBuddyAllocator (&buddy_allocator, 2 * static_cast <size_t >(1 << 30 ));
95
+
96
+ // Greater than max chunk size
97
+ TestBuddyAllocator (&buddy_allocator, 499 << 20 ,
98
+ /* use_system_allocator = */ true );
99
+ TestBuddyAllocator (&buddy_allocator, 2 * static_cast <size_t >(1 << 30 ),
100
+ /* use_system_allocator = */ true );
81
101
}
82
102
83
103
TEST (BuddyAllocator, InitRealloc) {
@@ -87,19 +107,19 @@ TEST(BuddyAllocator, InitRealloc) {
87
107
EXPECT_EQ (platform::GpuMaxChunkSize (), static_cast <size_t >(100 << 20 ));
88
108
89
109
BuddyAllocator buddy_allocator (
90
- std::unique_ptr<SystemAllocator>(new GPUAllocator (test_gpu_id )),
110
+ std::unique_ptr<SystemAllocator>(new GPUAllocator (TEST_GPU_ID )),
91
111
platform::GpuMinChunkSize (), platform::GpuMaxChunkSize ());
92
112
93
113
// Less then initial size and reallocate size
94
114
TestBuddyAllocator (&buddy_allocator, 10 << 20 );
95
115
// Between initial size and reallocate size and not exceed pool
96
116
TestBuddyAllocator (&buddy_allocator, 80 << 20 );
97
- // Less then reallocate size and exceed pool
98
- TestBuddyAllocator (&buddy_allocator, 40 << 20 );
99
- // Greater then reallocate size and exceed pool
100
- TestBuddyAllocator (&buddy_allocator, 80 << 20 );
101
- // Greater then initial size and reallocate size
102
- TestBuddyAllocator (&buddy_allocator, 2 * static_cast < size_t >( 1 << 30 ) );
117
+ TestBuddyAllocator (&buddy_allocator, 99 << 20 );
118
+ // Greater than max chunk size
119
+ TestBuddyAllocator (&buddy_allocator, 101 << 20 ,
120
+ /* use_system_allocator = */ true );
121
+ TestBuddyAllocator (&buddy_allocator, 2 * static_cast < size_t >( 1 << 30 ),
122
+ /* use_system_allocator = */ true );
103
123
}
104
124
105
125
TEST (BuddyAllocator, ReallocSizeGreaterThanInit) {
@@ -109,23 +129,112 @@ TEST(BuddyAllocator, ReallocSizeGreaterThanInit) {
109
129
EXPECT_EQ (platform::GpuMaxChunkSize (), static_cast <size_t >(10 << 20 ));
110
130
111
131
BuddyAllocator buddy_allocator (
112
- std::unique_ptr<SystemAllocator>(new GPUAllocator (test_gpu_id )),
132
+ std::unique_ptr<SystemAllocator>(new GPUAllocator (TEST_GPU_ID )),
113
133
platform::GpuMinChunkSize (), platform::GpuMaxChunkSize ());
114
134
115
- // Less then initial size and reallocate size
135
+ // Less than initial size and reallocate size
116
136
TestBuddyAllocator (&buddy_allocator, 1 << 20 );
117
- // Between initial size and reallocate size and not exceed pool
118
- TestBuddyAllocator (&buddy_allocator, 3 << 20 );
119
- // Less then initial size and exceed pool
120
- TestBuddyAllocator (&buddy_allocator, 3 << 20 );
121
- // Less then reallocate size and not exceed pool (now pool is 15 MB, used 7
122
- // MB)
123
- TestBuddyAllocator (&buddy_allocator, 7 << 20 );
124
- // Less then reallocate size and exceed pool
137
+ // Between initial size and reallocate size and exceed pool
138
+ TestBuddyAllocator (&buddy_allocator, 6 << 20 );
125
139
TestBuddyAllocator (&buddy_allocator, 8 << 20 );
126
- // Greater then initial size and reallocate size
127
- TestBuddyAllocator (&buddy_allocator, 2 * static_cast <size_t >(1 << 30 ));
140
+ TestBuddyAllocator (&buddy_allocator, 9 << 20 );
141
+ // Greater than max trunk size
142
+ TestBuddyAllocator (&buddy_allocator, 11 << 20 ,
143
+ /* use_system_allocator = */ true );
144
+ TestBuddyAllocator (&buddy_allocator, 2 * static_cast <size_t >(1 << 30 ),
145
+ /* use_system_allocator = */ true );
146
+ }
147
+
148
+ TEST (BuddyAllocator, FractionRefillPool) {
149
+ FLAGS_fraction_of_gpu_memory_to_use = 0.6 ;
150
+ FLAGS_initial_gpu_memory_in_mb = 0 ;
151
+ FLAGS_reallocate_gpu_memory_in_mb = 0 ;
152
+
153
+ size_t max_chunk_size = platform::GpuMaxChunkSize ();
154
+ BuddyAllocator buddy_allocator (
155
+ std::unique_ptr<SystemAllocator>(new GPUAllocator (TEST_GPU_ID)),
156
+ platform::GpuMinChunkSize (), max_chunk_size);
157
+
158
+ // Less than pool size
159
+ int * p0 = TestBuddyAllocator (&buddy_allocator, max_chunk_size - 1000 ,
160
+ /* use_system_allocator = */ false ,
161
+ /* free_ptr = */ false );
162
+ // Max chunk size should be same during allocation
163
+ EXPECT_EQ (max_chunk_size, buddy_allocator.GetMaxChunkSize ());
164
+
165
+ size_t alloc =
166
+ platform::GpuAvailableMemToAlloc () * FLAGS_fraction_of_gpu_memory_to_use;
167
+ // Exceed pool trigger refilling size of fraction of avaiable gpu, and should
168
+ // be able to alloc 60% of the remaining GPU
169
+ int * p1 = TestBuddyAllocator (&buddy_allocator, alloc,
170
+ /* use_system_allocator = */ false ,
171
+ /* free_ptr = */ false );
172
+ // Max chunk size should be same during allocation
173
+ EXPECT_EQ (max_chunk_size, buddy_allocator.GetMaxChunkSize ());
174
+
175
+ alloc =
176
+ platform::GpuAvailableMemToAlloc () * FLAGS_fraction_of_gpu_memory_to_use;
177
+ // Exceed pool trigger refilling size of fraction of avaiable gpu, and should
178
+ // be able to alloc 60% of the remaining GPU
179
+ TestBuddyAllocator (&buddy_allocator, alloc,
180
+ /* use_system_allocator = */ false );
181
+ // Max chunk size should be same during allocation
182
+ EXPECT_EQ (max_chunk_size, buddy_allocator.GetMaxChunkSize ());
183
+
184
+ buddy_allocator.Free (p0);
185
+ buddy_allocator.Free (p1);
186
+ }
187
+
188
+ TEST (BuddyAllocator, AllocFromAvailable) {
189
+ FLAGS_fraction_of_gpu_memory_to_use = 0.7 ;
190
+ FLAGS_initial_gpu_memory_in_mb = 0 ;
191
+ FLAGS_reallocate_gpu_memory_in_mb = 0 ;
192
+
193
+ size_t total = 0 , available = 0 ;
194
+ platform::SetDeviceId (TEST_GPU_ID);
195
+ platform::GpuMemoryUsage (&available, &total);
196
+
197
+ // Take half of available GPU
198
+ void * p;
199
+ cudaError_t result = cudaMalloc (&p, available >> 1 );
200
+ EXPECT_TRUE (result == cudaSuccess);
201
+
202
+ // BuddyAllocator should be able to alloc the remaining GPU
203
+ BuddyAllocator buddy_allocator (
204
+ std::unique_ptr<SystemAllocator>(new GPUAllocator (TEST_GPU_ID)),
205
+ platform::GpuMinChunkSize (), platform::GpuMaxChunkSize ());
206
+
207
+ TestBuddyAllocator (&buddy_allocator, 10 );
208
+ TestBuddyAllocator (&buddy_allocator, 10 << 10 );
209
+ TestBuddyAllocator (&buddy_allocator, 10 << 20 );
210
+ TestBuddyAllocator (&buddy_allocator, static_cast <size_t >(1 << 30 ));
211
+
212
+ if (p) {
213
+ EXPECT_TRUE (cudaFree (p) == cudaSuccess);
214
+ }
128
215
}
216
+
217
+ TEST (BuddyAllocator, AllocFromAvailableWhenFractionIsOne) {
218
+ FLAGS_fraction_of_gpu_memory_to_use = 1.0 ;
219
+ FLAGS_initial_gpu_memory_in_mb = 0 ;
220
+ FLAGS_reallocate_gpu_memory_in_mb = 0 ;
221
+
222
+ void * p = nullptr ;
223
+ EXPECT_TRUE (cudaMalloc (&p, static_cast <size_t >(4 ) << 30 ) == cudaSuccess);
224
+
225
+ // BuddyAllocator should be able to alloc the remaining GPU
226
+ BuddyAllocator buddy_allocator (
227
+ std::unique_ptr<SystemAllocator>(new GPUAllocator (TEST_GPU_ID)),
228
+ platform::GpuMinChunkSize (), platform::GpuMaxChunkSize ());
229
+
230
+ TestBuddyAllocator (&buddy_allocator, static_cast <size_t >(1 ) << 30 );
231
+ TestBuddyAllocator (&buddy_allocator, static_cast <size_t >(5 ) << 30 );
232
+
233
+ if (p) {
234
+ EXPECT_TRUE (cudaFree (p) == cudaSuccess);
235
+ }
236
+ }
237
+
129
238
#endif
130
239
131
240
} // namespace detail
0 commit comments