Skip to content

Commit c64d959

Browse files
authored
Merge pull request #16295 from zhhsplendid/zhenghuihuang-dev-2
Add support for init_memory and re-allocate_memory
2 parents b55dd32 + 124f1df commit c64d959

13 files changed

+378
-45
lines changed

paddle/fluid/memory/allocation/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,6 @@ nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocat
6161

6262
cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator)
6363

64-
cc_test(allocator_facade_test SRCS allocator_facade_test.cc DEPS allocator_facade)
64+
cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade)
65+
66+
cc_test(allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade)
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/memory/allocation/allocator_facade.h"
16+
#include <gflags/gflags.h>
17+
#include <gtest/gtest.h>
18+
19+
#ifdef PADDLE_WITH_CUDA
20+
DECLARE_double(fraction_of_gpu_memory_to_use);
21+
DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
22+
DECLARE_uint64(initial_gpu_memory_in_mb);
23+
DECLARE_uint64(reallocate_gpu_memory_in_mb);
24+
DECLARE_int64(gpu_allocator_retry_time);
25+
#endif
26+
27+
namespace paddle {
28+
namespace memory {
29+
namespace allocation {
30+
31+
//! Run allocate test cases for different places
32+
void AllocateTestCases() {
33+
auto &instance = AllocatorFacade::Instance();
34+
platform::Place place;
35+
size_t size = 1024;
36+
37+
{
38+
place = platform::CPUPlace();
39+
size = 1024;
40+
auto cpu_allocation = instance.Alloc(place, size);
41+
ASSERT_NE(cpu_allocation, nullptr);
42+
ASSERT_NE(cpu_allocation->ptr(), nullptr);
43+
ASSERT_EQ(cpu_allocation->place(), place);
44+
ASSERT_EQ(cpu_allocation->size(), size);
45+
}
46+
47+
#ifdef PADDLE_WITH_CUDA
48+
{
49+
place = platform::CUDAPlace(0);
50+
size = 1024;
51+
auto gpu_allocation = instance.Alloc(place, size);
52+
ASSERT_NE(gpu_allocation, nullptr);
53+
ASSERT_NE(gpu_allocation->ptr(), nullptr);
54+
ASSERT_EQ(gpu_allocation->place(), place);
55+
ASSERT_GE(gpu_allocation->size(), size);
56+
}
57+
58+
{
59+
// Allocate 2GB gpu memory
60+
place = platform::CUDAPlace(0);
61+
size = 2 * static_cast<size_t>(1 << 30);
62+
auto gpu_allocation = instance.Alloc(place, size);
63+
ASSERT_NE(gpu_allocation, nullptr);
64+
ASSERT_NE(gpu_allocation->ptr(), nullptr);
65+
ASSERT_EQ(gpu_allocation->place(), place);
66+
ASSERT_GE(gpu_allocation->size(), size);
67+
}
68+
69+
{
70+
place = platform::CUDAPinnedPlace();
71+
size = (1 << 20);
72+
auto cuda_pinned_allocation =
73+
instance.Alloc(platform::CUDAPinnedPlace(), 1 << 20);
74+
ASSERT_NE(cuda_pinned_allocation, nullptr);
75+
ASSERT_NE(cuda_pinned_allocation->ptr(), nullptr);
76+
ASSERT_EQ(cuda_pinned_allocation->place(), place);
77+
ASSERT_GE(cuda_pinned_allocation->size(), size);
78+
}
79+
#endif
80+
}
81+
82+
TEST(Allocator, SpecifyGpuMemory) {
83+
#ifdef PADDLE_WITH_CUDA
84+
// Set to 0.0 to test FLAGS_initial_gpu_memory_in_mb and
85+
// FLAGS_reallocate_gpu_memory_in_mb
86+
FLAGS_fraction_of_gpu_memory_to_use = 0.0;
87+
// 512 MB
88+
FLAGS_initial_gpu_memory_in_mb = 512;
89+
// 4 MB
90+
FLAGS_reallocate_gpu_memory_in_mb = 4;
91+
FLAGS_gpu_allocator_retry_time = 500;
92+
FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5;
93+
#endif
94+
95+
AllocateTestCases();
96+
}
97+
98+
} // namespace allocation
99+
} // namespace memory
100+
} // namespace paddle

paddle/fluid/memory/allocation/allocator_facade_test.cc renamed to paddle/fluid/memory/allocation/allocator_facade_frac_flags_test.cc

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,17 @@
1919
#ifdef PADDLE_WITH_CUDA
2020
DECLARE_double(fraction_of_gpu_memory_to_use);
2121
DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
22+
DECLARE_uint64(initial_gpu_memory_in_mb);
23+
DECLARE_uint64(reallocate_gpu_memory_in_mb);
2224
DECLARE_int64(gpu_allocator_retry_time);
2325
#endif
2426

2527
namespace paddle {
2628
namespace memory {
2729
namespace allocation {
2830

29-
TEST(allocator, allocator) {
30-
#ifdef PADDLE_WITH_CUDA
31-
FLAGS_fraction_of_gpu_memory_to_use = 0.01;
32-
FLAGS_gpu_allocator_retry_time = 500;
33-
FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5;
34-
#endif
35-
31+
//! Run allocate test cases for different places
32+
void AllocateTestCases() {
3633
auto &instance = AllocatorFacade::Instance();
3734
platform::Place place;
3835
size_t size = 1024;
@@ -82,6 +79,16 @@ TEST(allocator, allocator) {
8279
#endif
8380
}
8481

82+
TEST(Allocator, Allocator) {
83+
#ifdef PADDLE_WITH_CUDA
84+
FLAGS_fraction_of_gpu_memory_to_use = 0.01;
85+
FLAGS_gpu_allocator_retry_time = 500;
86+
FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5;
87+
#endif
88+
89+
AllocateTestCases();
90+
}
91+
8592
} // namespace allocation
8693
} // namespace memory
8794
} // namespace paddle

paddle/fluid/memory/allocation/legacy_allocator.cc

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ DEFINE_bool(init_allocated_mem, false,
3737
"that initializing the allocated memory with a small value "
3838
"during unit testing.");
3939
DECLARE_double(fraction_of_gpu_memory_to_use);
40+
DECLARE_uint64(initial_gpu_memory_in_mb);
41+
DECLARE_uint64(reallocate_gpu_memory_in_mb);
4042
DECLARE_bool(benchmark);
4143

4244
namespace paddle {
@@ -153,12 +155,18 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
153155
platform::GpuMinChunkSize(),
154156
platform::GpuMaxChunkSize());
155157

156-
VLOG(10) << "\n\nNOTE: each GPU device use "
157-
<< FLAGS_fraction_of_gpu_memory_to_use * 100
158-
<< "% of GPU memory.\n"
159-
<< "You can set GFlags environment variable '"
160-
<< "FLAGS_fraction_of_gpu_memory_to_use"
161-
<< "' to change the fraction of GPU usage.\n\n";
158+
VLOG(10) << "\n\nNOTE:\n"
159+
<< "You can set GFlags environment variable "
160+
<< "'FLAGS_fraction_of_gpu_memory_to_use' "
161+
<< "or 'FLAGS_initial_gpu_memory_in_mb' "
162+
<< "or 'FLAGS_reallocate_gpu_memory_in_mb' "
163+
<< "to change the memory size for GPU usage.\n"
164+
<< "Current 'FLAGS_fraction_of_gpu_memory_to_use' value is "
165+
<< FLAGS_fraction_of_gpu_memory_to_use
166+
<< ". Current 'FLAGS_initial_gpu_memory_in_mb' value is "
167+
<< FLAGS_initial_gpu_memory_in_mb
168+
<< ". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is "
169+
<< FLAGS_reallocate_gpu_memory_in_mb << "\n\n";
162170
}
163171
});
164172

paddle/fluid/memory/detail/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,5 @@ endif(${WITH_GPU})
99
cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator)
1010

1111
cc_library(buddy_allocator SRCS buddy_allocator.cc DEPS memory_block system_allocator glog)
12+
13+
cc_test(buddy_allocator_test SRCS buddy_allocator_test.cc DEPS buddy_allocator)

paddle/fluid/memory/detail/buddy_allocator.cc

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

1515
#include "paddle/fluid/memory/detail/buddy_allocator.h"
16+
17+
#include <algorithm>
18+
#include <utility>
19+
1620
#include "glog/logging.h"
1721

1822
DEFINE_bool(free_idle_memory, false,
@@ -36,9 +40,10 @@ BuddyAllocator::~BuddyAllocator() {
3640
"have actually been freed";
3741
while (!pool_.empty()) {
3842
auto block = static_cast<MemoryBlock*>(std::get<2>(*pool_.begin()));
39-
VLOG(10) << "Free from block (" << block << ", " << max_chunk_size_ << ")";
43+
VLOG(10) << "Free from block (" << block << ", " << block->size(cache_)
44+
<< ")";
4045

41-
system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
46+
system_allocator_->Free(block, block->size(cache_), block->index(cache_));
4247
cache_.invalidate(block);
4348
pool_.erase(pool_.begin());
4449
}
@@ -71,7 +76,7 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
7176

7277
// refill the pool if failure
7378
if (it == pool_.end()) {
74-
it = RefillPool();
79+
it = RefillPool(size);
7580
// if still failure, fail fatally
7681
if (it == pool_.end()) {
7782
return nullptr;
@@ -184,38 +189,47 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
184189
return static_cast<MemoryBlock*>(p)->data();
185190
}
186191

187-
BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
192+
BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool(
193+
size_t request_bytes) {
194+
size_t allocate_bytes = max_chunk_size_;
195+
size_t index = 0;
196+
188197
#ifdef PADDLE_WITH_CUDA
189198
if (system_allocator_->UseGpu()) {
190199
if ((total_used_ + total_free_) == 0) {
191-
// Compute the maximum allocation size for the first allocation.
192-
max_chunk_size_ = platform::GpuMaxChunkSize();
200+
// Compute the allocation size for gpu for the first allocation.
201+
allocate_bytes = std::max(platform::GpuInitAllocSize(), request_bytes);
202+
} else {
203+
// Reallocation size
204+
if (realloc_size_ == 0) {
205+
realloc_size_ = platform::GpuReallocSize();
206+
}
207+
allocate_bytes = std::max(realloc_size_, request_bytes);
193208
}
194209
}
195210
#endif
196211

197-
// Allocate a new maximum sized block
198-
size_t index = 0;
199-
void* p = system_allocator_->Alloc(&index, max_chunk_size_);
212+
// Allocate a new block
213+
void* p = system_allocator_->Alloc(&index, allocate_bytes);
200214

201215
if (p == nullptr) return pool_.end();
202216

203217
VLOG(10) << "Creating and inserting new block " << p
204218
<< " from system allocator";
205219

206220
static_cast<MemoryBlock*>(p)->init(&cache_, MemoryBlock::FREE_CHUNK, index,
207-
max_chunk_size_, nullptr, nullptr);
221+
allocate_bytes, nullptr, nullptr);
208222

209223
// gpu fallback allocation
210224
if (system_allocator_->UseGpu() &&
211225
static_cast<MemoryBlock*>(p)->index(cache_) == 1) {
212226
fallback_alloc_count_++;
213227
}
214228

215-
total_free_ += max_chunk_size_;
229+
total_free_ += allocate_bytes;
216230

217231
// dump the block into pool
218-
return pool_.insert(IndexSizeAddress(index, max_chunk_size_, p)).first;
232+
return pool_.insert(IndexSizeAddress(index, allocate_bytes, p)).first;
219233
}
220234

221235
BuddyAllocator::PoolSet::iterator BuddyAllocator::FindExistChunk(size_t size) {
@@ -286,12 +300,12 @@ void BuddyAllocator::CleanIdleFallBackAlloc() {
286300

287301
VLOG(10) << "Return block " << block << " to fallback allocator.";
288302

289-
system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
303+
system_allocator_->Free(block, block->size(cache_), block->index(cache_));
290304
cache_.invalidate(block);
291305

292306
pool = PoolSet::reverse_iterator(pool_.erase(std::next(pool).base()));
293307

294-
total_free_ -= max_chunk_size_;
308+
total_free_ -= block->size(cache_);
295309
fallback_alloc_count_--;
296310

297311
// If no fall allocation exists, return directly
@@ -322,12 +336,12 @@ void BuddyAllocator::CleanIdleNormalAlloc() {
322336

323337
VLOG(10) << "Return block " << block << " to base allocator.";
324338

325-
system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
339+
system_allocator_->Free(block, block->size(cache_), block->index(cache_));
326340
cache_.invalidate(block);
327341

328342
pool = PoolSet::reverse_iterator(pool_.erase(std::next(pool).base()));
329343

330-
total_free_ -= max_chunk_size_;
344+
total_free_ -= block->size(cache_);
331345

332346
if (!shall_free_alloc()) return;
333347
}

paddle/fluid/memory/detail/buddy_allocator.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ class BuddyAllocator {
6060
void* SystemAlloc(size_t size);
6161

6262
/*! \brief If existing chunks are not suitable, refill pool */
63-
PoolSet::iterator RefillPool();
63+
PoolSet::iterator RefillPool(size_t request_bytes);
6464

6565
/**
6666
* \brief Find the suitable chunk from existing pool and split
@@ -89,6 +89,8 @@ class BuddyAllocator {
8989
size_t min_chunk_size_; // the minimum size of each chunk
9090
size_t max_chunk_size_; // the maximum size of each chunk
9191

92+
size_t realloc_size_ = 0; // the size of re-allocated chunk
93+
9294
private:
9395
/**
9496
* \brief A list of free allocation

0 commit comments

Comments
 (0)