Skip to content

Commit 644e8af

Browse files
authored
Merge pull request #16424 from sneaxiy/fix_allocator_bug
Fix allocator bug
2 parents 679a4c2 + 318072c commit 644e8af

31 files changed

+350
-223
lines changed

paddle/fluid/framework/operator.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -365,9 +365,6 @@ class ExecutionContext {
365365
auto shared_allocation = std::shared_ptr<memory::allocation::Allocation>(
366366
allocation_ptr, deleter);
367367

368-
PADDLE_ENFORCE(
369-
dynamic_cast<platform::TemporaryAllocation*>(allocation_ptr) != nullptr,
370-
"The AllocationPtr must be TemporaryAllocation.");
371368
PADDLE_ENFORCE_GE(allocation_ptr->size(),
372369
framework::product(dim) * sizeof(T));
373370

paddle/fluid/memory/allocation/CMakeLists.txt

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator)
44
cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator)
55
cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator)
66
cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler)
7+
cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator)
78
cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator)
89

910
if (WITH_GPU)
@@ -37,30 +38,20 @@ else ()
3738
set(AllocatorFacadeDeps)
3839
endif()
3940

41+
list(APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator legacy_allocator zero_size_allocator)
42+
4043
cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator)
4144
cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator)
42-
cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator)
4345
cc_library(conditional_allocator SRCS conditional_allocator.cc DEPS allocator)
44-
cc_library(allocator_strategy SRCS allocator_strategy.cc DEPS gflags)
45-
cc_library(allocator_facade SRCS allocator_facade.cc DEPS
46-
${AllocatorFacadeDeps}
47-
cpu_allocator
48-
locked_allocator
49-
best_fit_allocator
50-
aligned_allocator
51-
auto_increment_allocator
52-
zero_size_allocator
53-
conditional_allocator
54-
retry_allocator
55-
buffered_allocator
56-
allocator_strategy
57-
legacy_allocator
58-
)
46+
cc_library(allocator_strategy SRCS allocator_strategy.cc DEPS gflags ${AllocatorFacadeDeps})
47+
cc_library(allocator_facade SRCS allocator_facade.cc DEPS allocator_strategy)
5948

6049
nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade)
6150

6251
cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator)
6352

53+
cc_test(naive_best_fit_allocator_facade_test SRCS naive_best_fit_allocator_facade_test.cc DEPS allocator_facade)
54+
6455
cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade)
6556

6657
cc_test(allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade)

paddle/fluid/memory/allocation/aligned_allocator.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#pragma once
1616
#include <memory>
17+
#include <utility>
1718
#include "paddle/fluid/memory/allocation/allocator.h"
1819

1920
namespace paddle {
@@ -93,6 +94,8 @@ class AlignedAllocator : public ThinAlignedAllocator {
9394
underlying_allocator_->Allocate(size + kAlignment, attr);
9495
return new AlignedAllocation<kAlignment>(std::move(raw_allocation), size);
9596
}
97+
98+
void FreeImpl(Allocation* allocation) override { delete allocation; }
9699
};
97100

98101
} // namespace allocation

paddle/fluid/memory/allocation/allocator.cc

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,24 @@ bool Allocator::IsAllocThreadSafe() const { return false; }
2727

2828
AllocationPtr Allocator::Allocate(size_t size, Allocator::Attr attr) {
2929
auto ptr = AllocateImpl(size, attr);
30-
ptr->set_allocator(this);
30+
ptr->RegisterDecoratedAllocator(this);
3131
return AllocationPtr(ptr);
3232
}
3333

34-
void Allocator::Free(Allocation* allocation) { delete allocation; }
34+
void Allocator::FreeImpl(Allocation* allocation) {
35+
Allocator* allocator = allocation->TopDecoratedAllocator();
36+
allocator->Free(allocation);
37+
}
38+
39+
void Allocator::Free(Allocation* allocation) {
40+
allocation->PopDecoratedAllocator();
41+
FreeImpl(allocation);
42+
}
3543

3644
const char* BadAlloc::what() const noexcept { return msg_.c_str(); }
3745

3846
void AllocationDeleter::operator()(Allocation* allocation) const {
39-
auto* allocator = allocation->allocator();
47+
Allocator* allocator = allocation->TopDecoratedAllocator();
4048
allocator->Free(allocation);
4149
}
4250

paddle/fluid/memory/allocation/allocator.h

Lines changed: 68 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
#pragma once
1616
#include <memory>
1717
#include <string>
18+
#include <utility>
19+
#include <vector>
1820
#include "paddle/fluid/platform/place.h"
1921

2022
namespace paddle {
@@ -44,13 +46,56 @@ class Allocator;
4446
// NOTE: this is the base class of Allocation. Each allocator can use its own
4547
// allocation object.
4648
// NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0
49+
50+
/**
51+
* Allocation is returned by Allocator::Allocate() method.
52+
*
53+
* An allocator may be decorated by another allocator. For example, we can
54+
* decorate
55+
* a RetryAllocator to any allocator to perform allocation retry when first
56+
* allocation request fails.
57+
*
58+
* Explanations of Allocator design is as follows:
59+
*
60+
* Suppose we have an allocator which is decorated by several allocators:
61+
*
62+
* A(1) <- A(2) <- A(3) <- ... <- A(n)
63+
*
64+
* , and the public allocator is A(1).
65+
*
66+
* The allocation process would be:
67+
*
68+
* A(n).Allocate() -> ... -> A(2).Allocate() -> A(1).Allocate()
69+
*
70+
* , and the free process would be:
71+
*
72+
* A(1).Free() -> A(2).Free() -> ... -> A(n).Free()
73+
*
74+
* Therefore, we should record the allocator chain when allocating, so
75+
* that we can free the allocation in the reverse order of allocator chain.
76+
* The field `decorated_allocators_` is used to record this chain.
77+
*
78+
* Another example is that we want to add additional fields in Allocation,
79+
* e.g., something what is done in AlignedAllocator, etc.
80+
* In this case, we should declare a derived class of Allocation, which
81+
* contains an underlying Allocation allocated by the underlying allocator.
82+
* Therefore, `decorated_allocators_` of the new Allocation object would
83+
* be a new chain, differing from the underlying Allocation object.
84+
*/
4785
class Allocation {
4886
public:
4987
Allocation(void* ptr, size_t size, platform::Place place)
50-
: allocator_(nullptr), ptr_(ptr), size_(size), place_(place) {}
88+
: ptr_(ptr), size_(size), place_(place) {
89+
// NOTE(zjl): Since decorated_allocators_ is usually a small vector
90+
// We reserve a small buffer to it to prevent frequent heap allocation
91+
// Not quite sure whether we need something like gtl vector.
92+
decorated_allocators_.reserve(8);
93+
}
5194

5295
Allocation(const Allocation& o) = delete;
5396
Allocation& operator=(const Allocation& o) = delete;
97+
Allocation(Allocation&& o) = delete;
98+
Allocation& operator=(Allocation&& o) = delete;
5499

55100
// Returns the holding pointer.
56101
// NOTE: For performance consideration, it is better not to make this method
@@ -72,17 +117,31 @@ class Allocation {
72117

73118
const platform::Place& place() const { return place_; }
74119

75-
Allocator* allocator() { return allocator_; }
120+
virtual ~Allocation();
76121

77-
void set_allocator(Allocator* allocator) { allocator_ = allocator; }
122+
private:
123+
const std::vector<Allocator*>& DecoratedAllocators() const {
124+
return decorated_allocators_;
125+
}
78126

79-
virtual ~Allocation();
127+
inline void RegisterDecoratedAllocator(Allocator* allocator) {
128+
decorated_allocators_.push_back(allocator);
129+
}
130+
131+
inline void PopDecoratedAllocator() { decorated_allocators_.pop_back(); }
132+
133+
inline Allocator* TopDecoratedAllocator() {
134+
return decorated_allocators_.back();
135+
}
80136

81137
private:
82-
Allocator* allocator_;
83138
void* ptr_;
84139
size_t size_;
85140
platform::Place place_;
141+
std::vector<Allocator*> decorated_allocators_;
142+
143+
friend class Allocator;
144+
friend class AllocationDeleter;
86145
};
87146

88147
using AllocationPtr = std::unique_ptr<Allocation, AllocationDeleter>;
@@ -132,9 +191,12 @@ class Allocator {
132191
// True if the `Allocate` is thread safe.
133192
virtual bool IsAllocThreadSafe() const;
134193

194+
// This function should not be called outside
195+
void Free(Allocation* allocation);
196+
135197
protected:
136-
virtual void Free(Allocation* allocation);
137198
virtual Allocation* AllocateImpl(size_t size, Allocator::Attr attr) = 0;
199+
virtual void FreeImpl(Allocation* allocation);
138200

139201
private:
140202
friend class AllocationDeleter;

paddle/fluid/memory/allocation/allocator_facade.cc

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <map>
1818
#include <string>
1919
#include <unordered_map>
20+
#include <utility>
2021
#include <vector>
2122
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
2223
#include "paddle/fluid/memory/allocation/allocator_facade.h"
@@ -30,6 +31,7 @@
3031
#include "paddle/fluid/memory/allocation/retry_allocator.h"
3132
#include "paddle/fluid/memory/allocation/zero_size_allocator.h"
3233
#include "paddle/fluid/platform/cpu_info.h"
34+
#include "paddle/fluid/platform/enforce.h"
3335
#include "paddle/fluid/platform/place.h"
3436
#ifdef PADDLE_WITH_CUDA
3537
#include "paddle/fluid/memory/allocation/cuda_allocator.h"
@@ -47,6 +49,17 @@ namespace paddle {
4749
namespace memory {
4850
namespace allocation {
4951

52+
static inline std::shared_ptr<Allocator> WrapRetryAllocator(
53+
std::shared_ptr<Allocator> allocator, int64_t retry_time) {
54+
if (retry_time > 0) {
55+
auto* retry_allocator =
56+
new RetryAllocator(std::move(allocator), retry_time);
57+
allocator.reset(retry_allocator);
58+
}
59+
60+
return allocator;
61+
}
62+
5063
// TODO(yy): Dirty code here. This class should be configurable in runtime.
5164
class CPUManagedAllocator : public Allocator {
5265
public:
@@ -110,14 +123,10 @@ class ChunkedAllocator : public Allocator {
110123
std::shared_ptr<Allocator> CreateAllocatorWithChunk() {
111124
chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_));
112125
auto* allocation = chunks_.back().get();
113-
std::unique_ptr<Allocator> allocator(new LockedAllocator(
114-
std::unique_ptr<Allocator>(new BestFitAllocator(allocation))));
126+
std::shared_ptr<Allocator> allocator(new LockedAllocator(
127+
std::shared_ptr<Allocator>(new BestFitAllocator(allocation))));
115128

116-
if (retry_time_ > 0) {
117-
auto* retry_allocator =
118-
new RetryAllocator(std::move(allocator), retry_time_);
119-
allocator.reset(retry_allocator);
120-
}
129+
allocator = WrapRetryAllocator(allocator, retry_time_);
121130

122131
return std::make_shared<AlignedAllocator<64u>>(std::move(allocator));
123132
}
@@ -188,13 +197,23 @@ class AllocatorFacadePrivate {
188197
~AllocatorFacadePrivate() = default;
189198

190199
AllocatorFacadePrivate() {
191-
if (GetAllocatorStrategy() == AllocatorStrategy::kLegacy) {
192-
InitLegacyAllocator();
193-
} else {
194-
InitCPUAllocator();
195-
InitCUDAAllocator();
196-
InitCUDAPinnedAllocator();
197-
WrapZeroSizeAllocator();
200+
auto strategy = GetAllocatorStrategy();
201+
switch (strategy) {
202+
case AllocatorStrategy::kLegacy: {
203+
InitLegacyAllocator();
204+
break;
205+
}
206+
case AllocatorStrategy::kNaiveBestFit: {
207+
InitCPUAllocator();
208+
InitCUDAAllocator();
209+
InitCUDAPinnedAllocator();
210+
WrapZeroSizeAllocator();
211+
break;
212+
}
213+
default: {
214+
PADDLE_THROW("Unsupported allocator strategy: %d",
215+
static_cast<int>(strategy));
216+
}
198217
}
199218
}
200219

@@ -252,8 +271,7 @@ AllocatorFacade& AllocatorFacade::Instance() {
252271

253272
std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
254273
const platform::Place& place, size_t size, Allocator::Attr attr) {
255-
return std::shared_ptr<Allocation>(Alloc(place, size, attr).release(),
256-
AllocationDeleter());
274+
return std::shared_ptr<Allocation>(Alloc(place, size, attr));
257275
}
258276

259277
AllocationPtr AllocatorFacade::Alloc(const platform::Place& place, size_t size,

paddle/fluid/memory/allocation/allocator_strategy.cc

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,27 @@
1414

1515
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
1616
#include "gflags/gflags.h"
17+
#include "paddle/fluid/platform/enforce.h"
1718

1819
DEFINE_string(
1920
allocator_strategy, "legacy",
2021
"The allocation strategy. Legacy means the original allocator of Fluid."
21-
"New means the experimental allocators of Fluid. in [legacy, new]");
22+
"naive_best_fit means the experimental best fit allocator. "
23+
"allocator. Enum in [legacy, naive_best_fit].");
2224

2325
namespace paddle {
2426
namespace memory {
2527
namespace allocation {
2628

2729
static AllocatorStrategy GetStrategyFromFlag() {
28-
return FLAGS_allocator_strategy == "legacy"
29-
? AllocatorStrategy::kLegacy
30-
: AllocatorStrategy::kNaiveBestFit;
30+
if (FLAGS_allocator_strategy == "legacy") {
31+
return AllocatorStrategy::kLegacy;
32+
} else if (FLAGS_allocator_strategy == "naive_best_fit") {
33+
return AllocatorStrategy::kNaiveBestFit;
34+
} else {
35+
PADDLE_THROW("Unsupported allocator strategy: %s",
36+
FLAGS_allocator_strategy);
37+
}
3138
}
3239

3340
AllocatorStrategy GetAllocatorStrategy() {

paddle/fluid/memory/allocation/best_fit_allocator.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ size_t BestFitAllocator::NumFreeChunks() const {
109109
}
110110
return num;
111111
}
112-
void BestFitAllocator::Free(Allocation* allocation) {
112+
void BestFitAllocator::FreeImpl(Allocation* allocation) {
113113
auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation);
114114
PADDLE_ENFORCE_NOT_NULL(bf_allocation,
115115
"The input allocation is not BestFitAllocation.");

paddle/fluid/memory/allocation/best_fit_allocator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ class BestFitAllocator : public Allocator {
119119
void InsertFreeNode(const ListIt& it);
120120

121121
protected:
122-
void Free(Allocation* allocation) override;
122+
void FreeImpl(Allocation* allocation) override;
123123
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
124124

125125
private:

0 commit comments

Comments
 (0)