PaddlePaddle
diff --git a/‎paddle/fluid/memory/allocation/CMakeLists.txt
Lines changed: 1 addition & 5 deletions b/‎paddle/fluid/memory/allocation/CMakeLists.txt
Lines changed: 1 addition & 5 deletions
diff --git a/‎paddle/fluid/memory/allocation/aligned_allocator.cc
Lines changed: 1 addition & 6 deletions b/‎paddle/fluid/memory/allocation/aligned_allocator.cc
Lines changed: 1 addition & 6 deletions
diff --git a/‎paddle/fluid/memory/allocation/aligned_allocator.h
Lines changed: 3 additions & 5 deletions b/‎paddle/fluid/memory/allocation/aligned_allocator.h
Lines changed: 3 additions & 5 deletions
diff --git a/‎paddle/fluid/memory/allocation/allocator.cc
Lines changed: 5 additions & 0 deletions b/‎paddle/fluid/memory/allocation/allocator.cc
Lines changed: 5 additions & 0 deletions
diff --git a/‎paddle/fluid/memory/allocation/allocator.h
Lines changed: 20 additions & 9 deletions b/‎paddle/fluid/memory/allocation/allocator.h
Lines changed: 20 additions & 9 deletions
diff --git a/‎paddle/fluid/memory/allocation/allocator_facade.cc
Lines changed: 15 additions & 24 deletions b/‎paddle/fluid/memory/allocation/allocator_facade.cc
Lines changed: 15 additions & 24 deletions
diff --git a/‎paddle/fluid/memory/allocation/auto_increment_allocator.cc
Lines changed: 50 additions & 9 deletions b/‎paddle/fluid/memory/allocation/auto_increment_allocator.cc
Lines changed: 50 additions & 9 deletions
diff --git a/‎paddle/fluid/memory/allocation/auto_increment_allocator.h
Lines changed: 5 additions & 61 deletions b/‎paddle/fluid/memory/allocation/auto_increment_allocator.h
Lines changed: 5 additions & 61 deletions
@@ -29,9 +29,6 @@ else()
                  cpu_allocator)
 endif()
 
-
-cc_library(naive_managed_allocator SRCS naive_managed_allocator.cc DEPS allocator)
-cc_test(naive_managed_allocator_test SRCS naive_managed_allocator_test.cc DEPS naive_managed_allocator)
 nv_library(pinned_allocator SRCS pinned_allocator.cc DEPS allocator)
 if (WITH_GPU)
     set(AllocatorFacadeDeps gpu_info cuda_allocator pinned_allocator cuda_device_guard)
@@ -49,7 +46,6 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS
         cpu_allocator
         locked_allocator
         best_fit_allocator
-        naive_managed_allocator
         aligned_allocator
         auto_increment_allocator
         zero_size_allocator
@@ -61,6 +57,6 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS
 
 nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade)
 
-cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator naive_managed_allocator best_fit_allocator locked_allocator cpu_allocator)
+cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator)
 
 cc_test(allocator_facade_test SRCS allocator_facade_test.cc DEPS allocator_facade)
@@ -19,14 +19,9 @@ namespace memory {
 namespace allocation {
 
 ThinAlignedAllocator::ThinAlignedAllocator(
-    std::shared_ptr<ManagedAllocator> underlyning_allocator)
+    std::shared_ptr<Allocator> underlyning_allocator)
     : underlying_allocator_(std::move(underlyning_allocator)) {}
 
-std::shared_ptr<Allocation> ThinAlignedAllocator::AllocateShared(
-    size_t size, Allocator::Attr attr) {
-  return std::shared_ptr<Allocation>(Allocate(size, attr).release());
-}
-
 bool ThinAlignedAllocator::IsAllocThreadSafe() const {
   return underlying_allocator_->IsAllocThreadSafe();
 }
 
@@ -70,17 +70,15 @@ class AlignedAllocation : public Allocation {
 //
 // NOTE(yy): This could be an over design. If it harms readability of code, it
 // could be removed later.
-class ThinAlignedAllocator : public ManagedAllocator {
+class ThinAlignedAllocator : public Allocator {
  public:
   explicit ThinAlignedAllocator(
-      std::shared_ptr<ManagedAllocator> underlyning_allocator);
-
-  std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override;
+      std::shared_ptr<Allocator> underlyning_allocator);
 
   bool IsAllocThreadSafe() const;
 
  protected:
-  std::shared_ptr<ManagedAllocator> underlying_allocator_;
+  std::shared_ptr<Allocator> underlying_allocator_;
 };
 
 // An aligned allocator will allocate `size+kAlignment` allocation and adjust
 
@@ -24,6 +24,11 @@ bool Allocator::IsAllocThreadSafe() const { return false; }
 
 const char* BadAlloc::what() const noexcept { return msg_.c_str(); }
 
+MannualFreeAllocation::~MannualFreeAllocation() { allocator_->Free(this); }
+std::unique_ptr<Allocation> MannualFreeAllocator::Allocate(
+    size_t size, Allocator::Attr attr) {
+  return std::unique_ptr<Allocation>(AllocateImpl(size, attr));
+}
 }  // namespace allocation
 }  // namespace memory
 }  // namespace paddle
@@ -121,19 +121,30 @@ class Allocator {
   virtual bool IsAllocThreadSafe() const;
 };
 
-// User need to invoke `Free` or `FreeUniquePtr` manually if allocated by
-// a manally managed allocator.
-class UnmanagedAllocator : public Allocator {
+class MannualFreeAllocator;
+class MannualFreeAllocation : public Allocation {
  public:
-  virtual void FreeUniquePtr(std::unique_ptr<Allocation> allocation) = 0;
+  MannualFreeAllocation(MannualFreeAllocator* allocator, void* ptr, size_t size,
+                        platform::Place place)
+      : Allocation(ptr, size, place), allocator_(allocator) {}
+
+  ~MannualFreeAllocation();
+
+ private:
+  MannualFreeAllocator* allocator_;
 };
 
-// The allocation will be managed by smart pointers. i.e., users do not need
-// to free allocation manually.
-class ManagedAllocator : public Allocator {
+// User need to invoke `Free` or `FreeUniquePtr` manually if allocated by
+// a manally managed allocator.
+class MannualFreeAllocator : public Allocator {
  public:
-  virtual std::shared_ptr<Allocation> AllocateShared(
-      size_t size, Allocator::Attr attr = kDefault) = 0;
+  std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) final;
+
+ protected:
+  virtual void Free(MannualFreeAllocation* allocation) = 0;
+  virtual MannualFreeAllocation* AllocateImpl(size_t size,
+                                              Allocator::Attr attr) = 0;
+  friend class MannualFreeAllocation;
 };
 
 }  // namespace allocation
 
@@ -24,7 +24,6 @@
 #include "paddle/fluid/memory/allocation/conditional_allocator.h"
 #include "paddle/fluid/memory/allocation/cpu_allocator.h"
 #include "paddle/fluid/memory/allocation/locked_allocator.h"
-#include "paddle/fluid/memory/allocation/naive_managed_allocator.h"
 #include "paddle/fluid/memory/allocation/retry_allocator.h"
 #include "paddle/fluid/memory/allocation/zero_size_allocator.h"
 #include "paddle/fluid/platform/cpu_info.h"
@@ -46,34 +45,28 @@ namespace memory {
 namespace allocation {
 
 // TODO(yy): Dirty code here. This class should be configurable in runtime.
-class CPUManagedAllocator : public ManagedAllocator {
+class CPUManagedAllocator : public Allocator {
  public:
-  CPUManagedAllocator()
-      : normal_allocator_(NaiveManagedAllocator::Create(
-            std::unique_ptr<Allocator>(new CPUAllocator()))) {}
+  CPUManagedAllocator() : normal_allocator_(new CPUAllocator()) {}
 
   std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override {
     return normal_allocator_->Allocate(size, attr);
   }
 
-  std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override {
-    return normal_allocator_->AllocateShared(size, attr);
-  }
-
   bool IsAllocThreadSafe() const override { return true; }
 
  private:
-  std::shared_ptr<ManagedAllocator> normal_allocator_;
+  std::shared_ptr<Allocator> normal_allocator_;
 };
 
 // TODO(yy): Dirty code here. This class should be configurable in runtime.
-class ChunkedManagedAllocator : public ManagedAllocator {
+class ChunkedManagedAllocator : public Allocator {
  public:
   explicit ChunkedManagedAllocator(std::unique_ptr<Allocator> system_allocator,
                                    size_t max_chunk_size, size_t capacity = 1,
                                    int64_t retry_time = -1)
       : max_chunk_size_(max_chunk_size), retry_time_(retry_time) {
-    raw_allocator_ = NaiveManagedAllocator::Create(std::move(system_allocator));
+    raw_allocator_ = std::move(system_allocator);
 
     if (max_chunk_size_ == 0) {
       default_allocator_ = raw_allocator_;
@@ -114,11 +107,7 @@ class ChunkedManagedAllocator : public ManagedAllocator {
     return default_allocator_->Allocate(size, attr);
   }
 
-  std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override {
-    return default_allocator_->AllocateShared(size, attr);
-  }
-
-  std::shared_ptr<ManagedAllocator> BestFitAllocatorCreator() {
+  std::shared_ptr<Allocator> BestFitAllocatorCreator() {
     chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_));
     auto* allocation = chunks_.back().get();
     std::unique_ptr<Allocator> unmanaged_allocator(new LockedAllocator(
@@ -127,12 +116,13 @@ class ChunkedManagedAllocator : public ManagedAllocator {
     if (retry_time_ <= 0) {
       VLOG(10) << "Create NaiveManagedAllocator without retry";
       return std::make_shared<AlignedAllocator<64u>>(
-          NaiveManagedAllocator::Create(std::move(unmanaged_allocator)));
+          std::move(unmanaged_allocator));
     } else {
       VLOG(10) << "Create RetryAllocator with retry_time " << retry_time_
                << "ms";
-      return std::make_shared<AlignedAllocator<64u>>(RetryAllocator::Create(
-          std::move(unmanaged_allocator), static_cast<size_t>(retry_time_)));
+      auto tmp = std::make_shared<RetryAllocator>(
+          std::move(unmanaged_allocator), static_cast<size_t>(retry_time_));
+      return std::make_shared<AlignedAllocator<64u>>(tmp);
     }
   }
 
@@ -142,8 +132,8 @@ class ChunkedManagedAllocator : public ManagedAllocator {
   size_t max_chunk_size_;
   int64_t retry_time_;
   std::vector<std::unique_ptr<Allocation>> chunks_;
-  std::shared_ptr<ManagedAllocator> raw_allocator_;
-  std::shared_ptr<ManagedAllocator> default_allocator_;
+  std::shared_ptr<Allocator> raw_allocator_;
+  std::shared_ptr<Allocator> default_allocator_;
 };
 
 #ifdef PADDLE_WITH_CUDA
@@ -193,7 +183,7 @@ class CUDAPinnedManagedAllocator : public ChunkedManagedAllocator {
 
 class AllocatorFacadePrivate {
  public:
-  std::map<platform::Place, std::shared_ptr<ManagedAllocator>> allocators_;
+  std::map<platform::Place, std::shared_ptr<Allocator>> allocators_;
 
   ~AllocatorFacadePrivate() = default;
 
@@ -245,7 +235,8 @@ AllocatorFacade& AllocatorFacade::Instance() {
 
 std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
     const platform::Place& place, size_t size, Allocator::Attr attr) {
-  return m_->allocators_.at(place)->AllocateShared(size, attr);
+  return std::shared_ptr<Allocation>(
+      m_->allocators_.at(place)->Allocate(size, attr).release());
 }
 
 std::unique_ptr<Allocation> AllocatorFacade::Alloc(const platform::Place& place,
 
@@ -20,20 +20,61 @@ namespace allocation {
 
 std::unique_ptr<Allocation> AutoIncrementAllocator::Allocate(
     size_t size, Allocator::Attr attr) {
-  return InvokeOrCreateUnderlyingAllocator([&](ManagedAllocator& allocator) {
-    return allocator.Allocate(size, attr);
-  });
-}
+  auto cur = prev_success_allocator_.load();
+  size_t retry_count = allocator_num_.load();
+  size_t allocator_num = retry_count;
+  while (retry_count-- > 0) {  // until there retry count is zero
+    try {
+      auto res = underlying_allocators_[cur]->Allocate(size, attr);
+      prev_success_allocator_ = cur;
+      return res;
+    } catch (BadAlloc&) {
+      if (++cur >= allocator_num) {
+        cur = 0;
+      }
+    } catch (...) {
+      // if there is another type of allocation, just rethrow it.
+      throw;
+    }
+  }
 
-std::shared_ptr<Allocation> AutoIncrementAllocator::AllocateShared(
-    size_t size, Allocator::Attr attr) {
-  return InvokeOrCreateUnderlyingAllocator([&](ManagedAllocator& allocator) {
-    return allocator.AllocateShared(size, attr);
-  });
+  // This happens when the first allocator is exhausted and
+  // there are more than 1 allocation requests
+  // In this situation, the first allocation request would success
+  // and the second allocation request would fail if we do not use
+  // the newly created allocator by the first allocation request.
+  for (cur = allocator_num; cur < allocator_num_; ++cur) {
+    try {
+      auto ret = underlying_allocators_[cur]->Allocate(size, attr);
+      prev_success_allocator_ = cur;
+      return ret;
+    } catch (BadAlloc&) {
+    } catch (...) {
+      throw;
+    }
+  }
+  // No suitable allocator
+  return CreateNewAllocator()->Allocate(size, attr);
 }
 
 bool AutoIncrementAllocator::IsAllocThreadSafe() const { return true; }
 
+std::shared_ptr<Allocator> AutoIncrementAllocator::CreateNewAllocator() {
+  std::lock_guard<std::mutex> guard(mtx_);
+  auto old_size = allocator_num_.load();
+  PADDLE_ENFORCE_LT(old_size, underlying_allocators_.size(),
+                    "Allocator number exceeds capacity %d",
+                    underlying_allocators_.size());
+  underlying_allocators_[old_size] = creator_();
+  prev_success_allocator_ = old_size;
+  ++allocator_num_;
+  PADDLE_ENFORCE(
+      underlying_allocators_[old_size]->IsAllocThreadSafe(),
+      "the underlying allocator must be thread safe. This is a program "
+      "bug.");
+  return underlying_allocators_[old_size];
+}
+
 }  // namespace allocation
 }  // namespace memory
 }  // namespace paddle
@@ -46,76 +46,20 @@ namespace allocation {
 // thread-safe std::vector with varying size is hard to implement.
 // Fortunately, we can get the total GPU memory and each chunk size.
 // Therefore, we can get the suitable capacity of AutoIncrementAllocator.
-class AutoIncrementAllocator : public ManagedAllocator {
+class AutoIncrementAllocator : public Allocator {
  public:
   // Creator is the method to create ManagedAllocator
-  using AllocatorCreator = std::function<std::shared_ptr<ManagedAllocator>()>;
+  using AllocatorCreator = std::function<std::shared_ptr<Allocator>()>;
 
   explicit AutoIncrementAllocator(AllocatorCreator&& creator, size_t capacity)
       : creator_(std::move(creator)), underlying_allocators_(capacity) {}
+
   std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override;
-  std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override;
+
   bool IsAllocThreadSafe() const override;
 
  private:
-  // NOTE: here use template Callback, it can be inlined when -O3
-  template <typename Callback>
-  inline typename std::result_of<Callback(ManagedAllocator&)>::type
-  InvokeOrCreateUnderlyingAllocator(Callback callback) {
-    auto cur = prev_success_allocator_.load();
-    size_t retry_count = allocator_num_.load();
-    size_t allocator_num = retry_count;
-    while (retry_count-- > 0) {  // until there retry count is zero
-      try {
-        auto res = callback(*underlying_allocators_[cur]);
-        prev_success_allocator_ = cur;
-        return std::move(res);
-      } catch (BadAlloc&) {
-        if (++cur >= allocator_num) {
-          cur = 0;
-        }
-      } catch (...) {
-        // if there is another type of allocation, just rethrow it.
-        throw;
-      }
-    }
-
-    // This happens when the first allocator is exhausted and
-    // there are more than 1 allocation requests
-    // In this situation, the first allocation request would success
-    // and the second allocation request would fail if we do not use
-    // the newly created allocator by the first allocation request.
-    for (cur = allocator_num; cur < allocator_num_; ++cur) {
-      try {
-        auto ret = callback(*underlying_allocators_[cur]);
-        prev_success_allocator_ = cur;
-        return std::move(ret);
-      } catch (BadAlloc&) {
-      } catch (...) {
-        throw;
-      }
-    }
-    // No suitable allocator
-
-    ManagedAllocator* new_allocator;
-    {
-      std::lock_guard<std::mutex> guard(mtx_);
-      auto old_size = allocator_num_.load();
-      PADDLE_ENFORCE_LT(old_size, underlying_allocators_.size(),
-                        "Allocator number exceeds capacity %d",
-                        underlying_allocators_.size());
-      underlying_allocators_[old_size] = creator_();
-      new_allocator = underlying_allocators_[old_size].get();
-      prev_success_allocator_ = old_size;
-      ++allocator_num_;
-    }
-
-    PADDLE_ENFORCE(
-        new_allocator->IsAllocThreadSafe(),
-        "the underlying allocator must be thread safe. This is a program "
-        "bug.");
-    return callback(*new_allocator);
-  }
+  std::shared_ptr<Allocator> CreateNewAllocator();
 
   AllocatorCreator creator_;