PaddlePaddle
diff --git a/‎paddle/fluid/memory/allocation/CMakeLists.txt
Lines changed: 3 additions & 1 deletion b/‎paddle/fluid/memory/allocation/CMakeLists.txt
Lines changed: 3 additions & 1 deletion
diff --git a/‎paddle/fluid/memory/allocation/allocator_facade_test.cc renamed to ‎paddle/fluid/memory/allocation/allocator_facade_abs_flags_test.cc
Lines changed: 7 additions & 17 deletions b/‎paddle/fluid/memory/allocation/allocator_facade_test.cc renamed to ‎paddle/fluid/memory/allocation/allocator_facade_abs_flags_test.cc
Lines changed: 7 additions & 17 deletions
diff --git a/‎paddle/fluid/memory/allocation/allocator_facade_frac_flags_test.cc
Lines changed: 94 additions & 0 deletions b/‎paddle/fluid/memory/allocation/allocator_facade_frac_flags_test.cc
Lines changed: 94 additions & 0 deletions
diff --git a/‎paddle/fluid/memory/allocation/legacy_allocator.cc
Lines changed: 8 additions & 8 deletions b/‎paddle/fluid/memory/allocation/legacy_allocator.cc
Lines changed: 8 additions & 8 deletions
diff --git a/‎paddle/fluid/memory/detail/CMakeLists.txt
Lines changed: 2 additions & 0 deletions b/‎paddle/fluid/memory/detail/CMakeLists.txt
Lines changed: 2 additions & 0 deletions
diff --git a/‎paddle/fluid/memory/detail/buddy_allocator.cc
Lines changed: 14 additions & 12 deletions b/‎paddle/fluid/memory/detail/buddy_allocator.cc
Lines changed: 14 additions & 12 deletions
diff --git a/‎paddle/fluid/memory/detail/buddy_allocator.h
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/memory/detail/buddy_allocator.h
Lines changed: 1 addition & 1 deletion
@@ -61,4 +61,6 @@ nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocat
 
 cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator)
 
-cc_test(allocator_facade_test SRCS allocator_facade_test.cc DEPS allocator_facade)
+cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade)
+
+cc_test(allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade)
@@ -19,8 +19,8 @@
 #ifdef PADDLE_WITH_CUDA
 DECLARE_double(fraction_of_gpu_memory_to_use);
 DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
-DECLARE_uint64(gpu_init_memory_in_mb);
-DECLARE_uint64(gpu_reallocate_memory_in_mb);
+DECLARE_uint64(initial_gpu_memory_in_mb);
+DECLARE_uint64(reallocate_gpu_memory_in_mb);
 DECLARE_int64(gpu_allocator_retry_time);
 #endif
 
@@ -79,25 +79,15 @@ void AllocateTestCases() {
 #endif
 }
 
-TEST(allocator, allocator) {
+TEST(Allocator, SpecifyGpuMemory) {
 #ifdef PADDLE_WITH_CUDA
-  FLAGS_fraction_of_gpu_memory_to_use = 0.01;
-  FLAGS_gpu_allocator_retry_time = 500;
-  FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5;
-#endif
-
-  AllocateTestCases();
-}
-
-TEST(allocator, specify_gpu_memory) {
-#ifdef PADDLE_WITH_CUDA
-  // Set to 0.0 to test FLAGS_gpu_init_memory_in_mb and
-  // FLAGS_gpu_reallocate_memory_in_mb
+  // Set to 0.0 to test FLAGS_initial_gpu_memory_in_mb and
+  // FLAGS_reallocate_gpu_memory_in_mb
   FLAGS_fraction_of_gpu_memory_to_use = 0.0;
   // 512 MB
-  FLAGS_gpu_init_memory_in_mb = 512;
+  FLAGS_initial_gpu_memory_in_mb = 512;
   // 4 MB
-  FLAGS_gpu_reallocate_memory_in_mb = 4;
+  FLAGS_reallocate_gpu_memory_in_mb = 4;
   FLAGS_gpu_allocator_retry_time = 500;
   FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5;
 #endif
 
@@ -0,0 +1,94 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/memory/allocation/allocator_facade.h"
+#include <gflags/gflags.h>
+#include <gtest/gtest.h>
+
+#ifdef PADDLE_WITH_CUDA
+DECLARE_double(fraction_of_gpu_memory_to_use);
+DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
+DECLARE_uint64(initial_gpu_memory_in_mb);
+DECLARE_uint64(reallocate_gpu_memory_in_mb);
+DECLARE_int64(gpu_allocator_retry_time);
+#endif
+
+namespace paddle {
+namespace memory {
+namespace allocation {
+
+//! Run allocate test cases for different places
+void AllocateTestCases() {
+  auto &instance = AllocatorFacade::Instance();
+  platform::Place place;
+  size_t size = 1024;
+
+  {
+    place = platform::CPUPlace();
+    size = 1024;
+    auto cpu_allocation = instance.Alloc(place, size);
+    ASSERT_NE(cpu_allocation, nullptr);
+    ASSERT_NE(cpu_allocation->ptr(), nullptr);
+    ASSERT_EQ(cpu_allocation->place(), place);
+    ASSERT_EQ(cpu_allocation->size(), size);
+  }
+
+#ifdef PADDLE_WITH_CUDA
+  {
+    place = platform::CUDAPlace(0);
+    size = 1024;
+    auto gpu_allocation = instance.Alloc(place, size);
+    ASSERT_NE(gpu_allocation, nullptr);
+    ASSERT_NE(gpu_allocation->ptr(), nullptr);
+    ASSERT_EQ(gpu_allocation->place(), place);
+    ASSERT_GE(gpu_allocation->size(), size);
+  }
+
+  {
+    // Allocate 2GB gpu memory
+    place = platform::CUDAPlace(0);
+    size = 2 * static_cast<size_t>(1 << 30);
+    auto gpu_allocation = instance.Alloc(place, size);
+    ASSERT_NE(gpu_allocation, nullptr);
+    ASSERT_NE(gpu_allocation->ptr(), nullptr);
+    ASSERT_EQ(gpu_allocation->place(), place);
+    ASSERT_GE(gpu_allocation->size(), size);
+  }
+
+  {
+    place = platform::CUDAPinnedPlace();
+    size = (1 << 20);
+    auto cuda_pinned_allocation =
+        instance.Alloc(platform::CUDAPinnedPlace(), 1 << 20);
+    ASSERT_NE(cuda_pinned_allocation, nullptr);
+    ASSERT_NE(cuda_pinned_allocation->ptr(), nullptr);
+    ASSERT_EQ(cuda_pinned_allocation->place(), place);
+    ASSERT_GE(cuda_pinned_allocation->size(), size);
+  }
+#endif
+}
+
+TEST(Allocator, Allocator) {
+#ifdef PADDLE_WITH_CUDA
+  FLAGS_fraction_of_gpu_memory_to_use = 0.01;
+  FLAGS_gpu_allocator_retry_time = 500;
+  FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5;
+#endif
+
+  AllocateTestCases();
+}
+
+}  // namespace allocation
+}  // namespace memory
+}  // namespace paddle
@@ -37,8 +37,8 @@ DEFINE_bool(init_allocated_mem, false,
             "that initializing the allocated memory with a small value "
             "during unit testing.");
 DECLARE_double(fraction_of_gpu_memory_to_use);
-DECLARE_uint64(gpu_init_memory_in_mb);
-DECLARE_uint64(gpu_reallocate_memory_in_mb);
+DECLARE_uint64(initial_gpu_memory_in_mb);
+DECLARE_uint64(reallocate_gpu_memory_in_mb);
 DECLARE_bool(benchmark);
 
 namespace paddle {
@@ -158,15 +158,15 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
       VLOG(10) << "\n\nNOTE:\n"
                << "You can set GFlags environment variable "
                << "'FLAGS_fraction_of_gpu_memory_to_use' "
-               << "or 'FLAGS_gpu_init_memory_in_mb' "
-               << "or 'FLAGS_gpu_reallocate_memory_in_mb' "
+               << "or 'FLAGS_initial_gpu_memory_in_mb' "
+               << "or 'FLAGS_reallocate_gpu_memory_in_mb' "
                << "to change the memory size for GPU usage.\n"
                << "Current 'FLAGS_fraction_of_gpu_memory_to_use' value is "
                << FLAGS_fraction_of_gpu_memory_to_use
-               << ". Current 'FLAGS_gpu_init_memory_in_mb' value is "
-               << FLAGS_gpu_init_memory_in_mb
-               << ". Current 'FLAGS_gpu_reallocate_memory_in_mb' value is "
-               << FLAGS_gpu_reallocate_memory_in_mb << "\n\n";
+               << ". Current 'FLAGS_initial_gpu_memory_in_mb' value is "
+               << FLAGS_initial_gpu_memory_in_mb
+               << ". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is "
+               << FLAGS_reallocate_gpu_memory_in_mb << "\n\n";
     }
   });
 
 
@@ -9,3 +9,5 @@ endif(${WITH_GPU})
 cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator)
 
 cc_library(buddy_allocator SRCS buddy_allocator.cc DEPS memory_block system_allocator glog)
+
+cc_test(buddy_allocator_test SRCS buddy_allocator_test.cc DEPS buddy_allocator)
@@ -14,7 +14,8 @@ limitations under the License. */
 
 #include "paddle/fluid/memory/detail/buddy_allocator.h"
 
-#include <utility>  // for std::move
+#include <algorithm>
+#include <utility>
 
 #include "glog/logging.h"
 
@@ -39,9 +40,10 @@ BuddyAllocator::~BuddyAllocator() {
               "have actually been freed";
   while (!pool_.empty()) {
     auto block = static_cast<MemoryBlock*>(std::get<2>(*pool_.begin()));
-    VLOG(10) << "Free from block (" << block << ", " << max_chunk_size_ << ")";
+    VLOG(10) << "Free from block (" << block << ", " << block->size(cache_)
+             << ")";
 
-    system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
+    system_allocator_->Free(block, block->size(cache_), block->index(cache_));
     cache_.invalidate(block);
     pool_.erase(pool_.begin());
   }
@@ -74,7 +76,7 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
 
   // refill the pool if failure
   if (it == pool_.end()) {
-    it = RefillPool();
+    it = RefillPool(size);
     // if still failure, fail fatally
     if (it == pool_.end()) {
       return nullptr;
@@ -187,22 +189,22 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
   return static_cast<MemoryBlock*>(p)->data();
 }
 
-BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
+BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool(
+    size_t request_bytes) {
   size_t allocate_bytes = max_chunk_size_;
   size_t index = 0;
 
 #ifdef PADDLE_WITH_CUDA
   if (system_allocator_->UseGpu()) {
     if ((total_used_ + total_free_) == 0) {
       // Compute the allocation size for gpu for the first allocation.
-      max_chunk_size_ = platform::GpuMaxChunkSize();
-      allocate_bytes = platform::GpuInitAllocSize();
+      allocate_bytes = std::max(platform::GpuInitAllocSize(), request_bytes);
     } else {
       // Reallocation size
       if (realloc_size_ == 0) {
         realloc_size_ = platform::GpuReallocSize();
       }
-      allocate_bytes = realloc_size_;
+      allocate_bytes = std::max(realloc_size_, request_bytes);
     }
   }
 #endif
@@ -298,12 +300,12 @@ void BuddyAllocator::CleanIdleFallBackAlloc() {
 
     VLOG(10) << "Return block " << block << " to fallback allocator.";
 
-    system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
+    system_allocator_->Free(block, block->size(cache_), block->index(cache_));
     cache_.invalidate(block);
 
     pool = PoolSet::reverse_iterator(pool_.erase(std::next(pool).base()));
 
-    total_free_ -= max_chunk_size_;
+    total_free_ -= block->size(cache_);
     fallback_alloc_count_--;
 
     // If no fall allocation exists, return directly
@@ -334,12 +336,12 @@ void BuddyAllocator::CleanIdleNormalAlloc() {
 
     VLOG(10) << "Return block " << block << " to base allocator.";
 
-    system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
+    system_allocator_->Free(block, block->size(cache_), block->index(cache_));
     cache_.invalidate(block);
 
     pool = PoolSet::reverse_iterator(pool_.erase(std::next(pool).base()));
 
-    total_free_ -= max_chunk_size_;
+    total_free_ -= block->size(cache_);
 
     if (!shall_free_alloc()) return;
   }
 
@@ -60,7 +60,7 @@ class BuddyAllocator {
   void* SystemAlloc(size_t size);
 
   /*! \brief If existing chunks are not suitable, refill pool */
-  PoolSet::iterator RefillPool();
+  PoolSet::iterator RefillPool(size_t request_bytes);
 
   /**
    *  \brief   Find the suitable chunk from existing pool and split