Skip to content

Commit cdb3d9a

Browse files
authored
Merge pull request #413 from drnikolaev/caffe-0.16
Fix for large batches
2 parents 150109e + 60a4a9c commit cdb3d9a

File tree

2 files changed

+7
-5
lines changed

2 files changed

+7
-5
lines changed

include/caffe/layers/cudnn_conv_layer.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ template<typename Ftype, typename Btype>
7878
class CuDNNConvolutionLayer : public ConvolutionLayer<Ftype, Btype> {
7979
// Using all of memory may result in failure of workspace reserve.
8080
// NOLINT_NEXT_LINE(build/storage_class)
81-
static constexpr size_t PAGE_SIZE = 16 * 1024 * 1024;
81+
static constexpr size_t PAGE_SIZE = 32 * 1024 * 1024;
8282
static constexpr int MAX_PARALLEL_GROUPS = 2;
8383
static constexpr int REQUEST_ALGO_COUNT = 1;
8484
static constexpr int ATTEMPTS_TO_RESERVE_WS = 3;

src/caffe/layers/cudnn_conv_layer.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -247,18 +247,20 @@ void CuDNNConvolutionLayer<Ftype, Btype>::AllocateFindExWorkspace() {
247247
if (map_val(dev, ws_released_, mv_)) {
248248
return;
249249
}
250-
GPUMemory::Workspace& ws = map_ptr(dev, workspace_, mv_);
251-
ws.release();
252-
253250
GPUMemory::Workspace& tmp_ws = map_ptr(dev, tmp_weights_, mv_);
254251
const size_t tmp_weights_size = map_val(dev,
255252
this->phase_ == TRAIN ? train_tmp_weights_mem_ : test_tmp_weights_mem_, mv_);
256253
tmp_ws.safe_reserve(tmp_weights_size);
257254

255+
GPUMemory::Workspace& ws = map_ptr(dev, workspace_, mv_);
258256
size_t bytes_available, bytes_total;
259257
GPUMemory::GetInfo(&bytes_available, &bytes_total, true);
260258
bytes_available = std::min(bytes_available, bytes_total / 2UL);
259+
// 2+ pages => reallocate
261260
size_t req_bytes = align_down<7>(bytes_available > PAGE_SIZE ? bytes_available - PAGE_SIZE : 0UL);
261+
if (static_cast<float>(req_bytes) <= PAGE_SIZE) {
262+
return;
263+
}
262264
int attempts = ATTEMPTS_TO_RESERVE_WS;
263265
while (!ws.try_reserve(req_bytes) && attempts > 0) {
264266
req_bytes = align_down<7>(req_bytes > PAGE_SIZE ? req_bytes - PAGE_SIZE : 0UL);
@@ -468,7 +470,7 @@ void CuDNNConvolutionLayer<Ftype, Btype>::Reshape(
468470
if (!map_val(dev, ws_released_, mv_) && map_val(dev, ws_allocated_, mv_) > 0UL) {
469471
// Housekeeping: release excessive amount of device memory after FindEx calls
470472
size_t mem_req = align_up<7>(std::max(map_val(dev, train_mem_req_all_grps_, mv_),
471-
map_val(dev, test_mem_req_all_grps_, mv_)) * 2UL);
473+
map_val(dev, test_mem_req_all_grps_, mv_)) + PAGE_SIZE);
472474
if (mem_req > 0UL && ws.size() > mem_req) {
473475
// Winner needs half less - release the rest
474476
LOG(INFO) << this->print_current_device()

0 commit comments

Comments
 (0)