@@ -247,18 +247,20 @@ void CuDNNConvolutionLayer<Ftype, Btype>::AllocateFindExWorkspace() {
247247 if (map_val (dev, ws_released_, mv_)) {
248248 return ;
249249 }
250- GPUMemory::Workspace& ws = map_ptr (dev, workspace_, mv_);
251- ws.release ();
252-
253250 GPUMemory::Workspace& tmp_ws = map_ptr (dev, tmp_weights_, mv_);
254251 const size_t tmp_weights_size = map_val (dev,
255252 this ->phase_ == TRAIN ? train_tmp_weights_mem_ : test_tmp_weights_mem_, mv_);
256253 tmp_ws.safe_reserve (tmp_weights_size);
257254
255+ GPUMemory::Workspace& ws = map_ptr (dev, workspace_, mv_);
258256 size_t bytes_available, bytes_total;
259257 GPUMemory::GetInfo (&bytes_available, &bytes_total, true );
260258 bytes_available = std::min (bytes_available, bytes_total / 2UL );
259+ // 2+ pages => reallocate
261260 size_t req_bytes = align_down<7 >(bytes_available > PAGE_SIZE ? bytes_available - PAGE_SIZE : 0UL );
261+ if (static_cast <float >(req_bytes) <= PAGE_SIZE) {
262+ return ;
263+ }
262264 int attempts = ATTEMPTS_TO_RESERVE_WS;
263265 while (!ws.try_reserve (req_bytes) && attempts > 0 ) {
264266 req_bytes = align_down<7 >(req_bytes > PAGE_SIZE ? req_bytes - PAGE_SIZE : 0UL );
@@ -468,7 +470,7 @@ void CuDNNConvolutionLayer<Ftype, Btype>::Reshape(
468470 if (!map_val (dev, ws_released_, mv_) && map_val (dev, ws_allocated_, mv_) > 0UL ) {
469471 // Housekeeping: release excessive amount of device memory after FindEx calls
470472 size_t mem_req = align_up<7 >(std::max (map_val (dev, train_mem_req_all_grps_, mv_),
471- map_val (dev, test_mem_req_all_grps_, mv_)) * 2UL );
473+ map_val (dev, test_mem_req_all_grps_, mv_)) + PAGE_SIZE );
472474 if (mem_req > 0UL && ws.size () > mem_req) {
473475 // Winner needs half less - release the rest
474476 LOG (INFO) << this ->print_current_device ()
0 commit comments