fix cudnn conv bug which occurs in image classfication demo in GTX GPU

qingqing01 · qingqing01 · commit 95da095d108d · 2016-09-23T16:21:17.000+08:00
diff --git a/paddle/gserver/layers/CudnnConvLayer.cpp b/paddle/gserver/layers/CudnnConvLayer.cpp
@@ -85,6 +85,7 @@ bool CudnnConvLayer::init(const LayerMap &layerMap,
     biasOffset_ = numFilters_ / groups_[0];
   }
 
+  batchNum_ = 0;
   isSelectAlgo_ = false;
   return true;
 }
@@ -132,6 +133,9 @@ void CudnnConvLayer::reshape(int batchSize) {
   getOutput().setFrameHeight(outputH_);
   getOutput().setFrameWidth(outputW_);
 
+  isSelectAlgo_ = (batchSize == batchNum_);
+  batchNum_ = batchSize;
+
   size_t maxWorkSpace = 0;
   for (size_t i = 0; i < inputLayers_.size(); i++) {
     CHECK_EQ(inputLayers_[i]->getOutput().value->getWidth(),
@@ -160,6 +164,10 @@ void CudnnConvLayer::reshape(int batchSize) {
 
       maxWorkSpace = std::max(fwdLimitBytes_[i], bwdDataLimitBytes_[i]);
       maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_[i]);
+
+      VLOG(3) << getName() << " Fwd / BwdData / BwdFilter algo: " << fwdAlgo_[i]
+                           << " / " << bwdDataAlgo_[i]
+                           << " / " << bwdFilterAlgo_[i];
     }
   }
 
diff --git a/paddle/gserver/layers/CudnnConvLayer.h b/paddle/gserver/layers/CudnnConvLayer.h
@@ -87,6 +87,10 @@ class CudnnConvLayer : public ConvBaseLayer {
   /// Is or not select conv algorihtm.
   bool isSelectAlgo_;
 
+  /// batchNum is used to record batch size. If the batch size is changed,
+  /// the selection algorithm will be called.
+  int batchNum_;
+
 public:
   explicit CudnnConvLayer(const LayerConfig& config) : ConvBaseLayer(config) {}
 

Original file line number	Diff line number	Diff line change
`@@ -85,6 +85,7 @@ bool CudnnConvLayer::init(const LayerMap &layerMap,`
`85`	`85`	`biasOffset_ = numFilters_ / groups_[0];`
`86`	`86`	`}`
`87`	`87`
	`88`	`+ batchNum_ = 0;`
`88`	`89`	`isSelectAlgo_ = false;`
`89`	`90`	`return true;`
`90`	`91`	`}`
`@@ -132,6 +133,9 @@ void CudnnConvLayer::reshape(int batchSize) {`
`132`	`133`	`getOutput().setFrameHeight(outputH_);`
`133`	`134`	`getOutput().setFrameWidth(outputW_);`
`134`	`135`
	`136`	`+ isSelectAlgo_ = (batchSize == batchNum_);`
	`137`	`+ batchNum_ = batchSize;`
	`138`	`+`
`135`	`139`	`size_t maxWorkSpace = 0;`
`136`	`140`	`for (size_t i = 0; i < inputLayers_.size(); i++) {`
`137`	`141`	`CHECK_EQ(inputLayers_[i]->getOutput().value->getWidth(),`
`@@ -160,6 +164,10 @@ void CudnnConvLayer::reshape(int batchSize) {`
`160`	`164`
`161`	`165`	`maxWorkSpace = std::max(fwdLimitBytes_[i], bwdDataLimitBytes_[i]);`
`162`	`166`	`maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_[i]);`
	`167`	`+`
	`168`	`+ VLOG(3) << getName() << " Fwd / BwdData / BwdFilter algo: " << fwdAlgo_[i]`
	`169`	`+ << " / " << bwdDataAlgo_[i]`
	`170`	`+ << " / " << bwdFilterAlgo_[i];`
`163`	`171`	`}`
`164`	`172`	`}`
`165`	`173`