[evconvert] support NCHW data format and optimize NHWC data format

haifenghan · haifenghan · commit 6519a9ed6df5 · 2018-12-25T18:30:22.000+08:00
diff --git a/src/caffe/layers/depthtospace_layer.cpp b/src/caffe/layers/depthtospace_layer.cpp
@@ -18,10 +18,9 @@ void DepthToSpaceLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
     this->output_top_shape.push_back(bottom_shape[2] * this->block_size);
     this->output_top_shape.push_back(bottom_shape[3] / (this->block_size*this->block_size));
   } else if(this->data_format == "NCHW"){
-    NOT_IMPLEMENTED;
-    // this->output_top_shape.push_back(bottom_shape[1] / (this->block_size*this->block_size));
-    // this->output_top_shape.push_back(bottom_shape[2] * this->block_size);
-    // this->output_top_shape.push_back(bottom_shape[3] * this->block_size);
+    this->output_top_shape.push_back(bottom_shape[1] / (this->block_size*this->block_size));
+    this->output_top_shape.push_back(bottom_shape[2] * this->block_size);
+    this->output_top_shape.push_back(bottom_shape[3] * this->block_size);
   }
 }
 
@@ -61,8 +60,40 @@ void DepthToSpaceLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
             (offset_h * this->block_size + offset_w) * output_depth;
           for (int d = 0; d < output_depth; ++d) {
             const int in_d = d + offset_d;
-            const int out_index = b*output_height*output_width*output_depth + h*output_width*output_depth + w*output_depth + d;
-            const int in_index = b*input_height*input_width*input_depth + in_h*input_width*input_depth + in_w*input_depth + in_d;
+            const int out_index = ((b*output_height + h)*output_width + w)*output_depth + d;
+            const int in_index = ((b*input_height + in_h)*input_width + in_w)*input_depth + in_d;
+            top_data[out_index] = bottom_data[in_index];
+          }
+        }
+      }
+    }
+  } else {
+    const int batch_size = this->output_top_shape[0];
+    const int output_depth = this->output_top_shape[1];
+    const int output_height = this->output_top_shape[2];
+    const int output_width = this->output_top_shape[3];
+
+    vector<int> bottom_shape = bottom[0]->shape();
+    const int input_depth = bottom_shape[1];
+    const int input_height = bottom_shape[2];
+    const int input_width = bottom_shape[3];
+
+    const Dtype* bottom_data = bottom[0]->cpu_data();
+    Dtype* top_data = top[0]->mutable_cpu_data();
+
+    for (int b = 0; b < batch_size; ++b) {
+      for (int h = 0; h < output_height; ++h) {
+        const int in_h = h / this->block_size;
+        const int offset_h = (h % this->block_size);
+        for (int w = 0; w < output_width; ++w) {
+          const int in_w = w / this->block_size;
+          const int offset_w = (w % this->block_size);
+          const int offset_d =
+            (offset_h * this->block_size + offset_w) * output_depth;
+          for (int d = 0; d < output_depth; ++d) {
+            const int in_d = d + offset_d;
+            const int out_index = ((b*output_depth + d)*output_height + h)*output_width + w;
+            const int in_index = ((b*input_depth + in_d)*input_height + in_h)*input_width + in_w;
             top_data[out_index] = bottom_data[in_index];
           }
         }