fixed a bug in parse_conv in config_parser.py

wangyang59 · wangyang59 · commit 1c58e27fdc94 · 2016-11-09T11:44:59.000-08:00
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
@@ -649,7 +649,8 @@ def __init__(
 
         parse_conv(conv_conf,
                    input_layer_name,
-                   self.proj_conf.conv_conf)
+                   self.proj_conf.conv_conf,
+                   num_filters)
         # TODO: support rectangle input
         self.proj_conf.output_size = (self.proj_conf.conv_conf.output_x  ** 2) * num_filters
 
@@ -730,7 +731,8 @@ def __init__(
 
         parse_conv(conv_conf,
                    MakeLayerNameInSubmodel(input_layer_names[0]),
-                   self.operator_conf.conv_conf)
+                   self.operator_conf.conv_conf,
+                   num_filters)
         self.operator_conf.output_size = (self.operator_conf.conv_conf.output_x  ** 2) * num_filters
 
         config_assert(len(input_layer_names) == 2, "Conv is binary operator")
@@ -1097,7 +1099,7 @@ def parse_norm(norm, input_layer_name, norm_conf):
 caffe_mode: compute the output size using floor instead of ceil,
             which is consistent of caffe and CuDNN's convention.
 '''
-def parse_conv(conv, input_layer_name, conv_conf, trans=False):
+def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False):
     conv_conf.filter_size = conv.filter_size
     conv_conf.filter_size_y = conv.filter_size_y
     conv_conf.channels = conv.channels
@@ -1106,10 +1108,11 @@ def parse_conv(conv, input_layer_name, conv_conf, trans=False):
     conv_conf.stride = conv.stride
     conv_conf.stride_y = conv.stride_y
     conv_conf.groups = conv.groups
-    conv_conf.filter_channels = conv.channels / conv.groups
     conv_conf.caffe_mode = conv.caffe_mode
     
     if not trans:
+        conv_conf.filter_channels = conv.channels / conv.groups
+
         img_pixels = g_layer_map[input_layer_name].size / conv.channels
         print('channels=%d size=%d'%(conv.channels,
           g_layer_map[input_layer_name].size))
@@ -1123,6 +1126,8 @@ def parse_conv(conv, input_layer_name, conv_conf, trans=False):
             conv_conf.img_size, conv_conf.filter_size, 
             conv_conf.padding, conv_conf.stride, conv_conf.caffe_mode)
     else:
+        conv_conf.filter_channels = num_filters / conv.groups
+        
         outputSize = g_layer_map[input_layer_name].size / conv.channels
         print('channels=%d size=%d'%(conv.channels,
           g_layer_map[input_layer_name].size))
@@ -1616,7 +1621,8 @@ def __init__(
             parse_conv(
                 self.inputs[input_index].conv,
                 input_layer.name,
-                self.config.inputs[input_index].conv_conf)
+                self.config.inputs[input_index].conv_conf,
+                num_filters)
             conv_conf = self.config.inputs[input_index].conv_conf
             psize = self.calc_parameter_size(conv_conf)
             print("output size for %s is %d " % (name, conv_conf.output_x))
@@ -1676,6 +1682,7 @@ def __init__(
                 self.inputs[input_index].conv,
                 input_layer.name,
                 self.config.inputs[input_index].conv_conf,
+                num_filters,
                 trans=True)
             conv_conf = self.config.inputs[input_index].conv_conf
             psize = self.calc_parameter_size(conv_conf)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
@@ -0,0 +1,176 @@
+type: "nn"
+layers {
+  name: "image"
+  type: "data"
+  size: 51529
+  active_type: ""
+}
+layers {
+  name: "__conv_0__"
+  type: "exconvt"
+  size: 4194304
+  active_type: ""
+  inputs {
+    input_layer_name: "image"
+    input_parameter_name: "___conv_0__.w0"
+    conv_conf {
+      filter_size: 32
+      channels: 1
+      stride: 1
+      padding: 1
+      groups: 1
+      filter_channels: 64
+      output_x: 227
+      img_size: 256
+      caffe_mode: true
+      filter_size_y: 32
+      padding_y: 1
+      stride_y: 1
+    }
+  }
+  bias_parameter_name: "___conv_0__.wbias"
+  num_filters: 64
+  shared_biases: true
+}
+layers {
+  name: "__batch_norm_0__"
+  type: "batch_norm"
+  size: 4194304
+  active_type: "relu"
+  inputs {
+    input_layer_name: "__conv_0__"
+    input_parameter_name: "___batch_norm_0__.w0"
+    image_conf {
+      channels: 64
+      img_size: 256
+    }
+  }
+  inputs {
+    input_layer_name: "__conv_0__"
+    input_parameter_name: "___batch_norm_0__.w1"
+  }
+  inputs {
+    input_layer_name: "__conv_0__"
+    input_parameter_name: "___batch_norm_0__.w2"
+  }
+  bias_parameter_name: "___batch_norm_0__.wbias"
+  moving_average_fraction: 0.9
+}
+layers {
+  name: "__crmnorm_0__"
+  type: "norm"
+  size: 4194304
+  active_type: ""
+  inputs {
+    input_layer_name: "__batch_norm_0__"
+    norm_conf {
+      norm_type: "cmrnorm-projection"
+      channels: 64
+      size: 32
+      scale: 0.0004
+      pow: 0.75
+      output_x: 256
+      img_size: 256
+      blocked: false
+    }
+  }
+}
+layers {
+  name: "__pool_0__"
+  type: "pool"
+  size: 3240000
+  active_type: ""
+  inputs {
+    input_layer_name: "__conv_0__"
+    pool_conf {
+      pool_type: "max-projection"
+      channels: 64
+      size_x: 32
+      stride: 1
+      output_x: 225
+      img_size: 256
+      padding: 0
+      size_y: 32
+      stride_y: 1
+      output_y: 225
+      img_size_y: 256
+      padding_y: 0
+    }
+  }
+}
+parameters {
+  name: "___conv_0__.w0"
+  size: 65536
+  initial_mean: 0.0
+  initial_std: 0.0441941738242
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___conv_0__.wbias"
+  size: 64
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 64
+  dims: 1
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___batch_norm_0__.w0"
+  size: 64
+  initial_mean: 1.0
+  initial_std: 0.0
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___batch_norm_0__.w1"
+  size: 64
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 64
+  initial_strategy: 0
+  initial_smart: false
+  is_static: true
+  is_shared: true
+}
+parameters {
+  name: "___batch_norm_0__.w2"
+  size: 64
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 64
+  initial_strategy: 0
+  initial_smart: false
+  is_static: true
+  is_shared: true
+}
+parameters {
+  name: "___batch_norm_0__.wbias"
+  size: 64
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 64
+  initial_strategy: 0
+  initial_smart: false
+}
+input_layer_names: "image"
+output_layer_names: "__pool_0__"
+output_layer_names: "__crmnorm_0__"
+sub_models {
+  name: "root"
+  layer_names: "image"
+  layer_names: "__conv_0__"
+  layer_names: "__batch_norm_0__"
+  layer_names: "__crmnorm_0__"
+  layer_names: "__pool_0__"
+  input_layer_names: "image"
+  output_layer_names: "__pool_0__"
+  output_layer_names: "__crmnorm_0__"
+  is_recurrent_layer_group: false
+}
+