create new varible in scope

luotao1 · luotao1 · commit f45818e7f929 · 2018-04-13T16:55:02.000+08:00
diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py
@@ -67,6 +67,7 @@
     'clip',
     'SimpleDistributeTranspiler',
     'DistributeTranspiler',
+    'InferenceTranspiler',
     'memory_optimize',
     'release_memory',
     'profiler',
diff --git a/python/paddle/fluid/inference_transpiler.py b/python/paddle/fluid/inference_transpiler.py
@@ -21,7 +21,20 @@
 class InferenceTranspiler:
     def transpile(self, program, scope, place):
         '''
-        Transpile the program to a inference program by fused batch normalization.
+        Transpile the program. Support only fuse batch normalization now.
+
+        :param program: program to transpile 
+        :type program: Program
+        :param scope: inference scope 
+        :type scope: Scope
+        :param place: inference place 
+        :type place: Place
+        '''
+        self.fuse_batch_norm(program, scope, place)
+
+    def fuse_batch_norm(self, program, scope, place):
+        '''
+        Transpile the program by fused batch normalization.
  
         The batch normalization followed the convolution or fully connected layer 
         can be integrated with them. Doing so will give us a forward acceleration, 
@@ -57,8 +70,6 @@ def transpile(self, program, scope, place):
         :type scope: Scope
         :param place: inference place 
         :type place: Place
-        :return: program by fused batch normalization
-        :rtype: Program
         '''
         self.scope = scope
         self.place = place
@@ -96,7 +107,7 @@ def transpile(self, program, scope, place):
         # TODO(luotao): use clone() method to flush the program.desc in force, 
         # since some large program.desc will not be flushed immediately. 
         # And a better solution will be considered later.
-        return program.clone()
+        program = program.clone()
 
     # ====================== private transpiler functions =====================
     def _insert_bias_op(self, index, current_op, bn_op):
@@ -142,11 +153,25 @@ def _fuse_param(self, current_op, bn_op, bias_op, with_bias):
         :type with_bias: Int
         '''
 
-        def _load_tensor(param_name):
-            return self.scope.find_var(param_name[0]).get_tensor()
+        def _update_param(op, old_param_name, new_param):
+            # For the sake of remaining the original variables the same as before,
+            # create new variables in scope to store the new parameters.
+            old_param_name = old_param_name[0]
+            old_var = self.block.vars[old_param_name]
+            new_param_name = old_param_name + '_fuse_bn'
+            new_var = self.block.create_parameter(
+                name=new_param_name.encode('ascii'),
+                type=old_var.type,
+                dtype=old_var.dtype,
+                shape=old_var.shape)
+            op.rename_input(old_param_name, new_param_name)
+            self.scope.var(new_param_name)
+
+            tensor = self.scope.find_var(new_param_name).get_tensor()
+            tensor.set(np.array(new_param), self.place)
 
         def _load_param(param_name):
-            return np.array(_load_tensor(param_name))
+            return np.array(self.scope.find_var(param_name[0]).get_tensor())
 
         bias_bn = _load_param(bn_op.input("Bias"))  #Bias
         scale_bn = _load_param(bn_op.input("Scale"))  #Scale
@@ -155,8 +180,6 @@ def _load_param(param_name):
 
         # TODO(luotao1): consider only conv2d now. fc would be delt later.
         current_param = _load_param(current_op.input("Filter"))
-        current_tensor = _load_tensor(current_op.input("Filter"))
-
         std_bn = np.float32(np.sqrt(np.add(var_bn, 1e-5)))
         tmp = np.float32(np.divide(scale_bn, std_bn))
 
@@ -167,17 +190,16 @@ def _load_param(param_name):
             bias = np.zeros(bias_bn.shape)
         bias = np.float32(
             np.add(np.multiply(np.subtract(bias, mean_bn), tmp), bias_bn))
-        bias_tensor = _load_tensor(bias_op.input("Y"))
-        bias_tensor.set(bias, self.place)
 
         # re-compute weight of conv2d
         tmp = tmp.reshape(tmp.shape[0], -1)
         dst_param = current_param.reshape((tmp.shape[0], -1))
         dst_param = np.float32(np.multiply(dst_param, tmp))
         dst_param = dst_param.reshape(current_param.shape)
 
-        # set the updated parameters
-        current_tensor.set(np.array(dst_param), self.place)
+        # update parameters
+        _update_param(current_op, current_op.input("Filter"), dst_param)
+        _update_param(bias_op, bias_op.input("Y"), bias)
 
         # collect the renamed input
         self.input_map[bn_op.output("Y")[0]] = bias_op.output("Out")[0]
diff --git a/python/paddle/fluid/tests/book/test_image_classification.py b/python/paddle/fluid/tests/book/test_image_classification.py
@@ -226,16 +226,17 @@ def infer(use_cuda, save_dirname=None):
         batch_size = 1
         tensor_img = numpy.random.rand(batch_size, 3, 32, 32).astype("float32")
 
+        # Use inference_transpiler to speedup
+        inference_transpiler_program = inference_program.clone()
+        t = fluid.InferenceTranspiler()
+        t.transpile(inference_transpiler_program, inference_scope, place)
+
         # Construct feed as a dictionary of {feed_target_name: feed_target_data}
         # and results will contain a list of data corresponding to fetch_targets.
         results = exe.run(inference_program,
                           feed={feed_target_names[0]: tensor_img},
                           fetch_list=fetch_targets)
 
-        # Use inference_transpiler to speedup
-        t = fluid.InferenceTranspiler()
-        inference_transpiler_program = t.transpile(inference_program,
-                                                   inference_scope, place)
         transpiler_results = exe.run(inference_transpiler_program,
                                      feed={feed_target_names[0]: tensor_img},
                                      fetch_list=fetch_targets)