Adding L2 Regularization to Recognize digits MLP example (#5186)

abhinavarora · web-flow · commit 5906baa3f4ad · 2017-10-27T19:28:28.000-07:00
diff --git a/python/paddle/v2/framework/layer_helper.py b/python/paddle/v2/framework/layer_helper.py
@@ -131,12 +131,14 @@ def input_dtype(self, input_param_name='input'):
         return dtype
 
     def create_parameter(self, attr, shape, dtype, suffix='w'):
-        if attr['name'] is None:
-            attr['name'] = unique_name(".".join([self.name, suffix]))
+        # Deepcopy the attr so that parameters can be shared in program
+        attr_copy = copy.deepcopy(attr)
+        if attr_copy['name'] is None:
+            attr_copy['name'] = unique_name(".".join([self.name, suffix]))
         self.init_program.global_block().create_parameter(
-            dtype=dtype, shape=shape, **attr)
+            dtype=dtype, shape=shape, **attr_copy)
         return self.program.global_block().create_parameter(
-            name=attr['name'], dtype=dtype, shape=shape)
+            name=attr_copy['name'], dtype=dtype, shape=shape)
 
     def create_tmp_variable(self, dtype):
         return self.program.current_block().create_var(
diff --git a/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py b/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py
@@ -5,9 +5,11 @@
 
 from paddle.v2.framework.framework import Program, g_program
 from paddle.v2.framework.executor import Executor
+from paddle.v2.framework.regularizer import L2DecayRegularizer
 
 import numpy as np
 
+BATCH_SIZE = 128
 init_program = Program()
 program = Program()
 image = layers.data(
@@ -17,22 +19,35 @@
     program=program,
     init_program=init_program)
 
+param_attr = {
+    'name': None,
+    'init_attr': {
+        'type': 'uniform_random',
+        'min': -1.0,
+        'max': 1.0
+    },
+    'regularization': L2DecayRegularizer(0.0005 * BATCH_SIZE)
+}
+
 hidden1 = layers.fc(input=image,
                     size=128,
                     act='relu',
                     program=program,
-                    init_program=init_program)
+                    init_program=init_program,
+                    param_attr=param_attr)
 hidden2 = layers.fc(input=hidden1,
                     size=64,
                     act='relu',
                     program=program,
-                    init_program=init_program)
+                    init_program=init_program,
+                    param_attr=param_attr)
 
 predict = layers.fc(input=hidden2,
                     size=10,
                     act='softmax',
                     program=program,
-                    init_program=init_program)
+                    init_program=init_program,
+                    param_attr=param_attr)
 
 label = layers.data(
     name='y',
@@ -48,8 +63,6 @@
 sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
 opts = sgd_optimizer.minimize(avg_cost)
 
-BATCH_SIZE = 128
-
 train_reader = paddle.batch(
     paddle.reader.shuffle(
         paddle.dataset.mnist.train(), buf_size=8192),