Add parallel_nn api and unittest (#110)

luotao1 · reyoung · commit ffc341675dd8 · 2016-09-27T13:20:49.000+08:00
* Add `device` parameter to ExtraAttr in trainer_config_helpers.
* add unittest for it.
diff --git a/paddle/trainer/tests/sample_trainer_config_parallel.conf b/paddle/trainer/tests/sample_trainer_config_parallel.conf
@@ -13,137 +13,74 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
+from paddle.trainer_config_helpers import *
 
-TrainData(
-    SimpleData(
-        files = "trainer/tests/sample_filelist.txt",
-        feat_dim = 3,
-        context_len = 0,
-        buffer_capacity = 1000000,
-    )
-)
+TrainData(SimpleData(
+            files = "trainer/tests/sample_filelist.txt",
+            feat_dim = 3,
+            context_len = 0,
+            buffer_capacity = 1000000))
 
-TestData(
-    SimpleData(
-        files = "trainer/tests/sample_filelist.txt",
-        feat_dim = 3,
-        context_len = 0,
-        buffer_capacity = 1000000,
-    )
-)
+TestData(SimpleData(
+           files = "trainer/tests/sample_filelist.txt",
+           feat_dim = 3,
+           context_len = 0,
+           buffer_capacity = 1000000))
 
-Settings(
-    algorithm = "sgd",
-    num_batches_per_send_parameter = 1,
-    num_batches_per_get_parameter = 1,
-    batch_size = 100,
-    learning_rate = 0.001,
-    learning_rate_decay_a = 1e-5,
-    learning_rate_decay_b = 0.5,
-)
+settings(batch_size = 100)
 
-default_initial_std(0.2)
 # Output layer, label layer, cost layer, preferably set to the same environment.
 output_device = 0
 
-model_type("nn")
-
 # Input Layer does not need to specify the device number.
-Layer(
-    name = "input",
-    type = "data",
-    size = 3,
-)
+data = data_layer(name='input', size=3)
 
 # Calculate in the CPU.
-Layer(
-    name = "layer1_1",
-    type = "fc",
-    size = 5,
-    active_type = "sigmoid",
-    device = -1,
-    inputs = "input",
-)
+fc1 = fc_layer(input=data, size=5,
+               bias_attr=True,
+               layer_attr=ExtraAttr(device=-1),
+               act=SigmoidActivation())
 
 # Calculate in the GPU 0.
-Layer(
-    name = "layer2_1",
-    type = "fc",
-    size = 10,
-    active_type = "sigmoid",
-    device = 0,
-    inputs = "layer1_1",
-)
+fc2 = fc_layer(input=fc1, size=10,
+               bias_attr=True,
+               layer_attr=ExtraAttr(device=0),
+               act=SigmoidActivation())
 
 # Calculate in the GPU 1.
-Layer(
-    name = "layer2_2",
-    type = "fc",
-    size = 10,
-    active_type = "sigmoid",
-    device = 1,
-    inputs = "layer1_1",
-)
+fc3 = fc_layer(input=fc1, size=10,
+               bias_attr=True,
+               layer_attr=ExtraAttr(device=1),
+               act=SigmoidActivation())
 
 # Calculate in the GPU 0.
-Layer(
-    name = "layer3_1",
-    type = "fc",
-    size = 10,
-    device = 0,
-    active_type = "sigmoid",
-    inputs = ["layer2_1", "layer2_2"],
-)
+fc4 = fc_layer(input=[fc2,fc3], size=10,
+               bias_attr=True,
+               layer_attr=ExtraAttr(device=0),
+               act=SigmoidActivation())
 
 # Calculate in the GPU 1.
-Layer(
-    name = "layer3_2",
-    type = "fc",
-    size = 10,
-    device = 1,
-    active_type = "sigmoid",
-    inputs = ["layer2_1", "layer2_2"],
-)
-
+fc5 = fc_layer(input=[fc2,fc3], size=10,
+               bias_attr=True,
+               layer_attr=ExtraAttr(device=1),
+               act=SigmoidActivation())
 
-Layer(
-    name = "output",
-    type = "fc",
-    size = 10,
-    device = output_device,
-    active_type = "sigmoid",
-    inputs = ["layer3_1", "layer3_2"],
-)
+output = fc_layer(input=[fc4,fc5], size=10,
+                  bias_attr=True,
+                  layer_attr=ExtraAttr(device=output_device),
+                  act=SoftmaxActivation())
 
 if get_config_arg('with_cost', bool, True):
     # This is for training the neural network.
     # We need to have another data layer for label
     # and a layer for calculating cost
-    Layer(
-        name = "label",
-        type = "data",
-        device = output_device,
-        size = 1,
-    )
-
-    Layer(
-        name = "cost",
-        type = "multi-class-cross-entropy",
-        device = output_device,
-        inputs = ["output", "label"],
-    )
-
-    Evaluator(
-        name = "error",
-        type = "classification_error",
-        inputs = ["output", "label"])
-
-    Inputs("input", "label")
-    Outputs("cost")
-
+    lbl = data_layer(name='label', size=1,
+                    layer_attr=ExtraAttr(device=output_device))
+                    
+    outputs(classification_cost(input=output, 
+                                label=lbl,
+                                layer_attr=ExtraAttr(device=output_device)))
 else:
     # This is for prediction where we don't have label
     # and don't need to calculate cost
-    Inputs("input")
-    Outputs("output")
+    outputs(output)
diff --git a/python/paddle/trainer_config_helpers/attrs.py b/python/paddle/trainer_config_helpers/attrs.py
@@ -174,12 +174,16 @@ class ExtraLayerAttribute(object):
                       The dropout rate is the zero rate of this mask. The
                       details of what dropout is please refer to `here
                       <https://www.cs.toronto.edu/~hinton/absps/
-                      JMLRdropout.pdf>`_
+                      JMLRdropout.pdf>`_.
     :type drop_rate: float
-
+    :param device: device ID of layer. device=-1, use CPU. device>0, use GPU.
+                   The details allocation in parallel_nn please refer to `here
+                   <http://www.paddlepaddle.org/doc/ui/cmd_argument/
+                   use_case.html#case-2-specify-layers-in-different-devices>`_.
+    :type device: int
     """
 
-    def __init__(self, error_clipping_threshold=None, drop_rate=None):
+    def __init__(self, error_clipping_threshold=None, drop_rate=None, device=None):
         self.attr = dict()
         if isinstance(error_clipping_threshold, float):
             assert error_clipping_threshold > 0
@@ -189,6 +193,9 @@ def __init__(self, error_clipping_threshold=None, drop_rate=None):
             assert drop_rate > 0
             self.attr["drop_rate"] = drop_rate
 
+        if isinstance(device, int):
+            self.attr["device"] = device
+
     def check(self, layer_name):
         for key in self.attr:
             if not hasattr(self, 'can_%s' % key) or \
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
@@ -201,6 +201,7 @@ def __str__(self):
 
 ERROR_CLIPPING = 'error_clipping_threshold'
 DROPOUT = 'drop_rate'
+DEVICE = 'device'
 
 
 def check_input(input):
@@ -223,10 +224,12 @@ def check_input(input):
 
 
 def layer_support(*attrs):
+    attrs_list = list(attrs) 
+    attrs_list.append(DEVICE)
     def decorator(method):
         @functools.wraps(method)
         def wrapper(*args, **kwargs):
-            for attr in attrs:
+            for attr in attrs_list:
                 for each in args:
                     if isinstance(each, ExtraLayerAttribute):
                         setattr(each, '_'.join(['can', attr]), True)
@@ -2625,9 +2628,11 @@ def regression_cost(input, label, cost='square_error', name=None):
 
 
 @wrap_name_default("cost")
+@layer_support()
 def classification_cost(input, label, name=None,
                         cost="multi-class-cross-entropy",
-                        evaluator=classification_error_evaluator):
+                        evaluator=classification_error_evaluator,
+                        layer_attr=None):
     """
     classification cost Layer.
 
@@ -2640,13 +2645,16 @@ def classification_cost(input, label, name=None,
     :param cost: cost method.
     :type cost: basestring
     :param evaluator: Evaluator method.
+    :param layer_attr: layer's extra attribute.
+    :type layer_attr: ExtraLayerAttribute
     :return: LayerOutput object.
     :rtype: LayerOutput
     """
     assert input.layer_type != LayerType.DATA
     assert isinstance(input.activation, SoftmaxActivation)
     assert label.layer_type == LayerType.DATA
-    Layer(name=name, type=cost, inputs=[Input(input.name), Input(label.name)])
+    Layer(name=name, type=cost, inputs=[Input(input.name), Input(label.name)],
+          **ExtraLayerAttribute.to_kwargs(layer_attr))
 
     def __add_evaluator__(e):
         assert callable(e)