From 07fe0eea5e4ef088c21549a5056650ee66827bcd Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 10 Jan 2017 14:19:38 +0800 Subject: [PATCH 01/15] Init commit --- .../paddle/trainer_config_helpers/__init__.py | 1 + python/paddle/v2/__init__.py | 18 ++++++++++++++++++ python/paddle/v2/config.py | 12 ++++++++++++ python/paddle/v2/data.py | 11 +++++++++++ python/setup.py.in | 3 ++- 5 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 python/paddle/v2/__init__.py create mode 100644 python/paddle/v2/config.py create mode 100644 python/paddle/v2/data.py diff --git a/python/paddle/trainer_config_helpers/__init__.py b/python/paddle/trainer_config_helpers/__init__.py index 13155ebddbb49c..a7f5c02802587c 100644 --- a/python/paddle/trainer_config_helpers/__init__.py +++ b/python/paddle/trainer_config_helpers/__init__.py @@ -21,5 +21,6 @@ from optimizers import * from attrs import * from config_parser_utils import * + # This will enable operator overload for LayerOutput import layer_math diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py new file mode 100644 index 00000000000000..aefddaf08af108 --- /dev/null +++ b/python/paddle/v2/__init__.py @@ -0,0 +1,18 @@ +""" +This is an experimental package for Paddle new API. + +Currently, we use should always use + +.. code-block: python + + import paddle.v2 as paddle + +as our import statement. The API is in flux, never use this package in +production. +""" + +import paddle.trainer_config_helpers as config +import paddle.v2.data as data +import py_paddle.swig_paddle as raw + +__all__ = ['config', 'data', 'raw'] diff --git a/python/paddle/v2/config.py b/python/paddle/v2/config.py new file mode 100644 index 00000000000000..843bfdb981cb2a --- /dev/null +++ b/python/paddle/v2/config.py @@ -0,0 +1,12 @@ +from paddle.trainer_config_helpers import * +from paddle.trainer.config_parser import parse_config as parse +from paddle.trainer_config_helpers.config_parser_utils import \ + parse_network_config as parse_network +from paddle.trainer_config_helpers.config_parser_utils import \ + parse_optimizer_config as parse_optimizer + +import paddle.trainer_config_helpers as tmp + +__all__ = ['parse', 'parse_network', 'parse_optimizer'] + +__all__.extend(tmp.__all__) diff --git a/python/paddle/v2/data.py b/python/paddle/v2/data.py new file mode 100644 index 00000000000000..c1d32f87173615 --- /dev/null +++ b/python/paddle/v2/data.py @@ -0,0 +1,11 @@ +from paddle.trainer.PyDataProvider2 import * +from py_paddle.dataprovider_converter import DataProviderConverter + +__all__ = [ + 'dense_vector', 'dense_vector_sequence', 'dense_vector_sub_sequence', + 'integer_value', 'integer_sequence', 'integer_value_sub_sequence', + 'sparse_binary_vector', 'sparse_binary_vector_sequence', + 'sparse_binary_vector_sub_sequence', 'sparse_vector', + 'sparse_vector_sequence', 'sparse_vector_sub_sequence', 'provider', + 'CacheType', 'DataProviderConverter' +] diff --git a/python/setup.py.in b/python/setup.py.in index b66a42e87c7870..9126b05216ca10 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -4,7 +4,8 @@ packages=['paddle', 'paddle.proto', 'paddle.trainer', 'paddle.trainer_config_helpers', - 'paddle.utils'] + 'paddle.utils', + 'paddle.v2'] setup(name='paddle', version='${PADDLE_VERSION}', From 1935b34272ade24601781af78f1dbbeb86fcfad7 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 10 Jan 2017 15:15:12 +0800 Subject: [PATCH 02/15] Simple extract paddle.v2 api. --- demo/mnist/api_train.py | 67 ++++++++++++++++-------------------- python/CMakeLists.txt | 5 +-- python/paddle/v2/__init__.py | 4 +-- python/paddle/v2/config.py | 2 +- 4 files changed, 36 insertions(+), 42 deletions(-) diff --git a/demo/mnist/api_train.py b/demo/mnist/api_train.py index f301da382ff8a5..3dd1089aa72918 100644 --- a/demo/mnist/api_train.py +++ b/demo/mnist/api_train.py @@ -6,42 +6,33 @@ The user api could be simpler and carefully designed. """ -import py_paddle.swig_paddle as api -from py_paddle import DataProviderConverter -import paddle.trainer.PyDataProvider2 as dp -import numpy as np import random + +import paddle.v2 as paddle +import py_paddle.swig_paddle as api + from mnist_util import read_from_mnist -from paddle.trainer_config_helpers import * def optimizer_config(): - settings( + paddle.config.settings( learning_rate=1e-4, - learning_method=AdamOptimizer(), + learning_method=paddle.config.AdamOptimizer(), batch_size=1000, - model_average=ModelAverage(average_window=0.5), - regularization=L2Regularization(rate=0.5)) + model_average=paddle.config.ModelAverage(average_window=0.5), + regularization=paddle.config.L2Regularization(rate=0.5)) def network_config(): - imgs = data_layer(name='pixel', size=784) - hidden1 = fc_layer(input=imgs, size=200) - hidden2 = fc_layer(input=hidden1, size=200) - inference = fc_layer(input=hidden2, size=10, act=SoftmaxActivation()) - cost = classification_cost( - input=inference, label=data_layer( + imgs = paddle.config.data_layer(name='pixel', size=784) + hidden1 = paddle.config.fc_layer(input=imgs, size=200) + hidden2 = paddle.config.fc_layer(input=hidden1, size=200) + inference = paddle.config.fc_layer( + input=hidden2, size=10, act=paddle.config.SoftmaxActivation()) + cost = paddle.config.classification_cost( + input=inference, label=paddle.config.data_layer( name='label', size=10)) - outputs(cost) - - -def init_parameter(network): - assert isinstance(network, api.GradientMachine) - for each_param in network.getParameters(): - assert isinstance(each_param, api.Parameter) - array_size = len(each_param) - array = np.random.uniform(-1.0, 1.0, array_size).astype('float32') - each_param.getBuf(api.PARAMETER_VALUE).copyFromNumpyArray(array) + paddle.config.outputs(cost) def generator_to_batch(generator, batch_size): @@ -73,42 +64,44 @@ def input_order_converter(generator): def main(): - api.initPaddle("-use_gpu=false", "-trainer_count=4") # use 4 cpu cores + paddle.raw.initPaddle("-use_gpu=false", + "-trainer_count=4") # use 4 cpu cores # get enable_types for each optimizer. # enable_types = [value, gradient, momentum, etc] # For each optimizer(SGD, Adam), GradientMachine should enable different # buffers. - opt_config_proto = parse_optimizer_config(optimizer_config) - opt_config = api.OptimizationConfig.createFromProto(opt_config_proto) - _temp_optimizer_ = api.ParameterOptimizer.create(opt_config) + opt_config_proto = paddle.config.parse_optimizer(optimizer_config) + opt_config = paddle.raw.OptimizationConfig.createFromProto(opt_config_proto) + _temp_optimizer_ = paddle.raw.ParameterOptimizer.create(opt_config) enable_types = _temp_optimizer_.getParameterTypes() # Create Simple Gradient Machine. - model_config = parse_network_config(network_config) - m = api.GradientMachine.createFromConfigProto( + model_config = paddle.config.parse_network(network_config) + m = paddle.raw.GradientMachine.createFromConfigProto( model_config, api.CREATE_MODE_NORMAL, enable_types) # This type check is not useful. Only enable type hint in IDE. # Such as PyCharm - assert isinstance(m, api.GradientMachine) + assert isinstance(m, paddle.raw.GradientMachine) # Initialize Parameter by numpy. - init_parameter(network=m) + m.randParameters() # Create Local Updater. Local means not run in cluster. # For a cluster training, here we can change to createRemoteUpdater # in future. - updater = api.ParameterUpdater.createLocalUpdater(opt_config) - assert isinstance(updater, api.ParameterUpdater) + updater = paddle.raw.ParameterUpdater.createLocalUpdater(opt_config) + assert isinstance(updater, paddle.raw.ParameterUpdater) # Initialize ParameterUpdater. updater.init(m) # DataProvider Converter is a utility convert Python Object to Paddle C++ # Input. The input format is as same as Paddle's DataProvider. - converter = DataProviderConverter( - input_types=[dp.dense_vector(784), dp.integer_value(10)]) + converter = paddle.data.DataProviderConverter(input_types=[ + paddle.data.dense_vector(784), paddle.data.integer_value(10) + ]) train_file = './data/raw_data/train' test_file = './data/raw_data/t10k' diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 1cda4762eb2a55..1e660d13fdeaf9 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -4,11 +4,12 @@ set(OUTPUT_DIR file(GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py) file(GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py) file(GLOB UTILS_PY_FILES . ./paddle/utils/*.py) - +file(GLOB V2_PY_FILES . ./paddle/v2/*.py) set(PY_FILES paddle/__init__.py ${TRAINER_PY_FILES} ${HELPERS_PY_FILES} - ${UTILS_PY_FILES}) + ${UTILS_PY_FILES} + ${V2_PY_FILES}) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py) diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index aefddaf08af108..2f35c88229ec51 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -11,8 +11,8 @@ production. """ -import paddle.trainer_config_helpers as config -import paddle.v2.data as data import py_paddle.swig_paddle as raw +import config +import data __all__ = ['config', 'data', 'raw'] diff --git a/python/paddle/v2/config.py b/python/paddle/v2/config.py index 843bfdb981cb2a..48873b26af0064 100644 --- a/python/paddle/v2/config.py +++ b/python/paddle/v2/config.py @@ -9,4 +9,4 @@ __all__ = ['parse', 'parse_network', 'parse_optimizer'] -__all__.extend(tmp.__all__) +__all__.extend(filter(lambda x: x[:2] != '__', dir(tmp))) From da970427b980096c8af9ebcb64467923fbef0627 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 10 Jan 2017 22:15:06 +0800 Subject: [PATCH 03/15] Follow comments --- demo/mnist/api_train.py | 11 +++++------ python/setup.py.in | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/demo/mnist/api_train.py b/demo/mnist/api_train.py index 3dd1089aa72918..325d140d461549 100644 --- a/demo/mnist/api_train.py +++ b/demo/mnist/api_train.py @@ -9,7 +9,6 @@ import random import paddle.v2 as paddle -import py_paddle.swig_paddle as api from mnist_util import read_from_mnist @@ -79,7 +78,7 @@ def main(): # Create Simple Gradient Machine. model_config = paddle.config.parse_network(network_config) m = paddle.raw.GradientMachine.createFromConfigProto( - model_config, api.CREATE_MODE_NORMAL, enable_types) + model_config, paddle.raw.CREATE_MODE_NORMAL, enable_types) # This type check is not useful. Only enable type hint in IDE. # Such as PyCharm @@ -123,7 +122,7 @@ def main(): # outArgs is Neural Network forward result. Here is not useful, just passed # to gradient_machine.forward - outArgs = api.Arguments.createArguments(0) + outArgs = paddle.raw.Arguments.createArguments(0) for pass_id in xrange(2): # we train 2 passes. updater.startPass() @@ -171,7 +170,7 @@ def main(): test_data_generator = input_order_converter(read_from_mnist(test_file)) for data_batch in generator_to_batch(test_data_generator, 512): # in testing stage, only forward is needed. - m.forward(converter(data_batch), outArgs, api.PASS_TEST) + m.forward(converter(data_batch), outArgs, paddle.raw.PASS_TEST) m.eval(test_evaluator) # print error rate for test data set @@ -182,8 +181,8 @@ def main(): updater.catchUpWith() params = m.getParameters() for each_param in params: - assert isinstance(each_param, api.Parameter) - value = each_param.getBuf(api.PARAMETER_VALUE) + assert isinstance(each_param, paddle.raw.Parameter) + value = each_param.getBuf(paddle.raw.PARAMETER_VALUE) value = value.copyToNumpyArray() # Here, we could save parameter to every where you want diff --git a/python/setup.py.in b/python/setup.py.in index 9126b05216ca10..1e1324eea825ab 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -5,7 +5,7 @@ packages=['paddle', 'paddle.trainer', 'paddle.trainer_config_helpers', 'paddle.utils', - 'paddle.v2'] + 'paddle.v2'] setup(name='paddle', version='${PADDLE_VERSION}', From 823eb1f8f5864ced16e2f098a4e6dc1f4b18a193 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 11 Jan 2017 17:15:47 +0800 Subject: [PATCH 04/15] Add proto to paddle.v2 --- python/paddle/v2/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index 2f35c88229ec51..95d32832f07327 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -14,5 +14,6 @@ import py_paddle.swig_paddle as raw import config import data +import paddle.proto as proto -__all__ = ['config', 'data', 'raw'] +__all__ = ['config', 'data', 'raw', 'proto'] From 3bc8f99e7bc9d154f2513fc9663c3f7f2dbbbe5b Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 15 Jan 2017 20:32:04 +0800 Subject: [PATCH 05/15] add layer abstract --- python/paddle/v2/__init__.py | 3 +- python/paddle/v2/layers.py | 160 +++++++++++++++++++++++++++++++++++ 2 files changed, 162 insertions(+), 1 deletion(-) create mode 100644 python/paddle/v2/layers.py diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index 95d32832f07327..3e11b571fa8697 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -15,5 +15,6 @@ import config import data import paddle.proto as proto +import layers -__all__ = ['config', 'data', 'raw', 'proto'] +__all__ = ['config', 'data', 'raw', 'proto', 'layers'] diff --git a/python/paddle/v2/layers.py b/python/paddle/v2/layers.py new file mode 100644 index 00000000000000..93eed41705e933 --- /dev/null +++ b/python/paddle/v2/layers.py @@ -0,0 +1,160 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import uuid + +import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils +import paddle.trainer_config_helpers.layers as layers +import paddle.trainer_config_helpers.networks as networks + + +class Layer(object): + def __init__(self, layer=None, is_output=False, **kwargs): + self.uuid = uuid.uuid4() + self.parent_layer = layer + self.layer_output = None + self.network_config = None + self.is_output = is_output + + def _execute(self): + """ + recursively set parent's into proto + :return: + """ + if self.parent_layer is not None: + self.parent_layer.execute() + + def _mark_output_layers(self): + """ + find out all layers that is marked output and set them into proto + :return: + """ + print self.layers() + output_layers = filter(lambda layer: layer.is_output, self.layers()) + print output_layers + if len(output_layers) > 0: + networks.outputs( + map(lambda layer: layer.layer_output, output_layers)) + + def execute(self): + """ + function to set proto attribute + :return: + """ + pass + + def network(self): + """ + Construct the network according to this layer and all it's parent layers + :return: return a proto that represent this network. + """ + + def construct_network(): + self.execute() + self._mark_output_layers() + + if self.network_config is None: + self.network_config = config_parser_utils.parse_network_config( + construct_network) + return self.network_config + + def layers(self): + """ + get all layers that have relation to this layer. + :return: + """ + all_layers = [] + if self.parent_layer is not None: + all_layers.extend(self.parent_layer.layers()) + all_layers.append(self) + return all_layers + + +class DataLayer(Layer): + def __init__(self, + name, + size, + height=None, + width=None, + layer_attr=None, + **kwargs): + self.name = name + self.size = size + self.height = height + self.width = width + self.layer_attr = layer_attr + super(DataLayer, self).__init__(**kwargs) + + def execute(self): + self._execute() + self.layer_output = \ + layers.data_layer(self.name, self.size, self.height, self.width, self.layer_attr) + + +class FcLayer(Layer): + def __init__(self, + layer, + size, + act=None, + name=None, + param_attr=None, + bias_attr=None, + layer_attr=None, + **kwargs): + self.parent_layer = layer + self.size = size + self.act = act + self.name = name + self.param_attr = param_attr + self.bias_attr = bias_attr + self.layer_attr = layer_attr + super(FcLayer, self).__init__(layer, **kwargs) + + def execute(self): + self._execute() + self.layer_output = \ + layers.fc_layer(input=self.parent_layer.layer_output, size=self.size, act=self.act, + name=self.name, param_attr=self.param_attr, bias_attr=self.bias_attr, + layer_attr=self.layer_attr) + + +class ClassificationCost(Layer): + def __init__(self, + layer, + label, + weight=None, + name=None, + evaluator=layers.classification_error_evaluator, + layer_attr=None, + is_output=False, + **kwargs): + assert isinstance(label, Layer) + self.parent_layer = layer + self.label = label + self.weight = weight + self.name = name + self.evaluator = evaluator + self.layer_attr = layer_attr + super(ClassificationCost, self).__init__(layer, is_output, **kwargs) + + def execute(self): + self._execute() + self.label.execute() + self.layer_output = \ + layers.classification_cost(input=self.parent_layer.layer_output, + label=self.label.layer_output, + weight=self.weight, + name=self.name, + evaluator=self.evaluator, + layer_attr=self.layer_attr) From 26e7ca9104f26941291337b44966471bef085c12 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 16 Jan 2017 11:30:46 +0800 Subject: [PATCH 06/15] Refine layers in paddle.v2 --- python/paddle/v2/layers.py | 224 ++++++++++++++----------------------- 1 file changed, 85 insertions(+), 139 deletions(-) diff --git a/python/paddle/v2/layers.py b/python/paddle/v2/layers.py index 93eed41705e933..2ec8e7eb3ca4be 100644 --- a/python/paddle/v2/layers.py +++ b/python/paddle/v2/layers.py @@ -12,149 +12,95 @@ # See the License for the specific language governing permissions and # limitations under the License. -import uuid - -import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils -import paddle.trainer_config_helpers.layers as layers -import paddle.trainer_config_helpers.networks as networks +import paddle.trainer_config_helpers as conf_helps +from paddle.trainer_config_helpers.config_parser_utils import \ + parse_network_config as __parse__ +from paddle.trainer_config_helpers.default_decorators import wrap_name_default class Layer(object): - def __init__(self, layer=None, is_output=False, **kwargs): - self.uuid = uuid.uuid4() - self.parent_layer = layer - self.layer_output = None - self.network_config = None - self.is_output = is_output - - def _execute(self): - """ - recursively set parent's into proto - :return: - """ - if self.parent_layer is not None: - self.parent_layer.execute() + def __init__(self, name, parent_layer): + assert isinstance(parent_layer, dict) + assert isinstance(name, basestring) + self.name = name + self.__parent_layer__ = parent_layer - def _mark_output_layers(self): - """ - find out all layers that is marked output and set them into proto - :return: - """ - print self.layers() - output_layers = filter(lambda layer: layer.is_output, self.layers()) - print output_layers - if len(output_layers) > 0: - networks.outputs( - map(lambda layer: layer.layer_output, output_layers)) - - def execute(self): + def to_proto(self, context): """ function to set proto attribute - :return: - """ - pass - - def network(self): - """ - Construct the network according to this layer and all it's parent layers - :return: return a proto that represent this network. """ - - def construct_network(): - self.execute() - self._mark_output_layers() - - if self.network_config is None: - self.network_config = config_parser_utils.parse_network_config( - construct_network) - return self.network_config - - def layers(self): - """ - get all layers that have relation to this layer. - :return: - """ - all_layers = [] - if self.parent_layer is not None: - all_layers.extend(self.parent_layer.layers()) - all_layers.append(self) - return all_layers - - -class DataLayer(Layer): - def __init__(self, - name, - size, - height=None, - width=None, - layer_attr=None, - **kwargs): - self.name = name - self.size = size - self.height = height - self.width = width - self.layer_attr = layer_attr - super(DataLayer, self).__init__(**kwargs) - - def execute(self): - self._execute() - self.layer_output = \ - layers.data_layer(self.name, self.size, self.height, self.width, self.layer_attr) - - -class FcLayer(Layer): - def __init__(self, - layer, - size, - act=None, - name=None, - param_attr=None, - bias_attr=None, - layer_attr=None, - **kwargs): - self.parent_layer = layer - self.size = size - self.act = act - self.name = name - self.param_attr = param_attr - self.bias_attr = bias_attr - self.layer_attr = layer_attr - super(FcLayer, self).__init__(layer, **kwargs) - - def execute(self): - self._execute() - self.layer_output = \ - layers.fc_layer(input=self.parent_layer.layer_output, size=self.size, act=self.act, - name=self.name, param_attr=self.param_attr, bias_attr=self.bias_attr, - layer_attr=self.layer_attr) - - -class ClassificationCost(Layer): - def __init__(self, - layer, - label, - weight=None, - name=None, - evaluator=layers.classification_error_evaluator, - layer_attr=None, - is_output=False, - **kwargs): - assert isinstance(label, Layer) - self.parent_layer = layer - self.label = label - self.weight = weight - self.name = name - self.evaluator = evaluator - self.layer_attr = layer_attr - super(ClassificationCost, self).__init__(layer, is_output, **kwargs) - - def execute(self): - self._execute() - self.label.execute() - self.layer_output = \ - layers.classification_cost(input=self.parent_layer.layer_output, - label=self.label.layer_output, - weight=self.weight, - name=self.name, - evaluator=self.evaluator, - layer_attr=self.layer_attr) + kwargs = dict() + for param_name in self.__parent_layer__: + param_value = self.__parent_layer__[param_name].to_proto( + context=context) + kwargs[param_name] = param_value + + if self.name not in context: + context[self.name] = self.to_proto_impl(**kwargs) + return context[self.name] + + def to_proto_impl(self, **kwargs): + raise NotImplementedError() + + +def parse_network(*outputs): + def __real_func__(): + context = dict() + real_output = [each.to_proto(context=context) for each in outputs] + conf_helps.outputs(real_output) + + return __parse__(__real_func__) + + +def __convert__(method_name, name_prefix, parent_names): + if name_prefix is not None: + wrapper = wrap_name_default(name_prefix=name_prefix) + else: + wrapper = None + + class __Impl__(Layer): + def __init__(self, name=None, **kwargs): + parent_layers = dict() + other_kwargs = dict() + for pname in parent_names: + parent_layers[pname] = kwargs[pname] + + for key in kwargs.keys(): + if key not in parent_names: + other_kwargs[key] = kwargs[key] + + super(__Impl__, self).__init__(name, parent_layers) + self.__other_kwargs__ = other_kwargs + + if wrapper is not None: + __init__ = wrapper(__init__) + + def to_proto_impl(self, **kwargs): + args = dict() + for each in kwargs: + args[each] = kwargs[each] + for each in self.__other_kwargs__: + args[each] = self.__other_kwargs__[each] + return getattr(conf_helps, method_name)(name=self.name, **args) + + return __Impl__ + + +data_layer = __convert__('data_layer', None, []) +fc_layer = __convert__('fc_layer', name_prefix='fc', parent_names=['input']) +classification_cost = __convert__( + 'classification_cost', + name_prefix='classification_cost', + parent_names=['input', 'label']) + +__all__ = ['data_layer', 'fc_layer', 'classification_cost', 'parse_network'] + +if __name__ == '__main__': + data = data_layer(name='pixel', size=784) + hidden = fc_layer(input=data, size=100, act=conf_helps.SigmoidActivation()) + predict = fc_layer( + input=hidden, size=10, act=conf_helps.SoftmaxActivation()) + cost = classification_cost( + input=predict, label=data_layer( + name='label', size=10)) + print parse_network(cost) From 258fc5575a52b11b9084da076c19c86f2dfd502f Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 16 Jan 2017 11:57:46 +0800 Subject: [PATCH 07/15] Support multiple input in Paddle.v2.Layer --- python/paddle/v2/layers.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/python/paddle/v2/layers.py b/python/paddle/v2/layers.py index 2ec8e7eb3ca4be..14efe9412c3582 100644 --- a/python/paddle/v2/layers.py +++ b/python/paddle/v2/layers.py @@ -16,6 +16,7 @@ from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as __parse__ from paddle.trainer_config_helpers.default_decorators import wrap_name_default +import collections class Layer(object): @@ -31,8 +32,13 @@ def to_proto(self, context): """ kwargs = dict() for param_name in self.__parent_layer__: - param_value = self.__parent_layer__[param_name].to_proto( - context=context) + if not isinstance(self.__parent_layer__[param_name], + collections.Sequence): + param_value = self.__parent_layer__[param_name].to_proto( + context=context) + else: + param_value = map(lambda x: x.to_proto(context=context), + self.__parent_layer__[param_name]) kwargs[param_name] = param_value if self.name not in context: @@ -99,7 +105,7 @@ def to_proto_impl(self, **kwargs): data = data_layer(name='pixel', size=784) hidden = fc_layer(input=data, size=100, act=conf_helps.SigmoidActivation()) predict = fc_layer( - input=hidden, size=10, act=conf_helps.SoftmaxActivation()) + input=[hidden, data], size=10, act=conf_helps.SoftmaxActivation()) cost = classification_cost( input=predict, label=data_layer( name='label', size=10)) From 2b988b47768b017abf08e49298d72c17c8bf89ad Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 16 Jan 2017 14:06:30 +0800 Subject: [PATCH 08/15] Add Optimizer --- python/paddle/v2/optimizer.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 python/paddle/v2/optimizer.py diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py new file mode 100644 index 00000000000000..ff77ebf5f986a5 --- /dev/null +++ b/python/paddle/v2/optimizer.py @@ -0,0 +1,29 @@ +from paddle.trainer_config_helpers import * +import py_paddle.swig_paddle as api + + +class Optimizer(object): + def __init__(self, **kwargs): + if 'batch_size' in kwargs: + del kwargs['batch_size'] # not important for python library. + + def __impl__(): + settings(batch_size=1, **kwargs) + + self.__opt_conf_proto__ = parse_optimizer_config(__impl__) + self.__opt_conf__ = api.OptimizationConfig.createFromProto( + self.__opt_conf_proto__) + + def enable_types(self): + tmp = api.ParameterOptimizer.create(self.__opt_conf__) + assert isinstance(tmp, api.ParameterOptimizer) + return tmp.getParameterTypes() + + def create_local_updater(self): + return api.ParameterUpdater.createLocalUpdater(self.__opt_conf__) + + +if __name__ == '__main__': + api.initPaddle('--use_gpu=false') + opt = Optimizer(learning_rate=1e-4, learning_method=AdamOptimizer()) + print opt.enable_types() From 5e1d187afceacf8172f607198097a334399529a6 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 16 Jan 2017 14:17:35 +0800 Subject: [PATCH 09/15] Add optimizers --- demo/mnist/api_train.py | 22 +++++++--------------- python/CMakeLists.txt | 2 +- python/paddle/v2/__init__.py | 3 ++- python/paddle/v2/optimizer.py | 7 +++++++ 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/demo/mnist/api_train.py b/demo/mnist/api_train.py index 325d140d461549..58b27cde277e48 100644 --- a/demo/mnist/api_train.py +++ b/demo/mnist/api_train.py @@ -13,15 +13,6 @@ from mnist_util import read_from_mnist -def optimizer_config(): - paddle.config.settings( - learning_rate=1e-4, - learning_method=paddle.config.AdamOptimizer(), - batch_size=1000, - model_average=paddle.config.ModelAverage(average_window=0.5), - regularization=paddle.config.L2Regularization(rate=0.5)) - - def network_config(): imgs = paddle.config.data_layer(name='pixel', size=784) hidden1 = paddle.config.fc_layer(input=imgs, size=200) @@ -70,15 +61,16 @@ def main(): # enable_types = [value, gradient, momentum, etc] # For each optimizer(SGD, Adam), GradientMachine should enable different # buffers. - opt_config_proto = paddle.config.parse_optimizer(optimizer_config) - opt_config = paddle.raw.OptimizationConfig.createFromProto(opt_config_proto) - _temp_optimizer_ = paddle.raw.ParameterOptimizer.create(opt_config) - enable_types = _temp_optimizer_.getParameterTypes() + optimizer = paddle.optimizer.Optimizer( + learning_method=paddle.optimizer.AdamOptimizer(), + learning_rate=1e-4, + model_average=paddle.optimizer.ModelAverage(average_window=0.5), + regularization=paddle.optimizer.L2Regularization(rate=0.5)) # Create Simple Gradient Machine. model_config = paddle.config.parse_network(network_config) m = paddle.raw.GradientMachine.createFromConfigProto( - model_config, paddle.raw.CREATE_MODE_NORMAL, enable_types) + model_config, paddle.raw.CREATE_MODE_NORMAL, optimizer.enable_types()) # This type check is not useful. Only enable type hint in IDE. # Such as PyCharm @@ -90,7 +82,7 @@ def main(): # Create Local Updater. Local means not run in cluster. # For a cluster training, here we can change to createRemoteUpdater # in future. - updater = paddle.raw.ParameterUpdater.createLocalUpdater(opt_config) + updater = optimizer.create_local_updater() assert isinstance(updater, paddle.raw.ParameterUpdater) # Initialize ParameterUpdater. diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 1e660d13fdeaf9..3a4c6bd76258c8 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -9,7 +9,7 @@ set(PY_FILES paddle/__init__.py ${TRAINER_PY_FILES} ${HELPERS_PY_FILES} ${UTILS_PY_FILES} - ${V2_PY_FILES}) + ${V2_PY_FILES}) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py) diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index 3e11b571fa8697..0f78fb3d6198d9 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -16,5 +16,6 @@ import data import paddle.proto as proto import layers +import optimizer -__all__ = ['config', 'data', 'raw', 'proto', 'layers'] +__all__ = ['config', 'data', 'raw', 'proto', 'layers', 'optimizer'] diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index ff77ebf5f986a5..13bd2fd703e9fe 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -1,5 +1,12 @@ from paddle.trainer_config_helpers import * import py_paddle.swig_paddle as api +import paddle.trainer_config_helpers.optimizers as raw_opt_pacakge + +__all__ = ['Optimizer'] + +__all__.extend( + filter(lambda x: x not in ['Optimizer', 'BaseSGDOptimizer', 'settings'], + raw_opt_pacakge.__all__)) class Optimizer(object): From a2cf6356fa3ee6ecc4a67563a5c7761ad42284ae Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 16 Jan 2017 17:06:13 +0800 Subject: [PATCH 10/15] change network config in mnist/api_trian.py to v2 --- demo/mnist/api_train.py | 23 ++++++++++------------- python/paddle/v2/gradient_machine.py | 0 2 files changed, 10 insertions(+), 13 deletions(-) create mode 100644 python/paddle/v2/gradient_machine.py diff --git a/demo/mnist/api_train.py b/demo/mnist/api_train.py index 58b27cde277e48..99a3ddcd092d70 100644 --- a/demo/mnist/api_train.py +++ b/demo/mnist/api_train.py @@ -13,18 +13,6 @@ from mnist_util import read_from_mnist -def network_config(): - imgs = paddle.config.data_layer(name='pixel', size=784) - hidden1 = paddle.config.fc_layer(input=imgs, size=200) - hidden2 = paddle.config.fc_layer(input=hidden1, size=200) - inference = paddle.config.fc_layer( - input=hidden2, size=10, act=paddle.config.SoftmaxActivation()) - cost = paddle.config.classification_cost( - input=inference, label=paddle.config.data_layer( - name='label', size=10)) - paddle.config.outputs(cost) - - def generator_to_batch(generator, batch_size): ret_val = list() for each_item in generator: @@ -67,8 +55,17 @@ def main(): model_average=paddle.optimizer.ModelAverage(average_window=0.5), regularization=paddle.optimizer.L2Regularization(rate=0.5)) + # define network + imgs = paddle.layers.data_layer(name='pixel', size=784) + hidden1 = paddle.layers.fc_layer(input=imgs, size=200) + hidden2 = paddle.layers.fc_layer(input=hidden1, size=200) + inference = paddle.layers.fc_layer( + input=hidden2, size=10, act=paddle.config.SoftmaxActivation()) + cost = paddle.layers.classification_cost( + input=inference, label=paddle.layers.data_layer( + name='label', size=10)) # Create Simple Gradient Machine. - model_config = paddle.config.parse_network(network_config) + model_config = paddle.layers.parse_network(cost) m = paddle.raw.GradientMachine.createFromConfigProto( model_config, paddle.raw.CREATE_MODE_NORMAL, optimizer.enable_types()) diff --git a/python/paddle/v2/gradient_machine.py b/python/paddle/v2/gradient_machine.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 From 7826fdfd3675e35c32d1e8aada286a1410ef2b4f Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 16 Jan 2017 17:31:05 +0800 Subject: [PATCH 11/15] rm gradient_machine.py --- python/paddle/v2/gradient_machine.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 python/paddle/v2/gradient_machine.py diff --git a/python/paddle/v2/gradient_machine.py b/python/paddle/v2/gradient_machine.py deleted file mode 100644 index e69de29bb2d1d6..00000000000000 From e7da4aeaafedcea0789e6cb604e6db956e06fd28 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 17 Jan 2017 12:39:31 +0800 Subject: [PATCH 12/15] Start define model api --- demo/mnist/api_train.py | 2 + python/paddle/v2/model.py | 96 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 python/paddle/v2/model.py diff --git a/demo/mnist/api_train.py b/demo/mnist/api_train.py index 99a3ddcd092d70..c82e147c1b6ef7 100644 --- a/demo/mnist/api_train.py +++ b/demo/mnist/api_train.py @@ -64,8 +64,10 @@ def main(): cost = paddle.layers.classification_cost( input=inference, label=paddle.layers.data_layer( name='label', size=10)) + # Create Simple Gradient Machine. model_config = paddle.layers.parse_network(cost) + m = paddle.raw.GradientMachine.createFromConfigProto( model_config, paddle.raw.CREATE_MODE_NORMAL, optimizer.enable_types()) diff --git a/python/paddle/v2/model.py b/python/paddle/v2/model.py new file mode 100644 index 00000000000000..46100febe352ea --- /dev/null +++ b/python/paddle/v2/model.py @@ -0,0 +1,96 @@ +import layers as v2_layer +import optimizer as v2_optimizer +import py_paddle.swig_paddle as api +import collections + + +class Evaluator(object): + def __init__(self): + raise NotImplementedError() + + def start(self): + raise NotImplementedError() + + def finish(self): + raise NotImplementedError() + + def __str__(self): + raise NotImplementedError() + + +class Model(object): + """ + :type __gradient_machine__: api.GradientMachine + :type __updater__: api.ParameterUpdater + """ + + def __init__(self, layers, optimizer, is_local=True): + if not isinstance(layers, collections.Sequence): + layers = [layers] # layers could be many. + + assert is_local, 'Currently, only local mode is supported' + + for each in layers: + assert isinstance(each, v2_layer.Layer) + + assert isinstance(optimizer, v2_optimizer.Optimizer) + + # Create Proto. + self.__gradient_machine__ = self.create_gradient_machine(layers, + optimizer) + + self.__updater__ = self.create_local_updater(optimizer) + + def rand_parameter(self): + raise NotImplementedError() + + def save_parameter(self, filename): + raise NotImplementedError() + + def load_parameter(self, filename): + raise NotImplementedError() + + def start(self): + raise NotImplementedError() + + def finish(self): + raise NotImplementedError() + + def start_pass(self): + raise NotImplementedError() + + def start_batch(self): + raise NotImplementedError() + + def finish_batch(self): + raise NotImplementedError() + + def train(self, data): + in_args = self.__data_converter(data) + out_args = api.Arguments.createArguments(0) + # forward/backward + # update + raise NotImplementedError() + + def test(self, data): + self.__updater__.catchUpWith() + in_args = self.__data_converter(data) + out_args = api.Arguments.createArguments(0) + # forward. + raise NotImplementedError() + + def finish_pass(self): + raise NotImplementedError() + + def complete(self): + return self.finish() + + def create_gradient_machine(self, layers, optimizer): + raise NotImplementedError() + + def create_local_updater(self, optimizer): + raise NotImplementedError() + + @property + def __data_converter(self): + raise NotImplementedError() From 6e4086c02d6c226edaad05273f22a1d092443816 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 17 Jan 2017 13:50:37 +0800 Subject: [PATCH 13/15] Add Data Interface --- demo/mnist/api_train.py | 217 +++++++++++++++++------------------ python/paddle/v2/__init__.py | 3 +- python/paddle/v2/data.py | 57 ++++++++- python/paddle/v2/model.py | 8 ++ 4 files changed, 171 insertions(+), 114 deletions(-) diff --git a/demo/mnist/api_train.py b/demo/mnist/api_train.py index c82e147c1b6ef7..5b0efbb405c542 100644 --- a/demo/mnist/api_train.py +++ b/demo/mnist/api_train.py @@ -65,123 +65,116 @@ def main(): input=inference, label=paddle.layers.data_layer( name='label', size=10)) - # Create Simple Gradient Machine. - model_config = paddle.layers.parse_network(cost) + model = paddle.model.Model(layers=[cost], optimizer=optimizer) - m = paddle.raw.GradientMachine.createFromConfigProto( - model_config, paddle.raw.CREATE_MODE_NORMAL, optimizer.enable_types()) + model.rand_parameter() - # This type check is not useful. Only enable type hint in IDE. - # Such as PyCharm - assert isinstance(m, paddle.raw.GradientMachine) + model.start() - # Initialize Parameter by numpy. - m.randParameters() - - # Create Local Updater. Local means not run in cluster. - # For a cluster training, here we can change to createRemoteUpdater - # in future. - updater = optimizer.create_local_updater() - assert isinstance(updater, paddle.raw.ParameterUpdater) - - # Initialize ParameterUpdater. - updater.init(m) - - # DataProvider Converter is a utility convert Python Object to Paddle C++ - # Input. The input format is as same as Paddle's DataProvider. - converter = paddle.data.DataProviderConverter(input_types=[ - paddle.data.dense_vector(784), paddle.data.integer_value(10) - ]) + batch_evaluator = model.make_evaluator() + test_evaluator = model.make_evaluator() train_file = './data/raw_data/train' - test_file = './data/raw_data/t10k' - - # start gradient machine. - # the gradient machine must be started before invoke forward/backward. - # not just for training, but also for inference. - m.start() - - # evaluator can print error rate, etc. It is a C++ class. - batch_evaluator = m.makeEvaluator() - test_evaluator = m.makeEvaluator() - - # Get Train Data. - # TrainData will stored in a data pool. Currently implementation is not care - # about memory, speed. Just a very naive implementation. - train_data_generator = input_order_converter(read_from_mnist(train_file)) - train_data = BatchPool(train_data_generator, 512) - - # outArgs is Neural Network forward result. Here is not useful, just passed - # to gradient_machine.forward - outArgs = paddle.raw.Arguments.createArguments(0) - - for pass_id in xrange(2): # we train 2 passes. - updater.startPass() - - for batch_id, data_batch in enumerate(train_data()): - # data_batch is input images. - # here, for online learning, we could get data_batch from network. - - # Start update one batch. - pass_type = updater.startBatch(len(data_batch)) - - # Start BatchEvaluator. - # batch_evaluator can be used between start/finish. - batch_evaluator.start() - - # forwardBackward is a shortcut for forward and backward. - # It is sometimes faster than invoke forward/backward separately, - # because in GradientMachine, it may be async. - m.forwardBackward(converter(data_batch), outArgs, pass_type) - - for each_param in m.getParameters(): - updater.update(each_param) - - # Get cost. We use numpy to calculate total cost for this batch. - cost_vec = outArgs.getSlotValue(0) - cost_vec = cost_vec.copyToNumpyMat() - cost = cost_vec.sum() / len(data_batch) - - # Make evaluator works. - m.eval(batch_evaluator) - - # Print logs. - print 'Pass id', pass_id, 'Batch id', batch_id, 'with cost=', \ - cost, batch_evaluator - - batch_evaluator.finish() - # Finish batch. - # * will clear gradient. - # * ensure all values should be updated. - updater.finishBatch(cost) - - # testing stage. use test data set to test current network. - updater.apply() - test_evaluator.start() - test_data_generator = input_order_converter(read_from_mnist(test_file)) - for data_batch in generator_to_batch(test_data_generator, 512): - # in testing stage, only forward is needed. - m.forward(converter(data_batch), outArgs, paddle.raw.PASS_TEST) - m.eval(test_evaluator) - - # print error rate for test data set - print 'Pass', pass_id, ' test evaluator: ', test_evaluator - test_evaluator.finish() - updater.restore() - - updater.catchUpWith() - params = m.getParameters() - for each_param in params: - assert isinstance(each_param, paddle.raw.Parameter) - value = each_param.getBuf(paddle.raw.PARAMETER_VALUE) - value = value.copyToNumpyArray() - - # Here, we could save parameter to every where you want - print each_param.getName(), value - - updater.finishPass() - - m.finish() + for pass_id in xrange(2): + model.start_pass() + + model.finish_pass() + + # # DataProvider Converter is a utility convert Python Object to Paddle C++ + # # Input. The input format is as same as Paddle's DataProvider. + # converter = paddle.data.DataProviderConverter(input_types=[ + # paddle.data.dense_vector(784), paddle.data.integer_value(10) + # ]) + # + # train_file = './data/raw_data/train' + # test_file = './data/raw_data/t10k' + # + # # start gradient machine. + # # the gradient machine must be started before invoke forward/backward. + # # not just for training, but also for inference. + # m.start() + # + # # evaluator can print error rate, etc. It is a C++ class. + # batch_evaluator = m.makeEvaluator() + # test_evaluator = m.makeEvaluator() + # + # # Get Train Data. + # # TrainData will stored in a data pool. Currently implementation is not care + # # about memory, speed. Just a very naive implementation. + # train_data_generator = input_order_converter(read_from_mnist(train_file)) + # train_data = BatchPool(train_data_generator, 512) + # + # # outArgs is Neural Network forward result. Here is not useful, just passed + # # to gradient_machine.forward + # outArgs = paddle.raw.Arguments.createArguments(0) + # + # for pass_id in xrange(2): # we train 2 passes. + # updater.startPass() + # + # for batch_id, data_batch in enumerate(train_data()): + # # data_batch is input images. + # # here, for online learning, we could get data_batch from network. + # + # # Start update one batch. + # pass_type = updater.startBatch(len(data_batch)) + # + # # Start BatchEvaluator. + # # batch_evaluator can be used between start/finish. + # batch_evaluator.start() + # + # # forwardBackward is a shortcut for forward and backward. + # # It is sometimes faster than invoke forward/backward separately, + # # because in GradientMachine, it may be async. + # m.forwardBackward(converter(data_batch), outArgs, pass_type) + # + # for each_param in m.getParameters(): + # updater.update(each_param) + # + # # Get cost. We use numpy to calculate total cost for this batch. + # cost_vec = outArgs.getSlotValue(0) + # cost_vec = cost_vec.copyToNumpyMat() + # cost = cost_vec.sum() / len(data_batch) + # + # # Make evaluator works. + # m.eval(batch_evaluator) + # + # # Print logs. + # print 'Pass id', pass_id, 'Batch id', batch_id, 'with cost=', \ + # cost, batch_evaluator + # + # batch_evaluator.finish() + # # Finish batch. + # # * will clear gradient. + # # * ensure all values should be updated. + # updater.finishBatch(cost) + # + # # testing stage. use test data set to test current network. + # updater.apply() + # test_evaluator.start() + # test_data_generator = input_order_converter(read_from_mnist(test_file)) + # for data_batch in generator_to_batch(test_data_generator, 512): + # # in testing stage, only forward is needed. + # m.forward(converter(data_batch), outArgs, paddle.raw.PASS_TEST) + # m.eval(test_evaluator) + # + # # print error rate for test data set + # print 'Pass', pass_id, ' test evaluator: ', test_evaluator + # test_evaluator.finish() + # updater.restore() + # + # updater.catchUpWith() + # params = m.getParameters() + # for each_param in params: + # assert isinstance(each_param, paddle.raw.Parameter) + # value = each_param.getBuf(paddle.raw.PARAMETER_VALUE) + # value = value.copyToNumpyArray() + # + # # Here, we could save parameter to every where you want + # print each_param.getName(), value + # + # updater.finishPass() + + model.finish() if __name__ == '__main__': diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index 0f78fb3d6198d9..675f32cec1d168 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -17,5 +17,6 @@ import paddle.proto as proto import layers import optimizer +import model -__all__ = ['config', 'data', 'raw', 'proto', 'layers', 'optimizer'] +__all__ = ['config', 'data', 'raw', 'proto', 'layers', 'optimizer', 'model'] diff --git a/python/paddle/v2/data.py b/python/paddle/v2/data.py index c1d32f87173615..5dc0b1eb00222a 100644 --- a/python/paddle/v2/data.py +++ b/python/paddle/v2/data.py @@ -7,5 +7,60 @@ 'sparse_binary_vector', 'sparse_binary_vector_sequence', 'sparse_binary_vector_sub_sequence', 'sparse_vector', 'sparse_vector_sequence', 'sparse_vector_sub_sequence', 'provider', - 'CacheType', 'DataProviderConverter' + 'CacheType', 'DataProviderConverter', 'chunk', 'IDataPool' ] + + +def chunk(iterable, size=1): + items = [None] * size # prealloc + for i, item in enumerate(iterable): + if i % size == 0 and i != 0: + yield items + items[i % size] = item + i += 1 # i is the total size. + i %= size + if i == 0: + yield items + else: + yield items[:min(i + 1, size)] + + +class IDataPool(object): + """ + Interface of DataPool, but note that Python is using Duck-Typing, it is not + necessary to inherit this interface. + + NOTE: For Paddle developer, NEVER CHECK isinstance(obj, IDataPool). + + Basically contains two method, + + * next(): User should return the next batch of data in pool. raise + StopIteration if there is no more data in pool. + + * reset(): Reset the data pool to initial status. + + The basic usage of this api is as same as normal Python iterator, like + + .. code-block:: python + + pool = DataPool() + + for batch in pool: + process_batch(batch) + + + NOTE: The Data Pool API is not thread-safe. + """ + + def __iter__(self): + self.reset() + return self + + def next(self): + raise NotImplementedError() + + def __next__(self): + return self.next() + + def reset(self): + raise NotImplementedError() diff --git a/python/paddle/v2/model.py b/python/paddle/v2/model.py index 46100febe352ea..583b4aacf3a313 100644 --- a/python/paddle/v2/model.py +++ b/python/paddle/v2/model.py @@ -79,6 +79,14 @@ def test(self, data): # forward. raise NotImplementedError() + def make_evaluator(self): + """ + + :return: + :rtype: Evaluator + """ + raise NotImplementedError() + def finish_pass(self): raise NotImplementedError() From 286372b1b609df8ed88e9fa8514b2b49dedf147d Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 17 Jan 2017 14:39:52 +0800 Subject: [PATCH 14/15] Add create_data_pool method. --- python/paddle/v2/data.py | 80 +++++++++++++++++++++++++++++++-------- python/paddle/v2/model.py | 4 ++ 2 files changed, 69 insertions(+), 15 deletions(-) diff --git a/python/paddle/v2/data.py b/python/paddle/v2/data.py index 5dc0b1eb00222a..3a84b6f3c86be6 100644 --- a/python/paddle/v2/data.py +++ b/python/paddle/v2/data.py @@ -1,5 +1,7 @@ from paddle.trainer.PyDataProvider2 import * from py_paddle.dataprovider_converter import DataProviderConverter +import random +import model as v2_model __all__ = [ 'dense_vector', 'dense_vector_sequence', 'dense_vector_sub_sequence', @@ -7,24 +9,11 @@ 'sparse_binary_vector', 'sparse_binary_vector_sequence', 'sparse_binary_vector_sub_sequence', 'sparse_vector', 'sparse_vector_sequence', 'sparse_vector_sub_sequence', 'provider', - 'CacheType', 'DataProviderConverter', 'chunk', 'IDataPool' + 'CacheType', 'DataProviderConverter', 'IDataPool', 'NaiveDataPool', + 'create_data_pool' ] -def chunk(iterable, size=1): - items = [None] * size # prealloc - for i, item in enumerate(iterable): - if i % size == 0 and i != 0: - yield items - items[i % size] = item - i += 1 # i is the total size. - i %= size - if i == 0: - yield items - else: - yield items[:min(i + 1, size)] - - class IDataPool(object): """ Interface of DataPool, but note that Python is using Duck-Typing, it is not @@ -64,3 +53,64 @@ def __next__(self): def reset(self): raise NotImplementedError() + + +def input_order_mapper(iterable, input_order): + assert isinstance(input_order, collections.Sequence) + for each_input_name in input_order: + assert isinstance(each_input_name, basestring) + + tmp = [None] * len(input_order) + for each_item in iterable: + for i in xrange(len(input_order)): + tmp[i] = each_item[input_order[i]] + yield tmp + + +class NaiveDataPool(IDataPool): + """ + Naive Data Pool means load all samples in memory. + """ + + def __init__(self, iterable, batch_size, input_order, shuffle=True): + self.__pool__ = list( + input_order_mapper( + iterable=iterable, input_order=input_order)) + self.__batch_size__ = batch_size + self.__shuffle__ = shuffle + self.__idx__ = 0 + + def reset(self): + self.__idx__ = 0 + if self.__shuffle__: + random.shuffle(self.__pool__) + + def next(self): + if self.__idx__ >= len(self.__pool__): + raise StopIteration() + + begin = self.__idx__ + end = min(self.__idx__ + self.__batch_size__, len(self.__pool__)) + self.__idx__ = end + return self.__pool__[begin:end] + + +def create_data_pool(file_reader, + file_list, + model, + batch_size, + shuffle=True, + pool_class=NaiveDataPool): + assert isinstance(model, v2_model.Model) + + def __impl__(): + settings = object() + for each_file in file_list: + for each_sample in file_reader(settings, each_file): + yield each_sample + + return pool_class( + iterable=__impl__(), + batch_size=batch_size, + input_order=model.input_order, + shuffle=shuffle) diff --git a/python/paddle/v2/model.py b/python/paddle/v2/model.py index 583b4aacf3a313..e8efdb9fedd970 100644 --- a/python/paddle/v2/model.py +++ b/python/paddle/v2/model.py @@ -41,6 +41,10 @@ def __init__(self, layers, optimizer, is_local=True): self.__updater__ = self.create_local_updater(optimizer) + @property + def input_order(self): + raise NotImplementedError() + def rand_parameter(self): raise NotImplementedError() From 9360a1fc15dd6ddbf220416be2161381b30feed9 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 17 Jan 2017 15:03:07 +0800 Subject: [PATCH 15/15] Using new API refactor api_train. --- demo/mnist/api_train.py | 161 ++++++++------------------------------ python/paddle/v2/data.py | 6 +- python/paddle/v2/model.py | 4 + 3 files changed, 40 insertions(+), 131 deletions(-) diff --git a/demo/mnist/api_train.py b/demo/mnist/api_train.py index 5b0efbb405c542..7e0f28d223019e 100644 --- a/demo/mnist/api_train.py +++ b/demo/mnist/api_train.py @@ -6,49 +6,16 @@ The user api could be simpler and carefully designed. """ -import random import paddle.v2 as paddle from mnist_util import read_from_mnist -def generator_to_batch(generator, batch_size): - ret_val = list() - for each_item in generator: - ret_val.append(each_item) - if len(ret_val) == batch_size: - yield ret_val - ret_val = list() - if len(ret_val) != 0: - yield ret_val - - -class BatchPool(object): - def __init__(self, generator, batch_size): - self.data = list(generator) - self.batch_size = batch_size - - def __call__(self): - random.shuffle(self.data) - for offset in xrange(0, len(self.data), self.batch_size): - limit = min(offset + self.batch_size, len(self.data)) - yield self.data[offset:limit] - - -def input_order_converter(generator): - for each_item in generator: - yield each_item['pixel'], each_item['label'] - - def main(): paddle.raw.initPaddle("-use_gpu=false", "-trainer_count=4") # use 4 cpu cores - # get enable_types for each optimizer. - # enable_types = [value, gradient, momentum, etc] - # For each optimizer(SGD, Adam), GradientMachine should enable different - # buffers. optimizer = paddle.optimizer.Optimizer( learning_method=paddle.optimizer.AdamOptimizer(), learning_rate=1e-4, @@ -69,110 +36,44 @@ def main(): model.rand_parameter() - model.start() - batch_evaluator = model.make_evaluator() test_evaluator = model.make_evaluator() - train_file = './data/raw_data/train' + train_data = paddle.data.create_data_pool( + file_reader=read_from_mnist, + file_list=['./data/raw_data/train'], + model=model, + batch_size=128, + shuffle=True) + test_data = paddle.data.create_data_pool( + file_reader=read_from_mnist, + file_list=['./data/raw_data/test'], + model=model, + batch_size=128, + shuffle=False) + + # Training process. + model.start() + for pass_id in xrange(2): model.start_pass() - model.finish_pass() + for batch_id, data_batch in enumerate(train_data): + model.start_batch() + model.train(data_batch) + batch_evaluator.start() + model.evaluate(batch_evaluator) + batch_evaluator.finish() + print "Pass=%d, batch=%d" % (pass_id, batch_id), batch_evaluator + model.finish_batch() + + test_evaluator.start() + for _, data_batch in enumerate(test_data): + model.test(data_batch) + print "TEST Pass=%d" % pass_id, test_evaluator + test_evaluator.finish() - # # DataProvider Converter is a utility convert Python Object to Paddle C++ - # # Input. The input format is as same as Paddle's DataProvider. - # converter = paddle.data.DataProviderConverter(input_types=[ - # paddle.data.dense_vector(784), paddle.data.integer_value(10) - # ]) - # - # train_file = './data/raw_data/train' - # test_file = './data/raw_data/t10k' - # - # # start gradient machine. - # # the gradient machine must be started before invoke forward/backward. - # # not just for training, but also for inference. - # m.start() - # - # # evaluator can print error rate, etc. It is a C++ class. - # batch_evaluator = m.makeEvaluator() - # test_evaluator = m.makeEvaluator() - # - # # Get Train Data. - # # TrainData will stored in a data pool. Currently implementation is not care - # # about memory, speed. Just a very naive implementation. - # train_data_generator = input_order_converter(read_from_mnist(train_file)) - # train_data = BatchPool(train_data_generator, 512) - # - # # outArgs is Neural Network forward result. Here is not useful, just passed - # # to gradient_machine.forward - # outArgs = paddle.raw.Arguments.createArguments(0) - # - # for pass_id in xrange(2): # we train 2 passes. - # updater.startPass() - # - # for batch_id, data_batch in enumerate(train_data()): - # # data_batch is input images. - # # here, for online learning, we could get data_batch from network. - # - # # Start update one batch. - # pass_type = updater.startBatch(len(data_batch)) - # - # # Start BatchEvaluator. - # # batch_evaluator can be used between start/finish. - # batch_evaluator.start() - # - # # forwardBackward is a shortcut for forward and backward. - # # It is sometimes faster than invoke forward/backward separately, - # # because in GradientMachine, it may be async. - # m.forwardBackward(converter(data_batch), outArgs, pass_type) - # - # for each_param in m.getParameters(): - # updater.update(each_param) - # - # # Get cost. We use numpy to calculate total cost for this batch. - # cost_vec = outArgs.getSlotValue(0) - # cost_vec = cost_vec.copyToNumpyMat() - # cost = cost_vec.sum() / len(data_batch) - # - # # Make evaluator works. - # m.eval(batch_evaluator) - # - # # Print logs. - # print 'Pass id', pass_id, 'Batch id', batch_id, 'with cost=', \ - # cost, batch_evaluator - # - # batch_evaluator.finish() - # # Finish batch. - # # * will clear gradient. - # # * ensure all values should be updated. - # updater.finishBatch(cost) - # - # # testing stage. use test data set to test current network. - # updater.apply() - # test_evaluator.start() - # test_data_generator = input_order_converter(read_from_mnist(test_file)) - # for data_batch in generator_to_batch(test_data_generator, 512): - # # in testing stage, only forward is needed. - # m.forward(converter(data_batch), outArgs, paddle.raw.PASS_TEST) - # m.eval(test_evaluator) - # - # # print error rate for test data set - # print 'Pass', pass_id, ' test evaluator: ', test_evaluator - # test_evaluator.finish() - # updater.restore() - # - # updater.catchUpWith() - # params = m.getParameters() - # for each_param in params: - # assert isinstance(each_param, paddle.raw.Parameter) - # value = each_param.getBuf(paddle.raw.PARAMETER_VALUE) - # value = value.copyToNumpyArray() - # - # # Here, we could save parameter to every where you want - # print each_param.getName(), value - # - # updater.finishPass() + model.finish_pass() model.finish() diff --git a/python/paddle/v2/data.py b/python/paddle/v2/data.py index 3a84b6f3c86be6..223673e68c14cf 100644 --- a/python/paddle/v2/data.py +++ b/python/paddle/v2/data.py @@ -105,8 +105,12 @@ def create_data_pool(file_reader, def __impl__(): settings = object() + method = file_reader + if method.func_code.co_argcount == 2: # for backward capacity + method = functools.partial(method, settings) + for each_file in file_list: - for each_sample in file_reader(settings, each_file): + for each_sample in method(each_file): yield each_sample return pool_class( diff --git a/python/paddle/v2/model.py b/python/paddle/v2/model.py index e8efdb9fedd970..13c319aefb6f96 100644 --- a/python/paddle/v2/model.py +++ b/python/paddle/v2/model.py @@ -83,6 +83,10 @@ def test(self, data): # forward. raise NotImplementedError() + def evaluate(self, evaluator): + assert isinstance(evaluator, Evaluator) + raise NotImplementedError() + def make_evaluator(self): """