diff --git a/demo/mnist/api_train.py b/demo/mnist/api_train.py index f301da382ff8a5..7e0f28d223019e 100644 --- a/demo/mnist/api_train.py +++ b/demo/mnist/api_train.py @@ -6,199 +6,76 @@ The user api could be simpler and carefully designed. """ -import py_paddle.swig_paddle as api -from py_paddle import DataProviderConverter -import paddle.trainer.PyDataProvider2 as dp -import numpy as np -import random -from mnist_util import read_from_mnist -from paddle.trainer_config_helpers import * +import paddle.v2 as paddle -def optimizer_config(): - settings( - learning_rate=1e-4, - learning_method=AdamOptimizer(), - batch_size=1000, - model_average=ModelAverage(average_window=0.5), - regularization=L2Regularization(rate=0.5)) - - -def network_config(): - imgs = data_layer(name='pixel', size=784) - hidden1 = fc_layer(input=imgs, size=200) - hidden2 = fc_layer(input=hidden1, size=200) - inference = fc_layer(input=hidden2, size=10, act=SoftmaxActivation()) - cost = classification_cost( - input=inference, label=data_layer( - name='label', size=10)) - outputs(cost) - +from mnist_util import read_from_mnist -def init_parameter(network): - assert isinstance(network, api.GradientMachine) - for each_param in network.getParameters(): - assert isinstance(each_param, api.Parameter) - array_size = len(each_param) - array = np.random.uniform(-1.0, 1.0, array_size).astype('float32') - each_param.getBuf(api.PARAMETER_VALUE).copyFromNumpyArray(array) +def main(): + paddle.raw.initPaddle("-use_gpu=false", + "-trainer_count=4") # use 4 cpu cores -def generator_to_batch(generator, batch_size): - ret_val = list() - for each_item in generator: - ret_val.append(each_item) - if len(ret_val) == batch_size: - yield ret_val - ret_val = list() - if len(ret_val) != 0: - yield ret_val + optimizer = paddle.optimizer.Optimizer( + learning_method=paddle.optimizer.AdamOptimizer(), + learning_rate=1e-4, + model_average=paddle.optimizer.ModelAverage(average_window=0.5), + regularization=paddle.optimizer.L2Regularization(rate=0.5)) + + # define network + imgs = paddle.layers.data_layer(name='pixel', size=784) + hidden1 = paddle.layers.fc_layer(input=imgs, size=200) + hidden2 = paddle.layers.fc_layer(input=hidden1, size=200) + inference = paddle.layers.fc_layer( + input=hidden2, size=10, act=paddle.config.SoftmaxActivation()) + cost = paddle.layers.classification_cost( + input=inference, label=paddle.layers.data_layer( + name='label', size=10)) + model = paddle.model.Model(layers=[cost], optimizer=optimizer) -class BatchPool(object): - def __init__(self, generator, batch_size): - self.data = list(generator) - self.batch_size = batch_size + model.rand_parameter() - def __call__(self): - random.shuffle(self.data) - for offset in xrange(0, len(self.data), self.batch_size): - limit = min(offset + self.batch_size, len(self.data)) - yield self.data[offset:limit] + batch_evaluator = model.make_evaluator() + test_evaluator = model.make_evaluator() + train_data = paddle.data.create_data_pool( + file_reader=read_from_mnist, + file_list=['./data/raw_data/train'], + model=model, + batch_size=128, + shuffle=True) + test_data = paddle.data.create_data_pool( + file_reader=read_from_mnist, + file_list=['./data/raw_data/test'], + model=model, + batch_size=128, + shuffle=False) -def input_order_converter(generator): - for each_item in generator: - yield each_item['pixel'], each_item['label'] + # Training process. + model.start() + for pass_id in xrange(2): + model.start_pass() -def main(): - api.initPaddle("-use_gpu=false", "-trainer_count=4") # use 4 cpu cores - - # get enable_types for each optimizer. - # enable_types = [value, gradient, momentum, etc] - # For each optimizer(SGD, Adam), GradientMachine should enable different - # buffers. - opt_config_proto = parse_optimizer_config(optimizer_config) - opt_config = api.OptimizationConfig.createFromProto(opt_config_proto) - _temp_optimizer_ = api.ParameterOptimizer.create(opt_config) - enable_types = _temp_optimizer_.getParameterTypes() - - # Create Simple Gradient Machine. - model_config = parse_network_config(network_config) - m = api.GradientMachine.createFromConfigProto( - model_config, api.CREATE_MODE_NORMAL, enable_types) - - # This type check is not useful. Only enable type hint in IDE. - # Such as PyCharm - assert isinstance(m, api.GradientMachine) - - # Initialize Parameter by numpy. - init_parameter(network=m) - - # Create Local Updater. Local means not run in cluster. - # For a cluster training, here we can change to createRemoteUpdater - # in future. - updater = api.ParameterUpdater.createLocalUpdater(opt_config) - assert isinstance(updater, api.ParameterUpdater) - - # Initialize ParameterUpdater. - updater.init(m) - - # DataProvider Converter is a utility convert Python Object to Paddle C++ - # Input. The input format is as same as Paddle's DataProvider. - converter = DataProviderConverter( - input_types=[dp.dense_vector(784), dp.integer_value(10)]) - - train_file = './data/raw_data/train' - test_file = './data/raw_data/t10k' - - # start gradient machine. - # the gradient machine must be started before invoke forward/backward. - # not just for training, but also for inference. - m.start() - - # evaluator can print error rate, etc. It is a C++ class. - batch_evaluator = m.makeEvaluator() - test_evaluator = m.makeEvaluator() - - # Get Train Data. - # TrainData will stored in a data pool. Currently implementation is not care - # about memory, speed. Just a very naive implementation. - train_data_generator = input_order_converter(read_from_mnist(train_file)) - train_data = BatchPool(train_data_generator, 512) - - # outArgs is Neural Network forward result. Here is not useful, just passed - # to gradient_machine.forward - outArgs = api.Arguments.createArguments(0) - - for pass_id in xrange(2): # we train 2 passes. - updater.startPass() - - for batch_id, data_batch in enumerate(train_data()): - # data_batch is input images. - # here, for online learning, we could get data_batch from network. - - # Start update one batch. - pass_type = updater.startBatch(len(data_batch)) - - # Start BatchEvaluator. - # batch_evaluator can be used between start/finish. + for batch_id, data_batch in enumerate(train_data): + model.start_batch() + model.train(data_batch) batch_evaluator.start() - - # forwardBackward is a shortcut for forward and backward. - # It is sometimes faster than invoke forward/backward separately, - # because in GradientMachine, it may be async. - m.forwardBackward(converter(data_batch), outArgs, pass_type) - - for each_param in m.getParameters(): - updater.update(each_param) - - # Get cost. We use numpy to calculate total cost for this batch. - cost_vec = outArgs.getSlotValue(0) - cost_vec = cost_vec.copyToNumpyMat() - cost = cost_vec.sum() / len(data_batch) - - # Make evaluator works. - m.eval(batch_evaluator) - - # Print logs. - print 'Pass id', pass_id, 'Batch id', batch_id, 'with cost=', \ - cost, batch_evaluator - + model.evaluate(batch_evaluator) batch_evaluator.finish() - # Finish batch. - # * will clear gradient. - # * ensure all values should be updated. - updater.finishBatch(cost) + print "Pass=%d, batch=%d" % (pass_id, batch_id), batch_evaluator + model.finish_batch() - # testing stage. use test data set to test current network. - updater.apply() test_evaluator.start() - test_data_generator = input_order_converter(read_from_mnist(test_file)) - for data_batch in generator_to_batch(test_data_generator, 512): - # in testing stage, only forward is needed. - m.forward(converter(data_batch), outArgs, api.PASS_TEST) - m.eval(test_evaluator) - - # print error rate for test data set - print 'Pass', pass_id, ' test evaluator: ', test_evaluator + for _, data_batch in enumerate(test_data): + model.test(data_batch) + print "TEST Pass=%d" % pass_id, test_evaluator test_evaluator.finish() - updater.restore() - - updater.catchUpWith() - params = m.getParameters() - for each_param in params: - assert isinstance(each_param, api.Parameter) - value = each_param.getBuf(api.PARAMETER_VALUE) - value = value.copyToNumpyArray() - - # Here, we could save parameter to every where you want - print each_param.getName(), value - updater.finishPass() + model.finish_pass() - m.finish() + model.finish() if __name__ == '__main__': diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 1cda4762eb2a55..3a4c6bd76258c8 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -4,11 +4,12 @@ set(OUTPUT_DIR file(GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py) file(GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py) file(GLOB UTILS_PY_FILES . ./paddle/utils/*.py) - +file(GLOB V2_PY_FILES . ./paddle/v2/*.py) set(PY_FILES paddle/__init__.py ${TRAINER_PY_FILES} ${HELPERS_PY_FILES} - ${UTILS_PY_FILES}) + ${UTILS_PY_FILES} + ${V2_PY_FILES}) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py) diff --git a/python/paddle/trainer_config_helpers/__init__.py b/python/paddle/trainer_config_helpers/__init__.py index 13155ebddbb49c..a7f5c02802587c 100644 --- a/python/paddle/trainer_config_helpers/__init__.py +++ b/python/paddle/trainer_config_helpers/__init__.py @@ -21,5 +21,6 @@ from optimizers import * from attrs import * from config_parser_utils import * + # This will enable operator overload for LayerOutput import layer_math diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py new file mode 100644 index 00000000000000..675f32cec1d168 --- /dev/null +++ b/python/paddle/v2/__init__.py @@ -0,0 +1,22 @@ +""" +This is an experimental package for Paddle new API. + +Currently, we use should always use + +.. code-block: python + + import paddle.v2 as paddle + +as our import statement. The API is in flux, never use this package in +production. +""" + +import py_paddle.swig_paddle as raw +import config +import data +import paddle.proto as proto +import layers +import optimizer +import model + +__all__ = ['config', 'data', 'raw', 'proto', 'layers', 'optimizer', 'model'] diff --git a/python/paddle/v2/config.py b/python/paddle/v2/config.py new file mode 100644 index 00000000000000..48873b26af0064 --- /dev/null +++ b/python/paddle/v2/config.py @@ -0,0 +1,12 @@ +from paddle.trainer_config_helpers import * +from paddle.trainer.config_parser import parse_config as parse +from paddle.trainer_config_helpers.config_parser_utils import \ + parse_network_config as parse_network +from paddle.trainer_config_helpers.config_parser_utils import \ + parse_optimizer_config as parse_optimizer + +import paddle.trainer_config_helpers as tmp + +__all__ = ['parse', 'parse_network', 'parse_optimizer'] + +__all__.extend(filter(lambda x: x[:2] != '__', dir(tmp))) diff --git a/python/paddle/v2/data.py b/python/paddle/v2/data.py new file mode 100644 index 00000000000000..223673e68c14cf --- /dev/null +++ b/python/paddle/v2/data.py @@ -0,0 +1,120 @@ +from paddle.trainer.PyDataProvider2 import * +from py_paddle.dataprovider_converter import DataProviderConverter +import random +import model as v2_model + +__all__ = [ + 'dense_vector', 'dense_vector_sequence', 'dense_vector_sub_sequence', + 'integer_value', 'integer_sequence', 'integer_value_sub_sequence', + 'sparse_binary_vector', 'sparse_binary_vector_sequence', + 'sparse_binary_vector_sub_sequence', 'sparse_vector', + 'sparse_vector_sequence', 'sparse_vector_sub_sequence', 'provider', + 'CacheType', 'DataProviderConverter', 'IDataPool', 'NaiveDataPool', + 'create_data_pool' +] + + +class IDataPool(object): + """ + Interface of DataPool, but note that Python is using Duck-Typing, it is not + necessary to inherit this interface. + + NOTE: For Paddle developer, NEVER CHECK isinstance(obj, IDataPool). + + Basically contains two method, + + * next(): User should return the next batch of data in pool. raise + StopIteration if there is no more data in pool. + + * reset(): Reset the data pool to initial status. + + The basic usage of this api is as same as normal Python iterator, like + + .. code-block:: python + + pool = DataPool() + + for batch in pool: + process_batch(batch) + + + NOTE: The Data Pool API is not thread-safe. + """ + + def __iter__(self): + self.reset() + return self + + def next(self): + raise NotImplementedError() + + def __next__(self): + return self.next() + + def reset(self): + raise NotImplementedError() + + +def input_order_mapper(iterable, input_order): + assert isinstance(input_order, collections.Sequence) + for each_input_name in input_order: + assert isinstance(each_input_name, basestring) + + tmp = [None] * len(input_order) + for each_item in iterable: + for i in xrange(len(input_order)): + tmp[i] = each_item[input_order[i]] + yield tmp + + +class NaiveDataPool(IDataPool): + """ + Naive Data Pool means load all samples in memory. + """ + + def __init__(self, iterable, batch_size, input_order, shuffle=True): + self.__pool__ = list( + input_order_mapper( + iterable=iterable, input_order=input_order)) + self.__batch_size__ = batch_size + self.__shuffle__ = shuffle + self.__idx__ = 0 + + def reset(self): + self.__idx__ = 0 + if self.__shuffle__: + random.shuffle(self.__pool__) + + def next(self): + if self.__idx__ >= len(self.__pool__): + raise StopIteration() + + begin = self.__idx__ + end = min(self.__idx__ + self.__batch_size__, len(self.__pool__)) + self.__idx__ = end + return self.__pool__[begin:end] + + +def create_data_pool(file_reader, + file_list, + model, + batch_size, + shuffle=True, + pool_class=NaiveDataPool): + assert isinstance(model, v2_model.Model) + + def __impl__(): + settings = object() + method = file_reader + if method.func_code.co_argcount == 2: # for backward capacity + method = functools.partial(method, settings) + + for each_file in file_list: + for each_sample in method(each_file): + yield each_sample + + return pool_class( + iterable=__impl__(), + batch_size=batch_size, + input_order=model.input_order, + shuffle=shuffle) diff --git a/python/paddle/v2/layers.py b/python/paddle/v2/layers.py new file mode 100644 index 00000000000000..14efe9412c3582 --- /dev/null +++ b/python/paddle/v2/layers.py @@ -0,0 +1,112 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.trainer_config_helpers as conf_helps +from paddle.trainer_config_helpers.config_parser_utils import \ + parse_network_config as __parse__ +from paddle.trainer_config_helpers.default_decorators import wrap_name_default +import collections + + +class Layer(object): + def __init__(self, name, parent_layer): + assert isinstance(parent_layer, dict) + assert isinstance(name, basestring) + self.name = name + self.__parent_layer__ = parent_layer + + def to_proto(self, context): + """ + function to set proto attribute + """ + kwargs = dict() + for param_name in self.__parent_layer__: + if not isinstance(self.__parent_layer__[param_name], + collections.Sequence): + param_value = self.__parent_layer__[param_name].to_proto( + context=context) + else: + param_value = map(lambda x: x.to_proto(context=context), + self.__parent_layer__[param_name]) + kwargs[param_name] = param_value + + if self.name not in context: + context[self.name] = self.to_proto_impl(**kwargs) + return context[self.name] + + def to_proto_impl(self, **kwargs): + raise NotImplementedError() + + +def parse_network(*outputs): + def __real_func__(): + context = dict() + real_output = [each.to_proto(context=context) for each in outputs] + conf_helps.outputs(real_output) + + return __parse__(__real_func__) + + +def __convert__(method_name, name_prefix, parent_names): + if name_prefix is not None: + wrapper = wrap_name_default(name_prefix=name_prefix) + else: + wrapper = None + + class __Impl__(Layer): + def __init__(self, name=None, **kwargs): + parent_layers = dict() + other_kwargs = dict() + for pname in parent_names: + parent_layers[pname] = kwargs[pname] + + for key in kwargs.keys(): + if key not in parent_names: + other_kwargs[key] = kwargs[key] + + super(__Impl__, self).__init__(name, parent_layers) + self.__other_kwargs__ = other_kwargs + + if wrapper is not None: + __init__ = wrapper(__init__) + + def to_proto_impl(self, **kwargs): + args = dict() + for each in kwargs: + args[each] = kwargs[each] + for each in self.__other_kwargs__: + args[each] = self.__other_kwargs__[each] + return getattr(conf_helps, method_name)(name=self.name, **args) + + return __Impl__ + + +data_layer = __convert__('data_layer', None, []) +fc_layer = __convert__('fc_layer', name_prefix='fc', parent_names=['input']) +classification_cost = __convert__( + 'classification_cost', + name_prefix='classification_cost', + parent_names=['input', 'label']) + +__all__ = ['data_layer', 'fc_layer', 'classification_cost', 'parse_network'] + +if __name__ == '__main__': + data = data_layer(name='pixel', size=784) + hidden = fc_layer(input=data, size=100, act=conf_helps.SigmoidActivation()) + predict = fc_layer( + input=[hidden, data], size=10, act=conf_helps.SoftmaxActivation()) + cost = classification_cost( + input=predict, label=data_layer( + name='label', size=10)) + print parse_network(cost) diff --git a/python/paddle/v2/model.py b/python/paddle/v2/model.py new file mode 100644 index 00000000000000..13c319aefb6f96 --- /dev/null +++ b/python/paddle/v2/model.py @@ -0,0 +1,112 @@ +import layers as v2_layer +import optimizer as v2_optimizer +import py_paddle.swig_paddle as api +import collections + + +class Evaluator(object): + def __init__(self): + raise NotImplementedError() + + def start(self): + raise NotImplementedError() + + def finish(self): + raise NotImplementedError() + + def __str__(self): + raise NotImplementedError() + + +class Model(object): + """ + :type __gradient_machine__: api.GradientMachine + :type __updater__: api.ParameterUpdater + """ + + def __init__(self, layers, optimizer, is_local=True): + if not isinstance(layers, collections.Sequence): + layers = [layers] # layers could be many. + + assert is_local, 'Currently, only local mode is supported' + + for each in layers: + assert isinstance(each, v2_layer.Layer) + + assert isinstance(optimizer, v2_optimizer.Optimizer) + + # Create Proto. + self.__gradient_machine__ = self.create_gradient_machine(layers, + optimizer) + + self.__updater__ = self.create_local_updater(optimizer) + + @property + def input_order(self): + raise NotImplementedError() + + def rand_parameter(self): + raise NotImplementedError() + + def save_parameter(self, filename): + raise NotImplementedError() + + def load_parameter(self, filename): + raise NotImplementedError() + + def start(self): + raise NotImplementedError() + + def finish(self): + raise NotImplementedError() + + def start_pass(self): + raise NotImplementedError() + + def start_batch(self): + raise NotImplementedError() + + def finish_batch(self): + raise NotImplementedError() + + def train(self, data): + in_args = self.__data_converter(data) + out_args = api.Arguments.createArguments(0) + # forward/backward + # update + raise NotImplementedError() + + def test(self, data): + self.__updater__.catchUpWith() + in_args = self.__data_converter(data) + out_args = api.Arguments.createArguments(0) + # forward. + raise NotImplementedError() + + def evaluate(self, evaluator): + assert isinstance(evaluator, Evaluator) + raise NotImplementedError() + + def make_evaluator(self): + """ + + :return: + :rtype: Evaluator + """ + raise NotImplementedError() + + def finish_pass(self): + raise NotImplementedError() + + def complete(self): + return self.finish() + + def create_gradient_machine(self, layers, optimizer): + raise NotImplementedError() + + def create_local_updater(self, optimizer): + raise NotImplementedError() + + @property + def __data_converter(self): + raise NotImplementedError() diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py new file mode 100644 index 00000000000000..13bd2fd703e9fe --- /dev/null +++ b/python/paddle/v2/optimizer.py @@ -0,0 +1,36 @@ +from paddle.trainer_config_helpers import * +import py_paddle.swig_paddle as api +import paddle.trainer_config_helpers.optimizers as raw_opt_pacakge + +__all__ = ['Optimizer'] + +__all__.extend( + filter(lambda x: x not in ['Optimizer', 'BaseSGDOptimizer', 'settings'], + raw_opt_pacakge.__all__)) + + +class Optimizer(object): + def __init__(self, **kwargs): + if 'batch_size' in kwargs: + del kwargs['batch_size'] # not important for python library. + + def __impl__(): + settings(batch_size=1, **kwargs) + + self.__opt_conf_proto__ = parse_optimizer_config(__impl__) + self.__opt_conf__ = api.OptimizationConfig.createFromProto( + self.__opt_conf_proto__) + + def enable_types(self): + tmp = api.ParameterOptimizer.create(self.__opt_conf__) + assert isinstance(tmp, api.ParameterOptimizer) + return tmp.getParameterTypes() + + def create_local_updater(self): + return api.ParameterUpdater.createLocalUpdater(self.__opt_conf__) + + +if __name__ == '__main__': + api.initPaddle('--use_gpu=false') + opt = Optimizer(learning_rate=1e-4, learning_method=AdamOptimizer()) + print opt.enable_types() diff --git a/python/setup.py.in b/python/setup.py.in index b66a42e87c7870..1e1324eea825ab 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -4,7 +4,8 @@ packages=['paddle', 'paddle.proto', 'paddle.trainer', 'paddle.trainer_config_helpers', - 'paddle.utils'] + 'paddle.utils', + 'paddle.v2'] setup(name='paddle', version='${PADDLE_VERSION}',