Skip to content

Commit 22454ed

Browse files
authored
Merge pull request #971 from reyoung/feature/mnist_train_api
[Done] Feature/mnist train api
2 parents 4ccd5ea + eca4592 commit 22454ed

19 files changed

+633
-88
lines changed

demo/mnist/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ mnist_vgg_model
44
plot.png
55
train.log
66
*pyc
7+
.ipynb_checkpoints

demo/mnist/api_train.py

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
"""
2+
A very basic example for how to use current Raw SWIG API to train mnist network.
3+
4+
Current implementation uses Raw SWIG, which means the API call is directly \
5+
passed to C++ side of Paddle.
6+
7+
The user api could be simpler and carefully designed.
8+
"""
9+
import py_paddle.swig_paddle as api
10+
from py_paddle import DataProviderConverter
11+
import paddle.trainer.PyDataProvider2 as dp
12+
import numpy as np
13+
import random
14+
from mnist_util import read_from_mnist
15+
from paddle.trainer_config_helpers import *
16+
17+
18+
def optimizer_config():
19+
settings(
20+
learning_rate=1e-4,
21+
learning_method=AdamOptimizer(),
22+
batch_size=1000,
23+
model_average=ModelAverage(average_window=0.5),
24+
regularization=L2Regularization(rate=0.5))
25+
26+
27+
def network_config():
28+
imgs = data_layer(name='pixel', size=784)
29+
hidden1 = fc_layer(input=imgs, size=200)
30+
hidden2 = fc_layer(input=hidden1, size=200)
31+
inference = fc_layer(input=hidden2, size=10, act=SoftmaxActivation())
32+
cost = classification_cost(
33+
input=inference, label=data_layer(
34+
name='label', size=10))
35+
outputs(cost)
36+
37+
38+
def init_parameter(network):
39+
assert isinstance(network, api.GradientMachine)
40+
for each_param in network.getParameters():
41+
assert isinstance(each_param, api.Parameter)
42+
array_size = len(each_param)
43+
array = np.random.uniform(-1.0, 1.0, array_size).astype('float32')
44+
each_param.getBuf(api.PARAMETER_VALUE).copyFromNumpyArray(array)
45+
46+
47+
def generator_to_batch(generator, batch_size):
48+
ret_val = list()
49+
for each_item in generator:
50+
ret_val.append(each_item)
51+
if len(ret_val) == batch_size:
52+
yield ret_val
53+
ret_val = list()
54+
if len(ret_val) != 0:
55+
yield ret_val
56+
57+
58+
class BatchPool(object):
59+
def __init__(self, generator, batch_size):
60+
self.data = list(generator)
61+
self.batch_size = batch_size
62+
63+
def __call__(self):
64+
random.shuffle(self.data)
65+
for offset in xrange(0, len(self.data), self.batch_size):
66+
limit = min(offset + self.batch_size, len(self.data))
67+
yield self.data[offset:limit]
68+
69+
70+
def input_order_converter(generator):
71+
for each_item in generator:
72+
yield each_item['pixel'], each_item['label']
73+
74+
75+
def main():
76+
api.initPaddle("-use_gpu=false", "-trainer_count=4") # use 4 cpu cores
77+
78+
# get enable_types for each optimizer.
79+
# enable_types = [value, gradient, momentum, etc]
80+
# For each optimizer(SGD, Adam), GradientMachine should enable different
81+
# buffers.
82+
opt_config_proto = parse_optimizer_config(optimizer_config)
83+
opt_config = api.OptimizationConfig.createFromProto(opt_config_proto)
84+
_temp_optimizer_ = api.ParameterOptimizer.create(opt_config)
85+
enable_types = _temp_optimizer_.getParameterTypes()
86+
87+
# Create Simple Gradient Machine.
88+
model_config = parse_network_config(network_config)
89+
m = api.GradientMachine.createFromConfigProto(
90+
model_config, api.CREATE_MODE_NORMAL, enable_types)
91+
92+
# This type check is not useful. Only enable type hint in IDE.
93+
# Such as PyCharm
94+
assert isinstance(m, api.GradientMachine)
95+
96+
# Initialize Parameter by numpy.
97+
init_parameter(network=m)
98+
99+
# Create Local Updater. Local means not run in cluster.
100+
# For a cluster training, here we can change to createRemoteUpdater
101+
# in future.
102+
updater = api.ParameterUpdater.createLocalUpdater(opt_config)
103+
assert isinstance(updater, api.ParameterUpdater)
104+
105+
# Initialize ParameterUpdater.
106+
updater.init(m)
107+
108+
# DataProvider Converter is a utility convert Python Object to Paddle C++
109+
# Input. The input format is as same as Paddle's DataProvider.
110+
converter = DataProviderConverter(
111+
input_types=[dp.dense_vector(784), dp.integer_value(10)])
112+
113+
train_file = './data/raw_data/train'
114+
test_file = './data/raw_data/t10k'
115+
116+
# start gradient machine.
117+
# the gradient machine must be started before invoke forward/backward.
118+
# not just for training, but also for inference.
119+
m.start()
120+
121+
# evaluator can print error rate, etc. It is a C++ class.
122+
batch_evaluator = m.makeEvaluator()
123+
test_evaluator = m.makeEvaluator()
124+
125+
# Get Train Data.
126+
# TrainData will stored in a data pool. Currently implementation is not care
127+
# about memory, speed. Just a very naive implementation.
128+
train_data_generator = input_order_converter(read_from_mnist(train_file))
129+
train_data = BatchPool(train_data_generator, 512)
130+
131+
# outArgs is Neural Network forward result. Here is not useful, just passed
132+
# to gradient_machine.forward
133+
outArgs = api.Arguments.createArguments(0)
134+
135+
for pass_id in xrange(2): # we train 2 passes.
136+
updater.startPass()
137+
138+
for batch_id, data_batch in enumerate(train_data()):
139+
# data_batch is input images.
140+
# here, for online learning, we could get data_batch from network.
141+
142+
# Start update one batch.
143+
pass_type = updater.startBatch(len(data_batch))
144+
145+
# Start BatchEvaluator.
146+
# batch_evaluator can be used between start/finish.
147+
batch_evaluator.start()
148+
149+
# forwardBackward is a shortcut for forward and backward.
150+
# It is sometimes faster than invoke forward/backward separately,
151+
# because in GradientMachine, it may be async.
152+
m.forwardBackward(converter(data_batch), outArgs, pass_type)
153+
154+
for each_param in m.getParameters():
155+
updater.update(each_param)
156+
157+
# Get cost. We use numpy to calculate total cost for this batch.
158+
cost_vec = outArgs.getSlotValue(0)
159+
cost_vec = cost_vec.copyToNumpyMat()
160+
cost = cost_vec.sum() / len(data_batch)
161+
162+
# Make evaluator works.
163+
m.eval(batch_evaluator)
164+
165+
# Print logs.
166+
print 'Pass id', pass_id, 'Batch id', batch_id, 'with cost=', \
167+
cost, batch_evaluator
168+
169+
batch_evaluator.finish()
170+
# Finish batch.
171+
# * will clear gradient.
172+
# * ensure all values should be updated.
173+
updater.finishBatch(cost)
174+
175+
# testing stage. use test data set to test current network.
176+
updater.apply()
177+
test_evaluator.start()
178+
test_data_generator = input_order_converter(read_from_mnist(test_file))
179+
for data_batch in generator_to_batch(test_data_generator, 512):
180+
# in testing stage, only forward is needed.
181+
m.forward(converter(data_batch), outArgs, api.PASS_TEST)
182+
m.eval(test_evaluator)
183+
184+
# print error rate for test data set
185+
print 'Pass', pass_id, ' test evaluator: ', test_evaluator
186+
test_evaluator.finish()
187+
updater.restore()
188+
189+
updater.catchUpWith()
190+
params = m.getParameters()
191+
for each_param in params:
192+
assert isinstance(each_param, api.Parameter)
193+
value = each_param.getBuf(api.PARAMETER_VALUE)
194+
value = value.copyToNumpyArray()
195+
196+
# Here, we could save parameter to every where you want
197+
print each_param.getName(), value
198+
199+
updater.finishPass()
200+
201+
m.finish()
202+
203+
204+
if __name__ == '__main__':
205+
main()

demo/mnist/mnist_provider.py

Lines changed: 3 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from paddle.trainer.PyDataProvider2 import *
2-
import numpy
2+
from mnist_util import read_from_mnist
33

44

55
# Define a py data provider
@@ -8,27 +8,5 @@
88
'label': integer_value(10)},
99
cache=CacheType.CACHE_PASS_IN_MEM)
1010
def process(settings, filename): # settings is not used currently.
11-
imgf = filename + "-images-idx3-ubyte"
12-
labelf = filename + "-labels-idx1-ubyte"
13-
f = open(imgf, "rb")
14-
l = open(labelf, "rb")
15-
16-
f.read(16)
17-
l.read(8)
18-
19-
# Define number of samples for train/test
20-
if "train" in filename:
21-
n = 60000
22-
else:
23-
n = 10000
24-
25-
images = numpy.fromfile(
26-
f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
27-
images = images / 255.0 * 2.0 - 1.0
28-
labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")
29-
30-
for i in xrange(n):
31-
yield {"pixel": images[i, :], 'label': labels[i]}
32-
33-
f.close()
34-
l.close()
11+
for each in read_from_mnist(filename):
12+
yield each

demo/mnist/mnist_util.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import numpy
2+
3+
__all__ = ['read_from_mnist']
4+
5+
6+
def read_from_mnist(filename):
7+
imgf = filename + "-images-idx3-ubyte"
8+
labelf = filename + "-labels-idx1-ubyte"
9+
f = open(imgf, "rb")
10+
l = open(labelf, "rb")
11+
12+
f.read(16)
13+
l.read(8)
14+
15+
# Define number of samples for train/test
16+
if "train" in filename:
17+
n = 60000
18+
else:
19+
n = 10000
20+
21+
images = numpy.fromfile(
22+
f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
23+
images = images / 255.0 * 2.0 - 1.0
24+
labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")
25+
26+
for i in xrange(n):
27+
yield {"pixel": images[i, :], 'label': labels[i]}
28+
29+
f.close()
30+
l.close()

paddle/api/CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
set(API_SOURCES
22
Arguments.cpp
33
ConfigParser.cpp
4+
Evaluator.cpp
45
GradientMachine.cpp
56
Matrix.cpp
67
Parameter.cpp
78
ParameterOptimizer.cpp
9+
ParameterUpdater.cpp
810
SequenceGenerator.cpp
911
Trainer.cpp
1012
Util.cpp
@@ -63,6 +65,15 @@ install(DIRECTORY ${PROJ_ROOT}/paddle/dist/
6365

6466
add_custom_target(python_api_wheel ALL DEPENDS
6567
${PROJ_ROOT}/paddle/dist/.timestamp)
68+
add_dependencies(python_api_wheel python_swig_sources
69+
paddle_parameter
70+
paddle_math
71+
paddle_utils
72+
paddle_gserver
73+
paddle_pserver
74+
paddle_trainer
75+
paddle_api
76+
paddle_cuda)
6677

6778
if(WITH_TESTING)
6879
add_subdirectory(test)

paddle/api/Evaluator.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
#include <sstream>
15+
#include "PaddleAPI.h"
16+
#include "PaddleAPIPrivate.h"
17+
18+
Evaluator::Evaluator() : m(new EvaluatorPrivate()) {}
19+
Evaluator::~Evaluator() { delete m; }
20+
21+
void Evaluator::start() { m->rawPtr->start(); }
22+
23+
void Evaluator::finish() { m->rawPtr->finish(); }
24+
25+
std::string Evaluator::toString() {
26+
std::ostringstream sout;
27+
m->rawPtr->printStats(sout);
28+
return sout.str();
29+
}

paddle/api/GradientMachine.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ GradientMachine* GradientMachine::createByModelConfig(
6464
return GradientMachine::createFromPaddleModelPtr(confPtr, mode, types);
6565
}
6666

67+
void GradientMachine::start() { m->machine->start(); }
68+
69+
void GradientMachine::finish() { m->machine->finish(); }
70+
6771
void GradientMachine::onPassEnd() { m->machine->onPassEnd(); }
6872

6973
void GradientMachine::prefetch(const Arguments& inArgs) {
@@ -166,3 +170,13 @@ SequenceGenerator* GradientMachine::asSequenceGenerator(
166170
r->setBeamSize(beam_size);
167171
return r;
168172
}
173+
174+
Evaluator* GradientMachine::makeEvaluator() {
175+
auto ev = new Evaluator();
176+
ev->m->rawPtr = m->machine->makeEvaluator();
177+
return ev;
178+
}
179+
180+
void GradientMachine::eval(Evaluator* evaluator) {
181+
m->machine->eval(evaluator->m->rawPtr);
182+
}

paddle/api/Paddle.swig

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,9 @@ namespace std {
9696
%rename(__getitem__) Vector::get;
9797
%rename(__setitem__) Vector::set;
9898
%rename(__len__) Vector::getSize;
99+
%rename(__len__) Parameter::getSize;
99100
%rename(__call__) ParameterTraverseCallback::apply;
101+
%rename(__repr__) Evaluator::toString;
100102

101103
%apply (float* INPLACE_ARRAY2, int DIM1, int DIM2) {
102104
(float* data, int dim1, int dim2)
@@ -167,13 +169,15 @@ namespace std {
167169
%newobject GradientMachine::asSequenceGenerator;
168170
%newobject GradientMachine::getParameter;
169171
%newobject GradientMachine::getLayerOutput;
172+
%newobject GradientMachine::makeEvaluator;
170173
%newobject TrainerConfig::createFromTrainerConfigFile;
171174
%newobject TrainerConfig::getModelConfig;
172175
%newobject TrainerConfig::getOptimizationConfig;
173176
%newobject Parameter::getBuf;
174177
%newobject Parameter::getConfig;
175178
%newobject ParameterOptimizer::create;
176179
%newobject ParameterOptimizer::needSpecialTraversal;
180+
%newobject ParameterUpdater::createLocalUpdater;
177181

178182
%feature("director") UpdateCallback;
179183
%feature("autodoc", 1); // To generate method stub, for code hint in ide
@@ -193,4 +197,4 @@ namespace std {
193197
%ignore OptimizationConfigPrivate;
194198
%ignore ParameterTraverseCallbackPrivate;
195199
%include "utils/GlobalConstants.h"
196-
%include "api/PaddleAPI.h"
200+
%include "api/PaddleAPI.h"

0 commit comments

Comments
 (0)