Skip to content

Commit 906e256

Browse files
authored
Add acc test to image classification (#5336)
* add acc layer * memory log level change from 3 to 10 * use gaussian random to init conv parameters * use initializer * fix import * batch_norm use helper to create persistable var * refine code * train only 2 batches for test * use g_program and g_init_program * use XavierInitializer to init fc parameter
1 parent 7484915 commit 906e256

File tree

6 files changed

+63
-62
lines changed

6 files changed

+63
-62
lines changed

paddle/framework/operator.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,6 @@ class OperatorWithKernel : public OperatorBase {
408408
// indicate kernel DataType by input data. Defaultly all input data must be
409409
// same.
410410
virtual DataType IndicateDataType(const ExecutionContext& ctx) const {
411-
VLOG(3) << "Default IndicateDataType " << this->Type();
412411
auto& scope = ctx.scope();
413412
int data_type = -1;
414413
for (auto& input : this->inputs_) {
@@ -425,7 +424,6 @@ class OperatorWithKernel : public OperatorBase {
425424
}
426425
if (t != nullptr) {
427426
int tmp = static_cast<int>(ToDataType(t->type()));
428-
VLOG(3) << "Input " << ipt_name << " with data_type " << tmp;
429427
PADDLE_ENFORCE(tmp == data_type || data_type == -1,
430428
"DataType of Paddle Op %s must be the same.",
431429
Type());

paddle/operators/batch_norm_op.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ class BatchNormOp : public framework::OperatorWithKernel {
5151
PADDLE_ENFORCE(ctx->HasOutput("SavedMean"), "");
5252
PADDLE_ENFORCE(ctx->HasOutput("SavedVariance"), "");
5353

54+
const float epsilon = ctx->Attrs().Get<float>("epsilon");
55+
PADDLE_ENFORCE_GE(epsilon, 0.0, "epsilon should be larger than 0");
56+
PADDLE_ENFORCE_LE(epsilon, 0.001, "epsilon should not be too large");
57+
5458
// make sure Mean/MeanOut and Variance/VarianceOut share memory in Python
5559
PADDLE_ENFORCE_EQ(ctx->Inputs("Mean")[0], ctx->Outputs("MeanOut")[0],
5660
"Mean and MeanOut should share the same memory");
@@ -297,7 +301,6 @@ class BatchNormGradOp : public framework::OperatorWithKernel {
297301

298302
framework::DataType IndicateDataType(
299303
const framework::ExecutionContext &ctx) const override {
300-
VLOG(3) << "IndicateDataType " << this->Type();
301304
const auto *var = ctx.InputVar(framework::GradVarName("Y"));
302305
if (var == nullptr) {
303306
PADDLE_THROW("can't find Y@GRAD");

python/paddle/v2/framework/layer_helper.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,12 @@ def input_dtype(self, input_param_name='input'):
112112
raise ValueError("Data Type mismatch")
113113
return dtype
114114

115-
def create_parameter(self, attr, shape, dtype, suffix='w'):
115+
def create_parameter(self, attr, shape, dtype, suffix='w',
116+
initializer=None):
116117
# Deepcopy the attr so that parameters can be shared in program
117118
attr_copy = copy.deepcopy(attr)
119+
if initializer is not None:
120+
attr_copy['initializer'] = initializer
118121
if attr_copy['name'] is None:
119122
attr_copy['name'] = unique_name(".".join([self.name, suffix]))
120123
self.init_program.global_block().create_parameter(

python/paddle/v2/framework/layers.py

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
1-
from paddle.v2.framework.layer_helper import LayerHelper, unique_name
21
import paddle.v2.framework.core as core
3-
from paddle.v2.framework.framework import OpProtoHolder, Variable, Program, \
4-
Operator
5-
from paddle.v2.framework.initializer import ConstantInitializer
2+
from paddle.v2.framework.framework import OpProtoHolder, Variable, Program, Operator
3+
from paddle.v2.framework.initializer import ConstantInitializer, NormalInitializer
4+
from paddle.v2.framework.layer_helper import LayerHelper, unique_name
65
import re
76

87
__all__ = [
@@ -344,8 +343,13 @@ def conv2d(input,
344343

345344
input_shape = input.shape
346345
filter_shape = [num_filters, num_filter_channels] + filter_size
346+
347+
std = (2.0 / (filter_size[0]**2 * num_channels))**0.5
347348
filter = helper.create_parameter(
348-
attr=helper.param_attr, shape=filter_shape, dtype=dtype)
349+
attr=helper.param_attr,
350+
shape=filter_shape,
351+
dtype=dtype,
352+
initializer=NormalInitializer(0.0, std, 0))
349353
pre_bias = helper.create_tmp_variable(dtype)
350354

351355
helper.append_op(
@@ -420,7 +424,7 @@ def batch_norm(input,
420424
act=None,
421425
is_test=False,
422426
momentum=0.9,
423-
epsilon=1e05,
427+
epsilon=1e-05,
424428
param_attr=None,
425429
bias_attr=None,
426430
data_layout='NCHW',
@@ -438,27 +442,29 @@ def batch_norm(input,
438442
else:
439443
raise ValueError("unsupported data layout:" + data_layout)
440444

441-
def create_persistable_var(dtype, shape, initializer=None):
442-
name = unique_name(".".join([helper.name, "xxxx"]))
443-
var = init_program.global_block().create_var(
444-
dtype=dtype, shape=shape, name=name, persistable=True)
445-
if initializer is not None:
446-
initializer(var, var.block)
447-
return program.global_block().create_var(
448-
name=name, dtype=dtype, shape=shape, persistable=True)
449-
450445
param_shape = [channel_num]
451446

452447
# create parameter
453448
scale = helper.create_parameter(
454-
attr=helper.param_attr, shape=param_shape, dtype=dtype)
449+
attr=helper.param_attr,
450+
shape=param_shape,
451+
dtype=dtype,
452+
initializer=ConstantInitializer(1.0))
455453
bias = helper.create_parameter(
456-
attr=helper.param_attr, shape=param_shape, dtype=dtype)
457-
458-
# create input
459-
mean = create_persistable_var(dtype, param_shape, ConstantInitializer(0.0))
460-
variance = create_persistable_var(dtype, param_shape,
461-
ConstantInitializer(1.0))
454+
attr=helper.param_attr,
455+
shape=param_shape,
456+
dtype=dtype,
457+
initializer=ConstantInitializer(0.0))
458+
459+
mean = helper.create_global_variable(
460+
dtype=input.data_type, shape=param_shape, persistable=True)
461+
helper.set_variable_initializer(
462+
var=mean, initializer=ConstantInitializer(0.0))
463+
464+
variance = helper.create_global_variable(
465+
dtype=input.data_type, shape=param_shape, persistable=True)
466+
helper.set_variable_initializer(
467+
var=variance, initializer=ConstantInitializer(1.0))
462468

463469
# create output
464470
# mean and mean_out share the same memory

python/paddle/v2/framework/tests/test_image_classification_train.py

Lines changed: 23 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
1+
import numpy as np
12
import paddle.v2 as paddle
3+
import paddle.v2.framework.core as core
24
import paddle.v2.framework.layers as layers
35
import paddle.v2.framework.nets as nets
4-
import paddle.v2.framework.core as core
56
import paddle.v2.framework.optimizer as optimizer
6-
7-
from paddle.v2.framework.framework import Program, g_program
87
from paddle.v2.framework.executor import Executor
9-
10-
import numpy as np
8+
from paddle.v2.framework.framework import g_init_program, g_program
9+
from paddle.v2.framework.initializer import XavierInitializer
1110

1211

1312
def resnet_cifar10(input, depth=32, program=None, init_program=None):
@@ -124,7 +123,7 @@ def layer_warp(block_func, input, ch_in, ch_out, count, stride, program,
124123
return pool
125124

126125

127-
def vgg16_bn_drop(input, program, init_program):
126+
def vgg16_bn_drop(input, program=None, init_program=None):
128127
def conv_block(input,
129128
num_filter,
130129
groups,
@@ -155,6 +154,7 @@ def conv_block(input,
155154
fc1 = layers.fc(input=drop,
156155
size=512,
157156
act=None,
157+
param_attr={"initializer": XavierInitializer()},
158158
program=program,
159159
init_program=init_program)
160160
reshape1 = layers.reshape(
@@ -169,46 +169,34 @@ def conv_block(input,
169169
fc2 = layers.fc(input=drop2,
170170
size=512,
171171
act=None,
172+
param_attr={"initializer": XavierInitializer()},
172173
program=program,
173174
init_program=init_program)
174175
return fc2
175176

176177

177-
init_program = Program()
178-
program = Program()
179-
180178
classdim = 10
181179
data_shape = [3, 32, 32]
182180

183-
images = layers.data(
184-
name='pixel', shape=data_shape, data_type='float32', program=program)
185-
186-
label = layers.data(
187-
name='label',
188-
shape=[1],
189-
data_type='int64',
190-
program=program,
191-
init_program=init_program)
181+
images = layers.data(name='pixel', shape=data_shape, data_type='float32')
182+
label = layers.data(name='label', shape=[1], data_type='int64')
192183

193184
# Add neural network config
194185
# option 1. resnet
195-
net = resnet_cifar10(images, 32, program, init_program)
186+
# net = resnet_cifar10(images, 32)
196187
# option 2. vgg
197-
# net = vgg16_bn_drop(images, program, init_program)
188+
net = vgg16_bn_drop(images)
198189

199190
# print(program)
200191

201-
predict = layers.fc(input=net,
202-
size=classdim,
203-
act='softmax',
204-
program=program,
205-
init_program=init_program)
206-
cost = layers.cross_entropy(
207-
input=predict, label=label, program=program, init_program=init_program)
208-
avg_cost = layers.mean(x=cost, program=program, init_program=init_program)
192+
predict = layers.fc(input=net, size=classdim, act='softmax')
193+
cost = layers.cross_entropy(input=predict, label=label)
194+
avg_cost = layers.mean(x=cost)
195+
accuracy = layers.accuracy(input=predict, label=label)
209196

210-
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
211-
opts = sgd_optimizer.minimize(avg_cost, init_program)
197+
# optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
198+
optimizer = optimizer.AdamOptimizer(learning_rate=0.001)
199+
opts = optimizer.minimize(avg_cost)
212200

213201
BATCH_SIZE = 128
214202
PASS_NUM = 1
@@ -221,7 +209,7 @@ def conv_block(input,
221209
place = core.CPUPlace()
222210
exe = Executor(place)
223211

224-
exe.run(init_program, feed={}, fetch_list=[])
212+
exe.run(g_init_program, feed={}, fetch_list=[])
225213

226214
for pass_id in range(PASS_NUM):
227215
batch_id = 0
@@ -239,14 +227,15 @@ def conv_block(input,
239227
tensor_img.set(img_data, place)
240228
tensor_y.set(y_data, place)
241229

242-
outs = exe.run(program,
230+
outs = exe.run(g_program,
243231
feed={"pixel": tensor_img,
244232
"label": tensor_y},
245-
fetch_list=[avg_cost])
233+
fetch_list=[avg_cost, accuracy])
246234

247235
loss = np.array(outs[0])
236+
acc = np.array(outs[1])
248237
print("pass_id:" + str(pass_id) + " batch_id:" + str(batch_id) +
249-
" loss:" + str(loss))
238+
" loss:" + str(loss) + " acc:" + str(acc))
250239
batch_id = batch_id + 1
251240

252241
if batch_id > 1:

python/paddle/v2/framework/tests/test_recognize_digits_mlp.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@
5757
cost = layers.cross_entropy(
5858
input=predict, label=label, program=program, init_program=init_program)
5959
avg_cost = layers.mean(x=cost, program=program, init_program=init_program)
60+
accuracy = layers.accuracy(
61+
input=predict, label=label, program=program, init_program=init_program)
6062

6163
optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9)
6264
opts = optimizer.minimize(avg_cost, init_program)
@@ -87,9 +89,9 @@
8789
outs = exe.run(program,
8890
feed={'x': tensor_x,
8991
'y': tensor_y},
90-
fetch_list=[avg_cost])
92+
fetch_list=[avg_cost, accuracy])
9193
out = np.array(outs[0])
92-
94+
acc = np.array(outs[1])
9395
if out[0] < 5.0:
9496
exit(0) # if avg cost less than 5.0, we think our code is good.
9597
exit(1)

0 commit comments

Comments
 (0)