Skip to content

Commit 11b6473

Browse files
authored
Image classification & word2vec (#10738)
1 parent 40a2ee9 commit 11b6473

File tree

5 files changed

+152
-42
lines changed

5 files changed

+152
-42
lines changed

python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ endforeach()
88

99
add_subdirectory(fit_a_line)
1010
add_subdirectory(recognize_digits)
11+
add_subdirectory(image_classification)
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
2+
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
3+
4+
# default test
5+
foreach(src ${TEST_OPS})
6+
py_test(${src} SRCS ${src}.py)
7+
endforeach()
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
"""
15+
CIFAR dataset.
16+
17+
This module will download dataset from
18+
https://www.cs.toronto.edu/~kriz/cifar.html and parse train/test set into
19+
paddle reader creators.
20+
21+
The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes,
22+
with 6000 images per class. There are 50000 training images and 10000 test
23+
images.
24+
25+
The CIFAR-100 dataset is just like the CIFAR-10, except it has 100 classes
26+
containing 600 images each. There are 500 training images and 100 testing
27+
images per class.
28+
29+
"""
30+
31+
import cPickle
32+
import itertools
33+
import numpy
34+
import paddle.v2.dataset.common
35+
import tarfile
36+
37+
__all__ = ['train10']
38+
39+
URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/'
40+
CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz'
41+
CIFAR10_MD5 = 'c58f30108f718f92721af3b95e74349a'
42+
43+
44+
def reader_creator(filename, sub_name, batch_size=None):
45+
def read_batch(batch):
46+
data = batch['data']
47+
labels = batch.get('labels', batch.get('fine_labels', None))
48+
assert labels is not None
49+
for sample, label in itertools.izip(data, labels):
50+
yield (sample / 255.0).astype(numpy.float32), int(label)
51+
52+
def reader():
53+
with tarfile.open(filename, mode='r') as f:
54+
names = (each_item.name for each_item in f
55+
if sub_name in each_item.name)
56+
57+
batch_count = 0
58+
for name in names:
59+
batch = cPickle.load(f.extractfile(name))
60+
for item in read_batch(batch):
61+
if isinstance(batch_size, int) and batch_count > batch_size:
62+
break
63+
batch_count += 1
64+
yield item
65+
66+
return reader
67+
68+
69+
def train10(batch_size=None):
70+
"""
71+
CIFAR-10 training set creator.
72+
73+
It returns a reader creator, each sample in the reader is image pixels in
74+
[0, 1] and label in [0, 9].
75+
76+
:return: Training reader creator
77+
:rtype: callable
78+
"""
79+
return reader_creator(
80+
paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
81+
'data_batch',
82+
batch_size=batch_size)
Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import paddle
1818
import paddle.fluid as fluid
1919
import numpy
20+
import cifar10_small_test_set
2021

2122

2223
def resnet_cifar10(input, depth=32):
@@ -81,46 +82,50 @@ def train_network():
8182
cost = fluid.layers.cross_entropy(input=predict, label=label)
8283
avg_cost = fluid.layers.mean(cost)
8384
accuracy = fluid.layers.accuracy(input=predict, label=label)
84-
return avg_cost, accuracy
85+
return [avg_cost, accuracy]
8586

8687

87-
def train(use_cuda, save_path):
88+
def train(use_cuda, train_program, save_dirname):
8889
BATCH_SIZE = 128
8990
EPOCH_NUM = 1
9091

9192
train_reader = paddle.batch(
9293
paddle.reader.shuffle(
93-
paddle.dataset.cifar.train10(), buf_size=128 * 10),
94+
cifar10_small_test_set.train10(batch_size=10), buf_size=128 * 10),
9495
batch_size=BATCH_SIZE)
9596

9697
test_reader = paddle.batch(
9798
paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE)
9899

99100
def event_handler(event):
100-
if isinstance(event, fluid.EndIteration):
101-
if (event.batch_id % 10) == 0:
102-
avg_cost, accuracy = trainer.test(reader=test_reader)
101+
if isinstance(event, fluid.EndStepEvent):
102+
avg_cost, accuracy = trainer.test(
103+
reader=test_reader, feed_order=['pixel', 'label'])
103104

104-
print('BatchID {1:04}, Loss {2:2.2}, Acc {3:2.2}'.format(
105-
event.batch_id + 1, avg_cost, accuracy))
105+
print('Loss {0:2.2}, Acc {1:2.2}'.format(avg_cost, accuracy))
106106

107-
if accuracy > 0.01: # Low threshold for speeding up CI
108-
trainer.params.save(save_path)
109-
return
107+
if accuracy > 0.01: # Low threshold for speeding up CI
108+
if save_dirname is not None:
109+
trainer.save_params(save_dirname)
110+
return
110111

111112
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
112113
trainer = fluid.Trainer(
113-
train_network,
114+
train_func=train_program,
114115
optimizer=fluid.optimizer.Adam(learning_rate=0.001),
115-
place=place,
116-
event_handler=event_handler)
117-
trainer.train(train_reader, EPOCH_NUM, event_handler=event_handler)
116+
place=place)
118117

118+
trainer.train(
119+
reader=train_reader,
120+
num_epochs=EPOCH_NUM,
121+
event_handler=event_handler,
122+
feed_order=['pixel', 'label'])
119123

120-
def infer(use_cuda, save_path):
121-
params = fluid.Params(save_path)
124+
125+
def infer(use_cuda, inference_program, save_dirname=None):
122126
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
123-
inferencer = fluid.Inferencer(inference_network, params, place=place)
127+
inferencer = fluid.Inferencer(
128+
infer_func=inference_program, param_path=save_dirname, place=place)
124129

125130
# The input's dimension of conv should be 4-D or 5-D.
126131
# Use normilized image pixels as input data, which should be in the range
@@ -135,8 +140,14 @@ def main(use_cuda):
135140
if use_cuda and not fluid.core.is_compiled_with_cuda():
136141
return
137142
save_path = "image_classification_resnet.inference.model"
138-
train(use_cuda, save_path)
139-
infer(use_cuda, save_path)
143+
144+
train(
145+
use_cuda=use_cuda, train_program=train_network, save_dirname=save_path)
146+
147+
infer(
148+
use_cuda=use_cuda,
149+
inference_program=inference_network,
150+
save_dirname=save_path)
140151

141152

142153
if __name__ == '__main__':
Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import paddle
1818
import paddle.fluid as fluid
1919
import numpy
20+
import cifar10_small_test_set
2021

2122

2223
def vgg16_bn_drop(input):
@@ -60,46 +61,48 @@ def train_network():
6061
cost = fluid.layers.cross_entropy(input=predict, label=label)
6162
avg_cost = fluid.layers.mean(cost)
6263
accuracy = fluid.layers.accuracy(input=predict, label=label)
63-
return avg_cost, accuracy
64+
return [avg_cost, accuracy]
6465

6566

66-
def train(use_cuda, save_path):
67+
def train(use_cuda, train_program, save_dirname):
6768
BATCH_SIZE = 128
68-
EPOCH_NUM = 1
69-
7069
train_reader = paddle.batch(
7170
paddle.reader.shuffle(
72-
paddle.dataset.cifar.train10(), buf_size=128 * 10),
71+
cifar10_small_test_set.train10(batch_size=10), buf_size=128 * 10),
7372
batch_size=BATCH_SIZE)
7473

7574
test_reader = paddle.batch(
7675
paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE)
7776

7877
def event_handler(event):
79-
if isinstance(event, fluid.EndIteration):
80-
if (event.batch_id % 10) == 0:
81-
avg_cost, accuracy = trainer.test(reader=test_reader)
78+
if isinstance(event, fluid.EndStepEvent):
79+
avg_cost, accuracy = trainer.test(
80+
reader=test_reader, feed_order=['pixel', 'label'])
8281

83-
print('BatchID {1:04}, Loss {2:2.2}, Acc {3:2.2}'.format(
84-
event.batch_id + 1, avg_cost, accuracy))
82+
print('Loss {0:2.2}, Acc {1:2.2}'.format(avg_cost, accuracy))
8583

86-
if accuracy > 0.01: # Low threshold for speeding up CI
87-
trainer.params.save(save_path)
88-
return
84+
if accuracy > 0.01: # Low threshold for speeding up CI
85+
if save_dirname is not None:
86+
trainer.save_params(save_dirname)
87+
return
8988

9089
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
9190
trainer = fluid.Trainer(
92-
train_network,
93-
optimizer=fluid.optimizer.Adam(learning_rate=0.001),
91+
train_func=train_program,
9492
place=place,
95-
event_handler=event_handler)
96-
trainer.train(train_reader, EPOCH_NUM, event_handler=event_handler)
93+
optimizer=fluid.optimizer.Adam(learning_rate=0.001))
94+
95+
trainer.train(
96+
reader=train_reader,
97+
num_epochs=1,
98+
event_handler=event_handler,
99+
feed_order=['pixel', 'label'])
97100

98101

99-
def infer(use_cuda, save_path):
100-
params = fluid.Params(save_path)
102+
def infer(use_cuda, inference_program, save_dirname=None):
101103
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
102-
inferencer = fluid.Inferencer(inference_network, params, place=place)
104+
inferencer = fluid.Inferencer(
105+
infer_func=inference_program, param_path=save_dirname, place=place)
103106

104107
# The input's dimension of conv should be 4-D or 5-D.
105108
# Use normilized image pixels as input data, which should be in the range
@@ -114,8 +117,14 @@ def main(use_cuda):
114117
if use_cuda and not fluid.core.is_compiled_with_cuda():
115118
return
116119
save_path = "image_classification_vgg.inference.model"
117-
train(use_cuda, save_path)
118-
infer(use_cuda, save_path)
120+
121+
train(
122+
use_cuda=use_cuda, train_program=train_network, save_dirname=save_path)
123+
124+
infer(
125+
use_cuda=use_cuda,
126+
inference_program=inference_network,
127+
save_dirname=save_path)
119128

120129

121130
if __name__ == '__main__':

0 commit comments

Comments
 (0)