Skip to content

Commit f6543a1

Browse files
sidgoyal78daming-lu
authored andcommitted
[Test-driven] Implementing sentiment_analysis with new API (#10812)
1 parent f0c4088 commit f6543a1

File tree

3 files changed

+61
-37
lines changed

3 files changed

+61
-37
lines changed

python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ endforeach()
99
add_subdirectory(fit_a_line)
1010
add_subdirectory(recognize_digits)
1111
add_subdirectory(image_classification)
12+
add_subdirectory(understand_sentiment)
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
2+
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
3+
4+
# default test
5+
foreach(src ${TEST_OPS})
6+
py_test(${src} SRCS ${src}.py)
7+
endforeach()
Lines changed: 53 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@
1717
import paddle
1818
import paddle.fluid as fluid
1919
from functools import partial
20+
import numpy as np
2021

2122
CLASS_DIM = 2
2223
EMB_DIM = 128
2324
HID_DIM = 512
2425
STACKED_NUM = 3
26+
BATCH_SIZE = 128
2527

2628

2729
def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
@@ -50,7 +52,7 @@ def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
5052
return prediction
5153

5254

53-
def inference_network(word_dict):
55+
def inference_program(word_dict):
5456
data = fluid.layers.data(
5557
name="words", shape=[1], dtype="int64", lod_level=1)
5658

@@ -60,57 +62,71 @@ def inference_network(word_dict):
6062
return net
6163

6264

63-
def train_network(word_dict):
64-
prediction = inference_network(word_dict)
65+
def train_program(word_dict):
66+
prediction = inference_program(word_dict)
6567
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
6668
cost = fluid.layers.cross_entropy(input=prediction, label=label)
6769
avg_cost = fluid.layers.mean(cost)
6870
accuracy = fluid.layers.accuracy(input=prediction, label=label)
69-
return avg_cost, accuracy
71+
return [avg_cost, accuracy]
7072

7173

72-
def train(use_cuda, save_path):
73-
BATCH_SIZE = 128
74-
EPOCH_NUM = 5
74+
def train(use_cuda, train_program, save_dirname):
75+
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
76+
optimizer = fluid.optimizer.Adagrad(learning_rate=0.002)
7577

7678
word_dict = paddle.dataset.imdb.word_dict()
79+
trainer = fluid.Trainer(
80+
train_func=partial(train_program, word_dict),
81+
place=place,
82+
optimizer=optimizer)
7783

78-
train_data = paddle.batch(
84+
def event_handler(event):
85+
if isinstance(event, fluid.EndEpochEvent):
86+
test_reader = paddle.batch(
87+
paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
88+
avg_cost, acc = trainer.test(
89+
reader=test_reader, feed_order=['words', 'label'])
90+
91+
print("avg_cost: %s" % avg_cost)
92+
print("acc : %s" % acc)
93+
94+
if acc > 0.2: # Smaller value to increase CI speed
95+
trainer.save_params(save_dirname)
96+
trainer.stop()
97+
98+
else:
99+
print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
100+
event.epoch + 1, avg_cost, acc))
101+
if math.isnan(avg_cost):
102+
sys.exit("got NaN loss, training failed.")
103+
elif isinstance(event, fluid.EndStepEvent):
104+
print("Step {0}, Epoch {1} Metrics {2}".format(
105+
event.step, event.epoch, map(np.array, event.metrics)))
106+
if event.step == 1: # Run 2 iterations to speed CI
107+
trainer.save_params(save_dirname)
108+
trainer.stop()
109+
110+
train_reader = paddle.batch(
79111
paddle.reader.shuffle(
80-
paddle.dataset.imdb.train(word_dict), buf_size=1000),
112+
paddle.dataset.imdb.train(word_dict), buf_size=25000),
81113
batch_size=BATCH_SIZE)
82114

83-
test_data = paddle.batch(
84-
paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
85-
86-
def event_handler(event):
87-
if isinstance(event, fluid.EndIteration):
88-
if (event.batch_id % 10) == 0:
89-
avg_cost, accuracy = trainer.test(reader=test_data)
90-
91-
print('BatchID {1:04}, Loss {2:2.2}, Acc {3:2.2}'.format(
92-
event.batch_id + 1, avg_cost, accuracy))
115+
trainer.train(
116+
num_epochs=1,
117+
event_handler=event_handler,
118+
reader=train_reader,
119+
feed_order=['words', 'label'])
93120

94-
if accuracy > 0.01: # Low threshold for speeding up CI
95-
trainer.params.save(save_path)
96-
return
97121

98-
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
99-
trainer = fluid.Trainer(
100-
partial(train_network, word_dict),
101-
optimizer=fluid.optimizer.Adagrad(learning_rate=0.002),
102-
place=place,
103-
event_handler=event_handler)
104-
105-
trainer.train(train_data, EPOCH_NUM, event_handler=event_handler)
106-
107-
108-
def infer(use_cuda, save_path):
109-
params = fluid.Params(save_path)
122+
def infer(use_cuda, inference_program, save_dirname=None):
110123
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
111124
word_dict = paddle.dataset.imdb.word_dict()
125+
112126
inferencer = fluid.Inferencer(
113-
partial(inference_network, word_dict), params, place=place)
127+
infer_func=partial(inference_program, word_dict),
128+
param_path=save_dirname,
129+
place=place)
114130

115131
def create_random_lodtensor(lod, place, low, high):
116132
data = np.random.random_integers(low, high,
@@ -131,8 +147,8 @@ def main(use_cuda):
131147
if use_cuda and not fluid.core.is_compiled_with_cuda():
132148
return
133149
save_path = "understand_sentiment_stacked_lstm.inference.model"
134-
train(use_cuda, save_path)
135-
infer(use_cuda, save_path)
150+
train(use_cuda, train_program, save_path)
151+
infer(use_cuda, inference_program, save_path)
136152

137153

138154
if __name__ == '__main__':

0 commit comments

Comments
 (0)