Skip to content

Commit 8a521c0

Browse files
committed
Remove buggy get_test_program and refine c++ reader demo
1 parent ebe3b5e commit 8a521c0

File tree

3 files changed

+62
-159
lines changed

3 files changed

+62
-159
lines changed

python/paddle/fluid/io.py

Lines changed: 0 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -789,101 +789,3 @@ def get_parameter_value_by_name(name, executor, program=None):
789789
program = default_main_program()
790790
var = program.global_block().var(name)
791791
return get_parameter_value(var, executor)
792-
793-
794-
def get_test_program(filelist, program=None, startup_program=None):
795-
"""
796-
Transpile current train program to a program to read test dataset
797-
if the program is using reader ops like "open_files_op".
798-
"""
799-
800-
def _copy_reader_var_(block, var, new_name=None):
801-
if new_name == None:
802-
new_name = var.name
803-
new_var = block.create_var(
804-
name=str(new_name), type=core.VarDesc.VarType.READER)
805-
new_var.desc.set_shapes(var.desc.shapes())
806-
new_var.desc.set_dtypes(var.desc.dtypes())
807-
new_var.persistable = True
808-
return new_var
809-
810-
def _get_test_reader_name(train_reader_name):
811-
return train_reader_name + "_test"
812-
813-
def _is_reader_op(op):
814-
block = op.block
815-
if "Out" in op.output_names:
816-
reader_out = block.vars[op.output("Out")[0]]
817-
if reader_out.type == core.VarDesc.VarType.READER:
818-
return True
819-
return False
820-
821-
if program == None:
822-
program = default_main_program()
823-
if startup_program == None:
824-
startup_program = default_startup_program()
825-
startup_block = startup_program.global_block()
826-
827-
# 1. find out the orignal reader var name
828-
startup_reader_op_list = []
829-
830-
for op in startup_block.ops:
831-
if _is_reader_op(op):
832-
startup_reader_op_list.append(op)
833-
834-
if len(startup_reader_op_list) == 0:
835-
return program
836-
837-
root_reader_op = startup_reader_op_list[0]
838-
train_test_reader_map = {}
839-
# 2. add operators to startup to read open and read test data files
840-
for op in startup_reader_op_list:
841-
assert (len(op.output("Out")) == 1)
842-
train_reader_name = op.output("Out")[0]
843-
train_reader = startup_block.vars[train_reader_name]
844-
test_reader = _copy_reader_var_(
845-
startup_block,
846-
train_reader,
847-
new_name=_get_test_reader_name(train_reader_name))
848-
train_test_reader_map[train_reader.name] = test_reader
849-
850-
test_op_inputs = {}
851-
for name in op.input_names:
852-
train_arg_names = op.input(name)
853-
test_arg_vars = []
854-
for arg_name in train_arg_names:
855-
arg_var = train_test_reader_map[
856-
arg_name] if name == "UnderlyingReader" else startup_block.vars[
857-
arg_name]
858-
test_arg_vars.append(arg_var)
859-
test_op_inputs[name] = test_arg_vars
860-
861-
test_op = startup_block.append_op(
862-
type=op.type,
863-
inputs=test_op_inputs,
864-
outputs={'Out': [test_reader]},
865-
attrs=op.attrs)
866-
# root reader op's filelist attr for read test files
867-
if op.type == root_reader_op.type:
868-
test_op.set_attr("file_names", filelist)
869-
if op.type == "create_multi_pass_reader":
870-
test_op.set_attr("pass_num", 1)
871-
872-
# 3. rename reader vars in inference program to different name
873-
# to avoid read from train data.
874-
main_block = program.global_block()
875-
for var in main_block.vars.values():
876-
if var.type == core.VarDesc.VarType.READER:
877-
main_block.rename_var(
878-
str(var.name), str(_get_test_reader_name(var.name)))
879-
880-
for op in main_block.ops:
881-
if op.type == root_reader_op.type:
882-
test_op.set_attr("file_names", filelist)
883-
if op.type == "create_multi_pass_reader":
884-
test_op.set_attr("pass_num", 1)
885-
886-
startup_program.sync_with_cpp()
887-
program.sync_with_cpp()
888-
889-
return program

python/paddle/fluid/tests/demo/text_classification/convert_data_to_recordio.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,12 @@ def load_vocab(filename):
3131

3232

3333
# load word dict with paddle inner function
34-
word_dict = load_vocab(sys.argv[1])
35-
word_dict["<unk>"] = len(word_dict)
34+
if len(sys.argv) > 1:
35+
word_dict = load_vocab(sys.argv[1])
36+
word_dict["<unk>"] = len(word_dict)
37+
else:
38+
word_dict = paddle.dataset.imdb.word_dict()
39+
3640
print "Dict dim = ", len(word_dict)
3741

3842
# input text data

python/paddle/fluid/tests/demo/text_classification/train.py

Lines changed: 56 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
TRAIN_FILES = ['train.recordio']
2020
TEST_FILES = ['test.recordio']
2121

22-
DICT_DIM = 89528
22+
DICT_DIM = 5147
2323

2424
# embedding dim
2525
emb_dim = 128
@@ -33,33 +33,24 @@
3333
# class num
3434
class_dim = 2
3535

36+
# epoch num
37+
epoch_num = 10
3638

37-
def network_cfg(is_train, pass_num=100):
38-
with fluid.unique_name.guard():
39-
train_file_obj = fluid.layers.open_files(
40-
filenames=TRAIN_FILES,
41-
pass_num=pass_num,
42-
shapes=[[-1, 1], [-1, 1]],
43-
lod_levels=[1, 0],
44-
dtypes=['int64', 'int64'],
45-
thread_num=1)
46-
47-
test_file_obj = fluid.layers.open_files(
48-
filenames=TEST_FILES,
49-
pass_num=1,
50-
shapes=[[-1, 1], [-1, 1]],
51-
lod_levels=[1, 0],
52-
dtypes=['int64', 'int64'],
53-
thread_num=1)
5439

55-
if is_train:
56-
file_obj = fluid.layers.shuffle(train_file_obj, buffer_size=1000)
57-
else:
58-
file_obj = test_file_obj
40+
def build_program(is_train):
41+
file_obj_handle = fluid.layers.io.open_files(
42+
filenames=TRAIN_FILES if is_train else TEST_FILES,
43+
shapes=[[-1, 1], [-1, 1]],
44+
lod_levels=[1, 0],
45+
dtypes=['int64', 'int64'],
46+
thread_num=1)
47+
if is_train:
48+
file_obj = fluid.layers.io.shuffle(file_obj_handle, buffer_size=1000)
49+
else:
50+
file_obj = file_obj_handle
51+
file_obj = fluid.layers.io.double_buffer(file_obj)
5952

60-
file_obj = fluid.layers.double_buffer(
61-
file_obj,
62-
name="train_double_buffer" if is_train else 'test_double_buffer')
53+
with fluid.unique_name.guard():
6354

6455
data, label = fluid.layers.read_file(file_obj)
6556

@@ -90,58 +81,64 @@ def network_cfg(is_train, pass_num=100):
9081

9182
if is_train:
9283
# SGD optimizer
93-
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=0.01)
84+
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=0.001)
9485
sgd_optimizer.minimize(avg_cost)
9586

96-
return {
97-
'loss': avg_cost,
98-
'log': [avg_cost, acc],
99-
'file': train_file_obj if is_train else test_file_obj
100-
}
87+
return {'loss': avg_cost, 'log': [avg_cost, acc], 'file': file_obj_handle}
10188

10289

10390
def main():
10491
train = fluid.Program()
10592
startup = fluid.Program()
93+
test = fluid.Program()
10694

10795
with fluid.program_guard(train, startup):
108-
train_args = network_cfg(is_train=True)
109-
110-
test = fluid.Program()
96+
train_args = build_program(is_train=True)
11197

112-
with fluid.program_guard(test, fluid.Program()):
113-
test_args = network_cfg(is_train=False)
98+
with fluid.program_guard(test, startup):
99+
test_args = build_program(is_train=False)
114100

101+
use_cuda = fluid.core.is_compiled_with_cuda()
115102
# startup
116-
place = fluid.CUDAPlace(0)
103+
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
117104
exe = fluid.Executor(place=place)
118105
exe.run(startup)
119106

120107
train_exe = fluid.ParallelExecutor(
121-
use_cuda=True, loss_name=train_args['loss'].name, main_program=train)
108+
use_cuda=use_cuda,
109+
loss_name=train_args['loss'].name,
110+
main_program=train)
111+
test_exe = fluid.ParallelExecutor(
112+
use_cuda=use_cuda, main_program=test, share_vars_from=train_exe)
122113

123114
fetch_var_list = [var.name for var in train_args['log']]
124-
for i in xrange(sys.maxint):
125-
result = map(numpy.array,
126-
train_exe.run(fetch_list=fetch_var_list
127-
if i % 1000 == 0 else []))
128-
if len(result) != 0:
129-
print 'Train: ', result
130-
131-
if i % 1000 == 0:
132-
test_exe = fluid.ParallelExecutor(
133-
use_cuda=True, main_program=test, share_vars_from=train_exe)
134-
loss = []
135-
acc = []
136-
try:
137-
while True:
138-
loss_np, acc_np = map(
139-
numpy.array, test_exe.run(fetch_list=fetch_var_list))
140-
loss.append(loss_np[0])
141-
acc.append(acc_np[0])
142-
except:
143-
test_args['file'].reset()
144-
print 'TEST: ', numpy.mean(loss), numpy.mean(acc)
115+
for epoch_id in range(epoch_num):
116+
# train
117+
try:
118+
batch_id = 0
119+
while True:
120+
result = map(numpy.array,
121+
train_exe.run(fetch_list=fetch_var_list
122+
if batch_id % 10 == 0 else []))
123+
if len(result) != 0:
124+
print 'Train loss: ', result
125+
batch_id += 1
126+
except fluid.core.EOFException:
127+
print 'End of epoch', epoch_id
128+
train_args['file'].reset()
129+
130+
# test
131+
loss = []
132+
acc = []
133+
try:
134+
while True:
135+
loss_np, acc_np = map(numpy.array,
136+
test_exe.run(fetch_list=fetch_var_list))
137+
loss.append(loss_np[0])
138+
acc.append(acc_np[0])
139+
except:
140+
test_args['file'].reset()
141+
print 'TEST: ', numpy.mean(loss), numpy.mean(acc)
145142

146143

147144
if __name__ == '__main__':

0 commit comments

Comments
 (0)