@@ -37,7 +37,8 @@ inline double GetCurrentMs() {
37
37
return 1e+3 * time.tv_sec + 1e-3 * time.tv_usec ;
38
38
}
39
39
40
- // return size of total words
40
+ // Load the input word index data from file and save into LodTensor.
41
+ // Return the size of words.
41
42
size_t LoadData (std::vector<paddle::framework::LoDTensor>* out,
42
43
const std::string& filename) {
43
44
size_t sz = 0 ;
@@ -67,6 +68,8 @@ size_t LoadData(std::vector<paddle::framework::LoDTensor>* out,
67
68
return sz;
68
69
}
69
70
71
+ // Split input data samples into small pieces jobs as balanced as possible,
72
+ // according to the number of threads.
70
73
void SplitData (
71
74
const std::vector<paddle::framework::LoDTensor>& datasets,
72
75
std::vector<std::vector<const paddle::framework::LoDTensor*>>* jobs,
@@ -116,7 +119,8 @@ void ThreadRunInfer(
116
119
for (size_t i = 0 ; i < inputs.size (); ++i) {
117
120
feed_targets[feed_target_names[0 ]] = inputs[i];
118
121
executor->Run (*copy_program, &sub_scope, &feed_targets, &fetch_targets,
119
- true , true , feed_holder_name, fetch_holder_name);
122
+ true /* create_local_scope*/ , true /* create_vars*/ ,
123
+ feed_holder_name, fetch_holder_name);
120
124
}
121
125
auto stop_ms = GetCurrentMs ();
122
126
scope->DeleteScope (&sub_scope);
@@ -143,12 +147,13 @@ TEST(inference, nlp) {
143
147
// 1. Define place, executor, scope
144
148
auto place = paddle::platform::CPUPlace ();
145
149
auto executor = paddle::framework::Executor (place);
146
- auto * scope = new paddle::framework::Scope ();
150
+ std::unique_ptr<paddle::framework::Scope> scope (
151
+ new paddle::framework::Scope ());
147
152
148
153
// 2. Initialize the inference_program and load parameters
149
154
std::unique_ptr<paddle::framework::ProgramDesc> inference_program;
150
155
inference_program =
151
- InitProgram (&executor, scope, FLAGS_modelpath, model_combined);
156
+ InitProgram (&executor, scope. get () , FLAGS_modelpath, model_combined);
152
157
if (FLAGS_use_mkldnn) {
153
158
EnableMKLDNN (inference_program);
154
159
}
@@ -166,9 +171,9 @@ TEST(inference, nlp) {
166
171
SplitData (datasets, &jobs, FLAGS_num_threads);
167
172
std::vector<std::unique_ptr<std::thread>> threads;
168
173
for (int i = 0 ; i < FLAGS_num_threads; ++i) {
169
- threads.emplace_back (new std::thread (ThreadRunInfer, i, &executor, scope,
170
- std::ref (inference_program ),
171
- std::ref (jobs)));
174
+ threads.emplace_back (
175
+ new std::thread (ThreadRunInfer, i, &executor, scope. get ( ),
176
+ std::ref (inference_program), std::ref (jobs)));
172
177
}
173
178
start_ms = GetCurrentMs ();
174
179
for (int i = 0 ; i < FLAGS_num_threads; ++i) {
@@ -177,7 +182,7 @@ TEST(inference, nlp) {
177
182
stop_ms = GetCurrentMs ();
178
183
} else {
179
184
if (FLAGS_prepare_vars) {
180
- executor.CreateVariables (*inference_program, scope, 0 );
185
+ executor.CreateVariables (*inference_program, scope. get () , 0 );
181
186
}
182
187
// always prepare context
183
188
std::unique_ptr<paddle::framework::ExecutorPrepareContext> ctx;
@@ -201,17 +206,15 @@ TEST(inference, nlp) {
201
206
start_ms = GetCurrentMs ();
202
207
for (size_t i = 0 ; i < datasets.size (); ++i) {
203
208
feed_targets[feed_target_names[0 ]] = &(datasets[i]);
204
- executor.RunPreparedContext (ctx.get (), scope, &feed_targets,
209
+ executor.RunPreparedContext (ctx.get (), scope. get () , &feed_targets,
205
210
&fetch_targets, !FLAGS_prepare_vars);
206
211
}
207
212
stop_ms = GetCurrentMs ();
208
213
LOG (INFO) << " Tid: 0, process " << datasets.size ()
209
214
<< " samples, avg time per sample: "
210
215
<< (stop_ms - start_ms) / datasets.size () << " ms" ;
211
216
}
212
-
213
217
LOG (INFO) << " Total inference time with " << FLAGS_num_threads
214
218
<< " threads : " << (stop_ms - start_ms) / 1000.0
215
219
<< " sec, QPS: " << datasets.size () / ((stop_ms - start_ms) / 1000 );
216
- delete scope;
217
220
}
0 commit comments