@@ -45,20 +45,23 @@ static void CreateTensorFromMessageType(framework::Variable *var,
45
45
}
46
46
}
47
47
48
- static void ParallelExecuteBlocks (const std::vector<size_t > ¶llel_blkids,
49
- framework::Executor *executor,
50
- framework::ProgramDesc *program,
51
- framework::Scope *scope) {
48
+ static void ParallelExecuteBlocks (
49
+ const std::vector<size_t > ¶llel_blkids, framework::Executor *executor,
50
+ const std::vector<std::shared_ptr<framework::ExecutorPrepareContext>>
51
+ &prepared,
52
+ framework::ProgramDesc *program, framework::Scope *scope) {
52
53
std::vector<std::future<void >> fs;
53
54
for (size_t idx : parallel_blkids) {
54
- fs.push_back (framework::Async ([&executor, &program, &scope, idx]() {
55
- int run_block = idx; // thread local
56
- try {
57
- executor->Run (*program, scope, run_block, false , false );
58
- } catch (std::exception &e) {
59
- LOG (ERROR) << " run sub program error " << e.what ();
60
- }
61
- }));
55
+ fs.push_back (
56
+ framework::Async ([&executor, &prepared, &program, &scope, idx]() {
57
+ int run_block = idx; // thread local
58
+ try {
59
+ executor->RunPreparedContext (prepared[run_block].get (), scope,
60
+ false , false );
61
+ } catch (std::exception &e) {
62
+ LOG (ERROR) << " run sub program error " << e.what ();
63
+ }
64
+ }));
62
65
}
63
66
for (size_t i = 0 ; i < fs.size (); ++i) fs[i].wait ();
64
67
}
@@ -101,6 +104,13 @@ class ListenAndServOp : public framework::OperatorBase {
101
104
" server program should have at least 2 blocks" );
102
105
103
106
framework::Executor executor (dev_place);
107
+ std::vector<int > block_list;
108
+ for (size_t blkid = 1 ; blkid < num_blocks; ++blkid)
109
+ block_list.push_back (blkid);
110
+ auto prepared = executor.Prepare (*program, block_list);
111
+ prepared.insert (
112
+ prepared.begin (),
113
+ std::shared_ptr<framework::ExecutorPrepareContext>(nullptr ));
104
114
105
115
// TODO(qiao) set proper fields for table lookup and update
106
116
rpc_service_->SetExecutor (&executor);
@@ -160,14 +170,15 @@ class ListenAndServOp : public framework::OperatorBase {
160
170
for (size_t blkid = 2 ; blkid < num_blocks; ++blkid) {
161
171
if (program->Block (blkid).Parent () != last_parent_blkid) {
162
172
for (size_t idx : parallel_blkids) VLOG (3 ) << idx;
163
- ParallelExecuteBlocks (parallel_blkids, &executor, program,
173
+ ParallelExecuteBlocks (parallel_blkids, &executor, prepared, program,
164
174
&recv_scope);
165
175
parallel_blkids.clear ();
166
176
last_parent_blkid = program->Block (blkid).Parent ();
167
177
}
168
178
parallel_blkids.push_back (blkid);
169
179
}
170
- ParallelExecuteBlocks (parallel_blkids, &executor, program, &recv_scope);
180
+ ParallelExecuteBlocks (parallel_blkids, &executor, prepared, program,
181
+ &recv_scope);
171
182
172
183
VLOG (3 ) << " run all blocks spent " << detail::GetTimestamp () - ts
173
184
<< " (ms)" ;
@@ -181,7 +192,8 @@ class ListenAndServOp : public framework::OperatorBase {
181
192
var->GetMutable <framework::SelectedRows>()->mutable_rows ()->clear ();
182
193
}
183
194
rpc_service_->SetCond (1 );
184
- // FIXME(typhoonzero): use another condition to sync wait clients get.
195
+ // NOTE: does not consider barrier request retry in here, we may use
196
+ // global barrier id to resolve this.
185
197
rpc_service_->WaitClientGet (fan_in);
186
198
sparse_vars.clear ();
187
199
} // while(true)
0 commit comments