@@ -126,6 +126,8 @@ class BspScheduleRecomp : public IBspScheduleEval<Graph_t> {
126126 vertex_idx getTotalAssignments () const ;
127127
128128 void mergeSupersteps ();
129+
130+ void cleanSchedule ();
129131};
130132
131133template <typename Graph_t>
@@ -324,4 +326,103 @@ void BspScheduleRecomp<Graph_t>::mergeSupersteps() {
324326 number_of_supersteps = current_step_idx;
325327}
326328
329+ // remove unneeded comm. schedule entries - these can happen in several algorithms
330+ template <typename Graph_t>
331+ void BspScheduleRecomp<Graph_t>::cleanSchedule()
332+ {
333+ // I. Data that is already present before it arrives
334+ std::vector<std::vector<std::multiset<unsigned >>> arrives_at (instance->numberOfVertices (),
335+ std::vector<std::multiset<unsigned >>(instance->numberOfProcessors ()));
336+ for (const auto &node : instance->getComputationalDag ().vertices ()) {
337+ for (const auto &proc_and_step : node_to_processor_and_supertep_assignment[node]) {
338+ arrives_at[node][proc_and_step.first ].insert (proc_and_step.second );
339+ }
340+ }
341+
342+ for (auto const &[key, val] : commSchedule) {
343+ arrives_at[std::get<0 >(key)][std::get<2 >(key)].insert (val);
344+ }
345+
346+ // - computation steps
347+ for (const auto &node : instance->getComputationalDag ().vertices ()) {
348+ for (unsigned index = 0 ; index < node_to_processor_and_supertep_assignment[node].size (); ) {
349+ const auto &proc_and_step = node_to_processor_and_supertep_assignment[node][index];
350+ if (*arrives_at[node][proc_and_step.first ].begin () < proc_and_step.second ) {
351+ node_to_processor_and_supertep_assignment[node][index] = node_to_processor_and_supertep_assignment[node].back ();
352+ node_to_processor_and_supertep_assignment[node].pop_back ();
353+ } else {
354+ ++index;
355+ }
356+ }
357+ }
358+
359+ // - communication steps
360+ std::vector<KeyTriple> toErase;
361+ for (auto const &[key, val] : commSchedule) {
362+ auto itr = arrives_at[std::get<0 >(key)][std::get<2 >(key)].begin ();
363+ if (*itr < val) {
364+ toErase.push_back (key);
365+ } else if (*itr == val && ++itr != arrives_at[std::get<0 >(key)][std::get<2 >(key)].end () && *itr == val) {
366+ toErase.push_back (key);
367+ arrives_at[std::get<0 >(key)][std::get<2 >(key)].erase (itr);
368+ }
369+ }
370+
371+ for (const KeyTriple &key : toErase) {
372+ commSchedule.erase (key);
373+ }
374+
375+ // II. Data that is not used after being computed/sent
376+ std::vector<std::vector<std::multiset<unsigned >>> used_at (instance->numberOfVertices (),
377+ std::vector<std::multiset<unsigned >>(instance->numberOfProcessors ()));
378+ for (const auto &node : instance->getComputationalDag ().vertices ()) {
379+ for (const auto &child : instance->getComputationalDag ().children (node)) {
380+ for (const auto &proc_and_step : node_to_processor_and_supertep_assignment[child]) {
381+ used_at[node][proc_and_step.first ].insert (proc_and_step.second );
382+ }
383+ }
384+ }
385+
386+ for (auto const &[key, val] : commSchedule) {
387+ used_at[std::get<0 >(key)][std::get<1 >(key)].insert (val);
388+ }
389+
390+ // - computation steps
391+ for (const auto &node : instance->getComputationalDag ().vertices ()) {
392+ for (unsigned index = 0 ; index < node_to_processor_and_supertep_assignment[node].size (); ) {
393+ const auto &proc_and_step = node_to_processor_and_supertep_assignment[node][index];
394+ if ((used_at[node][proc_and_step.first ].empty () || *used_at[node][proc_and_step.first ].rbegin () < proc_and_step.second )
395+ && index > 0 )
396+ {
397+ node_to_processor_and_supertep_assignment[node][index] = node_to_processor_and_supertep_assignment[node].back ();
398+ node_to_processor_and_supertep_assignment[node].pop_back ();
399+ } else {
400+ ++index;
401+ }
402+ }
403+ }
404+
405+ // - communication steps (need to visit cs entries in reverse superstep order here)
406+ std::vector<std::vector<KeyTriple>> entries (this ->number_of_supersteps );
407+ for (auto const &[key, val] : commSchedule) {
408+ entries[val].push_back (key);
409+ }
410+
411+ toErase.clear ();
412+ for (unsigned step = this ->number_of_supersteps - 1 ; step < this ->number_of_supersteps ; --step) {
413+ for (const KeyTriple &key : entries[step]) {
414+ if (used_at[std::get<0 >(key)][std::get<2 >(key)].empty ()
415+ || *used_at[std::get<0 >(key)][std::get<2 >(key)].rbegin () <= step) {
416+ toErase.push_back (key);
417+ auto itr = used_at[std::get<0 >(key)][std::get<1 >(key)].find (step);
418+ used_at[std::get<0 >(key)][std::get<1 >(key)].erase (itr);
419+ }
420+ }
421+ }
422+
423+ for (const KeyTriple &key : toErase) {
424+ commSchedule.erase (key);
425+ }
426+ }
427+
327428} // namespace osp
0 commit comments