@@ -475,24 +475,122 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
475475
476476 virtual void shrinkSchedule () override {
477477
478- std::vector<bool > comm_phase_empty (this ->number_of_supersteps , true );
478+ std::vector<unsigned > comm_phase_latest_dependency (this ->number_of_supersteps , 0 );
479+ std::vector<std::vector<unsigned > > first_at = getFirstPresence ();
480+
479481 for (auto const &[key, val] : commSchedule)
480- comm_phase_empty[val] = false ;
482+ if (this ->assignedProcessor (std::get<0 >(key)) != std::get<1 >(key))
483+ comm_phase_latest_dependency[val] = std::max (comm_phase_latest_dependency[val], first_at[std::get<0 >(key)][std::get<1 >(key)]);
484+
485+
486+ for (const auto &node : BspSchedule<Graph_t>::instance->getComputationalDag ().vertices ())
487+ for (const auto &child : BspSchedule<Graph_t>::instance->getComputationalDag ().children (node))
488+ if (this ->assignedProcessor (node) != this ->assignedProcessor (child))
489+ comm_phase_latest_dependency[this ->assignedSuperstep (child)] = std::max (comm_phase_latest_dependency[this ->assignedSuperstep (child)], first_at[node][this ->assignedProcessor (child)]);
490+
491+ std::vector<bool > comm_phase_deleted (this ->number_of_supersteps , false );
492+ for (unsigned step = this ->number_of_supersteps -1 ; step < this ->number_of_supersteps ; --step)
493+ {
494+ unsigned limit = 0 ;
495+ while (step > limit)
496+ {
497+ limit = std::max (limit, comm_phase_latest_dependency[step]);
498+ if (step > limit)
499+ {
500+ comm_phase_deleted[step] = true ;
501+ --step;
502+ }
503+ }
504+ }
481505
482506 std::vector<unsigned > new_step_index (this ->number_of_supersteps );
483- unsigned current_index = 0 ;
507+ unsigned current_index = std::numeric_limits< unsigned >:: max () ;
484508 for (unsigned step = 0 ; step < this ->number_of_supersteps ; ++step)
485509 {
486- new_step_index[step] = current_index;
487- if (!comm_phase_empty[step])
510+ if (!comm_phase_deleted[step])
488511 current_index++;
512+
513+ new_step_index[step] = current_index;
489514 }
490515 for (const auto & node : this ->instance ->vertices ())
491516 this ->node_to_superstep_assignment [node] = new_step_index[this ->node_to_superstep_assignment [node]];
492517 for (auto &[key, val] : commSchedule)
493518 val = new_step_index[val];
494519
495- this ->setNumberOfSupersteps (current_index);
520+ this ->setNumberOfSupersteps (current_index+1 );
521+ }
522+
523+ std::vector<std::vector<unsigned > > getFirstPresence () const {
524+
525+ std::vector<std::vector<unsigned > > first_at (BspSchedule<Graph_t>::instance->numberOfVertices (),
526+ std::vector<unsigned >(BspSchedule<Graph_t>::instance->numberOfProcessors (), std::numeric_limits<unsigned >::max ()));
527+
528+ for (const auto &node : BspSchedule<Graph_t>::instance->getComputationalDag ().vertices ())
529+ first_at[node][this ->assignedProcessor (node)] = this ->assignedSuperstep (node);
530+
531+ for (auto const &[key, val] : commSchedule)
532+ first_at[std::get<0 >(key)][std::get<2 >(key)] =
533+ std::min (first_at[std::get<0 >(key)][std::get<2 >(key)], val + 1 ); // TODO: replace by staleness after merge
534+
535+ return first_at;
536+ }
537+
538+ // remove unneeded comm. schedule entries - these can happen in ILPs, partial ILPs, etc.
539+ void cleanCommSchedule (){
540+
541+ // data that is already present before it arrives
542+ std::vector<std::vector<std::multiset<unsigned > > > arrives_at (BspSchedule<Graph_t>::instance->numberOfVertices (),
543+ std::vector<std::multiset<unsigned > >(BspSchedule<Graph_t>::instance->numberOfProcessors ()));
544+ for (const auto &node : BspSchedule<Graph_t>::instance->getComputationalDag ().vertices ())
545+ arrives_at[node][this ->assignedProcessor (node)].insert (this ->assignedSuperstep (node));
546+
547+ for (auto const &[key, val] : commSchedule)
548+ arrives_at[std::get<0 >(key)][std::get<2 >(key)].insert (val);
549+
550+ std::vector<KeyTriple> toErase;
551+ for (auto const &[key, val] : commSchedule)
552+ {
553+ auto itr = arrives_at[std::get<0 >(key)][std::get<2 >(key)].begin ();
554+ if (*itr < val)
555+ toErase.push_back (key);
556+ else if (*itr == val && ++itr != arrives_at[std::get<0 >(key)][std::get<2 >(key)].end () && *itr == val)
557+ {
558+ toErase.push_back (key);
559+ arrives_at[std::get<0 >(key)][std::get<2 >(key)].erase (itr);
560+ }
561+ }
562+
563+ for (const KeyTriple& key : toErase)
564+ commSchedule.erase (key);
565+
566+ // data that is not used after being sent
567+ std::vector<std::vector<std::multiset<unsigned > > > used_at (BspSchedule<Graph_t>::instance->numberOfVertices (),
568+ std::vector<std::multiset<unsigned > >(BspSchedule<Graph_t>::instance->numberOfProcessors ()));
569+ for (const auto &node : BspSchedule<Graph_t>::instance->getComputationalDag ().vertices ())
570+ for (const auto &child : BspSchedule<Graph_t>::instance->getComputationalDag ().children (node))
571+ used_at[node][this ->assignedProcessor (child)].insert (this ->assignedSuperstep (child));
572+
573+ for (auto const &[key, val] : commSchedule)
574+ used_at[std::get<0 >(key)][std::get<1 >(key)].insert (val);
575+
576+ // (need to visit cs entries in reverse superstep order here)
577+ std::vector<std::vector<KeyTriple> > entries (this ->number_of_supersteps );
578+ for (auto const &[key, val] : commSchedule)
579+ entries[val].push_back (key);
580+
581+ toErase.clear ();
582+ for (unsigned step = this ->number_of_supersteps -1 ; step < this ->number_of_supersteps ; --step)
583+ for (const KeyTriple& key : entries[step])
584+ if (used_at[std::get<0 >(key)][std::get<2 >(key)].empty () ||
585+ *used_at[std::get<0 >(key)][std::get<2 >(key)].rbegin () <= step)
586+ {
587+ toErase.push_back (key);
588+ auto itr = used_at[std::get<0 >(key)][std::get<1 >(key)].find (step);
589+ used_at[std::get<0 >(key)][std::get<1 >(key)].erase (itr);
590+ }
591+
592+ for (const KeyTriple& key : toErase)
593+ commSchedule.erase (key);
496594 }
497595};
498596
0 commit comments