@@ -67,6 +67,105 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
6767 std::vector<unsigned > node_to_processor_assignment;
6868 std::vector<unsigned > node_to_superstep_assignment;
6969
70+ template <unsigned staleness>
71+ inline bool satisfies_precedence_constraints_staleness () const {
72+
73+ if (node_to_processor_assignment.size () != instance->numberOfVertices () ||
74+ node_to_superstep_assignment.size () != instance->numberOfVertices ()) {
75+ return false ;
76+ }
77+
78+ for (const auto &v : instance->vertices ()) {
79+
80+ if (node_to_superstep_assignment[v] >= number_of_supersteps) {
81+ return false ;
82+ }
83+
84+ if (node_to_processor_assignment[v] >= instance->numberOfProcessors ()) {
85+ return false ;
86+ }
87+
88+ for (const auto &target : instance->getComputationalDag ().children (v)) {
89+
90+ const unsigned different_processors =
91+ (node_to_processor_assignment[v] == node_to_processor_assignment[target]) ? 0u : staleness;
92+
93+ if (node_to_superstep_assignment[v] + different_processors > node_to_superstep_assignment[target]) {
94+ return false ;
95+ }
96+ }
97+ }
98+
99+ return true ;
100+ }
101+
102+ void compute_lazy_communication_costs_helper (std::vector<std::vector<v_commw_t <Graph_t>>> & rec, std::vector<std::vector<v_commw_t <Graph_t>>> & send) const {
103+ for (const auto &node : instance->vertices ()) {
104+
105+ std::vector<unsigned > step_needed (instance->numberOfProcessors (), number_of_supersteps);
106+ for (const auto &target : instance->getComputationalDag ().children (node)) {
107+
108+ if (node_to_processor_assignment[node] != node_to_processor_assignment[target]) {
109+ step_needed[node_to_processor_assignment[target]] = std::min (
110+ step_needed[node_to_processor_assignment[target]], node_to_superstep_assignment[target]);
111+ }
112+ }
113+
114+ for (unsigned proc = 0 ; proc < instance->numberOfProcessors (); proc++) {
115+
116+ if (step_needed[proc] < number_of_supersteps) {
117+
118+ send[node_to_processor_assignment[node]][step_needed[proc] - 1 ] +=
119+ instance->sendCosts (node_to_processor_assignment[node], proc) *
120+ instance->getComputationalDag ().vertex_comm_weight (node);
121+
122+ rec[proc][step_needed[proc] - 1 ] += instance->sendCosts (node_to_processor_assignment[node], proc) *
123+ instance->getComputationalDag ().vertex_comm_weight (node);
124+ }
125+ }
126+ }
127+ }
128+
129+ std::vector<v_commw_t <Graph_t>> compute_max_comm_per_step_helper (const std::vector<std::vector<v_commw_t <Graph_t>>> & rec, const std::vector<std::vector<v_commw_t <Graph_t>>> & send) const {
130+ std::vector<v_commw_t <Graph_t>> max_comm_per_step (number_of_supersteps, 0 );
131+ for (unsigned step = 0 ; step < number_of_supersteps; step++) {
132+ v_commw_t <Graph_t> max_send = 0 ;
133+ v_commw_t <Graph_t> max_rec = 0 ;
134+
135+ for (unsigned proc = 0 ; proc < instance->numberOfProcessors (); proc++) {
136+ if (max_send < send[proc][step])
137+ max_send = send[proc][step];
138+ if (max_rec < rec[proc][step])
139+ max_rec = rec[proc][step];
140+ }
141+ max_comm_per_step[step] = std::max (max_send, max_rec) * instance->communicationCosts ();
142+ }
143+ return max_comm_per_step;
144+ }
145+
146+ std::vector<v_workw_t <Graph_t>> compute_max_work_per_step_helper () const {
147+ std::vector<std::vector<v_workw_t <Graph_t>>> work = std::vector<std::vector<v_workw_t <Graph_t>>>(
148+ number_of_supersteps, std::vector<v_workw_t <Graph_t>>(instance->numberOfProcessors (), 0 ));
149+ for (const auto &node : instance->vertices ()) {
150+ work[node_to_superstep_assignment[node]][node_to_processor_assignment[node]] +=
151+ instance->getComputationalDag ().vertex_work_weight (node);
152+ }
153+
154+ std::vector<v_workw_t <Graph_t>> max_work_per_step (number_of_supersteps, 0 );
155+ for (unsigned step = 0 ; step < number_of_supersteps; step++) {
156+ v_workw_t <Graph_t> max_work = 0 ;
157+ for (unsigned proc = 0 ; proc < instance->numberOfProcessors (); proc++) {
158+ if (max_work < work[step][proc]) {
159+ max_work = work[step][proc];
160+ }
161+ }
162+
163+ max_work_per_step[step] = max_work;
164+ }
165+
166+ return max_work_per_step;
167+ }
168+
70169 public:
71170
72171 BspSchedule () = delete ;
@@ -169,11 +268,8 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
169268 * @return The number of processors in the schedule.
170269 */
171270 void updateNumberOfSupersteps () {
172-
173271 number_of_supersteps = 0 ;
174-
175272 for (unsigned i = 0 ; i < instance->numberOfVertices (); ++i) {
176-
177273 if (node_to_superstep_assignment[i] >= number_of_supersteps) {
178274 number_of_supersteps = node_to_superstep_assignment[i] + 1 ;
179275 }
@@ -325,31 +421,8 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
325421 }
326422
327423 virtual v_workw_t <Graph_t> computeWorkCosts () const override {
328-
329- std::vector<std::vector<v_workw_t <Graph_t>>> work = std::vector<std::vector<v_workw_t <Graph_t>>>(
330- number_of_supersteps, std::vector<v_workw_t <Graph_t>>(instance->numberOfProcessors (), 0 ));
331-
332- for (const auto &node : instance->vertices ()) {
333- work[node_to_superstep_assignment[node]][node_to_processor_assignment[node]] +=
334- instance->getComputationalDag ().vertex_work_weight (node);
335- }
336-
337- v_workw_t <Graph_t> total_costs = 0 ;
338- for (unsigned step = 0 ; step < number_of_supersteps; step++) {
339-
340- v_workw_t <Graph_t> max_work = 0 ;
341-
342- for (unsigned proc = 0 ; proc < instance->numberOfProcessors (); proc++) {
343-
344- if (max_work < work[step][proc]) {
345- max_work = work[step][proc];
346- }
347- }
348-
349- total_costs += max_work;
350- }
351-
352- return total_costs;
424+ const std::vector<v_workw_t <Graph_t>> work_per_step = compute_max_work_per_step_helper ();
425+ return std::accumulate (work_per_step.begin (), work_per_step.end (), static_cast <v_workw_t <Graph_t>>(0 ));
353426 }
354427
355428 double compute_total_communication_costs () const {
@@ -453,27 +526,17 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
453526 }
454527 }
455528
529+ const std::vector<v_commw_t <Graph_t>> max_comm_per_step = compute_max_comm_per_step_helper (rec, send);
530+
456531 v_commw_t <Graph_t> costs = 0 ;
457532 for (unsigned step = 0 ; step < number_of_supersteps; step++) {
458- v_commw_t <Graph_t> max_send = 0 ;
459- v_commw_t <Graph_t> max_rec = 0 ;
460-
461- for (unsigned proc = 0 ; proc < instance->numberOfProcessors (); proc++) {
462- if (max_send < send[proc][step])
463- max_send = send[proc][step];
464- if (max_rec < rec[proc][step])
465- max_rec = rec[proc][step];
466- }
467-
468- const auto step_comm_cost = std::max (max_send, max_rec) * instance->communicationCosts ();
469-
533+ const auto step_comm_cost = max_comm_per_step[step];
470534 costs += step_comm_cost;
471535
472536 if (step_comm_cost > 0 ) {
473537 costs += instance->synchronisationCosts ();
474538 }
475539 }
476-
477540 return costs;
478541 }
479542
@@ -490,45 +553,12 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
490553 std::vector<std::vector<v_commw_t <Graph_t>>> send (instance->numberOfProcessors (),
491554 std::vector<v_commw_t <Graph_t>>(number_of_supersteps, 0 ));
492555
493- for (const auto &node : instance->vertices ()) {
494-
495- std::vector<unsigned > step_needed (instance->numberOfProcessors (), number_of_supersteps);
496- for (const auto &target : instance->getComputationalDag ().children (node)) {
497-
498- if (node_to_processor_assignment[node] != node_to_processor_assignment[target]) {
499- step_needed[node_to_processor_assignment[target]] = std::min (
500- step_needed[node_to_processor_assignment[target]], node_to_superstep_assignment[target]);
501- }
502- }
503-
504- for (unsigned proc = 0 ; proc < instance->numberOfProcessors (); proc++) {
505-
506- if (step_needed[proc] < number_of_supersteps) {
507-
508- send[node_to_processor_assignment[node]][step_needed[proc] - 1 ] +=
509- instance->sendCosts (node_to_processor_assignment[node], proc) *
510- instance->getComputationalDag ().vertex_comm_weight (node);
511-
512- rec[proc][step_needed[proc] - 1 ] += instance->sendCosts (node_to_processor_assignment[node], proc) *
513- instance->getComputationalDag ().vertex_comm_weight (node);
514- }
515- }
516- }
556+ compute_lazy_communication_costs_helper (rec, send);
557+ const std::vector<v_commw_t <Graph_t>> max_comm_per_step = compute_max_comm_per_step_helper (rec, send);
517558
518559 v_commw_t <Graph_t> costs = 0 ;
519560 for (unsigned step = 0 ; step < number_of_supersteps; step++) {
520- v_commw_t <Graph_t> max_send = 0 ;
521- v_commw_t <Graph_t> max_rec = 0 ;
522-
523- for (unsigned proc = 0 ; proc < instance->numberOfProcessors (); proc++) {
524- if (max_send < send[proc][step])
525- max_send = send[proc][step];
526- if (max_rec < rec[proc][step])
527- max_rec = rec[proc][step];
528- }
529-
530- const auto step_comm_cost = std::max (max_send, max_rec) * instance->communicationCosts ();
531-
561+ const auto step_comm_cost = max_comm_per_step[step];
532562 costs += step_comm_cost;
533563
534564 if (step_comm_cost > 0 ) {
@@ -550,38 +580,8 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
550580 *
551581 * @return True if the schedule satisfies the precedence constraints of the computational DAG, false otherwise.
552582 */
553- bool satisfiesPrecedenceConstraints () const {
554-
555- if (node_to_processor_assignment.size () != instance->numberOfVertices () ||
556- node_to_superstep_assignment.size () != instance->numberOfVertices ()) {
557- return false ;
558- }
559-
560- for (const auto &v : instance->vertices ()) {
561-
562- if (node_to_superstep_assignment[v] >= number_of_supersteps) {
563- return false ;
564- }
565-
566- if (node_to_processor_assignment[v] >= instance->numberOfProcessors ()) {
567- return false ;
568- }
569-
570- for (const auto &target : instance->getComputationalDag ().children (v)) {
571-
572- const unsigned different_processors =
573- (node_to_processor_assignment[v] == node_to_processor_assignment[target]) ? 0u : 1u ;
574-
575- if (node_to_superstep_assignment[v] + different_processors > node_to_superstep_assignment[target]) {
576- // std::cout << "This is not a valid scheduling (problems with nodes " << v << " and " << target <<
577- // ")."
578- // << std::endl; // todo should be removed
579- return false ;
580- }
581- }
582- }
583-
584- return true ;
583+ virtual bool satisfiesPrecedenceConstraints () const {
584+ return satisfies_precedence_constraints_staleness<1 >();
585585 };
586586
587587 bool satisfiesNodeTypeConstraints () const {
0 commit comments