Skip to content

Commit d784f84

Browse files
committed
added maxBspSchedule, maxBspScheduleCS
added tests for maxbsp
1 parent 4d1fe34 commit d784f84

File tree

5 files changed

+534
-140
lines changed

5 files changed

+534
-140
lines changed

include/osp/bsp/model/BspSchedule.hpp

Lines changed: 109 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,105 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
6767
std::vector<unsigned> node_to_processor_assignment;
6868
std::vector<unsigned> node_to_superstep_assignment;
6969

70+
template<unsigned staleness>
71+
inline bool satisfies_precedence_constraints_staleness() const {
72+
73+
if (node_to_processor_assignment.size() != instance->numberOfVertices() ||
74+
node_to_superstep_assignment.size() != instance->numberOfVertices()) {
75+
return false;
76+
}
77+
78+
for (const auto &v : instance->vertices()) {
79+
80+
if (node_to_superstep_assignment[v] >= number_of_supersteps) {
81+
return false;
82+
}
83+
84+
if (node_to_processor_assignment[v] >= instance->numberOfProcessors()) {
85+
return false;
86+
}
87+
88+
for (const auto &target : instance->getComputationalDag().children(v)) {
89+
90+
const unsigned different_processors =
91+
(node_to_processor_assignment[v] == node_to_processor_assignment[target]) ? 0u : staleness;
92+
93+
if (node_to_superstep_assignment[v] + different_processors > node_to_superstep_assignment[target]) {
94+
return false;
95+
}
96+
}
97+
}
98+
99+
return true;
100+
}
101+
102+
void compute_lazy_communication_costs_helper(std::vector<std::vector<v_commw_t<Graph_t>>> & rec, std::vector<std::vector<v_commw_t<Graph_t>>> & send) const {
103+
for (const auto &node : instance->vertices()) {
104+
105+
std::vector<unsigned> step_needed(instance->numberOfProcessors(), number_of_supersteps);
106+
for (const auto &target : instance->getComputationalDag().children(node)) {
107+
108+
if (node_to_processor_assignment[node] != node_to_processor_assignment[target]) {
109+
step_needed[node_to_processor_assignment[target]] = std::min(
110+
step_needed[node_to_processor_assignment[target]], node_to_superstep_assignment[target]);
111+
}
112+
}
113+
114+
for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
115+
116+
if (step_needed[proc] < number_of_supersteps) {
117+
118+
send[node_to_processor_assignment[node]][step_needed[proc] - 1] +=
119+
instance->sendCosts(node_to_processor_assignment[node], proc) *
120+
instance->getComputationalDag().vertex_comm_weight(node);
121+
122+
rec[proc][step_needed[proc] - 1] += instance->sendCosts(node_to_processor_assignment[node], proc) *
123+
instance->getComputationalDag().vertex_comm_weight(node);
124+
}
125+
}
126+
}
127+
}
128+
129+
std::vector<v_commw_t<Graph_t>> compute_max_comm_per_step_helper(const std::vector<std::vector<v_commw_t<Graph_t>>> & rec, const std::vector<std::vector<v_commw_t<Graph_t>>> & send) const {
130+
std::vector<v_commw_t<Graph_t>> max_comm_per_step(number_of_supersteps, 0);
131+
for (unsigned step = 0; step < number_of_supersteps; step++) {
132+
v_commw_t<Graph_t> max_send = 0;
133+
v_commw_t<Graph_t> max_rec = 0;
134+
135+
for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
136+
if (max_send < send[proc][step])
137+
max_send = send[proc][step];
138+
if (max_rec < rec[proc][step])
139+
max_rec = rec[proc][step];
140+
}
141+
max_comm_per_step[step] = std::max(max_send, max_rec) * instance->communicationCosts();
142+
}
143+
return max_comm_per_step;
144+
}
145+
146+
std::vector<v_workw_t<Graph_t>> compute_max_work_per_step_helper() const {
147+
std::vector<std::vector<v_workw_t<Graph_t>>> work = std::vector<std::vector<v_workw_t<Graph_t>>>(
148+
number_of_supersteps, std::vector<v_workw_t<Graph_t>>(instance->numberOfProcessors(), 0));
149+
for (const auto &node : instance->vertices()) {
150+
work[node_to_superstep_assignment[node]][node_to_processor_assignment[node]] +=
151+
instance->getComputationalDag().vertex_work_weight(node);
152+
}
153+
154+
std::vector<v_workw_t<Graph_t>> max_work_per_step(number_of_supersteps, 0);
155+
for (unsigned step = 0; step < number_of_supersteps; step++) {
156+
v_workw_t<Graph_t> max_work = 0;
157+
for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
158+
if (max_work < work[step][proc]) {
159+
max_work = work[step][proc];
160+
}
161+
}
162+
163+
max_work_per_step[step] = max_work;
164+
}
165+
166+
return max_work_per_step;
167+
}
168+
70169
public:
71170

72171
BspSchedule() = delete;
@@ -169,11 +268,8 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
169268
* @return The number of processors in the schedule.
170269
*/
171270
void updateNumberOfSupersteps() {
172-
173271
number_of_supersteps = 0;
174-
175272
for (unsigned i = 0; i < instance->numberOfVertices(); ++i) {
176-
177273
if (node_to_superstep_assignment[i] >= number_of_supersteps) {
178274
number_of_supersteps = node_to_superstep_assignment[i] + 1;
179275
}
@@ -325,31 +421,8 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
325421
}
326422

327423
virtual v_workw_t<Graph_t> computeWorkCosts() const override {
328-
329-
std::vector<std::vector<v_workw_t<Graph_t>>> work = std::vector<std::vector<v_workw_t<Graph_t>>>(
330-
number_of_supersteps, std::vector<v_workw_t<Graph_t>>(instance->numberOfProcessors(), 0));
331-
332-
for (const auto &node : instance->vertices()) {
333-
work[node_to_superstep_assignment[node]][node_to_processor_assignment[node]] +=
334-
instance->getComputationalDag().vertex_work_weight(node);
335-
}
336-
337-
v_workw_t<Graph_t> total_costs = 0;
338-
for (unsigned step = 0; step < number_of_supersteps; step++) {
339-
340-
v_workw_t<Graph_t> max_work = 0;
341-
342-
for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
343-
344-
if (max_work < work[step][proc]) {
345-
max_work = work[step][proc];
346-
}
347-
}
348-
349-
total_costs += max_work;
350-
}
351-
352-
return total_costs;
424+
const std::vector<v_workw_t<Graph_t>> work_per_step = compute_max_work_per_step_helper();
425+
return std::accumulate(work_per_step.begin(), work_per_step.end(), static_cast<v_workw_t<Graph_t>>(0));
353426
}
354427

355428
double compute_total_communication_costs() const {
@@ -453,27 +526,17 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
453526
}
454527
}
455528

529+
const std::vector<v_commw_t<Graph_t>> max_comm_per_step = compute_max_comm_per_step_helper(rec, send);
530+
456531
v_commw_t<Graph_t> costs = 0;
457532
for (unsigned step = 0; step < number_of_supersteps; step++) {
458-
v_commw_t<Graph_t> max_send = 0;
459-
v_commw_t<Graph_t> max_rec = 0;
460-
461-
for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
462-
if (max_send < send[proc][step])
463-
max_send = send[proc][step];
464-
if (max_rec < rec[proc][step])
465-
max_rec = rec[proc][step];
466-
}
467-
468-
const auto step_comm_cost = std::max(max_send, max_rec) * instance->communicationCosts();
469-
533+
const auto step_comm_cost = max_comm_per_step[step];
470534
costs += step_comm_cost;
471535

472536
if (step_comm_cost > 0) {
473537
costs += instance->synchronisationCosts();
474538
}
475539
}
476-
477540
return costs;
478541
}
479542

@@ -490,45 +553,12 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
490553
std::vector<std::vector<v_commw_t<Graph_t>>> send(instance->numberOfProcessors(),
491554
std::vector<v_commw_t<Graph_t>>(number_of_supersteps, 0));
492555

493-
for (const auto &node : instance->vertices()) {
494-
495-
std::vector<unsigned> step_needed(instance->numberOfProcessors(), number_of_supersteps);
496-
for (const auto &target : instance->getComputationalDag().children(node)) {
497-
498-
if (node_to_processor_assignment[node] != node_to_processor_assignment[target]) {
499-
step_needed[node_to_processor_assignment[target]] = std::min(
500-
step_needed[node_to_processor_assignment[target]], node_to_superstep_assignment[target]);
501-
}
502-
}
503-
504-
for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
505-
506-
if (step_needed[proc] < number_of_supersteps) {
507-
508-
send[node_to_processor_assignment[node]][step_needed[proc] - 1] +=
509-
instance->sendCosts(node_to_processor_assignment[node], proc) *
510-
instance->getComputationalDag().vertex_comm_weight(node);
511-
512-
rec[proc][step_needed[proc] - 1] += instance->sendCosts(node_to_processor_assignment[node], proc) *
513-
instance->getComputationalDag().vertex_comm_weight(node);
514-
}
515-
}
516-
}
556+
compute_lazy_communication_costs_helper(rec, send);
557+
const std::vector<v_commw_t<Graph_t>> max_comm_per_step = compute_max_comm_per_step_helper(rec, send);
517558

518559
v_commw_t<Graph_t> costs = 0;
519560
for (unsigned step = 0; step < number_of_supersteps; step++) {
520-
v_commw_t<Graph_t> max_send = 0;
521-
v_commw_t<Graph_t> max_rec = 0;
522-
523-
for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
524-
if (max_send < send[proc][step])
525-
max_send = send[proc][step];
526-
if (max_rec < rec[proc][step])
527-
max_rec = rec[proc][step];
528-
}
529-
530-
const auto step_comm_cost = std::max(max_send, max_rec) * instance->communicationCosts();
531-
561+
const auto step_comm_cost = max_comm_per_step[step];
532562
costs += step_comm_cost;
533563

534564
if (step_comm_cost > 0) {
@@ -550,38 +580,8 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
550580
*
551581
* @return True if the schedule satisfies the precedence constraints of the computational DAG, false otherwise.
552582
*/
553-
bool satisfiesPrecedenceConstraints() const {
554-
555-
if (node_to_processor_assignment.size() != instance->numberOfVertices() ||
556-
node_to_superstep_assignment.size() != instance->numberOfVertices()) {
557-
return false;
558-
}
559-
560-
for (const auto &v : instance->vertices()) {
561-
562-
if (node_to_superstep_assignment[v] >= number_of_supersteps) {
563-
return false;
564-
}
565-
566-
if (node_to_processor_assignment[v] >= instance->numberOfProcessors()) {
567-
return false;
568-
}
569-
570-
for (const auto &target : instance->getComputationalDag().children(v)) {
571-
572-
const unsigned different_processors =
573-
(node_to_processor_assignment[v] == node_to_processor_assignment[target]) ? 0u : 1u;
574-
575-
if (node_to_superstep_assignment[v] + different_processors > node_to_superstep_assignment[target]) {
576-
// std::cout << "This is not a valid scheduling (problems with nodes " << v << " and " << target <<
577-
// ")."
578-
// << std::endl; // todo should be removed
579-
return false;
580-
}
581-
}
582-
}
583-
584-
return true;
583+
virtual bool satisfiesPrecedenceConstraints() const {
584+
return satisfies_precedence_constraints_staleness<1>();
585585
};
586586

587587
bool satisfiesNodeTypeConstraints() const {

include/osp/bsp/model/BspScheduleCS.hpp

Lines changed: 27 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,19 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
6363
// contains entries: (vertex, from_proc, to_proc ) : step
6464
std::map<KeyTriple, unsigned> commSchedule;
6565

66+
protected:
67+
68+
void compute_cs_communication_costs_helper(std::vector<std::vector<v_commw_t<Graph_t>>> & rec, std::vector<std::vector<v_commw_t<Graph_t>>> & send) const {
69+
for (auto const &[key, val] : commSchedule) {
70+
send[std::get<1>(key)][val] +=
71+
BspSchedule<Graph_t>::instance->sendCosts(std::get<1>(key), std::get<2>(key)) *
72+
BspSchedule<Graph_t>::instance->getComputationalDag().vertex_comm_weight(std::get<0>(key));
73+
rec[std::get<2>(key)][val] +=
74+
BspSchedule<Graph_t>::instance->sendCosts(std::get<1>(key), std::get<2>(key)) *
75+
BspSchedule<Graph_t>::instance->getComputationalDag().vertex_comm_weight(std::get<0>(key));
76+
}
77+
}
78+
6679
public:
6780
BspScheduleCS() = delete;
6881

@@ -232,45 +245,29 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
232245
v_commw_t<Graph_t> compute_cs_communication_costs() const {
233246

234247
std::vector<std::vector<v_commw_t<Graph_t>>> rec(
235-
BspSchedule<Graph_t>::number_of_supersteps,
236-
std::vector<v_commw_t<Graph_t>>(BspSchedule<Graph_t>::instance->numberOfProcessors(), 0));
248+
BspSchedule<Graph_t>::instance->numberOfProcessors(),
249+
std::vector<v_commw_t<Graph_t>>(BspSchedule<Graph_t>::number_of_supersteps, 0));
237250
std::vector<std::vector<v_commw_t<Graph_t>>> send(
238-
BspSchedule<Graph_t>::number_of_supersteps,
239-
std::vector<v_commw_t<Graph_t>>(BspSchedule<Graph_t>::instance->numberOfProcessors(), 0));
240-
241-
for (auto const &[key, val] : commSchedule) {
251+
BspSchedule<Graph_t>::instance->numberOfProcessors(),
252+
std::vector<v_commw_t<Graph_t>>(BspSchedule<Graph_t>::number_of_supersteps, 0));
242253

243-
send[val][std::get<1>(key)] +=
244-
BspSchedule<Graph_t>::instance->sendCosts(std::get<1>(key), std::get<2>(key)) *
245-
BspSchedule<Graph_t>::instance->getComputationalDag().vertex_comm_weight(std::get<0>(key));
246-
rec[val][std::get<2>(key)] +=
247-
BspSchedule<Graph_t>::instance->sendCosts(std::get<1>(key), std::get<2>(key)) *
248-
BspSchedule<Graph_t>::instance->getComputationalDag().vertex_comm_weight(std::get<0>(key));
249-
}
254+
compute_cs_communication_costs_helper(rec, send);
255+
const std::vector<v_commw_t<Graph_t>> max_comm_per_step = this->compute_max_comm_per_step_helper(rec, send);
250256

251-
v_commw_t<Graph_t> comm_cost = 0;
252-
for (unsigned step = 0; step < BspSchedule<Graph_t>::number_of_supersteps; step++) {
257+
v_commw_t<Graph_t> costs = 0;
258+
for (unsigned step = 0; step < this->number_of_supersteps; step++) {
259+
const auto step_comm_cost = max_comm_per_step[step];
260+
costs += step_comm_cost;
253261

254-
v_commw_t<Graph_t> max_comm = 0;
255-
256-
for (unsigned proc = 0; proc < BspSchedule<Graph_t>::instance->numberOfProcessors(); proc++) {
257-
if (max_comm < send[step][proc])
258-
max_comm = send[step][proc];
259-
if (max_comm < rec[step][proc])
260-
max_comm = rec[step][proc];
261-
}
262-
263-
if (max_comm > 0) {
264-
comm_cost += BspSchedule<Graph_t>::instance->synchronisationCosts() +
265-
max_comm * BspSchedule<Graph_t>::instance->communicationCosts();
262+
if (step_comm_cost > 0) {
263+
costs += this->instance->synchronisationCosts();
266264
}
267265
}
268-
269-
return comm_cost;
266+
return costs;
270267
}
271268

272269
virtual v_workw_t<Graph_t> computeCosts() const override {
273-
return compute_cs_communication_costs() + BspSchedule<Graph_t>::computeWorkCosts();
270+
return compute_cs_communication_costs() + this->computeWorkCosts();
274271
}
275272

276273
void setAutoCommunicationSchedule() {

0 commit comments

Comments
 (0)