Skip to content

Commit f9ddfa7

Browse files
CoptFullScheduler for MaxBSP
1 parent 80a9ad6 commit f9ddfa7

File tree

4 files changed

+151
-91
lines changed

4 files changed

+151
-91
lines changed

include/osp/bsp/model/BspScheduleCS.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,8 @@ class BspScheduleCS : public BspSchedule<Graph_t> {
472472
}
473473
}
474474
}
475+
476+
virtual bool isMaxBsp() const { return false; }
475477
};
476478

477479
} // namespace osp

include/osp/bsp/model/MaxBspScheduleCS.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ class MaxBspScheduleCS : public BspScheduleCS<Graph_t> {
117117
}
118118
return costs;
119119
}
120+
121+
virtual bool isMaxBsp() const override { return true; }
120122
};
121123

122124
} // namespace osp

include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp

Lines changed: 130 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ limitations under the License.
2424
#include "osp/bsp/model/BspSchedule.hpp"
2525
#include "osp/bsp/model/BspScheduleCS.hpp"
2626
#include "osp/bsp/model/BspScheduleRecomp.hpp"
27+
#include "osp/bsp/model/MaxBspSchedule.hpp"
28+
#include "osp/bsp/model/MaxBspScheduleCS.hpp"
2729
#include "osp/bsp/model/VectorSchedule.hpp"
2830
#include "osp/bsp/scheduler/Scheduler.hpp"
2931
#include "osp/auxiliary/io/DotFileWriter.hpp"
@@ -60,6 +62,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
6062

6163
private:
6264
bool allow_recomputation;
65+
bool is_max_bsp = false;
6366
bool use_memory_constraint;
6467
bool use_initial_schedule = false;
6568
const BspScheduleCS<Graph_t> *initial_schedule;
@@ -230,6 +233,15 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
230233

231234
const auto &instance = schedule.getInstance();
232235

236+
unsigned number_of_supersteps = 0;
237+
238+
for (unsigned step = 0; step < max_number_supersteps; step++) {
239+
240+
if (superstep_used_var[static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) >= .99) {
241+
number_of_supersteps++;
242+
}
243+
}
244+
233245
for (const auto &node : instance.vertices()) {
234246

235247
for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) {
@@ -245,12 +257,15 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
245257
}
246258
}
247259

260+
if(is_max_bsp && number_of_supersteps>0) // can ignore last 2 comm phases in this case
261+
--number_of_supersteps;
262+
248263
for (const auto &node : instance.vertices()) {
249264

250265
for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) {
251266
for (unsigned int p_to = 0; p_to < instance.numberOfProcessors(); p_to++) {
252267
if (p_from != p_to) {
253-
for (unsigned int step = 0; step < max_number_supersteps; step++) {
268+
for (unsigned int step = 0; step < number_of_supersteps-1; step++) {
254269
if (comm_processor_to_processor_superstep_node_var[p_from][p_to][step]
255270
[static_cast<int>(node)]
256271
.Get(COPT_DBLINFO_VALUE) >= .99) {
@@ -284,7 +299,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
284299

285300
for (unsigned processor = 0; processor < schedule.getInstance().numberOfProcessors(); processor++) {
286301

287-
for (unsigned step = 0; step < max_number_supersteps; step++) {
302+
for (unsigned step = 0; step < number_of_supersteps-1; step++) {
288303

289304
if (node_to_processor_superstep_var[node][processor][static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) >= .99) {
290305
schedule.assignments(node).emplace_back(processor, step);
@@ -316,7 +331,23 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
316331
}
317332

318333

319-
void loadInitialSchedule(Model &model) {
334+
void loadInitialSchedule(Model &model, const BspInstance<Graph_t> &instance) {
335+
336+
if (use_initial_schedule_recomp &&
337+
(max_number_supersteps < initial_schedule_recomp->numberOfSupersteps() ||
338+
instance.numberOfProcessors() != initial_schedule_recomp->getInstance().numberOfProcessors() ||
339+
instance.numberOfVertices() != initial_schedule_recomp->getInstance().numberOfVertices())) {
340+
throw std::invalid_argument("Invalid Argument while computeScheduleRecomp[Recomp]: instance parameters do not "
341+
"agree with those of the initial schedule's instance!");
342+
}
343+
344+
if (!use_initial_schedule_recomp & use_initial_schedule &&
345+
(max_number_supersteps < initial_schedule->numberOfSupersteps() ||
346+
instance.numberOfProcessors() != initial_schedule->getInstance().numberOfProcessors() ||
347+
instance.numberOfVertices() != initial_schedule->getInstance().numberOfVertices())) {
348+
throw std::invalid_argument("Invalid Argument while computeScheduleRecomp[Recomp]: instance parameters do not "
349+
"agree with those of the initial schedule's instance!");
350+
}
320351

321352
const auto& DAG = use_initial_schedule_recomp ?
322353
initial_schedule_recomp->getInstance().getComputationalDag() :
@@ -362,6 +393,22 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
362393
computed[node].emplace(initial_schedule->assignedProcessor(node),initial_schedule->assignedSuperstep(node));
363394
}
364395

396+
std::vector<std::vector<unsigned> > first_at(DAG.num_vertices(), std::vector<unsigned>(num_processors, std::numeric_limits<unsigned>::max()));
397+
for (const auto &node : DAG.vertices())
398+
{
399+
if(use_initial_schedule_recomp)
400+
{
401+
for (const std::pair<unsigned, unsigned>& assignment : initial_schedule_recomp->assignments(node))
402+
first_at[node][assignment.first] = std::min(first_at[node][assignment.first], assignment.second);
403+
}
404+
else
405+
{
406+
first_at[node][initial_schedule->assignedProcessor(node)] = std::min(first_at[node][initial_schedule->assignedProcessor(node)],
407+
initial_schedule->assignedSuperstep(node) );
408+
}
409+
}
410+
411+
unsigned staleness = is_max_bsp ? 2 : 1;
365412
for (const auto &node : DAG.vertices()) {
366413

367414
for (unsigned p1 = 0; p1 < num_processors; p1++) {
@@ -380,6 +427,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
380427
comm_processor_to_processor_superstep_node_var[p1][p2][step]
381428
[static_cast<int>(node)],
382429
1);
430+
first_at[node][p2] = std::min(first_at[node][p2], step+staleness);
383431
} else {
384432
model.SetMipStart(
385433
comm_processor_to_processor_superstep_node_var[p1][p2][step]
@@ -390,12 +438,14 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
390438
}
391439
}
392440
}
393-
}
394441

395-
for(const std::pair<unsigned, unsigned>& proc_step : computed[node]){
396-
for(unsigned step = proc_step.second; step < max_number_supersteps; step++){
397-
model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc_step.first][proc_step.first][step]
398-
[static_cast<int>(node)], 1);
442+
for(unsigned step = 0; step < max_number_supersteps; step++){
443+
if(step < first_at[node][p1])
444+
model.SetMipStart(comm_processor_to_processor_superstep_node_var[p1][p1][step]
445+
[static_cast<int>(node)], 1);
446+
else
447+
model.SetMipStart(comm_processor_to_processor_superstep_node_var[p1][p1][step]
448+
[static_cast<int>(node)], 0);
399449
}
400450
}
401451
}
@@ -425,8 +475,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
425475
{
426476
for (const auto &node : initial_schedule_recomp->getInstance().vertices()) {
427477
for (const std::pair<unsigned, unsigned>& assignment : initial_schedule_recomp->assignments(node)) {
428-
work[assignment.second][assignment.first] +=
429-
DAG.vertex_work_weight(node);
478+
work[assignment.second][assignment.first] += DAG.vertex_work_weight(node);
430479
}
431480
}
432481
}
@@ -611,8 +660,14 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
611660
if (step > 0) {
612661

613662
for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) {
614-
expr1 += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 1]
663+
if(!is_max_bsp || p_from == processor){
664+
expr1 += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 1]
615665
[static_cast<int>(node)];
666+
}
667+
else if(step > 1){
668+
expr1 += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 2]
669+
[static_cast<int>(node)];
670+
}
616671
}
617672
}
618673

@@ -703,10 +758,25 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
703758
*/
704759
Expr expr;
705760

706-
for (unsigned int step = 0; step < max_number_supersteps; step++) {
707-
expr += max_work_superstep_var[static_cast<int>(step)] +
708-
instance.communicationCosts() * max_comm_superstep_var[static_cast<int>(step)] +
709-
instance.synchronisationCosts() * superstep_used_var[static_cast<int>(step)];
761+
if(is_max_bsp)
762+
{
763+
VarArray max_superstep_var = model.AddVars(static_cast<int>(max_number_supersteps), COPT_INTEGER, "max_superstep");
764+
for (unsigned int step = 0; step < max_number_supersteps; step++) {
765+
model.AddConstr(max_superstep_var[static_cast<int>(step)] >= max_work_superstep_var[static_cast<int>(step)]);
766+
if(step > 0)
767+
model.AddConstr(max_superstep_var[static_cast<int>(step)] >= instance.communicationCosts() * max_comm_superstep_var[static_cast<int>(step-1)]);
768+
expr += max_superstep_var[static_cast<int>(step)]; +
769+
instance.synchronisationCosts() * superstep_used_var[static_cast<int>(step)];
770+
}
771+
772+
}
773+
else
774+
{
775+
for (unsigned int step = 0; step < max_number_supersteps; step++) {
776+
expr += max_work_superstep_var[static_cast<int>(step)] +
777+
instance.communicationCosts() * max_comm_superstep_var[static_cast<int>(step)] +
778+
instance.synchronisationCosts() * superstep_used_var[static_cast<int>(step)];
779+
}
710780
}
711781

712782
model.SetObjective(expr - instance.synchronisationCosts(), COPT_MINIMIZE);
@@ -761,54 +831,35 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
761831
return status;
762832
}
763833
}
834+
virtual RETURN_STATUS computeSchedule(MaxBspSchedule<Graph_t> &schedule) {
835+
836+
MaxBspScheduleCS<Graph_t> schedule_cs(schedule.getInstance());
837+
RETURN_STATUS status = computeScheduleCS(schedule_cs);
838+
if (status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND) {
839+
schedule = std::move(schedule_cs);
840+
return status;
841+
} else {
842+
return status;
843+
}
844+
}
764845
virtual RETURN_STATUS computeScheduleCS(BspScheduleCS<Graph_t> &schedule) override {
765846

766847
auto &instance = schedule.getInstance();
767848

768-
if (use_initial_schedule &&
769-
(max_number_supersteps < initial_schedule->numberOfSupersteps() ||
770-
instance.numberOfProcessors() != initial_schedule->getInstance().numberOfProcessors() ||
771-
instance.numberOfVertices() != initial_schedule->getInstance().numberOfVertices())) {
772-
throw std::invalid_argument("Invalid Argument while computeSchedule(instance): instance parameters do not "
773-
"agree with those of the initial schedule's instance!");
774-
}
849+
allow_recomputation = false;
850+
851+
is_max_bsp = schedule.isMaxBsp();
775852

776853
Envr env;
777854
Model model = env.CreateModel("bsp_schedule_cs");
778855

779856
setupVariablesConstraintsObjective(instance, model);
780857

781858
if (use_initial_schedule) {
782-
loadInitialSchedule(model);
783-
}
784-
785-
model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, Scheduler<Graph_t>::timeLimitSeconds);
786-
model.SetIntParam(COPT_INTPARAM_THREADS, 128);
787-
788-
model.SetIntParam(COPT_INTPARAM_STRONGBRANCHING, 1);
789-
model.SetIntParam(COPT_INTPARAM_LPMETHOD, 1);
790-
model.SetIntParam(COPT_INTPARAM_ROUNDINGHEURLEVEL, 1);
791-
792-
model.SetIntParam(COPT_INTPARAM_SUBMIPHEURLEVEL, 1);
793-
// model.SetIntParam(COPT_INTPARAM_PRESOLVE, 1);
794-
// model.SetIntParam(COPT_INTPARAM_CUTLEVEL, 0);
795-
model.SetIntParam(COPT_INTPARAM_TREECUTLEVEL, 2);
796-
// model.SetIntParam(COPT_INTPARAM_DIVINGHEURLEVEL, 2);
797-
798-
if (write_solutions_found) {
799-
800-
WriteSolutionCallback solution_callback;
801-
solution_callback.comm_processor_to_processor_superstep_node_var_ptr =
802-
&comm_processor_to_processor_superstep_node_var;
803-
solution_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var;
804-
solution_callback.solution_file_prefix_cb = solution_file_prefix;
805-
solution_callback.write_solutions_path_cb = write_solutions_path;
806-
solution_callback.instance_ptr = &instance;
807-
808-
model.SetCallback(&solution_callback, COPT_CBCONTEXT_MIPSOL);
859+
loadInitialSchedule(model, instance);
809860
}
810861

811-
model.Solve();
862+
computeScheduleBase(schedule, model);
812863

813864
if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) {
814865

@@ -836,31 +887,43 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
836887

837888
allow_recomputation = true;
838889

839-
if (use_initial_schedule &&
840-
(max_number_supersteps < initial_schedule->numberOfSupersteps() ||
841-
schedule.getInstance().numberOfProcessors() != initial_schedule->getInstance().numberOfProcessors() ||
842-
schedule.getInstance().numberOfVertices() != initial_schedule->getInstance().numberOfVertices())) {
843-
throw std::invalid_argument("Invalid Argument while computeScheduleRecomp: instance parameters do not "
844-
"agree with those of the initial schedule's instance!");
845-
}
846-
847-
if (use_initial_schedule_recomp &&
848-
(max_number_supersteps < initial_schedule_recomp->numberOfSupersteps() ||
849-
schedule.getInstance().numberOfProcessors() != initial_schedule_recomp->getInstance().numberOfProcessors() ||
850-
schedule.getInstance().numberOfVertices() != initial_schedule_recomp->getInstance().numberOfVertices())) {
851-
throw std::invalid_argument("Invalid Argument while computeScheduleRecomp: instance parameters do not "
852-
"agree with those of the initial schedule's instance!");
853-
}
890+
is_max_bsp = false;
854891

855892
Envr env;
856893
Model model = env.CreateModel("bsp_schedule");
857894

858895
setupVariablesConstraintsObjective(schedule.getInstance(), model);
859896

860897
if (use_initial_schedule || use_initial_schedule_recomp) {
861-
loadInitialSchedule(model);
898+
loadInitialSchedule(model, schedule.getInstance());
862899
}
863900

901+
computeScheduleBase(schedule, model);
902+
903+
if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) {
904+
905+
constructBspScheduleRecompFromSolution(schedule, true);
906+
return RETURN_STATUS::OSP_SUCCESS;
907+
908+
} else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) {
909+
910+
return RETURN_STATUS::ERROR;
911+
912+
} else {
913+
914+
if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) {
915+
916+
constructBspScheduleRecompFromSolution(schedule, true);
917+
return RETURN_STATUS::BEST_FOUND;
918+
919+
} else {
920+
return RETURN_STATUS::TIMEOUT;
921+
}
922+
}
923+
};
924+
925+
virtual void computeScheduleBase(const BspScheduleRecomp<Graph_t> &schedule, Model &model) {
926+
864927
model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, Scheduler<Graph_t>::timeLimitSeconds);
865928
model.SetIntParam(COPT_INTPARAM_THREADS, 128);
866929

@@ -889,30 +952,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
889952
}
890953

891954
model.Solve();
892-
893-
allow_recomputation = false;
894-
895-
if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) {
896-
897-
constructBspScheduleRecompFromSolution(schedule, true);
898-
return RETURN_STATUS::OSP_SUCCESS;
899-
900-
} else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) {
901-
902-
return RETURN_STATUS::ERROR;
903-
904-
} else {
905-
906-
if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) {
907-
908-
constructBspScheduleRecompFromSolution(schedule, true);
909-
return RETURN_STATUS::BEST_FOUND;
910-
911-
} else {
912-
return RETURN_STATUS::TIMEOUT;
913-
}
914-
}
915-
};
955+
}
916956

917957

918958
/**

0 commit comments

Comments
 (0)