Skip to content

Commit 509dfe6

Browse files
committed
Manual graph creation for lb bounds update
1 parent 8596529 commit 509dfe6

File tree

8 files changed

+998
-97
lines changed

8 files changed

+998
-97
lines changed

cpp/src/mip/presolve/bounds_presolve.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ termination_criterion_t bound_presolve_t<i_t, f_t>::bound_update_loop(problem_t<
202202
}
203203
pb.handle_ptr->sync_stream();
204204
calculate_infeasible_redundant_constraints(pb);
205+
solve_iter = iter;
205206
206207
return criteria;
207208
}

cpp/src/mip/presolve/bounds_presolve.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ class bound_presolve_t {
8686
i_t infeas_constraints_count = 0;
8787
i_t redund_constraints_count = 0;
8888
probing_cache_t<i_t, f_t> probing_cache;
89+
i_t solve_iter;
8990
};
9091

9192
} // namespace cuopt::linear_programming::detail

cpp/src/mip/presolve/load_balanced_bounds_presolve.cu

Lines changed: 133 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ void load_balanced_bounds_presolve_t<i_t, f_t>::setup(
245245
heavy_degree_cutoff,
246246
problem.cnst_bin_offsets,
247247
problem.offsets);
248+
RAFT_CHECK_CUDA(stream_heavy_cnst);
248249

249250
num_blocks_heavy_vars = create_heavy_item_block_segments(stream_heavy_vars,
250251
heavy_vars_vertex_ids,
@@ -253,49 +254,34 @@ void load_balanced_bounds_presolve_t<i_t, f_t>::setup(
253254
heavy_degree_cutoff,
254255
problem.vars_bin_offsets,
255256
problem.reverse_offsets);
257+
RAFT_CHECK_CUDA(stream_heavy_vars);
256258

257259
tmp_act.resize(2 * num_blocks_heavy_cnst, stream_heavy_cnst);
258260
tmp_bnd.resize(2 * num_blocks_heavy_vars, stream_heavy_vars);
259261

260-
std::tie(is_cnst_sub_warp_single_bin, cnst_sub_warp_count) = sub_warp_meta(
261-
streams.get_stream(), warp_cnst_offsets, warp_cnst_id_offsets, pb->cnst_bin_offsets, 4);
262+
std::tie(is_cnst_sub_warp_single_bin, cnst_sub_warp_count) =
263+
sub_warp_meta(stream, warp_cnst_offsets, warp_cnst_id_offsets, pb->cnst_bin_offsets, 4);
262264

263-
std::tie(is_vars_sub_warp_single_bin, vars_sub_warp_count) = sub_warp_meta(
264-
streams.get_stream(), warp_vars_offsets, warp_vars_id_offsets, pb->vars_bin_offsets, 4);
265+
std::tie(is_vars_sub_warp_single_bin, vars_sub_warp_count) =
266+
sub_warp_meta(stream, warp_vars_offsets, warp_vars_id_offsets, pb->vars_bin_offsets, 4);
265267

266-
stream.synchronize();
267-
streams.sync_all_issued();
268+
// stream.synchronize();
269+
RAFT_CHECK_CUDA(stream);
270+
streams.sync_test_all_issued();
268271

269272
if (!calc_slack_erase_inf_cnst_graph_created) {
270-
bool erase_inf_cnst = true;
271-
calc_slack_erase_inf_cnst_graph_created = build_graph(
272-
streams,
273-
handle_ptr,
274-
calc_slack_erase_inf_cnst_graph,
275-
calc_slack_erase_inf_cnst_exec,
276-
[erase_inf_cnst, this]() { this->calculate_activity_graph(erase_inf_cnst, true); },
277-
[erase_inf_cnst, this]() { this->calculate_activity_graph(erase_inf_cnst); });
273+
create_constraint_slack_graph(true);
274+
calc_slack_erase_inf_cnst_graph_created = true;
278275
}
279276

280277
if (!calc_slack_graph_created) {
281-
bool erase_inf_cnst = false;
282-
calc_slack_graph_created = build_graph(
283-
streams,
284-
handle_ptr,
285-
calc_slack_graph,
286-
calc_slack_exec,
287-
[erase_inf_cnst, this]() { this->calculate_activity_graph(erase_inf_cnst, true); },
288-
[erase_inf_cnst, this]() { this->calculate_activity_graph(erase_inf_cnst); });
278+
create_constraint_slack_graph(false);
279+
calc_slack_graph_created = true;
289280
}
290281

291282
if (!upd_bnd_graph_created) {
292-
upd_bnd_graph_created = build_graph(
293-
streams,
294-
handle_ptr,
295-
upd_bnd_graph,
296-
upd_bnd_exec,
297-
[this]() { this->calculate_bounds_update_graph(true); },
298-
[this]() { this->calculate_bounds_update_graph(); });
283+
create_bounds_update_graph();
284+
upd_bnd_graph_created = true;
299285
}
300286
}
301287

@@ -368,6 +354,119 @@ void load_balanced_bounds_presolve_t<i_t, f_t>::calculate_activity_graph(bool er
368354
dry_run);
369355
}
370356

357+
template <typename i_t, typename f_t>
358+
void load_balanced_bounds_presolve_t<i_t, f_t>::create_bounds_update_graph()
359+
{
360+
using f_t2 = typename type_2<f_t>::type;
361+
cudaGraph_t upd_graph;
362+
cudaGraphCreate(&upd_graph, 0);
363+
cudaGraphNode_t bounds_changed_node;
364+
{
365+
i_t* bounds_changed_ptr = bounds_changed.data();
366+
367+
cudaMemcpy3DParms memcpyParams = {0};
368+
memcpyParams.srcArray = NULL;
369+
memcpyParams.srcPos = make_cudaPos(0, 0, 0);
370+
memcpyParams.srcPtr = make_cudaPitchedPtr(bounds_changed_ptr, sizeof(i_t), 1, 1);
371+
memcpyParams.dstArray = NULL;
372+
memcpyParams.dstPos = make_cudaPos(0, 0, 0);
373+
memcpyParams.dstPtr = make_cudaPitchedPtr(&h_bounds_changed, sizeof(i_t), 1, 1);
374+
memcpyParams.extent = make_cudaExtent(sizeof(i_t), 1, 1);
375+
memcpyParams.kind = cudaMemcpyDeviceToHost;
376+
cudaGraphAddMemcpyNode(&bounds_changed_node, upd_graph, NULL, 0, &memcpyParams);
377+
}
378+
379+
auto bounds_update_view = get_bounds_update_view(*pb);
380+
381+
create_update_bounds_heavy_vars<i_t, f_t, f_t2, 640>(upd_graph,
382+
bounds_changed_node,
383+
bounds_update_view,
384+
make_span_2(tmp_bnd),
385+
heavy_vars_vertex_ids,
386+
heavy_vars_pseudo_block_ids,
387+
heavy_vars_block_segments,
388+
pb->vars_bin_offsets,
389+
heavy_degree_cutoff,
390+
num_blocks_heavy_vars);
391+
RAFT_CUDA_TRY(cudaGetLastError());
392+
create_update_bounds_per_block<i_t, f_t, f_t2>(
393+
upd_graph, bounds_changed_node, bounds_update_view, pb->vars_bin_offsets, heavy_degree_cutoff);
394+
RAFT_CUDA_TRY(cudaGetLastError());
395+
create_update_bounds_sub_warp<i_t, f_t, f_t2>(upd_graph,
396+
bounds_changed_node,
397+
bounds_update_view,
398+
is_vars_sub_warp_single_bin,
399+
vars_sub_warp_count,
400+
warp_vars_offsets,
401+
warp_vars_id_offsets,
402+
pb->vars_bin_offsets);
403+
RAFT_CUDA_TRY(cudaGetLastError());
404+
cudaGraphDebugDotPrint(upd_graph, "/home/aatish/debug_upd_graph", 0);
405+
RAFT_CUDA_TRY(cudaGetLastError());
406+
cudaGraphInstantiate(&upd_bnd_exec, upd_graph, NULL, NULL, 0);
407+
RAFT_CUDA_TRY(cudaGetLastError());
408+
}
409+
410+
template <typename i_t, typename f_t>
411+
void load_balanced_bounds_presolve_t<i_t, f_t>::create_constraint_slack_graph(bool erase_inf_cnst)
412+
{
413+
using f_t2 = typename type_2<f_t>::type;
414+
cudaGraph_t cnst_slack_graph;
415+
cudaGraphCreate(&cnst_slack_graph, 0);
416+
417+
cudaGraphNode_t set_bounds_changed_node;
418+
{
419+
// TODO : Investigate why memset node is not captured manually
420+
i_t* bounds_changed_ptr = bounds_changed.data();
421+
422+
cudaMemcpy3DParms memcpyParams = {0};
423+
memcpyParams.srcArray = NULL;
424+
memcpyParams.srcPos = make_cudaPos(0, 0, 0);
425+
memcpyParams.srcPtr = make_cudaPitchedPtr(&h_bounds_changed, sizeof(i_t), 1, 1);
426+
memcpyParams.dstArray = NULL;
427+
memcpyParams.dstPos = make_cudaPos(0, 0, 0);
428+
memcpyParams.dstPtr = make_cudaPitchedPtr(bounds_changed_ptr, sizeof(i_t), 1, 1);
429+
memcpyParams.extent = make_cudaExtent(sizeof(i_t), 1, 1);
430+
memcpyParams.kind = cudaMemcpyHostToDevice;
431+
cudaGraphAddMemcpyNode(&set_bounds_changed_node, cnst_slack_graph, NULL, 0, &memcpyParams);
432+
}
433+
434+
auto activity_view = get_activity_view(*pb);
435+
436+
create_activity_heavy_cnst<i_t, f_t, f_t2, 512>(cnst_slack_graph,
437+
set_bounds_changed_node,
438+
activity_view,
439+
make_span_2(tmp_act),
440+
heavy_cnst_vertex_ids,
441+
heavy_cnst_pseudo_block_ids,
442+
heavy_cnst_block_segments,
443+
pb->cnst_bin_offsets,
444+
heavy_degree_cutoff,
445+
num_blocks_heavy_cnst,
446+
erase_inf_cnst);
447+
create_activity_per_block<i_t, f_t, f_t2>(cnst_slack_graph,
448+
set_bounds_changed_node,
449+
activity_view,
450+
pb->cnst_bin_offsets,
451+
heavy_degree_cutoff,
452+
erase_inf_cnst);
453+
create_activity_sub_warp<i_t, f_t, f_t2>(cnst_slack_graph,
454+
set_bounds_changed_node,
455+
activity_view,
456+
is_cnst_sub_warp_single_bin,
457+
cnst_sub_warp_count,
458+
warp_cnst_offsets,
459+
warp_cnst_id_offsets,
460+
pb->cnst_bin_offsets,
461+
erase_inf_cnst);
462+
cudaGraphDebugDotPrint(cnst_slack_graph, "/home/aatish/debug_cnst_slack_graph", 0);
463+
if (erase_inf_cnst) {
464+
cudaGraphInstantiate(&calc_slack_erase_inf_cnst_exec, cnst_slack_graph, NULL, NULL, 0);
465+
} else {
466+
cudaGraphInstantiate(&calc_slack_exec, cnst_slack_graph, NULL, NULL, 0);
467+
}
468+
}
469+
371470
template <typename i_t, typename f_t>
372471
void load_balanced_bounds_presolve_t<i_t, f_t>::calculate_bounds_update_graph(bool dry_run)
373472
{
@@ -401,12 +500,13 @@ template <typename i_t, typename f_t>
401500
void load_balanced_bounds_presolve_t<i_t, f_t>::calculate_constraint_slack_iter(
402501
const raft::handle_t* handle_ptr)
403502
{
503+
// h_bounds_changed is copied to bounds_changed in calc_slack_exec
504+
h_bounds_changed = 0;
404505
{
405506
// writes nans to constraint activities that are infeasible
406507
//-> less expensive checks for update bounds step
407508
raft::common::nvtx::range scope("act_cuda_task_graph");
408509
cudaGraphLaunch(calc_slack_erase_inf_cnst_exec, handle_ptr->get_stream());
409-
handle_ptr->sync_stream();
410510
}
411511
infeas_cnst_slack_set_to_nan = true;
412512
RAFT_CHECK_CUDA(handle_ptr->get_stream());
@@ -416,6 +516,8 @@ template <typename i_t, typename f_t>
416516
void load_balanced_bounds_presolve_t<i_t, f_t>::calculate_constraint_slack(
417517
const raft::handle_t* handle_ptr)
418518
{
519+
// h_bounds_changed is copied to bounds_changed in calc_slack_exec
520+
h_bounds_changed = 0;
419521
{
420522
raft::common::nvtx::range scope("act_cuda_task_graph");
421523
cudaGraphLaunch(calc_slack_exec, handle_ptr->get_stream());
@@ -428,13 +530,10 @@ template <typename i_t, typename f_t>
428530
bool load_balanced_bounds_presolve_t<i_t, f_t>::update_bounds_from_slack(
429531
const raft::handle_t* handle_ptr)
430532
{
431-
i_t h_bounds_changed;
432-
bounds_changed.set_value_to_zero_async(handle_ptr->get_stream());
433-
533+
// bounds_changed is copied to h_bounds_changed in upd_bnd_exec
434534
{
435535
raft::common::nvtx::range scope("upd_cuda_task_graph");
436536
cudaGraphLaunch(upd_bnd_exec, handle_ptr->get_stream());
437-
h_bounds_changed = bounds_changed.value(handle_ptr->get_stream());
438537
}
439538
RAFT_CHECK_CUDA(handle_ptr->get_stream());
440539
constexpr i_t zero = 0;

cpp/src/mip/presolve/load_balanced_bounds_presolve.cuh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,8 @@ class load_balanced_bounds_presolve_t {
212212

213213
activity_view_t get_activity_view(const load_balanced_problem_t<i_t, f_t>& pb);
214214
bounds_update_view_t get_bounds_update_view(const load_balanced_problem_t<i_t, f_t>& pb);
215+
void create_bounds_update_graph();
216+
void create_constraint_slack_graph(bool erase_inf_cnst);
215217

216218
rmm::cuda_stream main_stream;
217219
rmm::cuda_stream act_stream;
@@ -221,6 +223,7 @@ class load_balanced_bounds_presolve_t {
221223
const load_balanced_problem_t<i_t, f_t>* pb;
222224

223225
rmm::device_scalar<i_t> bounds_changed;
226+
i_t h_bounds_changed;
224227

225228
rmm::device_uvector<f_t> cnst_slack;
226229
rmm::device_uvector<f_t> vars_bnd;

0 commit comments

Comments
 (0)