@@ -79,25 +79,10 @@ __global__ void lb_calc_act_heavy_kernel(i_t id_range_beg,
7979 activity_view_t view,
8080 raft::device_span<f_t2> tmp_cnst_act)
8181{
82- // if (pseudo_block_ids.size() <= blockIdx.x) {
83- // printf("oob pseudo_block_id %d %d\n", blockIdx.x, int(pseudo_block_ids.size()));
84- // }
85- // if (ids.size() <= blockIdx.x) {
86- // printf("oob ids\n");
87- // }
88- // if (tmp_cnst_act.size() <= blockIdx.x) {
89- // printf("oob tmp_cnst_act\n");
90- // }
9182 auto idx = ids[blockIdx .x ] + id_range_beg;
9283 auto pseudo_block_id = pseudo_block_ids[blockIdx .x ];
93- // if (view.offsets.size() <= idx) {
94- // printf("oob offset\n");
95- // }
96- // if (view.offsets.size() <= idx + 1) {
97- // printf("oob offset + 1\n");
98- // }
99- i_t item_off_beg = view.offsets [idx] + work_per_block * pseudo_block_id;
100- i_t item_off_end = min (item_off_beg + work_per_block, view.offsets [idx + 1 ]);
84+ i_t item_off_beg = view.offsets [idx] + work_per_block * pseudo_block_id;
85+ i_t item_off_end = min (item_off_beg + work_per_block, view.offsets [idx + 1 ]);
10186
10287 typedef cub::BlockReduce<f_t , BDIM> BlockReduce;
10388 __shared__ typename BlockReduce::TempStorage temp_storage;
@@ -658,77 +643,4 @@ __global__ void lb_upd_bnd_sub_warp_kernel(bounds_update_view_t view,
658643 }
659644}
660645
661- #if 0
662- template <typename i_t, typename f_t, typename f_t2, i_t PSEUDO_BDIM, i_t BDIM, typename bounds_update_view_t>
663- __device__ void upd_bnd_block(i_t prior_blocks_in_seg,
664- i_t id_range_beg, bounds_update_view_t view)
665- {
666- //i_t idx = id_range_beg + blockIdx.x;
667- i_t idx = id_beg_seg + prior_blocks_in_seg * (BDIM/PSEUDO_BDIM) + (threadIdx.x / PSEUDO_BDIM);
668- i_t var_idx = view.vars_reorg_ids[idx];
669- // x is lb, y is ub
670- auto old_bounds = view.vars_bnd[var_idx];
671- bool is_int = (view.vars_types[idx] == var_t::INTEGER);
672- i_t item_off_beg = view.offsets[idx];
673- i_t item_off_end = view.offsets[idx + 1];
674-
675- typedef cub::BlockReduce<f_t, BDIM> BlockReduce;
676- __shared__ typename BlockReduce::TempStorage temp_storage;
677-
678- // if it is a set variable then don't propagate the bound
679- // consider continuous vars as set if their bounds cross or equal
680- if (old_bounds.x + view.tolerances.integrality_tolerance >= old_bounds.y) { return; }
681- auto bounds =
682- update_bounds<i_t, f_t, f_t2, BDIM>(view, threadIdx.x, item_off_beg, item_off_end, old_bounds);
683-
684- bounds.x = BlockReduce(temp_storage).Reduce(bounds.x, cuda::maximum());
685- __syncthreads();
686- bounds.y = BlockReduce(temp_storage).Reduce(bounds.y, cuda::minimum());
687-
688- if (threadIdx.x == 0) {
689- write_updated_bounds(&view.vars_bnd[var_idx], is_int, view, bounds, old_bounds);
690- }
691- }
692-
693- template <typename i_t, typename f_t, typename f_t2, i_t BDIM, typename bounds_update_view_t>
694- __device__ void upd_bnd_sub_warp(bounds_update_view_t view,
695- raft::device_span<i_t> warp_vars_offsets,
696- raft::device_span<i_t> warp_vars_id_offsets)
697- {
698- i_t id_warp_beg, id_range_end, threads_per_variable;
699- detect_range_sub_warp<i_t>(
700- &id_warp_beg, &id_range_end, &threads_per_variable, warp_vars_offsets, warp_vars_id_offsets);
701-
702- if (threads_per_variable == 1) {
703- upd_bnd_sub_warp<i_t, f_t, f_t2, BDIM, 1>(id_warp_beg, id_range_end, view);
704- } else if (threads_per_variable == 2) {
705- upd_bnd_sub_warp<i_t, f_t, f_t2, BDIM, 2>(id_warp_beg, id_range_end, view);
706- } else if (threads_per_variable == 4) {
707- upd_bnd_sub_warp<i_t, f_t, f_t2, BDIM, 4>(id_warp_beg, id_range_end, view);
708- } else if (threads_per_variable == 8) {
709- upd_bnd_sub_warp<i_t, f_t, f_t2, BDIM, 8>(id_warp_beg, id_range_end, view);
710- } else if (threads_per_variable == 16) {
711- upd_bnd_sub_warp<i_t, f_t, f_t2, BDIM, 16>(id_warp_beg, id_range_end, view);
712- }
713- }
714-
715- template <typename i_t, typename f_t, typename f_t2, i_t BDIM, typename bounds_update_view_t>
716- __global__ void lb_upd_bnd_kernel(bounds_update_view_t view,
717- raft::device_span<i_t> warp_vars_offsets,
718- raft::device_span<i_t> warp_vars_id_offsets,
719- raft::device_span<i_t> block_vars_offsets,
720- raft::device_span<i_t> block_vars_id_offsets)
721- {
722- if (blockIdx.x < sub_warp_blocks_end) {
723- upd_bnd_sub_warp(view, warp_vars_offsets, warp_vars_id_offsets);
724- } else if (blockIdx.x < block_vars_offsets[1]) {
725- upd_bnd_block<64, BDIM>(view, blockIdx.x - block_vars_offsets[0], block_vars_offsets[0], block_vars_offsets[1]);
726- } else if (blockIdx.x < block_vars_offsets[2]) {
727- upd_bnd_block<256, BDIM>(view, blockIdx.x - block_vars_offsets[1], block_vars_offsets[1], block_vars_offsets[2]);
728- } else {
729- upd_bnd_heavy<512>(heavy_vars_beg_id, heavy_vars_vertex_ids, heavy_vars_pseudo_block_ids, heavy_degree_cutoff, view, tmp_bnd);
730- }
731- }
732- #endif
733-
734646} // namespace cuopt::linear_programming::detail
0 commit comments