Skip to content

Commit 79be180

Browse files
[Feature](mlu-ops): Use __bang_div (#1292)
1 parent 64732b4 commit 79be180

File tree

3 files changed

+8
-14
lines changed

3 files changed

+8
-14
lines changed

kernels/diff_iou_rotated_sort_vertices_forward/diff_iou_rotated_sort_vertices_forward_block.mlu

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,7 @@ static __mlu_func__ void load(const T *addr_vertices, const bool *addr_mask,
5454
template <typename T>
5555
static __mlu_func__ void computeDiv(T *nram_dst, T *nram_src0, T *nram_src1,
5656
int deal_num) {
57-
#if (__BANG_ARCH__ != 372)
5857
__bang_div(nram_dst, nram_src0, nram_src1, deal_num);
59-
#else
60-
__cn_vector_div_f32_rn(deal_num, nram_dst, nram_src0, nram_src1);
61-
#endif
6258
}
6359

6460
template <typename T>

kernels/fft/common/chirpz_aux_block.mlu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ __mlu_global__ void MLUKernelChirpz(const int length, int n, int pad_n,
7373
__bang_square(nram_index, nram_index, num_deal);
7474
__bang_mul_scalar(nram_index, nram_index, M_PI, num_deal);
7575
__bang_write_value(nram_temp, num_deal, (float)n);
76-
__cn_vector_div_f32_rn(num_deal, nram_temp, nram_index, nram_temp);
76+
__bang_div(nram_temp, nram_index, nram_temp, num_deal);
7777
__bang_move(nram_index, nram_temp, num_deal * sizeof(float));
7878
__cn_vector_cos_f32(num_deal, nram_index, nram_index);
7979
__cn_vector_sin_f32(num_deal, nram_temp, nram_temp);
@@ -137,7 +137,7 @@ __mlu_global__ void MLUKernelSignal(const int length, int n, int pad_n,
137137
__bang_square(nram_index, nram_index, 2 * num_deal);
138138
__bang_mul_scalar(nram_index, nram_index, M_PI, 2 * num_deal);
139139
__bang_write_value(nram_temp, 2 * num_deal, (float)n);
140-
__cn_vector_div_f32_rn(2 * num_deal, nram_temp, nram_index, nram_temp);
140+
__bang_div(nram_temp, nram_index, nram_temp, 2 * num_deal);
141141
__bang_move(nram_index, nram_temp, 2 * num_deal * sizeof(float));
142142
__cn_vector_cos_f32(2 * num_deal, nram_index, nram_index);
143143
__cn_vector_sin_f32(2 * num_deal, nram_temp, nram_temp);

kernels/sparse_conv/get_indice_pairs/get_indice_pairs_utils.h

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -294,25 +294,23 @@ __mlu_func__ void genIndiceOutLast(int32_t *nram_output, int32_t *nram_input,
294294
o_w = output_space.o_w;
295295
int32_t o_hw = o_h * o_w, o_dhw = o_d * o_h * o_w;
296296
__bang_write_value((int32_t *)nram_aux + 4 * deal_num, deal_num, int(o_dhw));
297-
__cn_vector_div_s32(deal_num, (int32_t *)nram_aux, (int32_t *)nram_input,
298-
(int32_t *)nram_aux + 4 * deal_num);
297+
__bang_div((int32_t *)nram_aux, (int32_t *)nram_input,
298+
(int32_t *)nram_aux + 4 * deal_num, deal_num);
299299
__bang_mul_scalar((int32_t *)nram_output, (int32_t *)nram_aux, (int)o_dhw,
300300
deal_num);
301301
__bang_sub((int32_t *)nram_input, (int32_t *)nram_input, (int *)nram_output,
302302
deal_num);
303303
__bang_write_value((int32_t *)nram_aux + 4 * deal_num, deal_num, int(o_hw));
304-
__cn_vector_div_s32(deal_num, (int32_t *)nram_aux + deal_num,
305-
(int32_t *)nram_input,
306-
(int32_t *)nram_aux + 4 * deal_num);
304+
__bang_div((int32_t *)nram_aux + deal_num, (int32_t *)nram_input,
305+
(int32_t *)nram_aux + 4 * deal_num, deal_num);
307306
__bang_mul_scalar((int32_t *)nram_output, (int32_t *)nram_aux + deal_num,
308307
(int)o_hw, deal_num);
309308
__bang_sub((int32_t *)nram_input, (int32_t *)nram_input,
310309
(int32_t *)nram_output, deal_num);
311310

312311
__bang_write_value((int32_t *)nram_aux + 4 * deal_num, deal_num, int(o_w));
313-
__cn_vector_div_s32(deal_num, (int32_t *)nram_aux + 2 * deal_num,
314-
(int32_t *)nram_input,
315-
(int32_t *)nram_aux + 4 * deal_num);
312+
__bang_div((int32_t *)nram_aux + 2 * deal_num, (int32_t *)nram_input,
313+
(int32_t *)nram_aux + 4 * deal_num, deal_num);
316314
__bang_mul_scalar((int32_t *)nram_output, (int32_t *)nram_aux + 2 * deal_num,
317315
(int)o_w, deal_num);
318316
__bang_sub((int32_t *)nram_aux + 3 * deal_num, (int32_t *)nram_input,

0 commit comments

Comments
 (0)