File tree Expand file tree Collapse file tree 13 files changed +121
-121
lines changed
src/awkward/_connect/cuda/cuda_kernels Expand file tree Collapse file tree 13 files changed +121
-121
lines changed Original file line number Diff line number Diff line change @@ -59,17 +59,17 @@ awkward_ListOffsetArray_reduce_local_outoffsets_64_b(
5959 }
6060 __syncthreads ();
6161
62- for (int64_t stride = 1 ; stride < blockDim .x ; stride *= 2 ) {
63- int64_t val = 0 ;
64- if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
65- val = temp[thread_id - stride];
62+ if (thread_id < lenparents) {
63+ for (int64_t stride = 1 ; stride < blockDim .x ; stride *= 2 ) {
64+ int64_t val = 0 ;
65+ if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
66+ val = temp[thread_id - stride];
67+ }
68+ __syncthreads ();
69+ temp[thread_id] += val;
70+ __syncthreads ();
6671 }
67- __syncthreads ();
68- temp[thread_id] += val;
69- __syncthreads ();
70- }
7172
72- if (thread_id < lenparents) {
7373 int64_t parent = parents[thread_id];
7474 if (idx == blockDim .x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1 ]) {
7575 atomicAdd (&scan_in_array[parent], temp[thread_id]);
Original file line number Diff line number Diff line change @@ -59,19 +59,19 @@ awkward_reduce_argmax_b(
5959 }
6060 __syncthreads ();
6161
62- for (int64_t stride = 1 ; stride < blockDim .x ; stride *= 2 ) {
63- int64_t index = -1 ;
64- if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
65- index = temp[thread_id - stride];
66- }
67- if (index != -1 && (temp[thread_id] == -1 || fromptr[index] > fromptr[temp[thread_id]] ||
68- (fromptr[index] == fromptr[temp[thread_id]] && index < temp[thread_id]))) {
69- temp[thread_id] = index;
62+ if (thread_id < lenparents) {
63+ for (int64_t stride = 1 ; stride < blockDim .x ; stride *= 2 ) {
64+ int64_t index = -1 ;
65+ if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
66+ index = temp[thread_id - stride];
67+ }
68+ if (index != -1 && (temp[thread_id] == -1 || fromptr[index] > fromptr[temp[thread_id]] ||
69+ (fromptr[index] == fromptr[temp[thread_id]] && index < temp[thread_id]))) {
70+ temp[thread_id] = index;
71+ }
72+ __syncthreads ();
7073 }
71- __syncthreads ();
72- }
7374
74- if (thread_id < lenparents) {
7575 int64_t parent = parents[thread_id];
7676 if (idx == blockDim .x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1 ]) {
7777 atomicExch (&atomic_toptr[parent], temp[thread_id]);
Original file line number Diff line number Diff line change @@ -59,19 +59,19 @@ awkward_reduce_argmin_b(
5959 }
6060 __syncthreads ();
6161
62- for (int64_t stride = 1 ; stride < blockDim .x ; stride *= 2 ) {
63- int64_t index = -1 ;
64- if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
65- index = temp[thread_id - stride];
66- }
67- if (index != -1 && (temp[thread_id] == -1 || fromptr[index] < fromptr[temp[thread_id]] ||
68- (fromptr[index] == fromptr[temp[thread_id]] && index < temp[thread_id]))) {
69- temp[thread_id] = index;
62+ if (thread_id < lenparents) {
63+ for (int64_t stride = 1 ; stride < blockDim .x ; stride *= 2 ) {
64+ int64_t index = -1 ;
65+ if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
66+ index = temp[thread_id - stride];
67+ }
68+ if (index != -1 && (temp[thread_id] == -1 || fromptr[index] < fromptr[temp[thread_id]] ||
69+ (fromptr[index] == fromptr[temp[thread_id]] && index < temp[thread_id]))) {
70+ temp[thread_id] = index;
71+ }
72+ __syncthreads ();
7073 }
71- __syncthreads ();
72- }
7374
74- if (thread_id < lenparents) {
7575 int64_t parent = parents[thread_id];
7676 if (idx == blockDim .x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1 ]) {
7777 atomicExch (&atomic_toptr[parent], temp[thread_id]);
Original file line number Diff line number Diff line change @@ -52,17 +52,17 @@ awkward_reduce_count_64_b(
5252 }
5353 __syncthreads ();
5454
55- for (int64_t stride = 1 ; stride < blockDim .x ; stride *= 2 ) {
56- int64_t val = 0 ;
57- if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
58- val = temp[thread_id - stride];
55+ if (thread_id < lenparents) {
56+ for (int64_t stride = 1 ; stride < blockDim .x ; stride *= 2 ) {
57+ int64_t val = 0 ;
58+ if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
59+ val = temp[thread_id - stride];
60+ }
61+ __syncthreads ();
62+ temp[thread_id] += val;
63+ __syncthreads ();
5964 }
60- __syncthreads ();
61- temp[thread_id] += val;
62- __syncthreads ();
63- }
6465
65- if (thread_id < lenparents) {
6666 int64_t parent = parents[thread_id];
6767 if (idx == blockDim .x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1 ]) {
6868 atomicAdd (&toptr[parent], temp[thread_id]);
Original file line number Diff line number Diff line change @@ -54,17 +54,17 @@ awkward_reduce_countnonzero_b(
5454 }
5555 __syncthreads ();
5656
57- for (int64_t stride = 1 ; stride < blockDim .x ; stride *= 2 ) {
58- int64_t val = 0 ;
59- if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
60- val = temp[thread_id - stride];
57+ if (thread_id < lenparents) {
58+ for (int64_t stride = 1 ; stride < blockDim .x ; stride *= 2 ) {
59+ int64_t val = 0 ;
60+ if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
61+ val = temp[thread_id - stride];
62+ }
63+ __syncthreads ();
64+ temp[thread_id] += val;
65+ __syncthreads ();
6166 }
62- __syncthreads ();
63- temp[thread_id] += val;
64- __syncthreads ();
65- }
6667
67- if (thread_id < lenparents) {
6868 int64_t parent = parents[thread_id];
6969 if (idx == blockDim .x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1 ]) {
7070 atomicAdd (&toptr[parent], temp[thread_id]);
Original file line number Diff line number Diff line change @@ -55,18 +55,18 @@ awkward_reduce_max_b(
5555 }
5656 __syncthreads ();
5757
58- for (int64_t stride = 1 ; stride < blockDim .x ; stride *= 2 ) {
59- T val = identity;
58+ if (thread_id < lenparents) {
59+ for (int64_t stride = 1 ; stride < blockDim .x ; stride *= 2 ) {
60+ T val = identity;
6061
61- if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
62- val = temp[idx - stride];
62+ if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
63+ val = temp[idx - stride];
64+ }
65+ __syncthreads ();
66+ temp[thread_id] = val > temp[thread_id] ? val : temp[thread_id];
67+ __syncthreads ();
6368 }
64- __syncthreads ();
65- temp[thread_id] = val > temp[thread_id] ? val : temp[thread_id];
66- __syncthreads ();
67- }
6869
69- if (thread_id < lenparents) {
7070 int64_t parent = parents[thread_id];
7171 if (idx == blockDim .x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1 ]) {
7272 atomicMax (&toptr[parent], temp[thread_id]);
Original file line number Diff line number Diff line change @@ -56,17 +56,17 @@ awkward_reduce_min_b(
5656 }
5757 __syncthreads ();
5858
59- for (int64_t stride = 1 ; stride < blockDim .x ; stride *= 2 ) {
60- T val = identity;
61- if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
62- val = temp[thread_id - stride];
59+ if (thread_id < lenparents) {
60+ for (int64_t stride = 1 ; stride < blockDim .x ; stride *= 2 ) {
61+ T val = identity;
62+ if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
63+ val = temp[thread_id - stride];
64+ }
65+ __syncthreads ();
66+ temp[thread_id] = val < temp[thread_id] ? val : temp[thread_id];
67+ __syncthreads ();
6368 }
64- __syncthreads ();
65- temp[thread_id] = val < temp[thread_id] ? val : temp[thread_id];
66- __syncthreads ();
67- }
6869
69- if (thread_id < lenparents) {
7070 int64_t parent = parents[thread_id];
7171 if (idx == blockDim .x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1 ]) {
7272 atomicMin (&toptr[parent], temp[thread_id]);
Original file line number Diff line number Diff line change @@ -59,17 +59,17 @@ awkward_reduce_prod_b(
5959 }
6060 __syncthreads ();
6161
62- for (int64_t stride = 1 ; stride < blockDim .x ; stride *= 2 ) {
63- T val = 1 ;
64- if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
65- val = temp[thread_id - stride];
62+ if (thread_id < lenparents) {
63+ for (int64_t stride = 1 ; stride < blockDim .x ; stride *= 2 ) {
64+ T val = 1 ;
65+ if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
66+ val = temp[thread_id - stride];
67+ }
68+ __syncthreads ();
69+ temp[thread_id] *= val;
70+ __syncthreads ();
6671 }
67- __syncthreads ();
68- temp[thread_id] *= val;
69- __syncthreads ();
70- }
7172
72- if (thread_id < lenparents) {
7373 int64_t parent = parents[thread_id];
7474 if (idx == blockDim .x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1 ]) {
7575 atomicMul (&atomic_toptr[parent], temp[thread_id]);
Original file line number Diff line number Diff line change @@ -59,17 +59,17 @@ awkward_reduce_prod_bool_b(
5959 }
6060 __syncthreads ();
6161
62- for (int64_t stride = 1 ; stride < blockDim .x ; stride *= 2 ) {
63- T val = 1 ;
64- if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
65- val = temp[thread_id - stride];
62+ if (thread_id < lenparents) {
63+ for (int64_t stride = 1 ; stride < blockDim .x ; stride *= 2 ) {
64+ T val = 1 ;
65+ if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
66+ val = temp[thread_id - stride];
67+ }
68+ __syncthreads ();
69+ temp[thread_id] &= (val != 0 );
70+ __syncthreads ();
6671 }
67- __syncthreads ();
68- temp[thread_id] &= (val != 0 );
69- __syncthreads ();
70- }
7172
72- if (thread_id < lenparents) {
7373 int64_t parent = parents[thread_id];
7474 if (idx == blockDim .x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1 ]) {
7575 atomicAnd (&atomic_toptr[parent], temp[thread_id]);
Original file line number Diff line number Diff line change @@ -54,17 +54,17 @@ awkward_reduce_sum_b(
5454 }
5555 __syncthreads ();
5656
57- for (int64_t stride = 1 ; stride < blockDim .x ; stride *= 2 ) {
58- T val = 0 ;
59- if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
60- val = temp[thread_id - stride];
57+ if (thread_id < lenparents) {
58+ for (int64_t stride = 1 ; stride < blockDim .x ; stride *= 2 ) {
59+ T val = 0 ;
60+ if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
61+ val = temp[thread_id - stride];
62+ }
63+ __syncthreads ();
64+ temp[thread_id] += val;
65+ __syncthreads ();
6166 }
62- __syncthreads ();
63- temp[thread_id] += val;
64- __syncthreads ();
65- }
6667
67- if (thread_id < lenparents) {
6868 int64_t parent = parents[thread_id];
6969 if (idx == blockDim .x - 1 || thread_id == lenparents - 1 || parents[thread_id] != parents[thread_id + 1 ]) {
7070 atomicAdd (&toptr[parent], temp[thread_id]);
You can’t perform that action at this time.
0 commit comments