We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 4634272 commit 0e1a3e9Copy full SHA for 0e1a3e9
aten/src/ATen/native/cuda/Reduce.cuh
@@ -1062,7 +1062,7 @@ ReduceConfig setReduceConfig(const TensorIterator& iter){
1062
// In such case, values in each loaded vector always correspond to different outputs.
1063
if (fastest_moving_stride == sizeof(scalar_t)) {
1064
#ifdef USE_ROCM
1065
- if (reduction_on_fastest_striding_dimension && dim0 > 128 && iter.num_reduce_dims() == 1) {
+ if (reduction_on_fastest_striding_dimension && dim0 >= 128 && iter.num_reduce_dims() == 1) {
1066
#else
1067
if (reduction_on_fastest_striding_dimension && dim0 > 128 && iter.num_reduce_dims() == 1 && vt0 >= input_vec_size) {
1068
#endif
0 commit comments