Skip to content

Commit f57d706

Browse files
committed
Use double to reduce
1 parent f94fdea commit f57d706

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

paddle/fluid/operators/layer_norm_op.cu

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,27 +67,27 @@ template <typename T, int BlockDim>
6767
__global__ void LayerNormForward(const T *x, const T *scale, const T *bias,
6868
T *y, T *mean, T *var, float epsilon,
6969
int feature_size) {
70-
using BlockReduce = cub::BlockReduce<PairForLayerNorm<T>, BlockDim>;
70+
using BlockReduce = cub::BlockReduce<PairForLayerNorm<double>, BlockDim>;
7171
__shared__ typename BlockReduce::TempStorage temp_storage;
7272

7373
int beg_idx = blockIdx.x * feature_size + threadIdx.x;
7474
int end_idx = (blockIdx.x + 1) * feature_size;
7575

7676
// Step 1: Reduce to calculate mean and var
77-
T mean_val = static_cast<T>(0);
78-
T var_val = static_cast<T>(0);
77+
double mean_val = 0;
78+
double var_val = 0;
7979
for (int i = beg_idx; i < end_idx; i += BlockDim) {
8080
T tmp = x[i];
8181
mean_val += tmp;
8282
var_val += (tmp * tmp);
8383
}
8484
auto pair = BlockReduce(temp_storage)
85-
.Reduce(PairForLayerNorm<T>(mean_val, var_val),
86-
PairForLayerNormAddFunctor<T>());
85+
.Reduce(PairForLayerNorm<double>(mean_val, var_val),
86+
PairForLayerNormAddFunctor<double>());
8787
if (threadIdx.x == 0) {
8888
auto tmp = pair.first_ / feature_size;
89-
mean[blockIdx.x] = tmp;
90-
var[blockIdx.x] = pair.second_ / feature_size - tmp * tmp;
89+
mean[blockIdx.x] = static_cast<T>(tmp);
90+
var[blockIdx.x] = static_cast<T>(pair.second_ / feature_size - tmp * tmp);
9191
}
9292
__syncthreads();
9393
mean_val = mean[blockIdx.x];

0 commit comments

Comments
 (0)