We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c09c876 commit a7c0d33Copy full SHA for a7c0d33
src/cpu/kernels.cc
@@ -489,9 +489,11 @@ namespace ctranslate2 {
489
const float variance = std::max(sum_squares / depth - mean * mean, 0.f);
490
const float rstd = 1.f / std::sqrt(variance + epsilon);
491
492
- for (dim_t j = 0; j < depth; j += weights_size) {
+ int inner_dim = depth / weights_size;
493
+ for (dim_t j = 0; j < inner_dim; j ++) {
494
for (dim_t k = 0; k < weights_size; k++) {
- y[j+k] = (x[j+k] - mean) * rstd * gamma[k] + beta[k];
495
+ int idx = k * inner_dim + j;
496
+ y[idx] = (x[idx] - mean) * rstd * gamma[k] + beta[k];
497
}
498
499
0 commit comments