Skip to content

Commit 9a19c6e

Browse files
committed
Fix bug in layernorm loop ordering.
1 parent 25ef63f commit 9a19c6e

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

src/cpu/kernels.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -489,9 +489,11 @@ namespace ctranslate2 {
489489
const float variance = std::max(sum_squares / depth - mean * mean, 0.f);
490490
const float rstd = 1.f / std::sqrt(variance + epsilon);
491491

492-
for (dim_t j = 0; j < depth; j += weights_size) {
492+
int inner_dim = depth / weights_size;
493+
for (dim_t j = 0; j < inner_dim; j ++) {
493494
for (dim_t k = 0; k < weights_size; k++) {
494-
y[j+k] = (x[j+k] - mean) * rstd * gamma[k] + beta[k];
495+
int idx = k * inner_dim + j;
496+
y[idx] = (x[idx] - mean) * rstd * gamma[k] + beta[k];
495497
}
496498
}
497499
}

0 commit comments

Comments
 (0)