Skip to content

Commit c5a816e

Browse files
fixed quantized_layer_norm
Differential Revision: D60811160 Pull Request resolved: #4554
1 parent 37c4f97 commit c5a816e

File tree

2 files changed

+12
-14
lines changed

2 files changed

+12
-14
lines changed

backends/cadence/reference/operators/quantized_layer_norm.cpp

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ namespace native {
2525
template <typename T>
2626
void quantized_layer_norm_(
2727
const Tensor& input,
28-
float input_scale,
28+
double input_scale,
2929
int64_t input_zero_point,
3030
const Tensor& weight,
3131
const Tensor& bias,
@@ -39,23 +39,22 @@ void quantized_layer_norm_(
3939
const float* __restrict__ weight_data = weight.const_data_ptr<float>();
4040
const float* __restrict__ bias_data = bias.const_data_ptr<float>();
4141

42-
float output_inv_scale = XT_RECIP_S(output_scale);
42+
float output_inv_scale = 1.0f / output_scale;
4343

4444
size_t last_dim = input.size(input.dim() - 1);
4545
size_t leading_dims = getLeadingDims(input, input.dim() - 1);
4646

4747
// Visualize the input tensor as a set of 1d vectors, and compute the
4848
// layer_norm for each vector.
4949
for (size_t i = 0; i < leading_dims; ++i) {
50-
const T* __restrict__ x = in_data + i * last_dim;
51-
T* __restrict__ y = out_data + i * last_dim;
50+
const T* x = in_data + i * last_dim;
51+
T* y = out_data + i * last_dim;
5252

5353
// compute sum and squared sum. The fp32 sum can be approximated as:
5454
// (X_1 - in_zero_point) * in_scale + (X_2 - in_zero_point) * in_scale + ...
5555
// (X_N - in_zero_point) * in_scale.
5656
int32_t sum = 0;
5757
int32_t sq_sum = last_dim * input_zero_point * input_zero_point;
58-
#pragma simd
5958
for (size_t j = 0; j < last_dim; ++j) {
6059
int32_t val = x[j];
6160
sum += val;
@@ -64,19 +63,18 @@ void quantized_layer_norm_(
6463
sq_sum -= (2 * sum * input_zero_point);
6564
sum -= (last_dim * input_zero_point);
6665

67-
float mean = XT_DIV_S(XT_MUL_S(input_scale, sum), last_dim);
66+
float mean = (input_scale * sum) / last_dim;
6867
float variance =
69-
XT_DIV_S(
70-
XT_MUL_S(sq_sum, XT_MUL_S(input_scale, input_scale)), last_dim) -
71-
XT_MUL_S(mean, mean);
72-
float inv_std = XT_RECIP_S(XT_SQRT_S(XT_ADD_S(variance, (float)eps)));
68+
(sq_sum * input_scale * input_scale) / last_dim - mean * mean;
69+
float inv_std = 1.0f / std::sqrt(variance + eps);
7370

7471
// y = (x - mean) / std * kGamma + kBeta
75-
#pragma simd
76-
for (size_t j = 0; j < last_dim; ++j) {
72+
for (int j = 0; j < last_dim; ++j) {
73+
// y[j] = (x[j] - mean) / std * kGamma + kBeta;
7774
// Since X is quantized, we dequantize it, compute fp32 result, and
7875
// quantize the result to an int8/uint8 value.
7976
float val = kernels::dequantize<T>(x[j], input_scale, input_zero_point);
77+
8078
val = (val - mean) * inv_std * weight_data[j] + bias_data[j];
8179
y[j] = kernels::quantize<T>(val, output_inv_scale, output_zero_point);
8280
}

examples/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,9 @@ The [`arm/`](./arm) directory contains scripts to help you run a PyTorch model o
6363

6464
You will find demos of [ExecuTorch QNN Backend](./qualcomm) in the [`qualcomm/`](./qualcomm) directory.
6565

66-
## Demo of ExecuTorch on Xtensa HiFi4 DSP
66+
## Demo of ExecuTorch on Cadence HiFi4 DSP
6767

68-
The [`xtensa/`](./xtensa) directory hosts a demo that showcases the process of exporting and executing a model on Xtensa Hifi4 DSP. You can utilize [this tutorial](../docs/source/build-run-xtensa.md) to guide you in configuring the demo and running it.
68+
The [`Cadence/`](./cadence) directory hosts a demo that showcases the process of exporting and executing a model on Xtensa Hifi4 DSP. You can utilize [this tutorial](../docs/source/build-run-xtensa.md) to guide you in configuring the demo and running it.
6969

7070
## Dependencies
7171

0 commit comments

Comments
 (0)