diff --git a/backends/cadence/aot/ops_registrations.py b/backends/cadence/aot/ops_registrations.py index e73de6ab7ce..c69aaa8091f 100644 --- a/backends/cadence/aot/ops_registrations.py +++ b/backends/cadence/aot/ops_registrations.py @@ -164,7 +164,7 @@ def quantized_layer_norm_meta( output_scale: float, output_zero_point: int, ) -> torch.Tensor: - return input.new_empty(input.size(), dtype=torch.uint8) + return input.new_empty(input.size(), dtype=input.dtype) @register_fake("cadence::quantized_relu") diff --git a/backends/cadence/reference/operators/quantized_matmul_out.cpp b/backends/cadence/reference/operators/quantized_matmul_out.cpp index b0a9393cd01..5e1a49c0b5a 100644 --- a/backends/cadence/reference/operators/quantized_matmul_out.cpp +++ b/backends/cadence/reference/operators/quantized_matmul_out.cpp @@ -49,7 +49,7 @@ __attribute__((noinline)) void qmatmul( sum += (X[i * n + k] - X_zero_point) * (y[k * p + j] - Y_zero_point); } } - Z[i * p + j] = kernels::quantize(sum, Z_scale, Z_zero_point); + Z[i * p + j] = kernels::quantize(sum, Z_scale, Z_zero_point); } } }