Skip to content

Commit 50055c5

Browse files
committed
[ARM] Fix undefined behaviour in float->bf16 conversion
This was implementing the float->bf16 conversion function using a left-shift of a signed integer, so for negative floating-point values a 1 was being shifted into the sign bit of the signed integer intermediate value. This is undefined behaviour, and was caught by UBSan. The vector versions are code-generated via Neon builtin functions, so probably don't have the same UB problem, but I've updated them anyway to be consistent. Fixes #61983
1 parent 7120be4 commit 50055c5

File tree

2 files changed

+13
-13
lines changed

2 files changed

+13
-13
lines changed

clang/include/clang/Basic/arm_neon.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ def OP_BFMLALT_LN
252252

253253
def OP_VCVT_F32_BF16
254254
: Op<(bitcast "R",
255-
(call "vshll_n", (bitcast "int16x4_t", $p0),
255+
(call "vshll_n", (bitcast "uint16x4_t", $p0),
256256
(literal "int32_t", "16")))>;
257257
def OP_VCVT_F32_BF16_LO
258258
: Op<(call "vcvt_f32_bf16", (call "vget_low", $p0))>;
@@ -275,8 +275,8 @@ def OP_VCVT_BF16_F32_HI_A32
275275
(call "vget_low", $p0))>;
276276

277277
def OP_CVT_F32_BF16
278-
: Op<(bitcast "R", (op "<<", (cast "int32_t", (bitcast "int16_t", $p0)),
279-
(literal "int32_t", "16")))>;
278+
: Op<(bitcast "R", (op "<<", (cast "uint32_t", (bitcast "uint16_t", $p0)),
279+
(literal "uint32_t", "16")))>;
280280

281281
//===----------------------------------------------------------------------===//
282282
// Auxiliary Instructions

clang/test/CodeGen/arm-bf16-convert-intrinsics.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
// CHECK-A64-NEXT: store <4 x bfloat> [[A:%.*]], ptr [[__REINT_808_I]], align 8
3030
// CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I]], align 8
3131
// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
32-
// CHECK-A64-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
32+
// CHECK-A64-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
3333
// CHECK-A64-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
3434
// CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_808_I]], align 16
3535
// CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I]], align 16
@@ -42,7 +42,7 @@
4242
// CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[A:%.*]], ptr [[__REINT_808_I]], align 8
4343
// CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I]], align 8
4444
// CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
45-
// CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
45+
// CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
4646
// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
4747
// CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_808_I]], align 8
4848
// CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I]], align 8
@@ -64,7 +64,7 @@
6464
// CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_8081_I]], ptr [[__REINT_808_I]], align 8
6565
// CHECK-A32-SOFTFP-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[__REINT_808_I]], align 8
6666
// CHECK-A32-SOFTFP-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
67-
// CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
67+
// CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
6868
// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 16)
6969
// CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_808_I]], align 8
7070
// CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[__REINT1_808_I]], align 8
@@ -82,7 +82,7 @@ float32x4_t test_vcvt_f32_bf16(bfloat16x4_t a) {
8282
// CHECK-A64-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_808_I_I]], align 8
8383
// CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I]], align 8
8484
// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
85-
// CHECK-A64-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
85+
// CHECK-A64-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
8686
// CHECK-A64-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
8787
// CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I]], align 16
8888
// CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I]], align 16
@@ -96,7 +96,7 @@ float32x4_t test_vcvt_f32_bf16(bfloat16x4_t a) {
9696
// CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_808_I_I]], align 8
9797
// CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I]], align 8
9898
// CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
99-
// CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
99+
// CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
100100
// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
101101
// CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I]], align 8
102102
// CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I]], align 8
@@ -137,7 +137,7 @@ float32x4_t test_vcvt_f32_bf16(bfloat16x4_t a) {
137137
// CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_8081_I_I]], ptr [[__REINT_808_I_I]], align 8
138138
// CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I]], align 8
139139
// CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
140-
// CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32>
140+
// CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = zext <4 x i16> [[TMP5]] to <4 x i32>
141141
// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP7]], splat (i32 16)
142142
// CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I]], align 8
143143
// CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I]], align 8
@@ -155,7 +155,7 @@ float32x4_t test_vcvtq_low_f32_bf16(bfloat16x8_t a) {
155155
// CHECK-A64-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_808_I_I]], align 8
156156
// CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I]], align 8
157157
// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
158-
// CHECK-A64-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
158+
// CHECK-A64-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
159159
// CHECK-A64-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
160160
// CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I]], align 16
161161
// CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I]], align 16
@@ -169,7 +169,7 @@ float32x4_t test_vcvtq_low_f32_bf16(bfloat16x8_t a) {
169169
// CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_808_I_I]], align 8
170170
// CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I]], align 8
171171
// CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
172-
// CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
172+
// CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
173173
// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
174174
// CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I]], align 8
175175
// CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I]], align 8
@@ -210,7 +210,7 @@ float32x4_t test_vcvtq_low_f32_bf16(bfloat16x8_t a) {
210210
// CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_8081_I_I]], ptr [[__REINT_808_I_I]], align 8
211211
// CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I]], align 8
212212
// CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
213-
// CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32>
213+
// CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = zext <4 x i16> [[TMP5]] to <4 x i32>
214214
// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP7]], splat (i32 16)
215215
// CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I]], align 8
216216
// CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I]], align 8
@@ -427,7 +427,7 @@ bfloat16_t test_vcvth_bf16_f32(float32_t a) {
427427
// CHECK-NEXT: [[__REINT1_I:%.*]] = alloca i32, align 4
428428
// CHECK-NEXT: store bfloat [[A:%.*]], ptr [[__REINT_I]], align 2
429429
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[__REINT_I]], align 2
430-
// CHECK-NEXT: [[CONV_I:%.*]] = sext i16 [[TMP0]] to i32
430+
// CHECK-NEXT: [[CONV_I:%.*]] = zext i16 [[TMP0]] to i32
431431
// CHECK-NEXT: [[SHL_I:%.*]] = shl i32 [[CONV_I]], 16
432432
// CHECK-NEXT: store i32 [[SHL_I]], ptr [[__REINT1_I]], align 4
433433
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[__REINT1_I]], align 4

0 commit comments

Comments
 (0)