2424
2525// CHECK-A64-LABEL: @test_vcvt_f32_bf16(
2626// CHECK-A64-NEXT: entry:
27- // CHECK-A64-NEXT: [[__REINT_836_I :%.*]] = alloca <4 x bfloat>, align 8
28- // CHECK-A64-NEXT: [[__REINT1_836_I :%.*]] = alloca <4 x i32>, align 16
29- // CHECK-A64-NEXT: store <4 x bfloat> [[A:%.*]], ptr [[__REINT_836_I ]], align 8
30- // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I ]], align 8
27+ // CHECK-A64-NEXT: [[__REINT_808_I :%.*]] = alloca <4 x bfloat>, align 8
28+ // CHECK-A64-NEXT: [[__REINT1_808_I :%.*]] = alloca <4 x i32>, align 16
29+ // CHECK-A64-NEXT: store <4 x bfloat> [[A:%.*]], ptr [[__REINT_808_I ]], align 8
30+ // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I ]], align 8
3131// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
32- // CHECK-A64-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
32+ // CHECK-A64-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
3333// CHECK-A64-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
34- // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_836_I ]], align 16
35- // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I ]], align 16
34+ // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_808_I ]], align 16
35+ // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I ]], align 16
3636// CHECK-A64-NEXT: ret <4 x float> [[TMP3]]
3737//
3838// CHECK-A32-HARDFP-LABEL: @test_vcvt_f32_bf16(
3939// CHECK-A32-HARDFP-NEXT: entry:
40- // CHECK-A32-HARDFP-NEXT: [[__REINT_836_I :%.*]] = alloca <4 x bfloat>, align 8
41- // CHECK-A32-HARDFP-NEXT: [[__REINT1_836_I :%.*]] = alloca <4 x i32>, align 8
42- // CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[A:%.*]], ptr [[__REINT_836_I ]], align 8
43- // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I ]], align 8
40+ // CHECK-A32-HARDFP-NEXT: [[__REINT_808_I :%.*]] = alloca <4 x bfloat>, align 8
41+ // CHECK-A32-HARDFP-NEXT: [[__REINT1_808_I :%.*]] = alloca <4 x i32>, align 8
42+ // CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[A:%.*]], ptr [[__REINT_808_I ]], align 8
43+ // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I ]], align 8
4444// CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
45- // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
45+ // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
4646// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
47- // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_836_I ]], align 8
48- // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I ]], align 8
47+ // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_808_I ]], align 8
48+ // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I ]], align 8
4949// CHECK-A32-HARDFP-NEXT: ret <4 x float> [[TMP3]]
5050//
5151// CHECK-A32-SOFTFP-LABEL: @test_vcvt_f32_bf16(
5252// CHECK-A32-SOFTFP-NEXT: entry:
53- // CHECK-A32-SOFTFP-NEXT: [[__P0_836_I :%.*]] = alloca <4 x bfloat>, align 8
54- // CHECK-A32-SOFTFP-NEXT: [[__REINT_836_I :%.*]] = alloca <4 x bfloat>, align 8
55- // CHECK-A32-SOFTFP-NEXT: [[__REINT1_836_I :%.*]] = alloca <4 x i32>, align 8
53+ // CHECK-A32-SOFTFP-NEXT: [[__P0_808_I :%.*]] = alloca <4 x bfloat>, align 8
54+ // CHECK-A32-SOFTFP-NEXT: [[__REINT_808_I :%.*]] = alloca <4 x bfloat>, align 8
55+ // CHECK-A32-SOFTFP-NEXT: [[__REINT1_808_I :%.*]] = alloca <4 x i32>, align 8
5656// CHECK-A32-SOFTFP-NEXT: [[A:%.*]] = alloca <4 x bfloat>, align 8
5757// CHECK-A32-SOFTFP-NEXT: [[COERCE:%.*]] = alloca <4 x bfloat>, align 8
5858// CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[A_COERCE:%.*]], ptr [[A]], align 8
5959// CHECK-A32-SOFTFP-NEXT: [[A1:%.*]] = load <4 x bfloat>, ptr [[A]], align 8
6060// CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[A1]], ptr [[COERCE]], align 8
6161// CHECK-A32-SOFTFP-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE]], align 8
62- // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP0]], ptr [[__P0_836_I ]], align 8
63- // CHECK-A32-SOFTFP-NEXT: [[__P0_8361_I :%.*]] = load <4 x bfloat>, ptr [[__P0_836_I ]], align 8
64- // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_8361_I ]], ptr [[__REINT_836_I ]], align 8
65- // CHECK-A32-SOFTFP-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[__REINT_836_I ]], align 8
62+ // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP0]], ptr [[__P0_808_I ]], align 8
63+ // CHECK-A32-SOFTFP-NEXT: [[__P0_8081_I :%.*]] = load <4 x bfloat>, ptr [[__P0_808_I ]], align 8
64+ // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_8081_I ]], ptr [[__REINT_808_I ]], align 8
65+ // CHECK-A32-SOFTFP-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[__REINT_808_I ]], align 8
6666// CHECK-A32-SOFTFP-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
67- // CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
67+ // CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
6868// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 16)
69- // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_836_I ]], align 8
70- // CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[__REINT1_836_I ]], align 8
69+ // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_808_I ]], align 8
70+ // CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[__REINT1_808_I ]], align 8
7171// CHECK-A32-SOFTFP-NEXT: ret <4 x float> [[TMP4]]
7272//
7373float32x4_t test_vcvt_f32_bf16 (bfloat16x4_t a ) {
@@ -76,39 +76,39 @@ float32x4_t test_vcvt_f32_bf16(bfloat16x4_t a) {
7676
7777// CHECK-A64-LABEL: @test_vcvtq_low_f32_bf16(
7878// CHECK-A64-NEXT: entry:
79- // CHECK-A64-NEXT: [[__REINT_836_I_I :%.*]] = alloca <4 x bfloat>, align 8
80- // CHECK-A64-NEXT: [[__REINT1_836_I_I :%.*]] = alloca <4 x i32>, align 16
79+ // CHECK-A64-NEXT: [[__REINT_808_I_I :%.*]] = alloca <4 x bfloat>, align 8
80+ // CHECK-A64-NEXT: [[__REINT1_808_I_I :%.*]] = alloca <4 x i32>, align 16
8181// CHECK-A64-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
82- // CHECK-A64-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_836_I_I ]], align 8
83- // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I ]], align 8
82+ // CHECK-A64-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_808_I_I ]], align 8
83+ // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I ]], align 8
8484// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
85- // CHECK-A64-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
85+ // CHECK-A64-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
8686// CHECK-A64-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
87- // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I ]], align 16
88- // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I ]], align 16
87+ // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I ]], align 16
88+ // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I ]], align 16
8989// CHECK-A64-NEXT: ret <4 x float> [[TMP3]]
9090//
9191// CHECK-A32-HARDFP-LABEL: @test_vcvtq_low_f32_bf16(
9292// CHECK-A32-HARDFP-NEXT: entry:
93- // CHECK-A32-HARDFP-NEXT: [[__REINT_836_I_I :%.*]] = alloca <4 x bfloat>, align 8
94- // CHECK-A32-HARDFP-NEXT: [[__REINT1_836_I_I :%.*]] = alloca <4 x i32>, align 8
93+ // CHECK-A32-HARDFP-NEXT: [[__REINT_808_I_I :%.*]] = alloca <4 x bfloat>, align 8
94+ // CHECK-A32-HARDFP-NEXT: [[__REINT1_808_I_I :%.*]] = alloca <4 x i32>, align 8
9595// CHECK-A32-HARDFP-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
96- // CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_836_I_I ]], align 8
97- // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I ]], align 8
96+ // CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_808_I_I ]], align 8
97+ // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I ]], align 8
9898// CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
99- // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
99+ // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
100100// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
101- // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I ]], align 8
102- // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I ]], align 8
101+ // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I ]], align 8
102+ // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I ]], align 8
103103// CHECK-A32-HARDFP-NEXT: ret <4 x float> [[TMP3]]
104104//
105105// CHECK-A32-SOFTFP-LABEL: @test_vcvtq_low_f32_bf16(
106106// CHECK-A32-SOFTFP-NEXT: entry:
107107// CHECK-A32-SOFTFP-NEXT: [[RETVAL_I:%.*]] = alloca <4 x bfloat>, align 8
108108// CHECK-A32-SOFTFP-NEXT: [[__P0_I2:%.*]] = alloca <8 x bfloat>, align 8
109- // CHECK-A32-SOFTFP-NEXT: [[__P0_836_I_I :%.*]] = alloca <4 x bfloat>, align 8
110- // CHECK-A32-SOFTFP-NEXT: [[__REINT_836_I_I :%.*]] = alloca <4 x bfloat>, align 8
111- // CHECK-A32-SOFTFP-NEXT: [[__REINT1_836_I_I :%.*]] = alloca <4 x i32>, align 8
109+ // CHECK-A32-SOFTFP-NEXT: [[__P0_808_I_I :%.*]] = alloca <4 x bfloat>, align 8
110+ // CHECK-A32-SOFTFP-NEXT: [[__REINT_808_I_I :%.*]] = alloca <4 x bfloat>, align 8
111+ // CHECK-A32-SOFTFP-NEXT: [[__REINT1_808_I_I :%.*]] = alloca <4 x i32>, align 8
112112// CHECK-A32-SOFTFP-NEXT: [[__P0_I:%.*]] = alloca <8 x bfloat>, align 8
113113// CHECK-A32-SOFTFP-NEXT: [[COERCE_I:%.*]] = alloca <8 x bfloat>, align 8
114114// CHECK-A32-SOFTFP-NEXT: [[COERCE2_I:%.*]] = alloca <4 x bfloat>, align 8
@@ -132,15 +132,15 @@ float32x4_t test_vcvt_f32_bf16(bfloat16x4_t a) {
132132// CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr [[COERCE2_I]], align 8
133133// CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP3]], ptr [[COERCE3_I]], align 8
134134// CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[COERCE3_I]], align 8
135- // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP4]], ptr [[__P0_836_I_I ]], align 8
136- // CHECK-A32-SOFTFP-NEXT: [[__P0_8361_I_I :%.*]] = load <4 x bfloat>, ptr [[__P0_836_I_I ]], align 8
137- // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_8361_I_I ]], ptr [[__REINT_836_I_I ]], align 8
138- // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I ]], align 8
135+ // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP4]], ptr [[__P0_808_I_I ]], align 8
136+ // CHECK-A32-SOFTFP-NEXT: [[__P0_8081_I_I :%.*]] = load <4 x bfloat>, ptr [[__P0_808_I_I ]], align 8
137+ // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_8081_I_I ]], ptr [[__REINT_808_I_I ]], align 8
138+ // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I ]], align 8
139139// CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
140- // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32>
140+ // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = zext <4 x i16> [[TMP5]] to <4 x i32>
141141// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP7]], splat (i32 16)
142- // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I ]], align 8
143- // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I ]], align 8
142+ // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I ]], align 8
143+ // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I ]], align 8
144144// CHECK-A32-SOFTFP-NEXT: ret <4 x float> [[TMP8]]
145145//
146146float32x4_t test_vcvtq_low_f32_bf16 (bfloat16x8_t a ) {
@@ -149,39 +149,39 @@ float32x4_t test_vcvtq_low_f32_bf16(bfloat16x8_t a) {
149149
150150// CHECK-A64-LABEL: @test_vcvtq_high_f32_bf16(
151151// CHECK-A64-NEXT: entry:
152- // CHECK-A64-NEXT: [[__REINT_836_I_I :%.*]] = alloca <4 x bfloat>, align 8
153- // CHECK-A64-NEXT: [[__REINT1_836_I_I :%.*]] = alloca <4 x i32>, align 16
152+ // CHECK-A64-NEXT: [[__REINT_808_I_I :%.*]] = alloca <4 x bfloat>, align 8
153+ // CHECK-A64-NEXT: [[__REINT1_808_I_I :%.*]] = alloca <4 x i32>, align 16
154154// CHECK-A64-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
155- // CHECK-A64-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_836_I_I ]], align 8
156- // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I ]], align 8
155+ // CHECK-A64-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_808_I_I ]], align 8
156+ // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I ]], align 8
157157// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
158- // CHECK-A64-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
158+ // CHECK-A64-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
159159// CHECK-A64-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
160- // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I ]], align 16
161- // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I ]], align 16
160+ // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I ]], align 16
161+ // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I ]], align 16
162162// CHECK-A64-NEXT: ret <4 x float> [[TMP3]]
163163//
164164// CHECK-A32-HARDFP-LABEL: @test_vcvtq_high_f32_bf16(
165165// CHECK-A32-HARDFP-NEXT: entry:
166- // CHECK-A32-HARDFP-NEXT: [[__REINT_836_I_I :%.*]] = alloca <4 x bfloat>, align 8
167- // CHECK-A32-HARDFP-NEXT: [[__REINT1_836_I_I :%.*]] = alloca <4 x i32>, align 8
166+ // CHECK-A32-HARDFP-NEXT: [[__REINT_808_I_I :%.*]] = alloca <4 x bfloat>, align 8
167+ // CHECK-A32-HARDFP-NEXT: [[__REINT1_808_I_I :%.*]] = alloca <4 x i32>, align 8
168168// CHECK-A32-HARDFP-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
169- // CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_836_I_I ]], align 8
170- // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I ]], align 8
169+ // CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_808_I_I ]], align 8
170+ // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I ]], align 8
171171// CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
172- // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
172+ // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
173173// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
174- // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I ]], align 8
175- // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I ]], align 8
174+ // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I ]], align 8
175+ // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I ]], align 8
176176// CHECK-A32-HARDFP-NEXT: ret <4 x float> [[TMP3]]
177177//
178178// CHECK-A32-SOFTFP-LABEL: @test_vcvtq_high_f32_bf16(
179179// CHECK-A32-SOFTFP-NEXT: entry:
180180// CHECK-A32-SOFTFP-NEXT: [[RETVAL_I:%.*]] = alloca <4 x bfloat>, align 8
181181// CHECK-A32-SOFTFP-NEXT: [[__P0_I2:%.*]] = alloca <8 x bfloat>, align 8
182- // CHECK-A32-SOFTFP-NEXT: [[__P0_836_I_I :%.*]] = alloca <4 x bfloat>, align 8
183- // CHECK-A32-SOFTFP-NEXT: [[__REINT_836_I_I :%.*]] = alloca <4 x bfloat>, align 8
184- // CHECK-A32-SOFTFP-NEXT: [[__REINT1_836_I_I :%.*]] = alloca <4 x i32>, align 8
182+ // CHECK-A32-SOFTFP-NEXT: [[__P0_808_I_I :%.*]] = alloca <4 x bfloat>, align 8
183+ // CHECK-A32-SOFTFP-NEXT: [[__REINT_808_I_I :%.*]] = alloca <4 x bfloat>, align 8
184+ // CHECK-A32-SOFTFP-NEXT: [[__REINT1_808_I_I :%.*]] = alloca <4 x i32>, align 8
185185// CHECK-A32-SOFTFP-NEXT: [[__P0_I:%.*]] = alloca <8 x bfloat>, align 8
186186// CHECK-A32-SOFTFP-NEXT: [[COERCE_I:%.*]] = alloca <8 x bfloat>, align 8
187187// CHECK-A32-SOFTFP-NEXT: [[COERCE2_I:%.*]] = alloca <4 x bfloat>, align 8
@@ -205,15 +205,15 @@ float32x4_t test_vcvtq_low_f32_bf16(bfloat16x8_t a) {
205205// CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr [[COERCE2_I]], align 8
206206// CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP3]], ptr [[COERCE3_I]], align 8
207207// CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[COERCE3_I]], align 8
208- // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP4]], ptr [[__P0_836_I_I ]], align 8
209- // CHECK-A32-SOFTFP-NEXT: [[__P0_8361_I_I :%.*]] = load <4 x bfloat>, ptr [[__P0_836_I_I ]], align 8
210- // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_8361_I_I ]], ptr [[__REINT_836_I_I ]], align 8
211- // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I ]], align 8
208+ // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP4]], ptr [[__P0_808_I_I ]], align 8
209+ // CHECK-A32-SOFTFP-NEXT: [[__P0_8081_I_I :%.*]] = load <4 x bfloat>, ptr [[__P0_808_I_I ]], align 8
210+ // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_8081_I_I ]], ptr [[__REINT_808_I_I ]], align 8
211+ // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I ]], align 8
212212// CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
213- // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32>
213+ // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = zext <4 x i16> [[TMP5]] to <4 x i32>
214214// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP7]], splat (i32 16)
215- // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I ]], align 8
216- // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I ]], align 8
215+ // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I ]], align 8
216+ // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I ]], align 8
217217// CHECK-A32-SOFTFP-NEXT: ret <4 x float> [[TMP8]]
218218//
219219float32x4_t test_vcvtq_high_f32_bf16 (bfloat16x8_t a ) {
@@ -427,7 +427,7 @@ bfloat16_t test_vcvth_bf16_f32(float32_t a) {
427427// CHECK-NEXT: [[__REINT1_I:%.*]] = alloca i32, align 4
428428// CHECK-NEXT: store bfloat [[A:%.*]], ptr [[__REINT_I]], align 2
429429// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[__REINT_I]], align 2
430- // CHECK-NEXT: [[CONV_I:%.*]] = sext i16 [[TMP0]] to i32
430+ // CHECK-NEXT: [[CONV_I:%.*]] = zext i16 [[TMP0]] to i32
431431// CHECK-NEXT: [[SHL_I:%.*]] = shl i32 [[CONV_I]], 16
432432// CHECK-NEXT: store i32 [[SHL_I]], ptr [[__REINT1_I]], align 4
433433// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[__REINT1_I]], align 4
0 commit comments