@@ -111,8 +111,8 @@ float64_t test_vmulxd_laneq_f64(float64_t a, float64x2_t b) {
111111// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
112112// CHECK-NEXT: [[ENTRY:.*:]]
113113// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x double> [[A]], i32 0
114- // CHECK-NEXT: [[VGET_LANE4 :%.*]] = extractelement <1 x double> [[B]], i32 0
115- // CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE4 ]])
114+ // CHECK-NEXT: [[VGET_LANE3 :%.*]] = extractelement <1 x double> [[B]], i32 0
115+ // CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE3 ]])
116116// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x double> [[A]], double [[VMULXD_F64_I]], i32 0
117117// CHECK-NEXT: ret <1 x double> [[VSET_LANE]]
118118//
@@ -196,19 +196,13 @@ float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) {
196196// CHECK-LABEL: define dso_local <1 x double> @test_vfma_lane_f64(
197197// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[V:%.*]]) #[[ATTR0]] {
198198// CHECK-NEXT: [[ENTRY:.*:]]
199- // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
200- // CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
201- // CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
202- // CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
203- // CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V]] to i64
204- // CHECK-NEXT: [[__S2_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
205- // CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
206- // CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
207- // CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__S2_SROA_0_0_VEC_INSERT]] to <8 x i8>
208- // CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
209- // CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP6]], <1 x double> [[TMP6]], <1 x i32> zeroinitializer
210- // CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
211- // CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
199+ // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to <8 x i8>
200+ // CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to <8 x i8>
201+ // CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V]] to <8 x i8>
202+ // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
203+ // CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
204+ // CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
205+ // CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
212206// CHECK-NEXT: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
213207// CHECK-NEXT: ret <1 x double> [[FMLA2]]
214208//
@@ -219,20 +213,14 @@ float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
219213// CHECK-LABEL: define dso_local <1 x double> @test_vfms_lane_f64(
220214// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[V:%.*]]) #[[ATTR0]] {
221215// CHECK-NEXT: [[ENTRY:.*:]]
222- // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
223- // CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
224216// CHECK-NEXT: [[FNEG:%.*]] = fneg <1 x double> [[B]]
225- // CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG]] to i64
226- // CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
227- // CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V]] to i64
228- // CHECK-NEXT: [[__S2_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
229- // CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
230- // CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
231- // CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__S2_SROA_0_0_VEC_INSERT]] to <8 x i8>
232- // CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
233- // CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP6]], <1 x double> [[TMP6]], <1 x i32> zeroinitializer
234- // CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
235- // CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
217+ // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to <8 x i8>
218+ // CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG]] to <8 x i8>
219+ // CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V]] to <8 x i8>
220+ // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
221+ // CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
222+ // CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
223+ // CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
236224// CHECK-NEXT: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
237225// CHECK-NEXT: ret <1 x double> [[FMLA2]]
238226//
@@ -243,21 +231,16 @@ float64x1_t test_vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
243231// CHECK-LABEL: define dso_local <1 x double> @test_vfma_laneq_f64(
244232// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <2 x double> noundef [[V:%.*]]) #[[ATTR0]] {
245233// CHECK-NEXT: [[ENTRY:.*:]]
246- // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
247- // CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
248- // CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
249- // CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
250- // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V]] to <2 x i64>
251- // CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
252- // CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
253- // CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
254- // CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to double
255- // CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to double
256- // CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
257- // CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
258- // CHECK-NEXT: [[TMP9:%.*]] = call double @llvm.fma.f64(double [[TMP7]], double [[EXTRACT]], double [[TMP6]])
259- // CHECK-NEXT: [[TMP10:%.*]] = bitcast double [[TMP9]] to <1 x double>
260- // CHECK-NEXT: ret <1 x double> [[TMP10]]
234+ // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to <8 x i8>
235+ // CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to <8 x i8>
236+ // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V]] to <16 x i8>
237+ // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
238+ // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
239+ // CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
240+ // CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
241+ // CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]])
242+ // CHECK-NEXT: [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
243+ // CHECK-NEXT: ret <1 x double> [[TMP7]]
261244//
262245float64x1_t test_vfma_laneq_f64 (float64x1_t a , float64x1_t b , float64x2_t v ) {
263246 return vfma_laneq_f64 (a , b , v , 0 );
@@ -266,22 +249,17 @@ float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
266249// CHECK-LABEL: define dso_local <1 x double> @test_vfms_laneq_f64(
267250// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <2 x double> noundef [[V:%.*]]) #[[ATTR0]] {
268251// CHECK-NEXT: [[ENTRY:.*:]]
269- // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
270- // CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
271252// CHECK-NEXT: [[FNEG:%.*]] = fneg <1 x double> [[B]]
272- // CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG]] to i64
273- // CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
274- // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V]] to <2 x i64>
275- // CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
276- // CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
277- // CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
278- // CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to double
279- // CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to double
280- // CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
281- // CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
282- // CHECK-NEXT: [[TMP9:%.*]] = call double @llvm.fma.f64(double [[TMP7]], double [[EXTRACT]], double [[TMP6]])
283- // CHECK-NEXT: [[TMP10:%.*]] = bitcast double [[TMP9]] to <1 x double>
284- // CHECK-NEXT: ret <1 x double> [[TMP10]]
253+ // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to <8 x i8>
254+ // CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG]] to <8 x i8>
255+ // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V]] to <16 x i8>
256+ // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
257+ // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
258+ // CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
259+ // CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
260+ // CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]])
261+ // CHECK-NEXT: [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
262+ // CHECK-NEXT: ret <1 x double> [[TMP7]]
285263//
286264float64x1_t test_vfms_laneq_f64 (float64x1_t a , float64x1_t b , float64x2_t v ) {
287265 return vfms_laneq_f64 (a , b , v , 0 );
@@ -555,8 +533,8 @@ int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
555533// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
556534// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
557535// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
558- // CHECK-NEXT: [[VGET_LANE9 :%.*]] = extractelement <1 x double> [[TMP1]], i32 0
559- // CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE9 ]])
536+ // CHECK-NEXT: [[VGET_LANE8 :%.*]] = extractelement <1 x double> [[TMP1]], i32 0
537+ // CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE8 ]])
560538// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
561539// CHECK-NEXT: ret <1 x double> [[VSET_LANE]]
562540//
0 commit comments