@@ -56,8 +56,8 @@ float64_t test_vmuld_laneq_f64(float64_t a, float64x2_t b) {
5656// CHECK-NEXT: [[ENTRY:.*:]]
5757// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to double
5858// CHECK-NEXT: [[TMP1:%.*]] = fmul double [[TMP0]], [[B]]
59- // CHECK-NEXT: [[TMP2 :%.*]] = bitcast double [[TMP1]] to <1 x double>
60- // CHECK-NEXT: ret <1 x double> [[TMP2 ]]
59+ // CHECK-NEXT: [[REF_TMP_I_0_VEC_INSERT :%.*]] = insertelement <1 x double> undef, double [[TMP1]], i32 0
60+ // CHECK-NEXT: ret <1 x double> [[REF_TMP_I_0_VEC_INSERT ]]
6161//
6262float64x1_t test_vmul_n_f64 (float64x1_t a , float64_t b ) {
6363 return vmul_n_f64 (a , b );
@@ -111,8 +111,8 @@ float64_t test_vmulxd_laneq_f64(float64_t a, float64x2_t b) {
111111// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
112112// CHECK-NEXT: [[ENTRY:.*:]]
113113// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x double> [[A]], i32 0
114- // CHECK-NEXT: [[VGET_LANE3 :%.*]] = extractelement <1 x double> [[B]], i32 0
115- // CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE3 ]])
114+ // CHECK-NEXT: [[VGET_LANE4 :%.*]] = extractelement <1 x double> [[B]], i32 0
115+ // CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE4 ]])
116116// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x double> [[A]], double [[VMULXD_F64_I]], i32 0
117117// CHECK-NEXT: ret <1 x double> [[VSET_LANE]]
118118//
@@ -196,13 +196,19 @@ float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) {
196196// CHECK-LABEL: define dso_local <1 x double> @test_vfma_lane_f64(
197197// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[V:%.*]]) #[[ATTR0]] {
198198// CHECK-NEXT: [[ENTRY:.*:]]
199- // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to <8 x i8>
200- // CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to <8 x i8>
201- // CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V]] to <8 x i8>
202- // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
203- // CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
204- // CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
205- // CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
199+ // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
200+ // CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
201+ // CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
202+ // CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
203+ // CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V]] to i64
204+ // CHECK-NEXT: [[__S2_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
205+ // CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
206+ // CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
207+ // CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__S2_SROA_0_0_VEC_INSERT]] to <8 x i8>
208+ // CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
209+ // CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP6]], <1 x double> [[TMP6]], <1 x i32> zeroinitializer
210+ // CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
211+ // CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
206212// CHECK-NEXT: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
207213// CHECK-NEXT: ret <1 x double> [[FMLA2]]
208214//
@@ -213,14 +219,20 @@ float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
213219// CHECK-LABEL: define dso_local <1 x double> @test_vfms_lane_f64(
214220// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[V:%.*]]) #[[ATTR0]] {
215221// CHECK-NEXT: [[ENTRY:.*:]]
222+ // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
223+ // CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
216224// CHECK-NEXT: [[FNEG:%.*]] = fneg <1 x double> [[B]]
217- // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to <8 x i8>
218- // CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG]] to <8 x i8>
219- // CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V]] to <8 x i8>
220- // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
221- // CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
222- // CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
223- // CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
225+ // CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG]] to i64
226+ // CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
227+ // CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V]] to i64
228+ // CHECK-NEXT: [[__S2_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
229+ // CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
230+ // CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
231+ // CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__S2_SROA_0_0_VEC_INSERT]] to <8 x i8>
232+ // CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
233+ // CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP6]], <1 x double> [[TMP6]], <1 x i32> zeroinitializer
234+ // CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
235+ // CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
224236// CHECK-NEXT: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
225237// CHECK-NEXT: ret <1 x double> [[FMLA2]]
226238//
@@ -231,16 +243,21 @@ float64x1_t test_vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
231243// CHECK-LABEL: define dso_local <1 x double> @test_vfma_laneq_f64(
232244// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <2 x double> noundef [[V:%.*]]) #[[ATTR0]] {
233245// CHECK-NEXT: [[ENTRY:.*:]]
234- // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to <8 x i8>
235- // CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to <8 x i8>
236- // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V]] to <16 x i8>
237- // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
238- // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
239- // CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
240- // CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
241- // CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]])
242- // CHECK-NEXT: [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
243- // CHECK-NEXT: ret <1 x double> [[TMP7]]
246+ // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
247+ // CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
248+ // CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
249+ // CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
250+ // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V]] to <2 x i64>
251+ // CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
252+ // CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
253+ // CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
254+ // CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to double
255+ // CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to double
256+ // CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
257+ // CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
258+ // CHECK-NEXT: [[TMP9:%.*]] = call double @llvm.fma.f64(double [[TMP7]], double [[EXTRACT]], double [[TMP6]])
259+ // CHECK-NEXT: [[TMP10:%.*]] = bitcast double [[TMP9]] to <1 x double>
260+ // CHECK-NEXT: ret <1 x double> [[TMP10]]
244261//
245262float64x1_t test_vfma_laneq_f64 (float64x1_t a , float64x1_t b , float64x2_t v ) {
246263 return vfma_laneq_f64 (a , b , v , 0 );
@@ -249,17 +266,22 @@ float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
249266// CHECK-LABEL: define dso_local <1 x double> @test_vfms_laneq_f64(
250267// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <2 x double> noundef [[V:%.*]]) #[[ATTR0]] {
251268// CHECK-NEXT: [[ENTRY:.*:]]
269+ // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
270+ // CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
252271// CHECK-NEXT: [[FNEG:%.*]] = fneg <1 x double> [[B]]
253- // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to <8 x i8>
254- // CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG]] to <8 x i8>
255- // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V]] to <16 x i8>
256- // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
257- // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
258- // CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
259- // CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
260- // CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]])
261- // CHECK-NEXT: [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
262- // CHECK-NEXT: ret <1 x double> [[TMP7]]
272+ // CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG]] to i64
273+ // CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
274+ // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V]] to <2 x i64>
275+ // CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
276+ // CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
277+ // CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
278+ // CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to double
279+ // CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to double
280+ // CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
281+ // CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
282+ // CHECK-NEXT: [[TMP9:%.*]] = call double @llvm.fma.f64(double [[TMP7]], double [[EXTRACT]], double [[TMP6]])
283+ // CHECK-NEXT: [[TMP10:%.*]] = bitcast double [[TMP9]] to <1 x double>
284+ // CHECK-NEXT: ret <1 x double> [[TMP10]]
263285//
264286float64x1_t test_vfms_laneq_f64 (float64x1_t a , float64x1_t b , float64x2_t v ) {
265287 return vfms_laneq_f64 (a , b , v , 0 );
@@ -530,12 +552,12 @@ int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
530552// CHECK-LABEL: define dso_local <1 x double> @test_vmulx_lane_f64_0(
531553// CHECK-SAME: ) #[[ATTR0]] {
532554// CHECK-NEXT: [[ENTRY:.*:]]
533- // CHECK-NEXT: [[TMP0 :%.*]] = bitcast i64 4599917171378402754 to <1 x double>
534- // CHECK-NEXT: [[TMP1 :%.*]] = bitcast i64 4606655882138939123 to <1 x double>
535- // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0 ]], i32 0
536- // CHECK-NEXT: [[VGET_LANE8 :%.*]] = extractelement <1 x double> [[TMP1 ]], i32 0
537- // CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE8 ]])
538- // CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0 ]], double [[VMULXD_F64_I]], i32 0
555+ // CHECK-NEXT: [[__PROMOTE_SROA_0_0_VEC_INSERT :%.*]] = insertelement <1 x double> undef, double 0x3FD6304BC43AB5C2, i32 0
556+ // CHECK-NEXT: [[__PROMOTE2_SROA_0_0_VEC_INSERT :%.*]] = insertelement <1 x double> undef, double 0x3FEE211E215AEEF3, i32 0
557+ // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x double> [[__PROMOTE_SROA_0_0_VEC_INSERT ]], i32 0
558+ // CHECK-NEXT: [[VGET_LANE9 :%.*]] = extractelement <1 x double> [[__PROMOTE2_SROA_0_0_VEC_INSERT ]], i32 0
559+ // CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE9 ]])
560+ // CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x double> [[__PROMOTE_SROA_0_0_VEC_INSERT ]], double [[VMULXD_F64_I]], i32 0
539561// CHECK-NEXT: ret <1 x double> [[VSET_LANE]]
540562//
541563float64x1_t test_vmulx_lane_f64_0 () {
@@ -552,13 +574,13 @@ float64x1_t test_vmulx_lane_f64_0() {
552574// CHECK-LABEL: define dso_local <1 x double> @test_vmulx_laneq_f64_2(
553575// CHECK-SAME: ) #[[ATTR0]] {
554576// CHECK-NEXT: [[ENTRY:.*:]]
555- // CHECK-NEXT: [[TMP0 :%.*]] = bitcast i64 4599917171378402754 to <1 x double>
556- // CHECK-NEXT: [[TMP1 :%.*]] = bitcast i64 4606655882138939123 to <1 x double>
557- // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0 ]], <1 x double> [[TMP1 ]], <2 x i32> <i32 0, i32 1>
558- // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0 ]], i32 0
577+ // CHECK-NEXT: [[__PROMOTE_SROA_0_0_VEC_INSERT :%.*]] = insertelement <1 x double> undef, double 0x3FD6304BC43AB5C2, i32 0
578+ // CHECK-NEXT: [[__PROMOTE2_SROA_0_0_VEC_INSERT :%.*]] = insertelement <1 x double> undef, double 0x3FEE211E215AEEF3, i32 0
579+ // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[__PROMOTE_SROA_0_0_VEC_INSERT ]], <1 x double> [[__PROMOTE2_SROA_0_0_VEC_INSERT ]], <2 x i32> <i32 0, i32 1>
580+ // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x double> [[__PROMOTE_SROA_0_0_VEC_INSERT ]], i32 0
559581// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[SHUFFLE_I]], i32 1
560582// CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
561- // CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0 ]], double [[VMULXD_F64_I]], i32 0
583+ // CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x double> [[__PROMOTE_SROA_0_0_VEC_INSERT ]], double [[VMULXD_F64_I]], i32 0
562584// CHECK-NEXT: ret <1 x double> [[VSET_LANE]]
563585//
564586float64x1_t test_vmulx_laneq_f64_2 () {
0 commit comments