Skip to content

Commit c81564c

Browse files
committed
arm changes
1 parent 76ea734 commit c81564c

File tree

2 files changed

+199
-152
lines changed

2 files changed

+199
-152
lines changed

clang/test/CodeGen/AArch64/neon-scalar-x-indexed-elem.c

Lines changed: 71 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ float64_t test_vmuld_laneq_f64(float64_t a, float64x2_t b) {
5656
// CHECK-NEXT: [[ENTRY:.*:]]
5757
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to double
5858
// CHECK-NEXT: [[TMP1:%.*]] = fmul double [[TMP0]], [[B]]
59-
// CHECK-NEXT: [[TMP2:%.*]] = bitcast double [[TMP1]] to <1 x double>
60-
// CHECK-NEXT: ret <1 x double> [[TMP2]]
59+
// CHECK-NEXT: [[REF_TMP_I_0_VEC_INSERT:%.*]] = insertelement <1 x double> undef, double [[TMP1]], i32 0
60+
// CHECK-NEXT: ret <1 x double> [[REF_TMP_I_0_VEC_INSERT]]
6161
//
6262
float64x1_t test_vmul_n_f64(float64x1_t a, float64_t b) {
6363
return vmul_n_f64(a, b);
@@ -111,8 +111,8 @@ float64_t test_vmulxd_laneq_f64(float64_t a, float64x2_t b) {
111111
// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
112112
// CHECK-NEXT: [[ENTRY:.*:]]
113113
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x double> [[A]], i32 0
114-
// CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <1 x double> [[B]], i32 0
115-
// CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE3]])
114+
// CHECK-NEXT: [[VGET_LANE4:%.*]] = extractelement <1 x double> [[B]], i32 0
115+
// CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE4]])
116116
// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x double> [[A]], double [[VMULXD_F64_I]], i32 0
117117
// CHECK-NEXT: ret <1 x double> [[VSET_LANE]]
118118
//
@@ -196,13 +196,19 @@ float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) {
196196
// CHECK-LABEL: define dso_local <1 x double> @test_vfma_lane_f64(
197197
// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[V:%.*]]) #[[ATTR0]] {
198198
// CHECK-NEXT: [[ENTRY:.*:]]
199-
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to <8 x i8>
200-
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to <8 x i8>
201-
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V]] to <8 x i8>
202-
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
203-
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
204-
// CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
205-
// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
199+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
200+
// CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
201+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
202+
// CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
203+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V]] to i64
204+
// CHECK-NEXT: [[__S2_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
205+
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
206+
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
207+
// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__S2_SROA_0_0_VEC_INSERT]] to <8 x i8>
208+
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
209+
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP6]], <1 x double> [[TMP6]], <1 x i32> zeroinitializer
210+
// CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
211+
// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
206212
// CHECK-NEXT: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
207213
// CHECK-NEXT: ret <1 x double> [[FMLA2]]
208214
//
@@ -213,14 +219,20 @@ float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
213219
// CHECK-LABEL: define dso_local <1 x double> @test_vfms_lane_f64(
214220
// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[V:%.*]]) #[[ATTR0]] {
215221
// CHECK-NEXT: [[ENTRY:.*:]]
222+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
223+
// CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
216224
// CHECK-NEXT: [[FNEG:%.*]] = fneg <1 x double> [[B]]
217-
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to <8 x i8>
218-
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG]] to <8 x i8>
219-
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V]] to <8 x i8>
220-
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
221-
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
222-
// CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
223-
// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
225+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG]] to i64
226+
// CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
227+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V]] to i64
228+
// CHECK-NEXT: [[__S2_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
229+
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
230+
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
231+
// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__S2_SROA_0_0_VEC_INSERT]] to <8 x i8>
232+
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
233+
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP6]], <1 x double> [[TMP6]], <1 x i32> zeroinitializer
234+
// CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
235+
// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
224236
// CHECK-NEXT: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
225237
// CHECK-NEXT: ret <1 x double> [[FMLA2]]
226238
//
@@ -231,16 +243,21 @@ float64x1_t test_vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
231243
// CHECK-LABEL: define dso_local <1 x double> @test_vfma_laneq_f64(
232244
// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <2 x double> noundef [[V:%.*]]) #[[ATTR0]] {
233245
// CHECK-NEXT: [[ENTRY:.*:]]
234-
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to <8 x i8>
235-
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to <8 x i8>
236-
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V]] to <16 x i8>
237-
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
238-
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
239-
// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
240-
// CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
241-
// CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]])
242-
// CHECK-NEXT: [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
243-
// CHECK-NEXT: ret <1 x double> [[TMP7]]
246+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
247+
// CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
248+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
249+
// CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
250+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V]] to <2 x i64>
251+
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
252+
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
253+
// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
254+
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to double
255+
// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to double
256+
// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
257+
// CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
258+
// CHECK-NEXT: [[TMP9:%.*]] = call double @llvm.fma.f64(double [[TMP7]], double [[EXTRACT]], double [[TMP6]])
259+
// CHECK-NEXT: [[TMP10:%.*]] = bitcast double [[TMP9]] to <1 x double>
260+
// CHECK-NEXT: ret <1 x double> [[TMP10]]
244261
//
245262
float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
246263
return vfma_laneq_f64(a, b, v, 0);
@@ -249,17 +266,22 @@ float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
249266
// CHECK-LABEL: define dso_local <1 x double> @test_vfms_laneq_f64(
250267
// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <2 x double> noundef [[V:%.*]]) #[[ATTR0]] {
251268
// CHECK-NEXT: [[ENTRY:.*:]]
269+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
270+
// CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
252271
// CHECK-NEXT: [[FNEG:%.*]] = fneg <1 x double> [[B]]
253-
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to <8 x i8>
254-
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG]] to <8 x i8>
255-
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V]] to <16 x i8>
256-
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
257-
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
258-
// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
259-
// CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
260-
// CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]])
261-
// CHECK-NEXT: [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
262-
// CHECK-NEXT: ret <1 x double> [[TMP7]]
272+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG]] to i64
273+
// CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
274+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V]] to <2 x i64>
275+
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
276+
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
277+
// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
278+
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to double
279+
// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to double
280+
// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
281+
// CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
282+
// CHECK-NEXT: [[TMP9:%.*]] = call double @llvm.fma.f64(double [[TMP7]], double [[EXTRACT]], double [[TMP6]])
283+
// CHECK-NEXT: [[TMP10:%.*]] = bitcast double [[TMP9]] to <1 x double>
284+
// CHECK-NEXT: ret <1 x double> [[TMP10]]
263285
//
264286
float64x1_t test_vfms_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
265287
return vfms_laneq_f64(a, b, v, 0);
@@ -530,12 +552,12 @@ int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
530552
// CHECK-LABEL: define dso_local <1 x double> @test_vmulx_lane_f64_0(
531553
// CHECK-SAME: ) #[[ATTR0]] {
532554
// CHECK-NEXT: [[ENTRY:.*:]]
533-
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
534-
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
535-
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
536-
// CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
537-
// CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE8]])
538-
// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
555+
// CHECK-NEXT: [[__PROMOTE_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x double> undef, double 0x3FD6304BC43AB5C2, i32 0
556+
// CHECK-NEXT: [[__PROMOTE2_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x double> undef, double 0x3FEE211E215AEEF3, i32 0
557+
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x double> [[__PROMOTE_SROA_0_0_VEC_INSERT]], i32 0
558+
// CHECK-NEXT: [[VGET_LANE9:%.*]] = extractelement <1 x double> [[__PROMOTE2_SROA_0_0_VEC_INSERT]], i32 0
559+
// CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE9]])
560+
// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x double> [[__PROMOTE_SROA_0_0_VEC_INSERT]], double [[VMULXD_F64_I]], i32 0
539561
// CHECK-NEXT: ret <1 x double> [[VSET_LANE]]
540562
//
541563
float64x1_t test_vmulx_lane_f64_0() {
@@ -552,13 +574,13 @@ float64x1_t test_vmulx_lane_f64_0() {
552574
// CHECK-LABEL: define dso_local <1 x double> @test_vmulx_laneq_f64_2(
553575
// CHECK-SAME: ) #[[ATTR0]] {
554576
// CHECK-NEXT: [[ENTRY:.*:]]
555-
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
556-
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
557-
// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x double> [[TMP1]], <2 x i32> <i32 0, i32 1>
558-
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
577+
// CHECK-NEXT: [[__PROMOTE_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x double> undef, double 0x3FD6304BC43AB5C2, i32 0
578+
// CHECK-NEXT: [[__PROMOTE2_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x double> undef, double 0x3FEE211E215AEEF3, i32 0
579+
// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[__PROMOTE_SROA_0_0_VEC_INSERT]], <1 x double> [[__PROMOTE2_SROA_0_0_VEC_INSERT]], <2 x i32> <i32 0, i32 1>
580+
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x double> [[__PROMOTE_SROA_0_0_VEC_INSERT]], i32 0
559581
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[SHUFFLE_I]], i32 1
560582
// CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
561-
// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
583+
// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x double> [[__PROMOTE_SROA_0_0_VEC_INSERT]], double [[VMULXD_F64_I]], i32 0
562584
// CHECK-NEXT: ret <1 x double> [[VSET_LANE]]
563585
//
564586
float64x1_t test_vmulx_laneq_f64_2() {

0 commit comments

Comments
 (0)