@@ -25,9 +25,9 @@ define <4 x float> @shuf_fdiv_v4f32_yy(<4 x float> %x, <4 x float> %y, <4 x floa
2525define <4 x i32 > @shuf_add_v4i32_xx (<4 x i32 > %x , <4 x i32 > %y , <4 x i32 > %z ) {
2626; CHECK-LABEL: define <4 x i32> @shuf_add_v4i32_xx(
2727; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
28- ; CHECK-NEXT: [[TMP1 :%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 0>
29- ; CHECK-NEXT: [[R1 :%.*]] = shufflevector <4 x i32> [[Y ]], <4 x i32> [[Z]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
30- ; CHECK-NEXT: [[R2:%.*]] = add <4 x i32> [[TMP1 ]], [[R1]]
28+ ; CHECK-NEXT: [[B0 :%.*]] = add <4 x i32> [[X]], [[Y]]
29+ ; CHECK-NEXT: [[B1 :%.*]] = add <4 x i32> [[X ]], [[Z]]
30+ ; CHECK-NEXT: [[R2:%.*]] = shufflevector <4 x i32> [[B0 ]], <4 x i32> [[B1]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
3131; CHECK-NEXT: ret <4 x i32> [[R2]]
3232;
3333 %b0 = add <4 x i32 > %x , %y
@@ -36,15 +36,22 @@ define <4 x i32> @shuf_add_v4i32_xx(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
3636 ret <4 x i32 > %r
3737}
3838
39- ; For commutative instructions, common operand may be swapped.
39+ ; For commutative instructions, common operand may be swapped (SSE - expensive fmul vs AVX - cheap fmul)
4040
4141define <4 x float > @shuf_fmul_v4f32_xx_swap (<4 x float > %x , <4 x float > %y , <4 x float > %z ) {
42- ; CHECK-LABEL: define <4 x float> @shuf_fmul_v4f32_xx_swap(
43- ; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
44- ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <4 x i32> <i32 0, i32 3, i32 4, i32 7>
45- ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 0, i32 3>
46- ; CHECK-NEXT: [[R:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
47- ; CHECK-NEXT: ret <4 x float> [[R]]
42+ ; SSE-LABEL: define <4 x float> @shuf_fmul_v4f32_xx_swap(
43+ ; SSE-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
44+ ; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <4 x i32> <i32 0, i32 3, i32 4, i32 7>
45+ ; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 0, i32 3>
46+ ; SSE-NEXT: [[R:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
47+ ; SSE-NEXT: ret <4 x float> [[R]]
48+ ;
49+ ; AVX-LABEL: define <4 x float> @shuf_fmul_v4f32_xx_swap(
50+ ; AVX-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
51+ ; AVX-NEXT: [[B0:%.*]] = fmul <4 x float> [[X]], [[Y]]
52+ ; AVX-NEXT: [[B1:%.*]] = fmul <4 x float> [[Z]], [[X]]
53+ ; AVX-NEXT: [[R:%.*]] = shufflevector <4 x float> [[B0]], <4 x float> [[B1]], <4 x i32> <i32 0, i32 3, i32 4, i32 7>
54+ ; AVX-NEXT: ret <4 x float> [[R]]
4855;
4956 %b0 = fmul <4 x float > %x , %y
5057 %b1 = fmul <4 x float > %z , %x
@@ -57,9 +64,9 @@ define <4 x float> @shuf_fmul_v4f32_xx_swap(<4 x float> %x, <4 x float> %y, <4 x
5764define <2 x i64 > @shuf_and_v2i64_yy_swap (<2 x i64 > %x , <2 x i64 > %y , <2 x i64 > %z ) {
5865; CHECK-LABEL: define <2 x i64> @shuf_and_v2i64_yy_swap(
5966; CHECK-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
60- ; CHECK-NEXT: [[TMP1 :%.*]] = shufflevector <2 x i64> [[Y ]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
61- ; CHECK-NEXT: [[TMP2 :%.*]] = shufflevector <2 x i64> [[X ]], <2 x i64> [[Z]], <2 x i32> <i32 3, i32 0>
62- ; CHECK-NEXT: [[R:%.*]] = and <2 x i64> [[TMP1 ]], [[TMP2]]
67+ ; CHECK-NEXT: [[B0 :%.*]] = and <2 x i64> [[X ]], [[Y]]
68+ ; CHECK-NEXT: [[B1 :%.*]] = and <2 x i64> [[Y ]], [[Z]]
69+ ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i64> [[B0 ]], <2 x i64> [[B1]], <2 x i32> <i32 3, i32 0>
6370; CHECK-NEXT: ret <2 x i64> [[R]]
6471;
6572 %b0 = and <2 x i64 > %x , %y
@@ -84,15 +91,22 @@ define <4 x i32> @shuf_shl_v4i32_xx(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
8491 ret <4 x i32 > %r
8592}
8693
87- ; negative test - common operand, but not commutable
94+ ; common operand, but not commutable (SSE - expensive vector shift vs AVX2 - cheap vector shift)
8895
8996define <4 x i32 > @shuf_shl_v4i32_xx_swap (<4 x i32 > %x , <4 x i32 > %y , <4 x i32 > %z ) {
90- ; CHECK-LABEL: define <4 x i32> @shuf_shl_v4i32_xx_swap(
91- ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
92- ; CHECK-NEXT: [[B0:%.*]] = shl <4 x i32> [[X]], [[Y]]
93- ; CHECK-NEXT: [[B1:%.*]] = shl <4 x i32> [[Z]], [[X]]
94- ; CHECK-NEXT: [[R1:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 3, i32 2, i32 2, i32 5>
95- ; CHECK-NEXT: ret <4 x i32> [[R1]]
97+ ; SSE-LABEL: define <4 x i32> @shuf_shl_v4i32_xx_swap(
98+ ; SSE-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
99+ ; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Z]], <4 x i32> <i32 3, i32 2, i32 2, i32 5>
100+ ; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 3, i32 2, i32 2, i32 5>
101+ ; SSE-NEXT: [[R:%.*]] = shl <4 x i32> [[TMP1]], [[TMP2]]
102+ ; SSE-NEXT: ret <4 x i32> [[R]]
103+ ;
104+ ; AVX-LABEL: define <4 x i32> @shuf_shl_v4i32_xx_swap(
105+ ; AVX-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
106+ ; AVX-NEXT: [[B0:%.*]] = shl <4 x i32> [[X]], [[Y]]
107+ ; AVX-NEXT: [[B1:%.*]] = shl <4 x i32> [[Z]], [[X]]
108+ ; AVX-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 3, i32 2, i32 2, i32 5>
109+ ; AVX-NEXT: ret <4 x i32> [[R]]
96110;
97111 %b0 = shl <4 x i32 > %x , %y
98112 %b1 = shl <4 x i32 > %z , %x
@@ -116,7 +130,7 @@ define <2 x i64> @shuf_sub_add_v2i64_yy(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z
116130 ret <2 x i64 > %r
117131}
118132
119- ; negative test - type change via shuffle
133+ ; type change via shuffle
120134
121135define <8 x float > @shuf_fmul_v4f32_xx_type (<4 x float > %x , <4 x float > %y , <4 x float > %z ) {
122136; CHECK-LABEL: define <8 x float> @shuf_fmul_v4f32_xx_type(
@@ -168,14 +182,14 @@ define <4 x i32> @shuf_mul_v4i32_yy_use2(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
168182 ret <4 x i32 > %r
169183}
170184
171- ; negative test - must have matching operand
185+ ; non-matching operands (not commutable)
172186
173187define <4 x float > @shuf_fdiv_v4f32_no_common_op (<4 x float > %x , <4 x float > %y , <4 x float > %z , <4 x float > %w ) {
174188; CHECK-LABEL: define <4 x float> @shuf_fdiv_v4f32_no_common_op(
175189; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]], <4 x float> [[W:%.*]]) #[[ATTR0]] {
176- ; CHECK-NEXT: [[B0 :%.*]] = fdiv <4 x float> [[X]], [[Y]]
177- ; CHECK-NEXT: [[B1 :%.*]] = fdiv <4 x float> [[Z ]], [[W]]
178- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[B0 ]], <4 x float> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
190+ ; CHECK-NEXT: [[TMP1 :%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Z]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
191+ ; CHECK-NEXT: [[TMP2 :%.*]] = shufflevector <4 x float> [[Y ]], <4 x float> [[W]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
192+ ; CHECK-NEXT: [[R:%.*]] = fdiv <4 x float> [[TMP1 ]], [[TMP2]]
179193; CHECK-NEXT: ret <4 x float> [[R]]
180194;
181195 %b0 = fdiv <4 x float > %x , %y
@@ -216,6 +230,3 @@ define <4 x i32> @shuf_srem_v4i32_poison(<4 x i32> %a0, <4 x i32> %a1) {
216230 ret <4 x i32 > %r
217231}
218232
219- ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
220- ; AVX: {{.*}}
221- ; SSE: {{.*}}
0 commit comments