Skip to content

Commit 4ca25c8

Browse files
committed
Add tests for vectors that NumElement of Dst is bigger than Src
1 parent 61bc3e0 commit 4ca25c8

File tree

1 file changed

+77
-4
lines changed

1 file changed

+77
-4
lines changed

llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll

Lines changed: 77 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,19 @@ define <4 x float> @ext2_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
5858
ret <4 x float> %r
5959
}
6060

61+
define <2 x float> @ext2_v4f32v2f32(<4 x float> %x, <2 x float> %y) {
62+
; CHECK-LABEL: @ext2_v4f32v2f32(
63+
; CHECK-NEXT: [[TMP1:%.*]] = fneg <4 x float> [[X:%.*]]
64+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 poison, i32 3>
65+
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
66+
; CHECK-NEXT: ret <2 x float> [[R]]
67+
;
68+
%e = extractelement <4 x float> %x, i32 3
69+
%n = fneg float %e
70+
%r = insertelement <2 x float> %y, float %n, i32 1
71+
ret <2 x float> %r
72+
}
73+
6174
; Eliminating extract/insert is still profitable. Flags propagate.
6275

6376
define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
@@ -85,6 +98,19 @@ define <4 x double> @ext1_v2f64v4f64(<2 x double> %x, <4 x double> %y) {
8598
ret <4 x double> %r
8699
}
87100

101+
define <2 x double> @ext1_v4f64v2f64(<4 x double> %x, <2 x double> %y) {
102+
; CHECK-LABEL: @ext1_v4f64v2f64(
103+
; CHECK-NEXT: [[TMP1:%.*]] = fneg nsz <4 x double> [[X:%.*]]
104+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <2 x i32> <i32 poison, i32 3>
105+
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x double> [[Y:%.*]], <2 x double> [[TMP2]], <2 x i32> <i32 0, i32 3>
106+
; CHECK-NEXT: ret <2 x double> [[R]]
107+
;
108+
%e = extractelement <4 x double> %x, i32 3
109+
%n = fneg nsz double %e
110+
%r = insertelement <2 x double> %y, double %n, i32 1
111+
ret <2 x double> %r
112+
}
113+
88114
define <8 x float> @ext7_v8f32(<8 x float> %x, <8 x float> %y) {
89115
; CHECK-LABEL: @ext7_v8f32(
90116
; CHECK-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[X:%.*]]
@@ -110,6 +136,19 @@ define <8 x float> @ext7_v4f32v8f32(<4 x float> %x, <8 x float> %y) {
110136
ret <8 x float> %r
111137
}
112138

139+
define <4 x float> @ext7_v8f32v4f32(<8 x float> %x, <4 x float> %y) {
140+
; CHECK-LABEL: @ext7_v8f32v4f32(
141+
; CHECK-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[X:%.*]]
142+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 7>
143+
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
144+
; CHECK-NEXT: ret <4 x float> [[R]]
145+
;
146+
%e = extractelement <8 x float> %x, i32 7
147+
%n = fneg float %e
148+
%r = insertelement <4 x float> %y, float %n, i32 3
149+
ret <4 x float> %r
150+
}
151+
113152
; Same as above with an extra use of the extracted element.
114153

115154
define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) {
@@ -157,6 +196,29 @@ define <8 x float> @ext7_v4f32v8f32_use1(<4 x float> %x, <8 x float> %y) {
157196
ret <8 x float> %r
158197
}
159198

199+
define <4 x float> @ext7_v8f32v4f32_use1(<8 x float> %x, <4 x float> %y) {
200+
; SSE-LABEL: @ext7_v8f32v4f32_use1(
201+
; SSE-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 7
202+
; SSE-NEXT: call void @use(float [[E]])
203+
; SSE-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[X]]
204+
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 7>
205+
; SSE-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
206+
; SSE-NEXT: ret <4 x float> [[R]]
207+
;
208+
; AVX-LABEL: @ext7_v8f32v4f32_use1(
209+
; AVX-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 7
210+
; AVX-NEXT: call void @use(float [[E]])
211+
; AVX-NEXT: [[N:%.*]] = fneg float [[E]]
212+
; AVX-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 3
213+
; AVX-NEXT: ret <4 x float> [[R]]
214+
;
215+
%e = extractelement <8 x float> %x, i32 7
216+
call void @use(float %e)
217+
%n = fneg float %e
218+
%r = insertelement <4 x float> %y, float %n, i32 3
219+
ret <4 x float> %r
220+
}
221+
160222
; Negative test - the transform is likely not profitable if the fneg has another use.
161223

162224
define <8 x float> @ext7_v8f32_use2(<8 x float> %x, <8 x float> %y) {
@@ -189,6 +251,21 @@ define <8 x float> @ext7_v4f32v8f32_use2(<4 x float> %x, <8 x float> %y) {
189251
ret <8 x float> %r
190252
}
191253

254+
define <4 x float> @ext7_v8f32v4f32_use2(<8 x float> %x, <4 x float> %y) {
255+
; CHECK-LABEL: @ext7_v8f32v4f32_use2(
256+
; CHECK-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 7
257+
; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
258+
; CHECK-NEXT: call void @use(float [[N]])
259+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 3
260+
; CHECK-NEXT: ret <4 x float> [[R]]
261+
;
262+
%e = extractelement <8 x float> %x, i32 7
263+
%n = fneg float %e
264+
call void @use(float %n)
265+
%r = insertelement <4 x float> %y, float %n, i32 3
266+
ret <4 x float> %r
267+
}
268+
192269
; Negative test - can't convert variable index to a shuffle.
193270

194271
define <2 x double> @ext_index_var_v2f64(<2 x double> %x, <2 x double> %y, i32 %index) {
@@ -217,9 +294,6 @@ define <4 x double> @ext_index_var_v2f64v4f64(<2 x double> %x, <4 x double> %y,
217294
ret <4 x double> %r
218295
}
219296

220-
; Negative test - require same extract/insert index for simple shuffle.
221-
; TODO: We could handle this by adjusting the cost calculation.
222-
223297
define <2 x double> @ext1_v2f64_ins0(<2 x double> %x, <2 x double> %y) {
224298
; CHECK-LABEL: @ext1_v2f64_ins0(
225299
; CHECK-NEXT: [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]]
@@ -232,7 +306,6 @@ define <2 x double> @ext1_v2f64_ins0(<2 x double> %x, <2 x double> %y) {
232306
ret <2 x double> %r
233307
}
234308

235-
; Negative test - extract from an index greater than the vector width of the destination
236309
define <2 x double> @ext3_v4f64v2f64(<4 x double> %x, <2 x double> %y) {
237310
; CHECK-LABEL: @ext3_v4f64v2f64(
238311
; CHECK-NEXT: [[TMP1:%.*]] = fneg nsz <4 x double> [[X:%.*]]

0 commit comments

Comments
 (0)