Skip to content

Commit 2b2130a

Browse files
committed
[x86] Add test for reduction
1 parent 72f99b7 commit 2b2130a

File tree

1 file changed

+140
-0
lines changed

1 file changed

+140
-0
lines changed
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1,+fast-hops | FileCheck %s --check-prefixes=SSE41
3+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-hops | FileCheck %s --check-prefixes=AVX2
4+
5+
define { i16, i16 } @test_reduce_v16i16_with_umin(<16 x i16> %x, <16 x i16> %y) {
6+
; SSE41-LABEL: test_reduce_v16i16_with_umin:
7+
; SSE41: # %bb.0:
8+
; SSE41-NEXT: movdqa %xmm0, %xmm4
9+
; SSE41-NEXT: pminuw %xmm1, %xmm4
10+
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,2,3]
11+
; SSE41-NEXT: pminuw %xmm4, %xmm5
12+
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,1,1]
13+
; SSE41-NEXT: pminuw %xmm5, %xmm6
14+
; SSE41-NEXT: movdqa %xmm6, %xmm5
15+
; SSE41-NEXT: psrld $16, %xmm5
16+
; SSE41-NEXT: pminuw %xmm6, %xmm5
17+
; SSE41-NEXT: phminposuw %xmm4, %xmm4
18+
; SSE41-NEXT: movd %xmm4, %eax
19+
; SSE41-NEXT: pshuflw {{.*#+}} xmm4 = xmm5[0,0,0,0,4,5,6,7]
20+
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,1,0,1]
21+
; SSE41-NEXT: pcmpeqw %xmm4, %xmm1
22+
; SSE41-NEXT: pcmpeqd %xmm5, %xmm5
23+
; SSE41-NEXT: pxor %xmm5, %xmm1
24+
; SSE41-NEXT: por %xmm3, %xmm1
25+
; SSE41-NEXT: pcmpeqw %xmm4, %xmm0
26+
; SSE41-NEXT: pxor %xmm5, %xmm0
27+
; SSE41-NEXT: por %xmm2, %xmm0
28+
; SSE41-NEXT: pminuw %xmm1, %xmm0
29+
; SSE41-NEXT: phminposuw %xmm0, %xmm0
30+
; SSE41-NEXT: movd %xmm0, %edx
31+
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
32+
; SSE41-NEXT: # kill: def $dx killed $dx killed $edx
33+
; SSE41-NEXT: retq
34+
;
35+
; AVX2-LABEL: test_reduce_v16i16_with_umin:
36+
; AVX2: # %bb.0:
37+
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
38+
; AVX2-NEXT: vpminuw %xmm2, %xmm0, %xmm2
39+
; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,3,2,3]
40+
; AVX2-NEXT: vpminuw %xmm3, %xmm2, %xmm3
41+
; AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,1,1]
42+
; AVX2-NEXT: vpminuw %xmm4, %xmm3, %xmm3
43+
; AVX2-NEXT: vpsrld $16, %xmm3, %xmm4
44+
; AVX2-NEXT: vphminposuw %xmm2, %xmm2
45+
; AVX2-NEXT: vmovd %xmm2, %eax
46+
; AVX2-NEXT: vpminuw %xmm4, %xmm3, %xmm2
47+
; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
48+
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
49+
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
50+
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
51+
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
52+
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
53+
; AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0
54+
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
55+
; AVX2-NEXT: vmovd %xmm0, %edx
56+
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
57+
; AVX2-NEXT: # kill: def $dx killed $dx killed $edx
58+
; AVX2-NEXT: vzeroupper
59+
; AVX2-NEXT: retq
60+
%min_x = tail call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %x)
61+
%min_x_vec = insertelement <1 x i16> poison, i16 %min_x, i64 0
62+
%min_x_splat = shufflevector <1 x i16> %min_x_vec, <1 x i16> poison, <16 x i32> zeroinitializer
63+
%cmp = icmp eq <16 x i16> %x, %min_x_splat
64+
%select = select <16 x i1> %cmp, <16 x i16> %y, <16 x i16> splat (i16 -1)
65+
%select_min = tail call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %select)
66+
%ret_0 = insertvalue { i16, i16 } poison, i16 %min_x, 0
67+
%ret = insertvalue { i16, i16 } %ret_0, i16 %select_min, 1
68+
ret { i16, i16 } %ret
69+
}
70+
71+
define { i16, i16 } @test_reduce_v16i16_with_add(<16 x i16> %x, <16 x i16> %y) {
72+
; SSE41-LABEL: test_reduce_v16i16_with_add:
73+
; SSE41: # %bb.0: # %start
74+
; SSE41-NEXT: movdqa %xmm0, %xmm4
75+
; SSE41-NEXT: paddw %xmm1, %xmm4
76+
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,2,3]
77+
; SSE41-NEXT: paddw %xmm4, %xmm5
78+
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,1,1]
79+
; SSE41-NEXT: paddw %xmm5, %xmm4
80+
; SSE41-NEXT: phaddw %xmm4, %xmm4
81+
; SSE41-NEXT: movdqa %xmm1, %xmm5
82+
; SSE41-NEXT: phaddw %xmm0, %xmm5
83+
; SSE41-NEXT: phaddw %xmm5, %xmm5
84+
; SSE41-NEXT: phaddw %xmm5, %xmm5
85+
; SSE41-NEXT: phaddw %xmm5, %xmm5
86+
; SSE41-NEXT: movd %xmm5, %eax
87+
; SSE41-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,0,0,0,4,5,6,7]
88+
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,1,0,1]
89+
; SSE41-NEXT: pcmpeqw %xmm4, %xmm1
90+
; SSE41-NEXT: pcmpeqd %xmm5, %xmm5
91+
; SSE41-NEXT: pxor %xmm5, %xmm1
92+
; SSE41-NEXT: por %xmm3, %xmm1
93+
; SSE41-NEXT: pcmpeqw %xmm4, %xmm0
94+
; SSE41-NEXT: pxor %xmm5, %xmm0
95+
; SSE41-NEXT: por %xmm2, %xmm0
96+
; SSE41-NEXT: pminuw %xmm1, %xmm0
97+
; SSE41-NEXT: phminposuw %xmm0, %xmm0
98+
; SSE41-NEXT: movd %xmm0, %edx
99+
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
100+
; SSE41-NEXT: # kill: def $dx killed $dx killed $edx
101+
; SSE41-NEXT: retq
102+
;
103+
; AVX2-LABEL: test_reduce_v16i16_with_add:
104+
; AVX2: # %bb.0: # %start
105+
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
106+
; AVX2-NEXT: vpaddw %xmm2, %xmm0, %xmm3
107+
; AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
108+
; AVX2-NEXT: vpaddw %xmm4, %xmm3, %xmm3
109+
; AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,1,1]
110+
; AVX2-NEXT: vpaddw %xmm4, %xmm3, %xmm3
111+
; AVX2-NEXT: vphaddw %xmm3, %xmm3, %xmm3
112+
; AVX2-NEXT: vphaddw %xmm0, %xmm2, %xmm2
113+
; AVX2-NEXT: vphaddw %xmm2, %xmm2, %xmm2
114+
; AVX2-NEXT: vphaddw %xmm2, %xmm2, %xmm2
115+
; AVX2-NEXT: vphaddw %xmm2, %xmm2, %xmm2
116+
; AVX2-NEXT: vmovd %xmm2, %eax
117+
; AVX2-NEXT: vpbroadcastw %xmm3, %ymm2
118+
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
119+
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
120+
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
121+
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
122+
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
123+
; AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0
124+
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
125+
; AVX2-NEXT: vmovd %xmm0, %edx
126+
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
127+
; AVX2-NEXT: # kill: def $dx killed $dx killed $edx
128+
; AVX2-NEXT: vzeroupper
129+
; AVX2-NEXT: retq
130+
start:
131+
%sum_x = tail call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %x)
132+
%sum_x_vec = insertelement <1 x i16> poison, i16 %sum_x, i64 0
133+
%sum_x_splat = shufflevector <1 x i16> %sum_x_vec, <1 x i16> poison, <16 x i32> zeroinitializer
134+
%cmp = icmp eq <16 x i16> %x, %sum_x_splat
135+
%select = select <16 x i1> %cmp, <16 x i16> %y, <16 x i16> splat (i16 -1)
136+
%select_min = tail call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %select)
137+
%ret_0 = insertvalue { i16, i16 } poison, i16 %sum_x, 0
138+
%ret = insertvalue { i16, i16 } %ret_0, i16 %select_min, 1
139+
ret { i16, i16 } %ret
140+
}

0 commit comments

Comments
 (0)