@@ -5,6 +5,10 @@ declare half @llvm.minimum.f16(half, half)
55declare half @llvm.maximum.f16 (half , half )
66declare <8 x half > @llvm.minimum.v8f16 (<8 x half >, <8 x half >)
77declare <8 x half > @llvm.maximum.v8f16 (<8 x half >, <8 x half >)
8+ declare <16 x half > @llvm.minimum.v16f16 (<16 x half >, <16 x half >)
9+ declare <16 x half > @llvm.maximum.v16f16 (<16 x half >, <16 x half >)
10+ declare <32 x half > @llvm.minimum.v32f16 (<32 x half >, <32 x half >)
11+ declare <32 x half > @llvm.maximum.v32f16 (<32 x half >, <32 x half >)
812
913define half @test_fminimum (half %x , half %y ) {
1014; CHECK-LABEL: test_fminimum:
@@ -25,20 +29,10 @@ define half @test_fminimum(half %x, half %y) {
2529 ret half %z
2630}
2731
28- define <8 x half > @test_fminimum_scalarize (<8 x half > %x , <8 x half > %y ) "no-nans-fp-math" ="true" "no-signed-zeros-fp-math" ="true" {
29- ; CHECK-LABEL: test_fminimum_scalarize :
32+ define <8 x half > @test_fminimum_v8f16 (<8 x half > %x , <8 x half > %y ) "no-nans-fp-math" ="true" "no-signed-zeros-fp-math" ="true" {
33+ ; CHECK-LABEL: test_fminimum_v8f16 :
3034; CHECK: # %bb.0:
31- ; CHECK-NEXT: vcmpltph %xmm1, %xmm0, %k1
32- ; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm2 {%k1}
33- ; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm3 = [32768,32768,32768,32768,32768,32768,32768,32768]
34- ; CHECK-NEXT: vpcmpeqw %xmm3, %xmm0, %k1
35- ; CHECK-NEXT: vpblendmw %xmm0, %xmm2, %xmm0 {%k1}
36- ; CHECK-NEXT: vpcmpeqw %xmm3, %xmm1, %k1
37- ; CHECK-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1}
38- ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
39- ; CHECK-NEXT: vcmpeqph %xmm1, %xmm2, %k1
40- ; CHECK-NEXT: vmovdqu16 %xmm0, %xmm2 {%k1}
41- ; CHECK-NEXT: vmovdqa %xmm2, %xmm0
35+ ; CHECK-NEXT: vminph %xmm1, %xmm0, %xmm0
4236; CHECK-NEXT: retq
4337 %r = call <8 x half > @llvm.minimum.v8f16 (<8 x half > %x , <8 x half > %y )
4438 ret <8 x half > %r
@@ -113,19 +107,10 @@ define half @test_fmaximum(half %x, half %y) {
113107 ret half %r
114108}
115109
116- define <8 x half > @test_fmaximum_scalarize (<8 x half > %x , <8 x half > %y ) "no-nans-fp-math" ="true" "no-signed-zeros-fp-math" ="true" {
117- ; CHECK-LABEL: test_fmaximum_scalarize :
110+ define <8 x half > @test_fmaximum_v8f16 (<8 x half > %x , <8 x half > %y ) "no-nans-fp-math" ="true" "no-signed-zeros-fp-math" ="true" {
111+ ; CHECK-LABEL: test_fmaximum_v8f16 :
118112; CHECK: # %bb.0:
119- ; CHECK-NEXT: vcmpltph %xmm0, %xmm1, %k1
120- ; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm2 {%k1}
121- ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1
122- ; CHECK-NEXT: vpblendmw %xmm0, %xmm2, %xmm0 {%k1}
123- ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
124- ; CHECK-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1}
125- ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
126- ; CHECK-NEXT: vcmpeqph %xmm1, %xmm2, %k1
127- ; CHECK-NEXT: vmovdqu16 %xmm0, %xmm2 {%k1}
128- ; CHECK-NEXT: vmovdqa %xmm2, %xmm0
113+ ; CHECK-NEXT: vmaxph %xmm1, %xmm0, %xmm0
129114; CHECK-NEXT: retq
130115 %r = call <8 x half > @llvm.maximum.v8f16 (<8 x half > %x , <8 x half > %y )
131116 ret <8 x half > %r
@@ -186,3 +171,50 @@ define half @test_fmaximum_combine_cmps(half %x, half %y) {
186171 %2 = tail call half @llvm.maximum.f16 (half %x , half %1 )
187172 ret half %2
188173}
174+
175+ define <16 x half > @test_fminimum_v16f16 (<16 x half > %x , <16 x half > %y ) "no-nans-fp-math" ="true" "no-signed-zeros-fp-math" ="true" {
176+ ; CHECK-LABEL: test_fminimum_v16f16:
177+ ; CHECK: # %bb.0:
178+ ; CHECK-NEXT: vminph %ymm1, %ymm0, %ymm0
179+ ; CHECK-NEXT: retq
180+ %r = call <16 x half > @llvm.minimum.v16f16 (<16 x half > %x , <16 x half > %y )
181+ ret <16 x half > %r
182+ }
183+
184+ define <16 x half > @test_fmaximum_v16f16_nans (<16 x half > %x , <16 x half > %y ) "no-signed-zeros-fp-math" ="true" {
185+ ; CHECK-LABEL: test_fmaximum_v16f16_nans:
186+ ; CHECK: # %bb.0:
187+ ; CHECK-NEXT: vmaxph %ymm1, %ymm0, %ymm1
188+ ; CHECK-NEXT: vcmpunordph %ymm0, %ymm0, %k1
189+ ; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
190+ ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
191+ ; CHECK-NEXT: retq
192+ %r = call <16 x half > @llvm.maximum.v16f16 (<16 x half > %x , <16 x half > %y )
193+ ret <16 x half > %r
194+ }
195+
196+ define <32 x half > @test_fminimum_v32f16_szero (<32 x half > %x , <32 x half > %y ) "no-nans-fp-math" ="true" {
197+ ; CHECK-LABEL: test_fminimum_v32f16_szero:
198+ ; CHECK: # %bb.0:
199+ ; CHECK-NEXT: vpmovw2m %zmm0, %k1
200+ ; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm2 {%k1}
201+ ; CHECK-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1}
202+ ; CHECK-NEXT: vminph %zmm2, %zmm0, %zmm0
203+ ; CHECK-NEXT: retq
204+ %r = call <32 x half > @llvm.minimum.v32f16 (<32 x half > %x , <32 x half > %y )
205+ ret <32 x half > %r
206+ }
207+
208+ define <32 x half > @test_fmaximum_v32f16_nans_szero (<32 x half > %x , <32 x half > %y ) {
209+ ; CHECK-LABEL: test_fmaximum_v32f16_nans_szero:
210+ ; CHECK: # %bb.0:
211+ ; CHECK-NEXT: vpmovw2m %zmm0, %k1
212+ ; CHECK-NEXT: vpblendmw %zmm1, %zmm0, %zmm2 {%k1}
213+ ; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
214+ ; CHECK-NEXT: vmaxph %zmm2, %zmm1, %zmm0
215+ ; CHECK-NEXT: vcmpunordph %zmm1, %zmm1, %k1
216+ ; CHECK-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1}
217+ ; CHECK-NEXT: retq
218+ %r = call <32 x half > @llvm.maximum.v32f16 (<32 x half > %x , <32 x half > %y )
219+ ret <32 x half > %r
220+ }
0 commit comments