Skip to content

Commit 13b5899

Browse files
authored
[SelectionDAGBuilder][X86] Don't form FMAXNUM for f16 vectors if FMAXNUM needs to be promoted. (#114943)
In #70357, I changed a isLegalOrCustom to isLegalOrCustomOrPromote in visitSelect to enable integer min/max to be formed when the operation was promoted. Unfortunately, this also affected floating point. For floating point, fmaxnum may require a libcall so we also need to check if the operation on the promoted type is legal or custom. Other changes to RISC-V have seen made the original change untested so this patch restores the original isLegalOrCustom. Fixes #114520.
1 parent 5e75f29 commit 13b5899

File tree

2 files changed

+113
-1
lines changed

2 files changed

+113
-1
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3775,7 +3775,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
37753775
}
37763776

37773777
if (!IsUnaryAbs && Opc != ISD::DELETED_NODE &&
3778-
(TLI.isOperationLegalOrCustomOrPromote(Opc, VT) ||
3778+
(TLI.isOperationLegalOrCustom(Opc, VT) ||
37793779
(UseScalarMinMax &&
37803780
TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
37813781
// If the underlying comparison instruction is used by any other

llvm/test/CodeGen/X86/pr114520.ll

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64-none-unknown-elf -mattr=+avx512vl | FileCheck %s
3+
4+
define half @test1(half %x) {
5+
; CHECK-LABEL: test1:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: vpextrw $0, %xmm0, %eax
8+
; CHECK-NEXT: vmovd %eax, %xmm0
9+
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0
10+
; CHECK-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11+
; CHECK-NEXT: movl $64512, %ecx # imm = 0xFC00
12+
; CHECK-NEXT: cmoval %eax, %ecx
13+
; CHECK-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0
14+
; CHECK-NEXT: retq
15+
entry:
16+
%cmp2 = fcmp ogt half %x, 0xHFC00
17+
%cond.v = select i1 %cmp2, half %x, half 0xHFC00
18+
ret half %cond.v
19+
}
20+
21+
define <8 x half> @test2(<8 x half> %x) {
22+
; CHECK-LABEL: test2:
23+
; CHECK: # %bb.0: # %entry
24+
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm2
25+
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = [-Inf,0.0E+0,0.0E+0,0.0E+0]
26+
; CHECK-NEXT: vucomiss %xmm1, %xmm2
27+
; CHECK-NEXT: seta %al
28+
; CHECK-NEXT: andl $1, %eax
29+
; CHECK-NEXT: kmovw %eax, %k0
30+
; CHECK-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[1,1,1,1,4,5,6,7]
31+
; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
32+
; CHECK-NEXT: vucomiss %xmm1, %xmm2
33+
; CHECK-NEXT: seta %al
34+
; CHECK-NEXT: kmovw %eax, %k1
35+
; CHECK-NEXT: kshiftlw $15, %k1, %k1
36+
; CHECK-NEXT: kshiftrw $14, %k1, %k1
37+
; CHECK-NEXT: korw %k1, %k0, %k0
38+
; CHECK-NEXT: movw $-5, %ax
39+
; CHECK-NEXT: kmovw %eax, %k1
40+
; CHECK-NEXT: kandw %k1, %k0, %k0
41+
; CHECK-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
42+
; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
43+
; CHECK-NEXT: vucomiss %xmm1, %xmm2
44+
; CHECK-NEXT: seta %al
45+
; CHECK-NEXT: kmovw %eax, %k1
46+
; CHECK-NEXT: kshiftlw $15, %k1, %k1
47+
; CHECK-NEXT: kshiftrw $13, %k1, %k1
48+
; CHECK-NEXT: korw %k1, %k0, %k0
49+
; CHECK-NEXT: movw $-9, %ax
50+
; CHECK-NEXT: kmovw %eax, %k1
51+
; CHECK-NEXT: kandw %k1, %k0, %k0
52+
; CHECK-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[3,3,3,3,4,5,6,7]
53+
; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
54+
; CHECK-NEXT: vucomiss %xmm1, %xmm2
55+
; CHECK-NEXT: seta %al
56+
; CHECK-NEXT: kmovw %eax, %k1
57+
; CHECK-NEXT: kshiftlw $15, %k1, %k1
58+
; CHECK-NEXT: kshiftrw $12, %k1, %k1
59+
; CHECK-NEXT: korw %k1, %k0, %k0
60+
; CHECK-NEXT: movw $-17, %ax
61+
; CHECK-NEXT: kmovw %eax, %k1
62+
; CHECK-NEXT: kandw %k1, %k0, %k0
63+
; CHECK-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
64+
; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
65+
; CHECK-NEXT: vucomiss %xmm1, %xmm2
66+
; CHECK-NEXT: seta %al
67+
; CHECK-NEXT: kmovw %eax, %k1
68+
; CHECK-NEXT: kshiftlw $15, %k1, %k1
69+
; CHECK-NEXT: kshiftrw $11, %k1, %k1
70+
; CHECK-NEXT: korw %k1, %k0, %k0
71+
; CHECK-NEXT: movw $-33, %ax
72+
; CHECK-NEXT: kmovw %eax, %k1
73+
; CHECK-NEXT: kandw %k1, %k0, %k0
74+
; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
75+
; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
76+
; CHECK-NEXT: vucomiss %xmm1, %xmm2
77+
; CHECK-NEXT: seta %al
78+
; CHECK-NEXT: kmovw %eax, %k1
79+
; CHECK-NEXT: kshiftlw $15, %k1, %k1
80+
; CHECK-NEXT: kshiftrw $10, %k1, %k1
81+
; CHECK-NEXT: korw %k1, %k0, %k0
82+
; CHECK-NEXT: movw $-65, %ax
83+
; CHECK-NEXT: kmovw %eax, %k1
84+
; CHECK-NEXT: kandw %k1, %k0, %k0
85+
; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
86+
; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
87+
; CHECK-NEXT: vucomiss %xmm1, %xmm2
88+
; CHECK-NEXT: seta %al
89+
; CHECK-NEXT: kmovw %eax, %k1
90+
; CHECK-NEXT: kshiftlw $6, %k1, %k1
91+
; CHECK-NEXT: korw %k1, %k0, %k0
92+
; CHECK-NEXT: kshiftlw $9, %k0, %k0
93+
; CHECK-NEXT: kshiftrw $9, %k0, %k0
94+
; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
95+
; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
96+
; CHECK-NEXT: vucomiss %xmm1, %xmm2
97+
; CHECK-NEXT: seta %al
98+
; CHECK-NEXT: kmovw %eax, %k1
99+
; CHECK-NEXT: kshiftlw $7, %k1, %k1
100+
; CHECK-NEXT: korw %k1, %k0, %k1
101+
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf]
102+
; CHECK-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
103+
; CHECK-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
104+
; CHECK-NEXT: vpmovdw %ymm2, %xmm2
105+
; CHECK-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm2 & (xmm0 ^ xmm1))
106+
; CHECK-NEXT: vzeroupper
107+
; CHECK-NEXT: retq
108+
entry:
109+
%cmp2 = fcmp ogt <8 x half> %x, splat (half 0xHFC00)
110+
%cond.v = select <8 x i1> %cmp2, <8 x half> %x, <8 x half> splat (half 0xHFC00)
111+
ret <8 x half> %cond.v
112+
}

0 commit comments

Comments
 (0)