Skip to content

Commit 77d8972

Browse files
authored
[X86][GlobalISel] Support G_SELECT for x86_fp80 (#157377)
G_SELECT for x86_fp80 is required to implement G_FPTOUI and G_UITOFP. Legalize the predicate operand first to prevent clamping of fp80 type. Since every G_SELECT initially uses s8 for the predicate type, we were always clamping the input. To keep this behavior, we add additional conditions for legal types, such as Is64Bit and HasCMOV.
1 parent 062c533 commit 77d8972

File tree

5 files changed

+70
-46
lines changed

5 files changed

+70
-46
lines changed

llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1879,28 +1879,34 @@ bool X86InstructionSelector::selectSelect(MachineInstr &I,
18791879

18801880
unsigned OpCmp;
18811881
LLT Ty = MRI.getType(DstReg);
1882-
switch (Ty.getSizeInBits()) {
1883-
default:
1884-
return false;
1885-
case 8:
1886-
OpCmp = X86::CMOV_GR8;
1887-
break;
1888-
case 16:
1889-
OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16;
1890-
break;
1891-
case 32:
1892-
OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32;
1893-
break;
1894-
case 64:
1895-
assert(STI.is64Bit() && STI.canUseCMOV());
1896-
OpCmp = X86::CMOV64rr;
1897-
break;
1882+
if (Ty.getSizeInBits() == 80) {
1883+
BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(X86::CMOVE_Fp80),
1884+
DstReg)
1885+
.addReg(Sel.getTrueReg())
1886+
.addReg(Sel.getFalseReg());
1887+
} else {
1888+
switch (Ty.getSizeInBits()) {
1889+
default:
1890+
return false;
1891+
case 8:
1892+
OpCmp = X86::CMOV_GR8;
1893+
break;
1894+
case 16:
1895+
OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16;
1896+
break;
1897+
case 32:
1898+
OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32;
1899+
break;
1900+
case 64:
1901+
assert(STI.is64Bit() && STI.canUseCMOV());
1902+
OpCmp = X86::CMOV64rr;
1903+
break;
1904+
}
1905+
BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg)
1906+
.addReg(Sel.getTrueReg())
1907+
.addReg(Sel.getFalseReg())
1908+
.addImm(X86::COND_E);
18981909
}
1899-
BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg)
1900-
.addReg(Sel.getTrueReg())
1901-
.addReg(Sel.getFalseReg())
1902-
.addImm(X86::COND_E);
1903-
19041910
const TargetRegisterClass *DstRC = getRegClass(Ty, DstReg, MRI);
19051911
if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
19061912
LLVM_DEBUG(dbgs() << "Failed to constrain CMOV\n");

llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -575,10 +575,13 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
575575

576576
// todo: vectors and address spaces
577577
getActionDefinitionsBuilder(G_SELECT)
578-
.legalFor({{s8, s32}, {s16, s32}, {s32, s32}, {s64, s32}, {p0, s32}})
578+
.legalFor({{s16, s32}, {s32, s32}, {p0, s32}})
579+
.legalFor(!HasCMOV, {{s8, s32}})
580+
.legalFor(Is64Bit, {{s64, s32}})
581+
.legalFor(UseX87, {{s80, s32}})
582+
.clampScalar(1, s32, s32)
579583
.widenScalarToNextPow2(0, /*Min=*/8)
580-
.clampScalar(0, HasCMOV ? s16 : s8, sMaxScalar)
581-
.clampScalar(1, s32, s32);
584+
.clampScalar(0, HasCMOV ? s16 : s8, sMaxScalar);
582585

583586
// memory intrinsics
584587
getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();

llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@ body: |
3737
; X86-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
3838
; X86-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[CTLZ]], [[C1]]
3939
; X86-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s64) = G_CTLZ_ZERO_UNDEF [[UV1]](s32)
40+
; X86-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
4041
; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64)
4142
; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CTLZ_ZERO_UNDEF]](s64)
42-
; X86-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
4343
; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT1]](s32), [[UV2]], [[UV4]]
4444
; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT1]](s32), [[UV3]], [[UV5]]
4545
; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32)
@@ -111,9 +111,9 @@ body: |
111111
; X86-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
112112
; X86-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[CTLZ]], [[C1]]
113113
; X86-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s64) = G_CTLZ_ZERO_UNDEF [[UV1]](s32)
114+
; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
114115
; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64)
115116
; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CTLZ_ZERO_UNDEF]](s64)
116-
; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
117117
; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV2]], [[UV4]]
118118
; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV3]], [[UV5]]
119119
; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32)

llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ body: |
3333
; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF
3434
; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF
3535
; X86-NEXT: [[DEF2:%[0-9]+]]:_(s1) = IMPLICIT_DEF
36+
; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1)
3637
; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
3738
; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64)
38-
; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1)
3939
; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV]], [[UV2]]
4040
; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV1]], [[UV3]]
4141
; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32)
@@ -115,9 +115,9 @@ body: |
115115
; X64: [[DEF:%[0-9]+]]:_(s8) = IMPLICIT_DEF
116116
; X64-NEXT: [[DEF1:%[0-9]+]]:_(s8) = IMPLICIT_DEF
117117
; X64-NEXT: [[DEF2:%[0-9]+]]:_(s1) = IMPLICIT_DEF
118+
; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1)
118119
; X64-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[DEF1]](s8)
119120
; X64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[DEF]](s8)
120-
; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1)
121121
; X64-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ZEXT]](s32), [[ANYEXT]], [[ANYEXT1]]
122122
; X64-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT]](s16)
123123
; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)

llvm/test/CodeGen/X86/isel-select-fcmov.ll

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc < %s -mtriple=i686-linux-gnu -mattr=+cmov -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=X86
3-
; RUN: llc < %s -mtriple=i686-linux-gnu -mattr=+cmov -global-isel -global-isel-abort=2 | FileCheck %s --check-prefix=X86-GISEL
3+
; RUN: llc < %s -mtriple=i686-linux-gnu -mattr=+cmov -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=X86-GISEL
44
; RUN: llc < %s -mtriple=i686-linux-gnu -mattr=+cmov -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefix=X86
55
; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=X64
6-
; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefix=X64-GISEL
6+
; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=X64-GISEL
77
; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefix=X64
88

99
; Test that we can generate an fcmove, and also that it passes verification.
@@ -24,8 +24,12 @@ define x86_fp80 @cmove_cmp(x86_fp80 %a, x86_fp80 %b, i32 %c) {
2424
; X86-GISEL: # %bb.0:
2525
; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp)
2626
; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp)
27+
; X86-GISEL-NEXT: xorl %eax, %eax
2728
; X86-GISEL-NEXT: cmpl $0, {{[0-9]+}}(%esp)
28-
; X86-GISEL-NEXT: fadd %st(1), %st
29+
; X86-GISEL-NEXT: sete %al
30+
; X86-GISEL-NEXT: fadd %st, %st(1)
31+
; X86-GISEL-NEXT: andl $1, %eax
32+
; X86-GISEL-NEXT: testl %eax, %eax
2933
; X86-GISEL-NEXT: fxch %st(1)
3034
; X86-GISEL-NEXT: fcmove %st(1), %st
3135
; X86-GISEL-NEXT: fstp %st(1)
@@ -46,8 +50,12 @@ define x86_fp80 @cmove_cmp(x86_fp80 %a, x86_fp80 %b, i32 %c) {
4650
; X64-GISEL: # %bb.0:
4751
; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp)
4852
; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp)
49-
; X64-GISEL-NEXT: testl %edi, %edi
50-
; X64-GISEL-NEXT: fadd %st(1), %st
53+
; X64-GISEL-NEXT: xorl %eax, %eax
54+
; X64-GISEL-NEXT: cmpl $0, %edi
55+
; X64-GISEL-NEXT: sete %al
56+
; X64-GISEL-NEXT: fadd %st, %st(1)
57+
; X64-GISEL-NEXT: andl $1, %eax
58+
; X64-GISEL-NEXT: testl %eax, %eax
5159
; X64-GISEL-NEXT: fxch %st(1)
5260
; X64-GISEL-NEXT: fcmove %st(1), %st
5361
; X64-GISEL-NEXT: fstp %st(1)
@@ -74,10 +82,12 @@ define x86_fp80 @cmove_arg(x86_fp80 %a, x86_fp80 %b, i1 %test) {
7482
; X86-GISEL: # %bb.0:
7583
; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp)
7684
; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp)
77-
; X86-GISEL-NEXT: fadd %st(1), %st
78-
; X86-GISEL-NEXT: testb $1, {{[0-9]+}}(%esp)
85+
; X86-GISEL-NEXT: fadd %st, %st(1)
86+
; X86-GISEL-NEXT: movl $1, %eax
87+
; X86-GISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
88+
; X86-GISEL-NEXT: testl %eax, %eax
7989
; X86-GISEL-NEXT: fxch %st(1)
80-
; X86-GISEL-NEXT: fcmovne %st(1), %st
90+
; X86-GISEL-NEXT: fcmove %st(1), %st
8191
; X86-GISEL-NEXT: fstp %st(1)
8292
; X86-GISEL-NEXT: retl
8393
;
@@ -96,10 +106,11 @@ define x86_fp80 @cmove_arg(x86_fp80 %a, x86_fp80 %b, i1 %test) {
96106
; X64-GISEL: # %bb.0:
97107
; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp)
98108
; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp)
99-
; X64-GISEL-NEXT: fadd %st(1), %st
100-
; X64-GISEL-NEXT: testb $1, %dil
109+
; X64-GISEL-NEXT: fadd %st, %st(1)
110+
; X64-GISEL-NEXT: andl $1, %edi
111+
; X64-GISEL-NEXT: testl %edi, %edi
101112
; X64-GISEL-NEXT: fxch %st(1)
102-
; X64-GISEL-NEXT: fcmovne %st(1), %st
113+
; X64-GISEL-NEXT: fcmove %st(1), %st
103114
; X64-GISEL-NEXT: fstp %st(1)
104115
; X64-GISEL-NEXT: retq
105116
%add = fadd x86_fp80 %a, %b
@@ -122,13 +133,15 @@ define x86_fp80 @cmove_load(x86_fp80 %a, x86_fp80 %b, ptr %p) {
122133
;
123134
; X86-GISEL-LABEL: cmove_load:
124135
; X86-GISEL: # %bb.0:
125-
; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
126136
; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp)
127137
; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp)
128-
; X86-GISEL-NEXT: fadd %st(1), %st
129-
; X86-GISEL-NEXT: cmpb $0, (%eax)
138+
; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
139+
; X86-GISEL-NEXT: fadd %st, %st(1)
140+
; X86-GISEL-NEXT: movzbl (%eax), %eax
141+
; X86-GISEL-NEXT: andl $1, %eax
142+
; X86-GISEL-NEXT: testl %eax, %eax
130143
; X86-GISEL-NEXT: fxch %st(1)
131-
; X86-GISEL-NEXT: fcmovne %st(1), %st
144+
; X86-GISEL-NEXT: fcmove %st(1), %st
132145
; X86-GISEL-NEXT: fstp %st(1)
133146
; X86-GISEL-NEXT: retl
134147
;
@@ -147,10 +160,12 @@ define x86_fp80 @cmove_load(x86_fp80 %a, x86_fp80 %b, ptr %p) {
147160
; X64-GISEL: # %bb.0:
148161
; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp)
149162
; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp)
150-
; X64-GISEL-NEXT: fadd %st(1), %st
151-
; X64-GISEL-NEXT: cmpb $0, (%rdi)
163+
; X64-GISEL-NEXT: fadd %st, %st(1)
164+
; X64-GISEL-NEXT: movzbl (%rdi), %eax
165+
; X64-GISEL-NEXT: andl $1, %eax
166+
; X64-GISEL-NEXT: testl %eax, %eax
152167
; X64-GISEL-NEXT: fxch %st(1)
153-
; X64-GISEL-NEXT: fcmovne %st(1), %st
168+
; X64-GISEL-NEXT: fcmove %st(1), %st
154169
; X64-GISEL-NEXT: fstp %st(1)
155170
; X64-GISEL-NEXT: retq
156171
%test = load i1, ptr %p

0 commit comments

Comments
 (0)