Skip to content

Commit aa9ff19

Browse files
committed
[X86][GlobalISel] Support G_SELECT for x86_fp80
G_SELECT for x86_fp80 is required to implement G_FPTOUI and G_UITOFP. Legalize the predicate operand first to prevent clamping of fp80 type. Since every G_SELECT initially uses s8 for the predicate type, we were always clamping the input. To maintain this behavior, we add additional conditions for legal types, such as Is64Bit and HasCMOV.
1 parent 2b3d3fc commit aa9ff19

File tree

6 files changed

+80
-43
lines changed

6 files changed

+80
-43
lines changed

llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1877,28 +1877,34 @@ bool X86InstructionSelector::selectSelect(MachineInstr &I,
18771877

18781878
unsigned OpCmp;
18791879
LLT Ty = MRI.getType(DstReg);
1880-
switch (Ty.getSizeInBits()) {
1881-
default:
1882-
return false;
1883-
case 8:
1884-
OpCmp = X86::CMOV_GR8;
1885-
break;
1886-
case 16:
1887-
OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16;
1888-
break;
1889-
case 32:
1890-
OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32;
1891-
break;
1892-
case 64:
1893-
assert(STI.is64Bit() && STI.canUseCMOV());
1894-
OpCmp = X86::CMOV64rr;
1895-
break;
1880+
if (Ty.getSizeInBits() == 80) {
1881+
BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(X86::CMOVE_Fp80),
1882+
DstReg)
1883+
.addReg(Sel.getTrueReg())
1884+
.addReg(Sel.getFalseReg());
1885+
} else {
1886+
switch (Ty.getSizeInBits()) {
1887+
default:
1888+
return false;
1889+
case 8:
1890+
OpCmp = X86::CMOV_GR8;
1891+
break;
1892+
case 16:
1893+
OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16;
1894+
break;
1895+
case 32:
1896+
OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32;
1897+
break;
1898+
case 64:
1899+
assert(STI.is64Bit() && STI.canUseCMOV());
1900+
OpCmp = X86::CMOV64rr;
1901+
break;
1902+
}
1903+
BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg)
1904+
.addReg(Sel.getTrueReg())
1905+
.addReg(Sel.getFalseReg())
1906+
.addImm(X86::COND_E);
18961907
}
1897-
BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg)
1898-
.addReg(Sel.getTrueReg())
1899-
.addReg(Sel.getFalseReg())
1900-
.addImm(X86::COND_E);
1901-
19021908
const TargetRegisterClass *DstRC = getRegClass(Ty, DstReg, MRI);
19031909
if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
19041910
LLVM_DEBUG(dbgs() << "Failed to constrain CMOV\n");

llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -568,10 +568,13 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
568568

569569
// todo: vectors and address spaces
570570
getActionDefinitionsBuilder(G_SELECT)
571-
.legalFor({{s8, s32}, {s16, s32}, {s32, s32}, {s64, s32}, {p0, s32}})
571+
.legalFor({{s16, s32}, {s32, s32}, {p0, s32}})
572+
.legalFor(!HasCMOV, {{s8, s32}})
573+
.legalFor(Is64Bit, {{s64, s32}})
574+
.legalFor(UseX87, {{s80, s32}})
575+
.clampScalar(1, s32, s32)
572576
.widenScalarToNextPow2(0, /*Min=*/8)
573-
.clampScalar(0, HasCMOV ? s16 : s8, sMaxScalar)
574-
.clampScalar(1, s32, s32);
577+
.clampScalar(0, HasCMOV ? s16 : s8, sMaxScalar);
575578

576579
// memory intrinsics
577580
getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();

llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ body: |
2525
; X64-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[CTLZ]], [[C1]]
2626
; X64-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C]]
2727
; X64-NEXT: RET 0, implicit [[AND1]](s64)
28+
;
2829
; X86-LABEL: name: test_ctlz35
2930
; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx
3031
; X86-NEXT: [[TRUNC:%[0-9]+]]:_(s35) = G_TRUNC [[COPY]](s64)
@@ -36,9 +37,9 @@ body: |
3637
; X86-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
3738
; X86-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[CTLZ]], [[C1]]
3839
; X86-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s64) = G_CTLZ_ZERO_UNDEF [[UV1]](s32)
40+
; X86-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
3941
; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64)
4042
; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CTLZ_ZERO_UNDEF]](s64)
41-
; X86-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
4243
; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT1]](s32), [[UV2]], [[UV4]]
4344
; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT1]](s32), [[UV3]], [[UV5]]
4445
; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32)
@@ -97,6 +98,7 @@ body: |
9798
; X64-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[DEF]](s64)
9899
; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[CTLZ]](s64)
99100
; X64-NEXT: RET 0, implicit [[COPY]](s64)
101+
;
100102
; X86-LABEL: name: test_ctlz64
101103
; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF
102104
; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64)
@@ -106,9 +108,9 @@ body: |
106108
; X86-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
107109
; X86-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[CTLZ]], [[C1]]
108110
; X86-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s64) = G_CTLZ_ZERO_UNDEF [[UV1]](s32)
111+
; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
109112
; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64)
110113
; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CTLZ_ZERO_UNDEF]](s64)
111-
; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
112114
; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV2]], [[UV4]]
113115
; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV3]], [[UV5]]
114116
; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32)

llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ body: |
3333
; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF
3434
; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF
3535
; X86-NEXT: [[DEF2:%[0-9]+]]:_(s1) = IMPLICIT_DEF
36+
; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1)
3637
; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
3738
; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64)
38-
; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1)
3939
; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV]], [[UV2]]
4040
; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV1]], [[UV3]]
4141
; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32)
@@ -115,9 +115,9 @@ body: |
115115
; X64: [[DEF:%[0-9]+]]:_(s8) = IMPLICIT_DEF
116116
; X64-NEXT: [[DEF1:%[0-9]+]]:_(s8) = IMPLICIT_DEF
117117
; X64-NEXT: [[DEF2:%[0-9]+]]:_(s1) = IMPLICIT_DEF
118+
; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1)
118119
; X64-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[DEF1]](s8)
119120
; X64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[DEF]](s8)
120-
; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1)
121121
; X64-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ZEXT]](s32), [[ANYEXT]], [[ANYEXT1]]
122122
; X64-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT]](s16)
123123
; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)

llvm/test/CodeGen/X86/fcmove.ll

Lines changed: 0 additions & 15 deletions
This file was deleted.
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -fast-isel -fast-isel-abort=1 -verify-machineinstrs | FileCheck %s
3+
; RUN: llc < %s -global-isel -global-isel-abort=1 -verify-machineinstrs | FileCheck %s --check-prefix=GISEL
4+
; RUN: llc < %s -fast-isel=0 -global-isel=0 -verify-machineinstrs | FileCheck %s
5+
6+
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
7+
target triple = "x86_64-unknown-unknown"
8+
9+
; Test that we can generate an fcmove, and also that it passes verification.
10+
11+
define x86_fp80 @cmove_f(x86_fp80 %a, x86_fp80 %b, i32 %c) {
12+
; CHECK-LABEL: cmove_f:
13+
; CHECK: # %bb.0:
14+
; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
15+
; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
16+
; CHECK-NEXT: testl %edi, %edi
17+
; CHECK-NEXT: fadd %st(1), %st
18+
; CHECK-NEXT: fxch %st(1)
19+
; CHECK-NEXT: fcmove %st(1), %st
20+
; CHECK-NEXT: fstp %st(1)
21+
; CHECK-NEXT: retq
22+
;
23+
; GISEL-LABEL: cmove_f:
24+
; GISEL: # %bb.0:
25+
; GISEL-NEXT: fldt {{[0-9]+}}(%rsp)
26+
; GISEL-NEXT: fldt {{[0-9]+}}(%rsp)
27+
; GISEL-NEXT: xorl %eax, %eax
28+
; GISEL-NEXT: cmpl $0, %edi
29+
; GISEL-NEXT: sete %al
30+
; GISEL-NEXT: fadd %st, %st(1)
31+
; GISEL-NEXT: andl $1, %eax
32+
; GISEL-NEXT: testl %eax, %eax
33+
; GISEL-NEXT: fxch %st(1)
34+
; GISEL-NEXT: fcmove %st(1), %st
35+
; GISEL-NEXT: fstp %st(1)
36+
; GISEL-NEXT: retq
37+
%test = icmp eq i32 %c, 0
38+
%add = fadd x86_fp80 %a, %b
39+
%ret = select i1 %test, x86_fp80 %add, x86_fp80 %b
40+
ret x86_fp80 %ret
41+
}

0 commit comments

Comments
 (0)