diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index 100f1ec027a66..53ec7125a6490 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -1879,28 +1879,34 @@ bool X86InstructionSelector::selectSelect(MachineInstr &I, unsigned OpCmp; LLT Ty = MRI.getType(DstReg); - switch (Ty.getSizeInBits()) { - default: - return false; - case 8: - OpCmp = X86::CMOV_GR8; - break; - case 16: - OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16; - break; - case 32: - OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32; - break; - case 64: - assert(STI.is64Bit() && STI.canUseCMOV()); - OpCmp = X86::CMOV64rr; - break; + if (Ty.getSizeInBits() == 80) { + BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(X86::CMOVE_Fp80), + DstReg) + .addReg(Sel.getTrueReg()) + .addReg(Sel.getFalseReg()); + } else { + switch (Ty.getSizeInBits()) { + default: + return false; + case 8: + OpCmp = X86::CMOV_GR8; + break; + case 16: + OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16; + break; + case 32: + OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32; + break; + case 64: + assert(STI.is64Bit() && STI.canUseCMOV()); + OpCmp = X86::CMOV64rr; + break; + } + BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg) + .addReg(Sel.getTrueReg()) + .addReg(Sel.getFalseReg()) + .addImm(X86::COND_E); } - BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg) - .addReg(Sel.getTrueReg()) - .addReg(Sel.getFalseReg()) - .addImm(X86::COND_E); - const TargetRegisterClass *DstRC = getRegClass(Ty, DstReg, MRI); if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { LLVM_DEBUG(dbgs() << "Failed to constrain CMOV\n"); diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index 28fa2cd0625c1..e792b1bce3c5c 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -575,10 +575,13 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, // todo: vectors and address spaces getActionDefinitionsBuilder(G_SELECT) - .legalFor({{s8, s32}, {s16, s32}, {s32, s32}, {s64, s32}, {p0, s32}}) + .legalFor({{s16, s32}, {s32, s32}, {p0, s32}}) + .legalFor(!HasCMOV, {{s8, s32}}) + .legalFor(Is64Bit, {{s64, s32}}) + .legalFor(UseX87, {{s80, s32}}) + .clampScalar(1, s32, s32) .widenScalarToNextPow2(0, /*Min=*/8) - .clampScalar(0, HasCMOV ? s16 : s8, sMaxScalar) - .clampScalar(1, s32, s32); + .clampScalar(0, HasCMOV ? s16 : s8, sMaxScalar); // memory intrinsics getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir index 470a30fd36b62..bd4e9a47d4882 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir @@ -37,9 +37,9 @@ body: | ; X86-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; X86-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[CTLZ]], [[C1]] ; X86-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s64) = G_CTLZ_ZERO_UNDEF [[UV1]](s32) + ; X86-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64) ; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CTLZ_ZERO_UNDEF]](s64) - ; X86-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT1]](s32), [[UV2]], [[UV4]] ; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT1]](s32), [[UV3]], [[UV5]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) @@ -111,9 +111,9 @@ body: | ; X86-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; X86-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[CTLZ]], [[C1]] ; X86-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s64) = G_CTLZ_ZERO_UNDEF [[UV1]](s32) + ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64) ; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CTLZ_ZERO_UNDEF]](s64) - ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV2]], [[UV4]] ; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV3]], [[UV5]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir index a7cbb35e3f74c..6ab424eeaa780 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir @@ -33,9 +33,9 @@ body: | ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[DEF2:%[0-9]+]]:_(s1) = IMPLICIT_DEF + ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1) ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) - ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1) ; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV]], [[UV2]] ; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV1]], [[UV3]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) @@ -115,9 +115,9 @@ body: | ; X64: [[DEF:%[0-9]+]]:_(s8) = IMPLICIT_DEF ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s8) = IMPLICIT_DEF ; X64-NEXT: [[DEF2:%[0-9]+]]:_(s1) = IMPLICIT_DEF + ; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1) ; X64-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[DEF1]](s8) ; X64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[DEF]](s8) - ; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1) ; X64-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ZEXT]](s32), [[ANYEXT]], [[ANYEXT1]] ; X64-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT]](s16) ; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) diff --git a/llvm/test/CodeGen/X86/isel-select-fcmov.ll b/llvm/test/CodeGen/X86/isel-select-fcmov.ll index e5122ab802d12..cb441b860bb56 100644 --- a/llvm/test/CodeGen/X86/isel-select-fcmov.ll +++ b/llvm/test/CodeGen/X86/isel-select-fcmov.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=i686-linux-gnu -mattr=+cmov -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=i686-linux-gnu -mattr=+cmov -global-isel -global-isel-abort=2 | FileCheck %s --check-prefix=X86-GISEL +; RUN: llc < %s -mtriple=i686-linux-gnu -mattr=+cmov -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=X86-GISEL ; RUN: llc < %s -mtriple=i686-linux-gnu -mattr=+cmov -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefix=X86 ; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=X64 -; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefix=X64-GISEL +; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=X64-GISEL ; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefix=X64 ; Test that we can generate an fcmove, and also that it passes verification. @@ -24,8 +24,12 @@ define x86_fp80 @cmove_cmp(x86_fp80 %a, x86_fp80 %b, i32 %c) { ; X86-GISEL: # %bb.0: ; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) ; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: xorl %eax, %eax ; X86-GISEL-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; X86-GISEL-NEXT: fadd %st(1), %st +; X86-GISEL-NEXT: sete %al +; X86-GISEL-NEXT: fadd %st, %st(1) +; X86-GISEL-NEXT: andl $1, %eax +; X86-GISEL-NEXT: testl %eax, %eax ; X86-GISEL-NEXT: fxch %st(1) ; X86-GISEL-NEXT: fcmove %st(1), %st ; X86-GISEL-NEXT: fstp %st(1) @@ -46,8 +50,12 @@ define x86_fp80 @cmove_cmp(x86_fp80 %a, x86_fp80 %b, i32 %c) { ; X64-GISEL: # %bb.0: ; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) -; X64-GISEL-NEXT: testl %edi, %edi -; X64-GISEL-NEXT: fadd %st(1), %st +; X64-GISEL-NEXT: xorl %eax, %eax +; X64-GISEL-NEXT: cmpl $0, %edi +; X64-GISEL-NEXT: sete %al +; X64-GISEL-NEXT: fadd %st, %st(1) +; X64-GISEL-NEXT: andl $1, %eax +; X64-GISEL-NEXT: testl %eax, %eax ; X64-GISEL-NEXT: fxch %st(1) ; X64-GISEL-NEXT: fcmove %st(1), %st ; X64-GISEL-NEXT: fstp %st(1) @@ -74,10 +82,12 @@ define x86_fp80 @cmove_arg(x86_fp80 %a, x86_fp80 %b, i1 %test) { ; X86-GISEL: # %bb.0: ; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) ; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) -; X86-GISEL-NEXT: fadd %st(1), %st -; X86-GISEL-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: fadd %st, %st(1) +; X86-GISEL-NEXT: movl $1, %eax +; X86-GISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: testl %eax, %eax ; X86-GISEL-NEXT: fxch %st(1) -; X86-GISEL-NEXT: fcmovne %st(1), %st +; X86-GISEL-NEXT: fcmove %st(1), %st ; X86-GISEL-NEXT: fstp %st(1) ; X86-GISEL-NEXT: retl ; @@ -96,10 +106,11 @@ define x86_fp80 @cmove_arg(x86_fp80 %a, x86_fp80 %b, i1 %test) { ; X64-GISEL: # %bb.0: ; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) -; X64-GISEL-NEXT: fadd %st(1), %st -; X64-GISEL-NEXT: testb $1, %dil +; X64-GISEL-NEXT: fadd %st, %st(1) +; X64-GISEL-NEXT: andl $1, %edi +; X64-GISEL-NEXT: testl %edi, %edi ; X64-GISEL-NEXT: fxch %st(1) -; X64-GISEL-NEXT: fcmovne %st(1), %st +; X64-GISEL-NEXT: fcmove %st(1), %st ; X64-GISEL-NEXT: fstp %st(1) ; X64-GISEL-NEXT: retq %add = fadd x86_fp80 %a, %b @@ -122,13 +133,15 @@ define x86_fp80 @cmove_load(x86_fp80 %a, x86_fp80 %b, ptr %p) { ; ; X86-GISEL-LABEL: cmove_load: ; X86-GISEL: # %bb.0: -; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) ; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) -; X86-GISEL-NEXT: fadd %st(1), %st -; X86-GISEL-NEXT: cmpb $0, (%eax) +; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: fadd %st, %st(1) +; X86-GISEL-NEXT: movzbl (%eax), %eax +; X86-GISEL-NEXT: andl $1, %eax +; X86-GISEL-NEXT: testl %eax, %eax ; X86-GISEL-NEXT: fxch %st(1) -; X86-GISEL-NEXT: fcmovne %st(1), %st +; X86-GISEL-NEXT: fcmove %st(1), %st ; X86-GISEL-NEXT: fstp %st(1) ; X86-GISEL-NEXT: retl ; @@ -147,10 +160,12 @@ define x86_fp80 @cmove_load(x86_fp80 %a, x86_fp80 %b, ptr %p) { ; X64-GISEL: # %bb.0: ; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) -; X64-GISEL-NEXT: fadd %st(1), %st -; X64-GISEL-NEXT: cmpb $0, (%rdi) +; X64-GISEL-NEXT: fadd %st, %st(1) +; X64-GISEL-NEXT: movzbl (%rdi), %eax +; X64-GISEL-NEXT: andl $1, %eax +; X64-GISEL-NEXT: testl %eax, %eax ; X64-GISEL-NEXT: fxch %st(1) -; X64-GISEL-NEXT: fcmovne %st(1), %st +; X64-GISEL-NEXT: fcmove %st(1), %st ; X64-GISEL-NEXT: fstp %st(1) ; X64-GISEL-NEXT: retq %test = load i1, ptr %p