diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 5fe7203c052d8..a53793bea4855 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -5352,10 +5352,12 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, MachineInstr *MI = nullptr; MachineInstr *Sub = nullptr; MachineInstr *Movr0Inst = nullptr; + SmallVector, 4> InstsToUpdate; bool NoSignFlag = false; bool ClearsOverflowFlag = false; bool ShouldUpdateCC = false; bool IsSwapped = false; + bool HasNF = Subtarget.hasNF(); unsigned OpNo = 0; X86::CondCode NewCC = X86::COND_INVALID; int64_t ImmDelta = 0; @@ -5441,6 +5443,16 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, continue; } + // Try to replace non-NF with NF instructions. + if (HasNF && Inst.registerDefIsDead(X86::EFLAGS, TRI)) { + unsigned NewOp = X86::getNFVariant(Inst.getOpcode()); + if (!NewOp) + return false; + + InstsToUpdate.push_back(std::make_pair(&Inst, NewOp)); + continue; + } + // Cannot do anything for any other EFLAG changes. return false; } @@ -5637,6 +5649,12 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, return false; } + // Replace non-NF with NF instructions. + for (auto &Inst : InstsToUpdate) { + Inst.first->setDesc(get(Inst.second)); + Inst.first->removeOperand(Inst.first->getNumOperands() - 1); + } + // Make sure Sub instruction defines EFLAGS and mark the def live. MachineOperand *FlagDef = Sub->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr); diff --git a/llvm/test/CodeGen/X86/apx/cf.ll b/llvm/test/CodeGen/X86/apx/cf.ll index a64d7df11a4d0..8d104e5f3ced2 100644 --- a/llvm/test/CodeGen/X86/apx/cf.ll +++ b/llvm/test/CodeGen/X86/apx/cf.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64 -mattr=+cf,+avx512f -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=x86_64 -mattr=+cf,+nf,+avx512f -verify-machineinstrs | FileCheck %s define void @basic(i32 %a, ptr %b, ptr %p, ptr %q) { ; CHECK-LABEL: basic: @@ -125,7 +125,7 @@ entry: ret void } -define void @single_cmp(i32 %a, i32 %b, ptr %c, ptr %d) #2 { +define void @single_cmp(i32 %a, i32 %b, ptr %c, ptr %d) { ; CHECK-LABEL: single_cmp: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cmpl %esi, %edi @@ -139,3 +139,22 @@ entry: tail call void @llvm.masked.store.v1i16.p0(<1 x i16> %2, ptr %d, i32 2, <1 x i1> %1) ret void } + +define void @load_add_store(i32 %a, i32 %b, ptr %p) { +; CHECK-LABEL: load_add_store: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: cfcmovnew (%rdx), %ax +; CHECK-NEXT: {nf} incl %eax +; CHECK-NEXT: cfcmovnew %ax, (%rdx) +; CHECK-NEXT: retq +entry: + %0 = icmp ne i32 %a, %b + %1 = insertelement <1 x i1> poison, i1 %0, i64 0 + %2 = tail call <1 x i16> @llvm.masked.load.v1i16.p0(ptr %p, i32 2, <1 x i1> %1, <1 x i16> poison) + %3 = extractelement <1 x i16> %2, i64 0 + %4 = add i16 %3, 1 + %5 = insertelement <1 x i16> poison, i16 %4, i64 0 + tail call void @llvm.masked.store.v1i16.p0(<1 x i16> %5, ptr %p, i32 2, <1 x i1> %1) + ret void +}