Skip to content

Commit 3f0314d

Browse files
committed
[X86][APX] Try to replace NDD with NF instructions when optimizeCompareInstr
https://godbolt.org/z/rWYdqnjjx
1 parent 69527b0 commit 3f0314d

File tree

2 files changed

+38
-5
lines changed

2 files changed

+38
-5
lines changed

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5352,10 +5352,12 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
53525352
MachineInstr *MI = nullptr;
53535353
MachineInstr *Sub = nullptr;
53545354
MachineInstr *Movr0Inst = nullptr;
5355+
SmallVector<MachineInstr *, 4> NDDInsts;
53555356
bool NoSignFlag = false;
53565357
bool ClearsOverflowFlag = false;
53575358
bool ShouldUpdateCC = false;
53585359
bool IsSwapped = false;
5360+
bool HasCF = Subtarget.hasNF();
53595361
unsigned OpNo = 0;
53605362
X86::CondCode NewCC = X86::COND_INVALID;
53615363
int64_t ImmDelta = 0;
@@ -5441,6 +5443,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
54415443
continue;
54425444
}
54435445

5446+
// Try to replace NDD with NF instructions.
5447+
if (HasCF && X86II::hasNewDataDest(Inst.getDesc().TSFlags) &&
5448+
Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
5449+
NDDInsts.push_back(&Inst);
5450+
continue;
5451+
}
5452+
54445453
// Cannot do anything for any other EFLAG changes.
54455454
return false;
54465455
}
@@ -5637,6 +5646,12 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
56375646
return false;
56385647
}
56395648

5649+
// Replace NDD with NF instructions.
5650+
for (MachineInstr *NDD : NDDInsts) {
5651+
NDD->setDesc(get(X86::getNFVariant(NDD->getOpcode())));
5652+
NDD->removeOperand(NDD->getNumOperands() - 1);
5653+
}
5654+
56405655
// Make sure Sub instruction defines EFLAGS and mark the def live.
56415656
MachineOperand *FlagDef =
56425657
Sub->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);

llvm/test/CodeGen/X86/apx/cf.ll

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64 -mattr=+cf,+avx512f -verify-machineinstrs | FileCheck %s
2+
; RUN: llc < %s -mtriple=x86_64 -mattr=+cf,+nf,+ndd,+avx512f -verify-machineinstrs | FileCheck %s
33

44
define void @basic(i32 %a, ptr %b, ptr %p, ptr %q) {
55
; CHECK-LABEL: basic:
@@ -57,9 +57,8 @@ entry:
5757
define i64 @reduced_data_dependency(i64 %a, i64 %b, ptr %c) {
5858
; CHECK-LABEL: reduced_data_dependency:
5959
; CHECK: # %bb.0: # %entry
60-
; CHECK-NEXT: movq %rdi, %rcx
61-
; CHECK-NEXT: subq %rsi, %rcx
62-
; CHECK-NEXT: cfcmovnsq (%rdx), %rdi, %rax
60+
; CHECK-NEXT: subq %rsi, %rdi, %rax
61+
; CHECK-NEXT: cfcmovnsq (%rdx), %rdi, %rcx
6362
; CHECK-NEXT: addq %rcx, %rax
6463
; CHECK-NEXT: retq
6564
entry:
@@ -125,7 +124,7 @@ entry:
125124
ret void
126125
}
127126

128-
define void @single_cmp(i32 %a, i32 %b, ptr %c, ptr %d) #2 {
127+
define void @single_cmp(i32 %a, i32 %b, ptr %c, ptr %d) {
129128
; CHECK-LABEL: single_cmp:
130129
; CHECK: # %bb.0: # %entry
131130
; CHECK-NEXT: cmpl %esi, %edi
@@ -139,3 +138,22 @@ entry:
139138
tail call void @llvm.masked.store.v1i16.p0(<1 x i16> %2, ptr %d, i32 2, <1 x i1> %1)
140139
ret void
141140
}
141+
142+
define void @load_add_store(i32 %a, i32 %b, ptr %p) {
143+
; CHECK-LABEL: load_add_store:
144+
; CHECK: # %bb.0: # %entry
145+
; CHECK-NEXT: cmpl %esi, %edi
146+
; CHECK-NEXT: cfcmovnew (%rdx), %ax
147+
; CHECK-NEXT: {nf} incw %ax
148+
; CHECK-NEXT: cfcmovnew %ax, (%rdx)
149+
; CHECK-NEXT: retq
150+
entry:
151+
%0 = icmp ne i32 %a, %b
152+
%1 = insertelement <1 x i1> poison, i1 %0, i64 0
153+
%2 = tail call <1 x i16> @llvm.masked.load.v1i16.p0(ptr %p, i32 2, <1 x i1> %1, <1 x i16> poison)
154+
%3 = extractelement <1 x i16> %2, i64 0
155+
%4 = add i16 %3, 1
156+
%5 = insertelement <1 x i16> poison, i16 %4, i64 0
157+
tail call void @llvm.masked.store.v1i16.p0(<1 x i16> %5, ptr %p, i32 2, <1 x i1> %1)
158+
ret void
159+
}

0 commit comments

Comments
 (0)