Skip to content

Commit 72ae8fe

Browse files
[EraVM] Optimize towards branch on zero in EraVMPostCodegenPrepare
This patch adds optimization that transforms lshr/add/sub + cmp into lshr/add/sub + cmp eq/ne 0. This is beneficial since these instructions produce flag, and cmp can be combined with them. This optimization is copied from the CodeGenPrepare. PR: #618 Signed-off-by: Vladimir Radosavljevic <[email protected]>
1 parent d378903 commit 72ae8fe

File tree

2 files changed

+110
-13
lines changed

2 files changed

+110
-13
lines changed

llvm/lib/Target/EraVM/EraVMPostCodegenPrepare.cpp

Lines changed: 101 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@
2121
#include "llvm/ADT/STLExtras.h"
2222
#include "llvm/IR/IRBuilder.h"
2323
#include "llvm/IR/InstrTypes.h"
24+
#include "llvm/IR/PatternMatch.h"
2425

2526
using namespace llvm;
27+
using namespace llvm::PatternMatch;
2628

2729
#define DEBUG_TYPE "eravm-post-codegen-prepare"
2830
#define ERAVM_POST_CODEGEN_PREPARE_NAME \
@@ -129,12 +131,107 @@ static bool optimizeICmp(ICmpInst &Cmp) {
129131
return true;
130132
}
131133

134+
// This optimization tries to convert:
135+
// %c = icmp ult %x, imm
136+
// br %c, bla, blb
137+
// %tc = lshr %x, LogBase2(imm)
138+
// to
139+
// %tc = lshr %x, LogBase2(imm)
140+
// %c = icmp eq %tc, 0
141+
// br %c, bla, blb
142+
//
143+
// or
144+
//
145+
// %c = icmp eq/ne %x, imm
146+
// br %c, bla, blb
147+
// %tc = add/sub %x, -imm/imm
148+
// to
149+
// %tc = add/sub %x, -imm/imm
150+
// %c = icmp eq/ne %tc, 0
151+
// br %c, bla, blb
152+
//
153+
// It is beneficial to do this transformation since lshr/add/sub produce flags
154+
// and cmp eq/ne 0 can be combined with them.
155+
// This is the same implementation as in the CodeGenPrepare pass (function
156+
// optimizeBranch) with the fix from upstream to drop poison generating
157+
// flags (PR #90382, commit ab12bba). Since this optimization can
158+
// create opportunities to generate overflow intrinsics (in
159+
// CodeGenPrepare::combineToUAddWithOverflow and
160+
// CodeGenPrepare::combineToUSubWithOverflow functions, where the latter is not
161+
// enabled atm since TLI->shouldFormOverflowOp returns true only for add), we
162+
// are moving this optimization here to prevent that. Benchmarks have shown that
163+
// creating more overflow intrinsics is not beneficial.
164+
// TODO #625: When ticket is resolved, remove this function and use
165+
// preferZeroCompareBranch TLI hook.
166+
static bool optimizeBranch(BranchInst *Branch) {
167+
if (!Branch->isConditional())
168+
return false;
169+
170+
auto *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
171+
if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
172+
return false;
173+
174+
Value *X = Cmp->getOperand(0);
175+
APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
176+
177+
for (auto *U : X->users()) {
178+
auto *UI = dyn_cast<Instruction>(U);
179+
// A quick dominance check
180+
if (!UI ||
181+
(UI->getParent() != Branch->getParent() &&
182+
UI->getParent() != Branch->getSuccessor(0) &&
183+
UI->getParent() != Branch->getSuccessor(1)) ||
184+
(UI->getParent() != Branch->getParent() &&
185+
!UI->getParent()->getSinglePredecessor()))
186+
continue;
187+
188+
if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
189+
match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) {
190+
IRBuilder<> Builder(Branch);
191+
if (UI->getParent() != Branch->getParent())
192+
UI->moveBefore(Branch);
193+
UI->dropPoisonGeneratingFlags();
194+
Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
195+
ConstantInt::get(UI->getType(), 0));
196+
LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
197+
LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
198+
Cmp->replaceAllUsesWith(NewCmp);
199+
return true;
200+
}
201+
if (Cmp->isEquality() &&
202+
(match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) ||
203+
match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))))) {
204+
IRBuilder<> Builder(Branch);
205+
if (UI->getParent() != Branch->getParent())
206+
UI->moveBefore(Branch);
207+
UI->dropPoisonGeneratingFlags();
208+
Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
209+
ConstantInt::get(UI->getType(), 0));
210+
LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
211+
LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
212+
Cmp->replaceAllUsesWith(NewCmp);
213+
return true;
214+
}
215+
}
216+
return false;
217+
}
218+
132219
bool EraVMPostCodegenPrepare::runOnFunction(Function &F) {
133220
bool Changed = false;
134-
for (auto &BB : F)
135-
for (auto &I : llvm::make_early_inc_range(BB))
136-
if (auto *Cmp = dyn_cast<ICmpInst>(&I))
137-
Changed |= optimizeICmp(*Cmp);
221+
for (auto &BB : F) {
222+
for (auto &I : llvm::make_early_inc_range(BB)) {
223+
switch (I.getOpcode()) {
224+
default:
225+
break;
226+
case Instruction::ICmp:
227+
Changed |= optimizeICmp(cast<ICmpInst>(I));
228+
break;
229+
case Instruction::Br:
230+
Changed |= optimizeBranch(cast<BranchInst>(&I));
231+
break;
232+
}
233+
}
234+
}
138235

139236
Changed |= rearrangeOverflowHandlingBranches(F);
140237
return Changed;

llvm/test/CodeGen/EraVM/branch-on-zero.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ target triple = "eravm"
77
define i256 @lshr(i256 %a) {
88
; CHECK-LABEL: lshr:
99
; CHECK: ; %bb.0: ; %entry
10-
; CHECK-NEXT: sub.s! 7, r1, r2
11-
; CHECK-NEXT: shr.s.gt 3, r1, r1
12-
; CHECK-NEXT: add.le r0, r0, r1
10+
; CHECK-NEXT: shr.s! 3, r1, r1
11+
; CHECK-NEXT: add.eq r0, r0, r1
12+
; CHECK-NEXT: ; %bb.1: ; %else
1313
; CHECK-NEXT: ret
1414
entry:
1515
%c = icmp ult i256 %a, 8
@@ -47,9 +47,9 @@ else:
4747
define i256 @add_overflow(i256 %a) {
4848
; CHECK-LABEL: add_overflow:
4949
; CHECK: ; %bb.0: ; %entry
50-
; CHECK-NEXT: sub.s! @CPI2_0[0], r1, r2
51-
; CHECK-NEXT: add.ne 1, r1, r1
50+
; CHECK-NEXT: add! 1, r1, r1
5251
; CHECK-NEXT: add.eq r0, r0, r1
52+
; CHECK-NEXT: ; %bb.1: ; %else
5353
; CHECK-NEXT: ret
5454
entry:
5555
%c = icmp eq i256 %a, -1
@@ -66,9 +66,9 @@ else:
6666
define i256 @add(i256 %a) {
6767
; CHECK-LABEL: add:
6868
; CHECK: ; %bb.0: ; %entry
69-
; CHECK-NEXT: sub.s! 10, r1, r2
70-
; CHECK-NEXT: sub.s.ne 10, r1, r1
69+
; CHECK-NEXT: sub.s! 10, r1, r1
7170
; CHECK-NEXT: add.eq r0, r0, r1
71+
; CHECK-NEXT: ; %bb.1: ; %else
7272
; CHECK-NEXT: ret
7373
entry:
7474
%c = icmp eq i256 %a, 10
@@ -104,9 +104,9 @@ else:
104104
define i256 @sub(i256 %a) {
105105
; CHECK-LABEL: sub:
106106
; CHECK: ; %bb.0: ; %entry
107-
; CHECK-NEXT: sub.s! 10, r1, r2
108-
; CHECK-NEXT: sub.s.ne 10, r1, r1
107+
; CHECK-NEXT: sub.s! 10, r1, r1
109108
; CHECK-NEXT: add.eq r0, r0, r1
109+
; CHECK-NEXT: ; %bb.1: ; %else
110110
; CHECK-NEXT: ret
111111
entry:
112112
%c = icmp eq i256 %a, 10

0 commit comments

Comments
 (0)