|
21 | 21 | #include "llvm/ADT/STLExtras.h" |
22 | 22 | #include "llvm/IR/IRBuilder.h" |
23 | 23 | #include "llvm/IR/InstrTypes.h" |
| 24 | +#include "llvm/IR/PatternMatch.h" |
24 | 25 |
|
25 | 26 | using namespace llvm; |
| 27 | +using namespace llvm::PatternMatch; |
26 | 28 |
|
27 | 29 | #define DEBUG_TYPE "eravm-post-codegen-prepare" |
28 | 30 | #define ERAVM_POST_CODEGEN_PREPARE_NAME \ |
@@ -129,12 +131,107 @@ static bool optimizeICmp(ICmpInst &Cmp) { |
129 | 131 | return true; |
130 | 132 | } |
131 | 133 |
|
| 134 | +// This optimization tries to convert: |
| 135 | +// %c = icmp ult %x, imm |
| 136 | +// br %c, bla, blb |
| 137 | +// %tc = lshr %x, LogBase2(imm) |
| 138 | +// to |
| 139 | +// %tc = lshr %x, LogBase2(imm) |
| 140 | +// %c = icmp eq %tc, 0 |
| 141 | +// br %c, bla, blb |
| 142 | +// |
| 143 | +// or |
| 144 | +// |
| 145 | +// %c = icmp eq/ne %x, imm |
| 146 | +// br %c, bla, blb |
| 147 | +// %tc = add/sub %x, -imm/imm |
| 148 | +// to |
| 149 | +// %tc = add/sub %x, -imm/imm |
| 150 | +// %c = icmp eq/ne %tc, 0 |
| 151 | +// br %c, bla, blb |
| 152 | +// |
| 153 | +// It is beneficial to do this transformation since lshr/add/sub produce flags |
| 154 | +// and cmp eq/ne 0 can be combined with them. |
| 155 | +// This is the same implementation as in the CodeGenPrepare pass (function |
| 156 | +// optimizeBranch) with the fix from upstream to drop poison generating |
| 157 | +// flags (PR #90382, commit ab12bba). Since this optimization can |
| 158 | +// create opportunities to generate overflow intrinsics (in |
| 159 | +// CodeGenPrepare::combineToUAddWithOverflow and |
| 160 | +// CodeGenPrepare::combineToUSubWithOverflow functions, where the latter is not |
| 161 | +// enabled atm since TLI->shouldFormOverflowOp returns true only for add), we |
| 162 | +// are moving this optimization here to prevent that. Benchmarks have shown that |
| 163 | +// creating more overflow intrinsics is not beneficial. |
| 164 | +// TODO #625: When ticket is resolved, remove this function and use |
| 165 | +// preferZeroCompareBranch TLI hook. |
| 166 | +static bool optimizeBranch(BranchInst *Branch) { |
| 167 | + if (!Branch->isConditional()) |
| 168 | + return false; |
| 169 | + |
| 170 | + auto *Cmp = dyn_cast<ICmpInst>(Branch->getCondition()); |
| 171 | + if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse()) |
| 172 | + return false; |
| 173 | + |
| 174 | + Value *X = Cmp->getOperand(0); |
| 175 | + APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue(); |
| 176 | + |
| 177 | + for (auto *U : X->users()) { |
| 178 | + auto *UI = dyn_cast<Instruction>(U); |
| 179 | + // A quick dominance check |
| 180 | + if (!UI || |
| 181 | + (UI->getParent() != Branch->getParent() && |
| 182 | + UI->getParent() != Branch->getSuccessor(0) && |
| 183 | + UI->getParent() != Branch->getSuccessor(1)) || |
| 184 | + (UI->getParent() != Branch->getParent() && |
| 185 | + !UI->getParent()->getSinglePredecessor())) |
| 186 | + continue; |
| 187 | + |
| 188 | + if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT && |
| 189 | + match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) { |
| 190 | + IRBuilder<> Builder(Branch); |
| 191 | + if (UI->getParent() != Branch->getParent()) |
| 192 | + UI->moveBefore(Branch); |
| 193 | + UI->dropPoisonGeneratingFlags(); |
| 194 | + Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI, |
| 195 | + ConstantInt::get(UI->getType(), 0)); |
| 196 | + LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n"); |
| 197 | + LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n"); |
| 198 | + Cmp->replaceAllUsesWith(NewCmp); |
| 199 | + return true; |
| 200 | + } |
| 201 | + if (Cmp->isEquality() && |
| 202 | + (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) || |
| 203 | + match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))))) { |
| 204 | + IRBuilder<> Builder(Branch); |
| 205 | + if (UI->getParent() != Branch->getParent()) |
| 206 | + UI->moveBefore(Branch); |
| 207 | + UI->dropPoisonGeneratingFlags(); |
| 208 | + Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI, |
| 209 | + ConstantInt::get(UI->getType(), 0)); |
| 210 | + LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n"); |
| 211 | + LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n"); |
| 212 | + Cmp->replaceAllUsesWith(NewCmp); |
| 213 | + return true; |
| 214 | + } |
| 215 | + } |
| 216 | + return false; |
| 217 | +} |
| 218 | + |
132 | 219 | bool EraVMPostCodegenPrepare::runOnFunction(Function &F) { |
133 | 220 | bool Changed = false; |
134 | | - for (auto &BB : F) |
135 | | - for (auto &I : llvm::make_early_inc_range(BB)) |
136 | | - if (auto *Cmp = dyn_cast<ICmpInst>(&I)) |
137 | | - Changed |= optimizeICmp(*Cmp); |
| 221 | + for (auto &BB : F) { |
| 222 | + for (auto &I : llvm::make_early_inc_range(BB)) { |
| 223 | + switch (I.getOpcode()) { |
| 224 | + default: |
| 225 | + break; |
| 226 | + case Instruction::ICmp: |
| 227 | + Changed |= optimizeICmp(cast<ICmpInst>(I)); |
| 228 | + break; |
| 229 | + case Instruction::Br: |
| 230 | + Changed |= optimizeBranch(cast<BranchInst>(&I)); |
| 231 | + break; |
| 232 | + } |
| 233 | + } |
| 234 | + } |
138 | 235 |
|
139 | 236 | Changed |= rearrangeOverflowHandlingBranches(F); |
140 | 237 | return Changed; |
|
0 commit comments