174174#include " llvm/IR/Function.h"
175175#include " llvm/IR/GetElementPtrTypeIterator.h"
176176#include " llvm/IR/IRBuilder.h"
177- #include " llvm/IR/InstIterator.h"
178177#include " llvm/IR/InstrTypes.h"
179178#include " llvm/IR/Instruction.h"
180179#include " llvm/IR/Instructions.h"
191190#include " llvm/Support/ErrorHandling.h"
192191#include " llvm/Support/raw_ostream.h"
193192#include " llvm/Transforms/Scalar.h"
194- #include " llvm/Transforms/Utils/BasicBlockUtils.h"
195193#include " llvm/Transforms/Utils/Local.h"
196194#include < cassert>
197195#include < cstdint>
200198using namespace llvm ;
201199using namespace llvm ::PatternMatch;
202200
203- #define DEBUG_TYPE " separate-offset-gep"
204-
205201static cl::opt<bool > DisableSeparateConstOffsetFromGEP (
206202 " disable-separate-const-offset-from-gep" , cl::init(false ),
207203 cl::desc(" Do not separate the constant offset from a GEP instruction" ),
@@ -492,42 +488,6 @@ class SeparateConstOffsetFromGEP {
492488 DenseMap<ExprKey, SmallVector<Instruction *, 2 >> DominatingSubs;
493489};
494490
495- // / A helper class that aims to convert xor operations into or operations when
496- // / their operands are disjoint and the result is used in a GEP's index. This
497- // / can then enable further GEP optimizations by effectively turning BaseVal |
498- // / Const into BaseVal + Const when they are disjoint, which
499- // / SeparateConstOffsetFromGEP can then process. This is a common pattern that
500- // / sets up a grid of memory accesses across a wave where each thread acesses
501- // / data at various offsets.
502- class XorToOrDisjointTransformer {
503- public:
504- XorToOrDisjointTransformer (Function &F, DominatorTree &DT,
505- const DataLayout &DL)
506- : F(F), DT(DT), DL(DL) {}
507-
508- bool run ();
509-
510- private:
511- Function &F;
512- DominatorTree &DT;
513- const DataLayout &DL;
514- // / Maps a common operand to all Xor instructions
515- using XorOpList = SmallVector<std::pair<BinaryOperator *, APInt>, 8 >;
516- using XorBaseValInst = DenseMap<Instruction *, XorOpList>;
517- XorBaseValInst XorGroups;
518-
519- // / Checks if the given value has at least one GetElementPtr user
520- static bool hasGEPUser (const Value *V);
521-
522- // / Helper function to check if BaseXor dominates all XORs in the group
523- bool dominatesAllXors (BinaryOperator *BaseXor, const XorOpList &XorsInGroup);
524-
525- // / Processes a group of XOR instructions that share the same non-constant
526- // / base operand. Returns true if this group's processing modified the
527- // / function.
528- bool processXorGroup (Instruction *OriginalBaseInst, XorOpList &XorsInGroup);
529- };
530-
531491} // end anonymous namespace
532492
533493char SeparateConstOffsetFromGEPLegacyPass::ID = 0 ;
@@ -1263,154 +1223,6 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
12631223 return true ;
12641224}
12651225
1266- // Helper function to check if an instruction has at least one GEP user
1267- bool XorToOrDisjointTransformer::hasGEPUser (const Value *V) {
1268- return llvm::any_of (V->users (), [](const User *U) {
1269- return isa<llvm::GetElementPtrInst>(U);
1270- });
1271- }
1272-
1273- bool XorToOrDisjointTransformer::dominatesAllXors (
1274- BinaryOperator *BaseXor, const XorOpList &XorsInGroup) {
1275- return llvm::all_of (XorsInGroup, [&](const auto &XorEntry) {
1276- BinaryOperator *XorInst = XorEntry.first ;
1277- // Do not evaluate the BaseXor, otherwise we end up cloning it.
1278- return XorInst == BaseXor || DT.dominates (BaseXor, XorInst);
1279- });
1280- }
1281-
1282- bool XorToOrDisjointTransformer::processXorGroup (Instruction *OriginalBaseInst,
1283- XorOpList &XorsInGroup) {
1284- bool Changed = false ;
1285- if (XorsInGroup.size () <= 1 )
1286- return false ;
1287-
1288- // Sort XorsInGroup by the constant offset value in increasing order.
1289- llvm::sort (XorsInGroup, [](const auto &A, const auto &B) {
1290- return A.second .slt (B.second );
1291- });
1292-
1293- // Dominance check
1294- // The "base" XOR for dominance purposes is the one with the smallest
1295- // constant.
1296- BinaryOperator *XorWithSmallConst = XorsInGroup[0 ].first ;
1297-
1298- if (!dominatesAllXors (XorWithSmallConst, XorsInGroup)) {
1299- LLVM_DEBUG (dbgs () << DEBUG_TYPE
1300- << " : Cloning and inserting XOR with smallest constant ("
1301- << *XorWithSmallConst
1302- << " ) as it does not dominate all other XORs"
1303- << " in function " << F.getName () << " \n " );
1304-
1305- BinaryOperator *ClonedXor =
1306- cast<BinaryOperator>(XorWithSmallConst->clone ());
1307- ClonedXor->setName (XorWithSmallConst->getName () + " .dom_clone" );
1308- ClonedXor->insertAfter (OriginalBaseInst);
1309- LLVM_DEBUG (dbgs () << " Cloned Inst: " << *ClonedXor << " \n " );
1310- Changed = true ;
1311- XorWithSmallConst = ClonedXor;
1312- }
1313-
1314- SmallVector<Instruction *, 8 > InstructionsToErase;
1315- const APInt SmallestConst =
1316- cast<ConstantInt>(XorWithSmallConst->getOperand (1 ))->getValue ();
1317-
1318- // Main transformation loop: Iterate over the original XORs in the sorted
1319- // group.
1320- for (const auto &XorEntry : XorsInGroup) {
1321- BinaryOperator *XorInst = XorEntry.first ; // Original XOR instruction
1322- const APInt ConstOffsetVal = XorEntry.second ;
1323-
1324- // Do not process the one with smallest constant as it is the base.
1325- if (XorInst == XorWithSmallConst)
1326- continue ;
1327-
1328- // Disjointness Check 1
1329- APInt NewConstVal = ConstOffsetVal - SmallestConst;
1330- if ((NewConstVal & SmallestConst) != 0 ) {
1331- LLVM_DEBUG (dbgs () << DEBUG_TYPE << " : Cannot transform XOR in function "
1332- << F.getName () << " :\n "
1333- << " New Const: " << NewConstVal
1334- << " Smallest Const: " << SmallestConst
1335- << " are not disjoint \n " );
1336- continue ;
1337- }
1338-
1339- // Disjointness Check 2
1340- if (MaskedValueIsZero (XorWithSmallConst, NewConstVal, SimplifyQuery (DL),
1341- 0 )) {
1342- LLVM_DEBUG (dbgs () << DEBUG_TYPE
1343- << " : Transforming XOR to OR (disjoint) in function "
1344- << F.getName () << " :\n "
1345- << " Xor: " << *XorInst << " \n "
1346- << " Base Val: " << *XorWithSmallConst << " \n "
1347- << " New Const: " << NewConstVal << " \n " );
1348-
1349- auto *NewOrInst = BinaryOperator::CreateDisjointOr (
1350- XorWithSmallConst,
1351- ConstantInt::get (OriginalBaseInst->getType (), NewConstVal),
1352- XorInst->getName () + " .or_disjoint" , XorInst->getIterator ());
1353-
1354- NewOrInst->copyMetadata (*XorInst);
1355- XorInst->replaceAllUsesWith (NewOrInst);
1356- LLVM_DEBUG (dbgs () << " New Inst: " << *NewOrInst << " \n " );
1357- InstructionsToErase.push_back (XorInst); // Mark original XOR for deletion
1358-
1359- Changed = true ;
1360- } else {
1361- LLVM_DEBUG (
1362- dbgs () << DEBUG_TYPE
1363- << " : Cannot transform XOR (not proven disjoint) in function "
1364- << F.getName () << " :\n "
1365- << " Xor: " << *XorInst << " \n "
1366- << " Base Val: " << *XorWithSmallConst << " \n "
1367- << " New Const: " << NewConstVal << " \n " );
1368- }
1369- }
1370-
1371- for (Instruction *I : InstructionsToErase)
1372- I->eraseFromParent ();
1373-
1374- return Changed;
1375- }
1376-
1377- // Try to transform XOR(A, B+C) in to XOR(A,C) + B where XOR(A,C) becomes
1378- // the base for memory operations. This transformation is true under the
1379- // following conditions
1380- // Check 1 - B and C are disjoint.
1381- // Check 2 - XOR(A,C) and B are disjoint.
1382- //
1383- // This transformation is beneficial particularly for GEPs because:
1384- // 1. OR operations often map better to addressing modes than XOR
1385- // 2. Disjoint OR operations preserve the semantics of the original XOR
1386- // 3. This can enable further optimizations in the GEP offset folding pipeline
1387- bool XorToOrDisjointTransformer::run () {
1388- bool Changed = false ;
1389-
1390- // Collect all candidate XORs
1391- for (Instruction &I : instructions (F)) {
1392- Instruction *Op0 = nullptr ;
1393- ConstantInt *C1 = nullptr ;
1394- BinaryOperator *MatchedXorOp = nullptr ;
1395-
1396- // Attempt to match the instruction 'I' as XOR operation.
1397- if (match (&I, m_CombineAnd (m_Xor (m_Instruction (Op0), m_ConstantInt (C1)),
1398- m_BinOp (MatchedXorOp))) &&
1399- hasGEPUser (MatchedXorOp))
1400- XorGroups[Op0].emplace_back (MatchedXorOp, C1->getValue ());
1401- }
1402-
1403- if (XorGroups.empty ())
1404- return false ;
1405-
1406- // Process each group of XORs
1407- for (auto &[OriginalBaseInst, XorsInGroup] : XorGroups)
1408- if (processXorGroup (OriginalBaseInst, XorsInGroup))
1409- Changed = true ;
1410-
1411- return Changed;
1412- }
1413-
14141226bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction (Function &F) {
14151227 if (skipFunction (F))
14161228 return false ;
@@ -1430,11 +1242,6 @@ bool SeparateConstOffsetFromGEP::run(Function &F) {
14301242
14311243 DL = &F.getDataLayout ();
14321244 bool Changed = false ;
1433-
1434- // Decompose xor in to "or disjoint" if possible.
1435- XorToOrDisjointTransformer XorTransformer (F, *DT, *DL);
1436- Changed |= XorTransformer.run ();
1437-
14381245 for (BasicBlock &B : F) {
14391246 if (!DT->isReachableFromEntry (&B))
14401247 continue ;
0 commit comments