-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[SeparateConstOffsetFromGEP] Decompose constant xor operand if possible #135788
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
e45f0aa
def3b3e
3492929
7ec5895
1d56b80
0f3c655
673dd1b
406a692
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -198,6 +198,8 @@ | |||||
| using namespace llvm; | ||||||
| using namespace llvm::PatternMatch; | ||||||
|
|
||||||
| #define DEBUG_TYPE "separate-offset-gep" | ||||||
|
|
||||||
| static cl::opt<bool> DisableSeparateConstOffsetFromGEP( | ||||||
| "disable-separate-const-offset-from-gep", cl::init(false), | ||||||
| cl::desc("Do not separate the constant offset from a GEP instruction"), | ||||||
|
|
@@ -484,6 +486,9 @@ class SeparateConstOffsetFromGEP { | |||||
|
|
||||||
| DenseMap<ExprKey, SmallVector<Instruction *, 2>> DominatingAdds; | ||||||
| DenseMap<ExprKey, SmallVector<Instruction *, 2>> DominatingSubs; | ||||||
|
|
||||||
| bool decomposeXor(Function &F); | ||||||
| Value *tryFoldXorToOrDisjoint(Instruction &I); | ||||||
| }; | ||||||
|
|
||||||
| } // end anonymous namespace | ||||||
|
|
@@ -1162,6 +1167,179 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { | |||||
| return true; | ||||||
| } | ||||||
|
|
||||||
| bool SeparateConstOffsetFromGEP::decomposeXor(Function &F) { | ||||||
| bool FunctionChanged = false; | ||||||
| SmallVector<std::pair<Instruction *, Value *>, 16> ReplacementsToMake; | ||||||
|
|
||||||
| for (BasicBlock &BB : F) { | ||||||
| for (Instruction &I : BB) { | ||||||
| if (I.getOpcode() == Instruction::Xor) { | ||||||
| if (Value *Replacement = tryFoldXorToOrDisjoint(I)) { | ||||||
| ReplacementsToMake.push_back({&I, Replacement}); | ||||||
| FunctionChanged = true; | ||||||
|
||||||
| } | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| if (!ReplacementsToMake.empty()) { | ||||||
| LLVM_DEBUG(dbgs() << "Applying " << ReplacementsToMake.size() | ||||||
| << " XOR->OR Disjoint replacements in " << F.getName() | ||||||
| << "\n"); | ||||||
| for (auto &Pair : ReplacementsToMake) | ||||||
| Pair.first->replaceAllUsesWith(Pair.second); | ||||||
|
|
||||||
| for (auto &Pair : ReplacementsToMake) | ||||||
| Pair.first->eraseFromParent(); | ||||||
| } | ||||||
|
|
||||||
| return FunctionChanged; | ||||||
| } | ||||||
|
|
||||||
| static llvm::Instruction *findClosestSequentialXor(Value *A, Instruction &I) { | ||||||
| llvm::Instruction *ClosestUser = nullptr; | ||||||
| for (llvm::User *User : A->users()) { | ||||||
| if (auto *UserInst = llvm::dyn_cast<llvm::Instruction>(User)) { | ||||||
| if (UserInst->getOpcode() != Instruction::Xor || UserInst == &I) | ||||||
| continue; | ||||||
| if (!ClosestUser) | ||||||
| ClosestUser = UserInst; | ||||||
| else { | ||||||
| // Compare instruction positions. | ||||||
| if (UserInst->comesBefore(ClosestUser)) { | ||||||
jmmartinez marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| ClosestUser = UserInst; | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
| return ClosestUser; | ||||||
| } | ||||||
|
|
||||||
| /// Try to transform I = xor(A, C1) into or_disjoint(Y, C2) | ||||||
| /// where Y = xor(A, C0) is another existing instruction dominating I, | ||||||
| /// C2 = C1 - C0, and A is known to be disjoint with C2. | ||||||
|
||||||
| /// | ||||||
| /// This transformation is beneficial particularly for GEPs because: | ||||||
| /// 1. OR operations often map better to addressing modes than XOR | ||||||
| /// 2. Disjoint OR operations preserve the semantics of the original XOR | ||||||
| /// 3. This can enable further optimizations in the GEP offset folding pipeline | ||||||
| /// | ||||||
| /// @param I The XOR instruction being visited. | ||||||
| /// @return The replacement Value* if successful, nullptr otherwise. | ||||||
| Value *SeparateConstOffsetFromGEP::tryFoldXorToOrDisjoint(Instruction &I) { | ||||||
| assert(I.getOpcode() == Instruction::Xor && "Instruction must be XOR"); | ||||||
|
|
||||||
| // Check if I has at least one GEP user. | ||||||
| bool HasGepUser = false; | ||||||
| for (User *U : I.users()) { | ||||||
| if (isa<GetElementPtrInst>(U)) { | ||||||
| HasGepUser = true; | ||||||
| break; | ||||||
| } | ||||||
| } | ||||||
|
||||||
| // If no user is a GEP instruction, abort the transformation. | ||||||
| if (!HasGepUser) { | ||||||
| LLVM_DEBUG( | ||||||
| dbgs() << "SeparateConstOffsetFromGEP: Skipping XOR->OR DISJOINT for" | ||||||
| << I << " because it has no GEP users.\n"); | ||||||
| return nullptr; | ||||||
| } | ||||||
|
|
||||||
| Value *Op0 = I.getOperand(0); | ||||||
| Value *Op1 = I.getOperand(1); | ||||||
| ConstantInt *C1 = dyn_cast<ConstantInt>(Op1); | ||||||
| Value *A = Op0; | ||||||
|
|
||||||
| // Bail out of there is not constant operand. | ||||||
| if (!C1) { | ||||||
| C1 = dyn_cast<ConstantInt>(Op0); | ||||||
| if (!C1) | ||||||
| return nullptr; | ||||||
| A = Op1; | ||||||
| } | ||||||
|
|
||||||
| if (isa<UndefValue>(A)) | ||||||
| return nullptr; | ||||||
|
|
||||||
| APInt C1_APInt = C1->getValue(); | ||||||
| unsigned BitWidth = C1_APInt.getBitWidth(); | ||||||
| Type *Ty = I.getType(); | ||||||
|
|
||||||
| // Find Dominating Y = xor A, C0 | ||||||
| Instruction *FoundUserInst = nullptr; | ||||||
| APInt C0_APInt; | ||||||
|
|
||||||
| // Find the closest XOR instruction using the same value. | ||||||
| Instruction *UserInst = findClosestSequentialXor(A, I); | ||||||
| if (!UserInst) { | ||||||
| LLVM_DEBUG( | ||||||
| dbgs() << "SeparateConstOffsetFromGEP: No dominating XOR found for" << I | ||||||
| << "\n"); | ||||||
| return nullptr; | ||||||
| } | ||||||
|
|
||||||
| BinaryOperator *UserBO = cast<BinaryOperator>(UserInst); | ||||||
|
||||||
| Value *UserOp0 = UserBO->getOperand(0); | ||||||
| Value *UserOp1 = UserBO->getOperand(1); | ||||||
| ConstantInt *UserC = nullptr; | ||||||
| if (UserOp0 == A) | ||||||
| UserC = dyn_cast<ConstantInt>(UserOp1); | ||||||
| else if (UserOp1 == A) | ||||||
| UserC = dyn_cast<ConstantInt>(UserOp0); | ||||||
|
||||||
| else { | ||||||
| LLVM_DEBUG(dbgs() << "SeparateConstOffsetFromGEP: Found XOR" << *UserInst | ||||||
| << " doesn't use value " << *A << "\n"); | ||||||
| return nullptr; | ||||||
| } | ||||||
|
|
||||||
| if (!UserC) { | ||||||
| LLVM_DEBUG( | ||||||
| dbgs() | ||||||
| << "SeparateConstOffsetFromGEP: Found XOR doesn't have constant operand" | ||||||
| << *UserInst << "\n"); | ||||||
| return nullptr; | ||||||
| } | ||||||
|
|
||||||
| if (!DT->dominates(UserInst, &I)) { | ||||||
| LLVM_DEBUG(dbgs() << "SeparateConstOffsetFromGEP: Found XOR" << *UserInst | ||||||
| << " doesn't dominate " << I << "\n"); | ||||||
| return nullptr; | ||||||
| } | ||||||
|
|
||||||
| FoundUserInst = UserInst; | ||||||
| C0_APInt = UserC->getValue(); | ||||||
|
|
||||||
| // Calculate C2 = C1 - C0. | ||||||
| APInt C2_APInt = C1_APInt - C0_APInt; | ||||||
|
|
||||||
| // Check Disjointness A & C2 == 0. | ||||||
| KnownBits KnownA(BitWidth); | ||||||
| computeKnownBits(A, KnownA, *DL, 0, nullptr, &I, DT); | ||||||
|
|
||||||
| if ((KnownA.One & C2_APInt) != 0) { | ||||||
| LLVM_DEBUG( | ||||||
| dbgs() << "SeparateConstOffsetFromGEP: Disjointness check failed for" | ||||||
| << I << "\n"); | ||||||
| return nullptr; | ||||||
| } | ||||||
|
|
||||||
| IRBuilder<> Builder(&I); | ||||||
| Builder.SetInsertPoint(&I); | ||||||
jmmartinez marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| Constant *C2_Const = ConstantInt::get(Ty, C2_APInt); | ||||||
| Twine Name = I.getName(); | ||||||
| Value *NewOr = BinaryOperator::CreateDisjointOr(FoundUserInst, C2_Const, Name, | ||||||
jmmartinez marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| I.getIterator()); | ||||||
| // Preserve metadata | ||||||
| if (Instruction *NewOrInst = dyn_cast<Instruction>(NewOr)) | ||||||
| NewOrInst->copyMetadata(I); | ||||||
|
||||||
|
|
||||||
| LLVM_DEBUG(dbgs() << "SeparateConstOffsetFromGEP: Replacing" << I | ||||||
| << " (used by GEP) with" << *NewOr << " based on" | ||||||
| << *FoundUserInst << "\n"); | ||||||
|
||||||
| << *FoundUserInst << "\n"); | |
| << *FoundUserInst << '\n'); |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible to integrate this with the main function walk? I don't see why we need another walk.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will look at it. I can change the main function walk, but it may it more complex. decomposeXor modifies the IR in such a way that the rest of the pass does its things.
sgundapa marked this conversation as resolved.
Show resolved
Hide resolved
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,144 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=separate-const-offset-from-gep \ | ||
| ; RUN: -S < %s | FileCheck %s | ||
|
|
||
|
|
||
| ; Test with GEP user and known bits: Ensure the transformation occurs when the xor has a GEP user | ||
| define ptr @test_with_gep_user(ptr %ptr) { | ||
| ; CHECK-LABEL: define ptr @test_with_gep_user( | ||
| ; CHECK-SAME: ptr [[PTR:%.*]]) { | ||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||
| ; CHECK-NEXT: [[BASE:%.*]] = add i64 0, 0 | ||
| ; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[BASE]], 8 | ||
| ; CHECK-NEXT: [[XOR21:%.*]] = or disjoint i64 [[XOR1]], 16 | ||
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR21]] | ||
| ; CHECK-NEXT: ret ptr [[GEP]] | ||
| ; | ||
| entry: | ||
| %base = add i64 0,0 | ||
| %xor1 = xor i64 %base, 8 | ||
| %xor2 = xor i64 %base, 24 ; Should be replaced with OR of %xor1 and 16 | ||
| %gep = getelementptr i8, ptr %ptr, i64 %xor2 | ||
| ret ptr %gep | ||
| } | ||
|
|
||
|
|
||
| ; Test with non-GEP user: Ensure the transformation does not occur | ||
| define i32 @test_with_non_gep_user(ptr %ptr) { | ||
| ; CHECK-LABEL: define i32 @test_with_non_gep_user( | ||
| ; CHECK-SAME: ptr [[PTR:%.*]]) { | ||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||
| ; CHECK-NEXT: [[BASE:%.*]] = add i32 0, 0 | ||
| ; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[BASE]], 8 | ||
| ; CHECK-NEXT: [[XOR2:%.*]] = xor i32 [[BASE]], 24 | ||
| ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[XOR2]], 5 | ||
| ; CHECK-NEXT: ret i32 [[ADD]] | ||
| ; | ||
| entry: | ||
| %base = add i32 0,0 | ||
| %xor1 = xor i32 %base, 8 | ||
| %xor2 = xor i32 %base, 24 | ||
| %add = add i32 %xor2, 5 | ||
| ret i32 %add | ||
| } | ||
|
|
||
| ; Test with non-constant operand: Ensure the transformation does not occur | ||
| define ptr @test_with_non_constant_operand(i64 %val, i64 %val2, ptr %ptr) { | ||
| ; CHECK-LABEL: define ptr @test_with_non_constant_operand( | ||
| ; CHECK-SAME: i64 [[VAL:%.*]], i64 [[VAL2:%.*]], ptr [[PTR:%.*]]) { | ||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||
| ; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[VAL]], [[VAL2]] | ||
| ; CHECK-NEXT: [[XOR2:%.*]] = xor i64 [[VAL]], 24 | ||
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR2]] | ||
| ; CHECK-NEXT: ret ptr [[GEP]] | ||
| ; | ||
| entry: | ||
| %xor1 = xor i64 %val, %val2 ; Non-constant operand | ||
| %xor2 = xor i64 %val, 24 | ||
| %gep = getelementptr i8, ptr %ptr, i64 %xor2 | ||
| ret ptr %gep | ||
| } | ||
|
|
||
| ; Test with unknown disjoint bits: Ensure the transformation does not occur | ||
| define ptr @test_with_unknown_disjoint_bits(i64 %base, ptr %ptr) { | ||
| ; CHECK-LABEL: define ptr @test_with_unknown_disjoint_bits( | ||
| ; CHECK-SAME: i64 [[BASE:%.*]], ptr [[PTR:%.*]]) { | ||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||
| ; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[BASE]], 8 | ||
| ; CHECK-NEXT: [[XOR21:%.*]] = or disjoint i64 [[XOR1]], 16 | ||
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR21]] | ||
| ; CHECK-NEXT: ret ptr [[GEP]] | ||
| ; | ||
| entry: | ||
| %xor1 = xor i64 %base, 8 | ||
| %xor2 = xor i64 %base, 24 | ||
| %gep = getelementptr i8, ptr %ptr, i64 %xor2 | ||
| ret ptr %gep | ||
| } | ||
|
|
||
| ; Test with multiple xor operations in sequence | ||
| define ptr @test_multiple_xors(ptr %ptr) { | ||
| ; CHECK-LABEL: define ptr @test_multiple_xors( | ||
| ; CHECK-SAME: ptr [[PTR:%.*]]) { | ||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||
| ; CHECK-NEXT: [[BASE:%.*]] = add i64 2, 0 | ||
| ; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[BASE]], 8 | ||
| ; CHECK-NEXT: [[XOR21:%.*]] = or disjoint i64 [[XOR1]], 16 | ||
| ; CHECK-NEXT: [[XOR32:%.*]] = or disjoint i64 [[XOR1]], 24 | ||
| ; CHECK-NEXT: [[XOR43:%.*]] = or disjoint i64 [[XOR1]], 64 | ||
| ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR21]] | ||
| ; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR32]] | ||
| ; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR43]] | ||
| ; CHECK-NEXT: ret ptr [[GEP4]] | ||
| ; | ||
| entry: | ||
| %base = add i64 2,0 | ||
| %xor1 = xor i64 %base, 8 | ||
| %xor2 = xor i64 %base, 24 ; Should be replaced with OR | ||
| %xor3 = xor i64 %base, 32 | ||
| %xor4 = xor i64 %base, 72 ; Should be replaced with OR | ||
| %gep2 = getelementptr i8, ptr %ptr, i64 %xor2 | ||
| %gep3 = getelementptr i8, ptr %ptr, i64 %xor3 | ||
| %gep4 = getelementptr i8, ptr %ptr, i64 %xor4 | ||
| ret ptr %gep4 | ||
| } | ||
|
|
||
|
|
||
| ; Test with operand order variations | ||
| define ptr @test_operand_order(ptr %ptr) { | ||
| ; CHECK-LABEL: define ptr @test_operand_order( | ||
| ; CHECK-SAME: ptr [[PTR:%.*]]) { | ||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||
| ; CHECK-NEXT: [[BASE:%.*]] = add i64 2, 0 | ||
| ; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[BASE]], 12 | ||
| ; CHECK-NEXT: [[XOR21:%.*]] = or disjoint i64 [[XOR1]], 12 | ||
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR21]] | ||
| ; CHECK-NEXT: ret ptr [[GEP]] | ||
| ; | ||
| entry: | ||
| %base = add i64 2,0 | ||
| %xor1 = xor i64 %base, 12 | ||
| %xor2 = xor i64 24, %base ; Operands reversed, should still be replaced | ||
jmmartinez marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| %gep = getelementptr i8, ptr %ptr, i64 %xor2 | ||
| ret ptr %gep | ||
| } | ||
|
|
||
|
|
||
| ; Test with multiple xor operations in sequence | ||
| define ptr @test_negative_offset(ptr %ptr) { | ||
| ; CHECK-LABEL: define ptr @test_negative_offset( | ||
| ; CHECK-SAME: ptr [[PTR:%.*]]) { | ||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||
| ; CHECK-NEXT: [[BASE:%.*]] = add i64 2, 0 | ||
| ; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[BASE]], 72 | ||
| ; CHECK-NEXT: [[XOR21:%.*]] = or disjoint i64 [[XOR1]], -48 | ||
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR21]] | ||
| ; CHECK-NEXT: ret ptr [[GEP]] | ||
| ; | ||
| entry: | ||
| %base = add i64 2,0 | ||
| %xor1 = xor i64 %base, 72 | ||
| %xor2 = xor i64 %base, 24 ; Should be replaced with OR | ||
| %gep = getelementptr i8, ptr %ptr, i64 %xor2 | ||
| ret ptr %gep | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.