Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 182 additions & 0 deletions llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@
using namespace llvm;
using namespace llvm::PatternMatch;

#define DEBUG_TYPE "separate-offset-gep"

static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
"disable-separate-const-offset-from-gep", cl::init(false),
cl::desc("Do not separate the constant offset from a GEP instruction"),
Expand Down Expand Up @@ -484,6 +486,9 @@ class SeparateConstOffsetFromGEP {

DenseMap<ExprKey, SmallVector<Instruction *, 2>> DominatingAdds;
DenseMap<ExprKey, SmallVector<Instruction *, 2>> DominatingSubs;

bool decomposeXor(Function &F);
Value *tryFoldXorToOrDisjoint(Instruction &I);
};

} // end anonymous namespace
Expand Down Expand Up @@ -1162,6 +1167,179 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
return true;
}

bool SeparateConstOffsetFromGEP::decomposeXor(Function &F) {
bool FunctionChanged = false;
SmallVector<std::pair<Instruction *, Value *>, 16> ReplacementsToMake;

for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
if (I.getOpcode() == Instruction::Xor) {
if (Value *Replacement = tryFoldXorToOrDisjoint(I)) {
ReplacementsToMake.push_back({&I, Replacement});
FunctionChanged = true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need FunctionChanged, just check if !ReplacementsToMake.empty()

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure.

}
}
}
}

if (!ReplacementsToMake.empty()) {
LLVM_DEBUG(dbgs() << "Applying " << ReplacementsToMake.size()
<< " XOR->OR Disjoint replacements in " << F.getName()
<< "\n");
for (auto &Pair : ReplacementsToMake)
Pair.first->replaceAllUsesWith(Pair.second);

for (auto &Pair : ReplacementsToMake)
Pair.first->eraseFromParent();
}

return FunctionChanged;
}

static llvm::Instruction *findClosestSequentialXor(Value *A, Instruction &I) {
llvm::Instruction *ClosestUser = nullptr;
for (llvm::User *User : A->users()) {
if (auto *UserInst = llvm::dyn_cast<llvm::Instruction>(User)) {
if (UserInst->getOpcode() != Instruction::Xor || UserInst == &I)
continue;
if (!ClosestUser)
ClosestUser = UserInst;
else {
// Compare instruction positions.
if (UserInst->comesBefore(ClosestUser)) {
ClosestUser = UserInst;
}
}
}
}
return ClosestUser;
}

/// Try to transform I = xor(A, C1) into or_disjoint(Y, C2)
/// where Y = xor(A, C0) is another existing instruction dominating I,
/// C2 = C1 - C0, and A is known to be disjoint with C2.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't core to what this pass is doing. This should have occurred in previous passes

///
/// This transformation is beneficial particularly for GEPs because:
/// 1. OR operations often map better to addressing modes than XOR
/// 2. Disjoint OR operations preserve the semantics of the original XOR
/// 3. This can enable further optimizations in the GEP offset folding pipeline
///
/// @param I The XOR instruction being visited.
/// @return The replacement Value* if successful, nullptr otherwise.
Value *SeparateConstOffsetFromGEP::tryFoldXorToOrDisjoint(Instruction &I) {
assert(I.getOpcode() == Instruction::Xor && "Instruction must be XOR");

// Check if I has at least one GEP user.
bool HasGepUser = false;
for (User *U : I.users()) {
if (isa<GetElementPtrInst>(U)) {
HasGepUser = true;
break;
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Generally shouldn't be looking at users but I also don't see why this cares

// If no user is a GEP instruction, abort the transformation.
if (!HasGepUser) {
LLVM_DEBUG(
dbgs() << "SeparateConstOffsetFromGEP: Skipping XOR->OR DISJOINT for"
<< I << " because it has no GEP users.\n");
return nullptr;
}

Value *Op0 = I.getOperand(0);
Value *Op1 = I.getOperand(1);
ConstantInt *C1 = dyn_cast<ConstantInt>(Op1);
Value *A = Op0;

// Bail out of there is not constant operand.
if (!C1) {
C1 = dyn_cast<ConstantInt>(Op0);
if (!C1)
return nullptr;
A = Op1;
}

if (isa<UndefValue>(A))
return nullptr;

APInt C1_APInt = C1->getValue();
unsigned BitWidth = C1_APInt.getBitWidth();
Type *Ty = I.getType();

// Find Dominating Y = xor A, C0
Instruction *FoundUserInst = nullptr;
APInt C0_APInt;

// Find the closest XOR instruction using the same value.
Instruction *UserInst = findClosestSequentialXor(A, I);
if (!UserInst) {
LLVM_DEBUG(
dbgs() << "SeparateConstOffsetFromGEP: No dominating XOR found for" << I
<< "\n");
return nullptr;
}

BinaryOperator *UserBO = cast<BinaryOperator>(UserInst);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will add a nullptr check here before I use this instruction.

Value *UserOp0 = UserBO->getOperand(0);
Value *UserOp1 = UserBO->getOperand(1);
ConstantInt *UserC = nullptr;
if (UserOp0 == A)
UserC = dyn_cast<ConstantInt>(UserOp1);
else if (UserOp1 == A)
UserC = dyn_cast<ConstantInt>(UserOp0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't handle the vector case, use PatternMatch to make it easier

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will fix it.

else {
LLVM_DEBUG(dbgs() << "SeparateConstOffsetFromGEP: Found XOR" << *UserInst
<< " doesn't use value " << *A << "\n");
return nullptr;
}

if (!UserC) {
LLVM_DEBUG(
dbgs()
<< "SeparateConstOffsetFromGEP: Found XOR doesn't have constant operand"
<< *UserInst << "\n");
return nullptr;
}

if (!DT->dominates(UserInst, &I)) {
LLVM_DEBUG(dbgs() << "SeparateConstOffsetFromGEP: Found XOR" << *UserInst
<< " doesn't dominate " << I << "\n");
return nullptr;
}

FoundUserInst = UserInst;
C0_APInt = UserC->getValue();

// Calculate C2 = C1 - C0.
APInt C2_APInt = C1_APInt - C0_APInt;

// Check Disjointness A & C2 == 0.
KnownBits KnownA(BitWidth);
computeKnownBits(A, KnownA, *DL, 0, nullptr, &I, DT);

if ((KnownA.One & C2_APInt) != 0) {
LLVM_DEBUG(
dbgs() << "SeparateConstOffsetFromGEP: Disjointness check failed for"
<< I << "\n");
return nullptr;
}

IRBuilder<> Builder(&I);
Builder.SetInsertPoint(&I);
Constant *C2_Const = ConstantInt::get(Ty, C2_APInt);
Twine Name = I.getName();
Value *NewOr = BinaryOperator::CreateDisjointOr(FoundUserInst, C2_Const, Name,
I.getIterator());
// Preserve metadata
if (Instruction *NewOrInst = dyn_cast<Instruction>(NewOr))
NewOrInst->copyMetadata(I);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will add this to preserve the metadata

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't need to manually copy the metadata, the IRBuilder will handle the debug-line info. Or ReplaceInstWithInst.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There could be more metadata than the debug lines, but it's not necessarily trivially correct to preserve it all


LLVM_DEBUG(dbgs() << "SeparateConstOffsetFromGEP: Replacing" << I
<< " (used by GEP) with" << *NewOr << " based on"
<< *FoundUserInst << "\n");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
<< *FoundUserInst << "\n");
<< *FoundUserInst << '\n');


return NewOr;
}

bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
Expand All @@ -1181,6 +1359,10 @@ bool SeparateConstOffsetFromGEP::run(Function &F) {

DL = &F.getDataLayout();
bool Changed = false;

// Decompose xor in to "or disjoint" if possible.
Changed |= decomposeXor(F);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to integrate this with the main function walk? I don't see why we need another walk.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will look at it. I can change the main function walk, but it may it more complex. decomposeXor modifies the IR in such a way that the rest of the pass does its things.


for (BasicBlock &B : F) {
if (!DT->isReachableFromEntry(&B))
continue;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=separate-const-offset-from-gep \
; RUN: -S < %s | FileCheck %s


; Test with GEP user and known bits: Ensure the transformation occurs when the xor has a GEP user
define ptr @test_with_gep_user(ptr %ptr) {
; CHECK-LABEL: define ptr @test_with_gep_user(
; CHECK-SAME: ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[BASE:%.*]] = add i64 0, 0
; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[BASE]], 8
; CHECK-NEXT: [[XOR21:%.*]] = or disjoint i64 [[XOR1]], 16
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR21]]
; CHECK-NEXT: ret ptr [[GEP]]
;
entry:
%base = add i64 0,0
%xor1 = xor i64 %base, 8
%xor2 = xor i64 %base, 24 ; Should be replaced with OR of %xor1 and 16
%gep = getelementptr i8, ptr %ptr, i64 %xor2
ret ptr %gep
}


; Test with non-GEP user: Ensure the transformation does not occur
define i32 @test_with_non_gep_user(ptr %ptr) {
; CHECK-LABEL: define i32 @test_with_non_gep_user(
; CHECK-SAME: ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[BASE:%.*]] = add i32 0, 0
; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[BASE]], 8
; CHECK-NEXT: [[XOR2:%.*]] = xor i32 [[BASE]], 24
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[XOR2]], 5
; CHECK-NEXT: ret i32 [[ADD]]
;
entry:
%base = add i32 0,0
%xor1 = xor i32 %base, 8
%xor2 = xor i32 %base, 24
%add = add i32 %xor2, 5
ret i32 %add
}

; Test with non-constant operand: Ensure the transformation does not occur
define ptr @test_with_non_constant_operand(i64 %val, i64 %val2, ptr %ptr) {
; CHECK-LABEL: define ptr @test_with_non_constant_operand(
; CHECK-SAME: i64 [[VAL:%.*]], i64 [[VAL2:%.*]], ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[VAL]], [[VAL2]]
; CHECK-NEXT: [[XOR2:%.*]] = xor i64 [[VAL]], 24
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR2]]
; CHECK-NEXT: ret ptr [[GEP]]
;
entry:
%xor1 = xor i64 %val, %val2 ; Non-constant operand
%xor2 = xor i64 %val, 24
%gep = getelementptr i8, ptr %ptr, i64 %xor2
ret ptr %gep
}

; Test with unknown disjoint bits: Ensure the transformation does not occur
define ptr @test_with_unknown_disjoint_bits(i64 %base, ptr %ptr) {
; CHECK-LABEL: define ptr @test_with_unknown_disjoint_bits(
; CHECK-SAME: i64 [[BASE:%.*]], ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[BASE]], 8
; CHECK-NEXT: [[XOR21:%.*]] = or disjoint i64 [[XOR1]], 16
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR21]]
; CHECK-NEXT: ret ptr [[GEP]]
;
entry:
%xor1 = xor i64 %base, 8
%xor2 = xor i64 %base, 24
%gep = getelementptr i8, ptr %ptr, i64 %xor2
ret ptr %gep
}

; Test with multiple xor operations in sequence
define ptr @test_multiple_xors(ptr %ptr) {
; CHECK-LABEL: define ptr @test_multiple_xors(
; CHECK-SAME: ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[BASE:%.*]] = add i64 2, 0
; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[BASE]], 8
; CHECK-NEXT: [[XOR21:%.*]] = or disjoint i64 [[XOR1]], 16
; CHECK-NEXT: [[XOR32:%.*]] = or disjoint i64 [[XOR1]], 24
; CHECK-NEXT: [[XOR43:%.*]] = or disjoint i64 [[XOR1]], 64
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR21]]
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR32]]
; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR43]]
; CHECK-NEXT: ret ptr [[GEP4]]
;
entry:
%base = add i64 2,0
%xor1 = xor i64 %base, 8
%xor2 = xor i64 %base, 24 ; Should be replaced with OR
%xor3 = xor i64 %base, 32
%xor4 = xor i64 %base, 72 ; Should be replaced with OR
%gep2 = getelementptr i8, ptr %ptr, i64 %xor2
%gep3 = getelementptr i8, ptr %ptr, i64 %xor3
%gep4 = getelementptr i8, ptr %ptr, i64 %xor4
ret ptr %gep4
}


; Test with operand order variations
define ptr @test_operand_order(ptr %ptr) {
; CHECK-LABEL: define ptr @test_operand_order(
; CHECK-SAME: ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[BASE:%.*]] = add i64 2, 0
; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[BASE]], 12
; CHECK-NEXT: [[XOR21:%.*]] = or disjoint i64 [[XOR1]], 12
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR21]]
; CHECK-NEXT: ret ptr [[GEP]]
;
entry:
%base = add i64 2,0
%xor1 = xor i64 %base, 12
%xor2 = xor i64 24, %base ; Operands reversed, should still be replaced
%gep = getelementptr i8, ptr %ptr, i64 %xor2
ret ptr %gep
}


; Test with multiple xor operations in sequence
define ptr @test_negative_offset(ptr %ptr) {
; CHECK-LABEL: define ptr @test_negative_offset(
; CHECK-SAME: ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[BASE:%.*]] = add i64 2, 0
; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[BASE]], 72
; CHECK-NEXT: [[XOR21:%.*]] = or disjoint i64 [[XOR1]], -48
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR21]]
; CHECK-NEXT: ret ptr [[GEP]]
;
entry:
%base = add i64 2,0
%xor1 = xor i64 %base, 72
%xor2 = xor i64 %base, 24 ; Should be replaced with OR
%gep = getelementptr i8, ptr %ptr, i64 %xor2
ret ptr %gep
}
Loading