Skip to content

[Loads] Support dereference for non-constant offset #149551

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 10 additions & 14 deletions llvm/lib/Analysis/Loads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,29 +361,25 @@ bool llvm::isDereferenceableAndAlignedInLoop(
AccessSize = MaxPtrDiff;
AccessSizeSCEV = PtrDiff;
} else if (auto *MinAdd = dyn_cast<SCEVAddExpr>(AccessStart)) {
if (MinAdd->getNumOperands() != 2)
return false;
const auto *NewBase = dyn_cast<SCEVUnknown>(SE.getPointerBase(MinAdd));
auto *OffsetSCEV = SE.removePointerBase(MinAdd);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can move the removePointerBase() call below !NewBase.


const auto *Offset = dyn_cast<SCEVConstant>(MinAdd->getOperand(0));
const auto *NewBase = dyn_cast<SCEVUnknown>(MinAdd->getOperand(1));
if (!Offset || !NewBase)
if (!NewBase)
return false;

// The following code below assumes the offset is unsigned, but GEP
// offsets are treated as signed so we can end up with a signed value
// here too. For example, suppose the initial PHI value is (i8 255),
// the offset will be treated as (i8 -1) and sign-extended to (i64 -1).
if (Offset->getAPInt().isNegative())
if (!SE.isKnownNonNegative(OffsetSCEV))
return false;

// For the moment, restrict ourselves to the case where the offset is a
// multiple of the requested alignment and the base is aligned.
// TODO: generalize if a case found which warrants
if (Offset->getAPInt().urem(Alignment.value()) != 0)
auto *OffsetSCEVTy = OffsetSCEV->getType();
if (!SE.getConstantMultiple(OffsetSCEV)
.urem(APInt(OffsetSCEVTy->getIntegerBitWidth(), Alignment.value()))
.isZero())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this can be spelled more compactly as: if (SE.getMinTrailingZeros(OffsetSCEV) < Log2(Alignment))

return false;

AccessSize = MaxPtrDiff + Offset->getAPInt();
AccessSizeSCEV = SE.getAddExpr(PtrDiff, Offset);
AccessSize = MaxPtrDiff + SE.getUnsignedRangeMax(OffsetSCEV);
AccessSizeSCEV = SE.getAddExpr(PtrDiff, OffsetSCEV);
Base = NewBase->getValue();
} else
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -562,38 +562,19 @@ define void @deref_assumption_loop_access_start_variable(i8 %v, ptr noundef %P,
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[IV_START]], [[N_VEC]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[IV_START]], [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 1
; CHECK-NEXT: [[TMP8:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP8]], splat (i1 true)
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP7]], align 1
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP19]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
; CHECK: [[PRED_LOAD_CONTINUE]]:
; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP9]], %[[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_IF1]]:
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 1
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP14]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_CONTINUE2]]:
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP15]], %[[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP17]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP2]], align 1
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP5]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
Expand All @@ -620,7 +601,6 @@ define void @deref_assumption_loop_access_start_variable(i8 %v, ptr noundef %P,
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;

entry:
%a = getelementptr i8, ptr %P, i64 16
%cmp = icmp slt i64 %iv.start, %N
Expand Down