Skip to content

Commit 8b8c59c

Browse files
authored
[Loads] Check if Ptr can be freed between Assume and CtxI. (llvm#161255)
When using information from dereferenceable assumptions, we need to make sure that the memory is not freed between the assume and the specified context instruction. Instead of just checking canBeFreed, check if there any calls that may free between the assume and the context instruction. Note that this also adjusts the context instruction to be the terminator in the loop predecessor, if there is one and it is a branch (to avoid things like invoke). PR: llvm#161255
1 parent fb17bc7 commit 8b8c59c

File tree

3 files changed

+43
-46
lines changed

3 files changed

+43
-46
lines changed

llvm/lib/Analysis/Loads.cpp

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,13 @@ static bool isDereferenceableAndAlignedPointerViaAssumption(
3737
function_ref<bool(const RetainedKnowledge &RK)> CheckSize,
3838
const DataLayout &DL, const Instruction *CtxI, AssumptionCache *AC,
3939
const DominatorTree *DT) {
40-
// Dereferenceable information from assumptions is only valid if the value
41-
// cannot be freed between the assumption and use. For now just use the
42-
// information for values that cannot be freed in the function.
43-
// TODO: More precisely check if the pointer can be freed between assumption
44-
// and use.
45-
if (!CtxI || Ptr->canBeFreed())
40+
if (!CtxI)
4641
return false;
4742
/// Look through assumes to see if both dereferencability and alignment can
4843
/// be proven by an assume if needed.
4944
RetainedKnowledge AlignRK;
5045
RetainedKnowledge DerefRK;
46+
bool PtrCanBeFreed = Ptr->canBeFreed();
5147
bool IsAligned = Ptr->getPointerAlignment(DL) >= Alignment;
5248
return getKnowledgeForValue(
5349
Ptr, {Attribute::Dereferenceable, Attribute::Alignment}, *AC,
@@ -56,7 +52,11 @@ static bool isDereferenceableAndAlignedPointerViaAssumption(
5652
return false;
5753
if (RK.AttrKind == Attribute::Alignment)
5854
AlignRK = std::max(AlignRK, RK);
59-
if (RK.AttrKind == Attribute::Dereferenceable)
55+
56+
// Dereferenceable information from assumptions is only valid if the
57+
// value cannot be freed between the assumption and use.
58+
if ((!PtrCanBeFreed || willNotFreeBetween(Assume, CtxI)) &&
59+
RK.AttrKind == Attribute::Dereferenceable)
6060
DerefRK = std::max(DerefRK, RK);
6161
IsAligned |= AlignRK && AlignRK.ArgValue >= Alignment.value();
6262
if (IsAligned && DerefRK && CheckSize(DerefRK))
@@ -390,7 +390,11 @@ bool llvm::isDereferenceableAndAlignedInLoop(
390390
} else
391391
return false;
392392

393-
Instruction *HeaderFirstNonPHI = &*L->getHeader()->getFirstNonPHIIt();
393+
Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt();
394+
if (BasicBlock *LoopPred = L->getLoopPredecessor()) {
395+
if (isa<BranchInst>(LoopPred->getTerminator()))
396+
CtxI = LoopPred->getTerminator();
397+
}
394398
return isDereferenceableAndAlignedPointerViaAssumption(
395399
Base, Alignment,
396400
[&SE, AccessSizeSCEV, &LoopGuards](const RetainedKnowledge &RK) {
@@ -399,9 +403,9 @@ bool llvm::isDereferenceableAndAlignedInLoop(
399403
SE.applyLoopGuards(AccessSizeSCEV, *LoopGuards),
400404
SE.applyLoopGuards(SE.getSCEV(RK.IRArgValue), *LoopGuards));
401405
},
402-
DL, HeaderFirstNonPHI, AC, &DT) ||
406+
DL, CtxI, AC, &DT) ||
403407
isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL,
404-
HeaderFirstNonPHI, AC, &DT);
408+
CtxI, AC, &DT);
405409
}
406410

407411
static bool suppressSpeculativeLoadForSanitizers(const Instruction &CtxI) {

llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1182,31 +1182,13 @@ define void @deref_assumption_in_header_constant_trip_count_nofree_via_context(p
11821182
; CHECK: [[VECTOR_PH]]:
11831183
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
11841184
; CHECK: [[VECTOR_BODY]]:
1185-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
1185+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1186+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
11861187
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
11871188
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4
1188-
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD1]], zeroinitializer
1189-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
1190-
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
1191-
; CHECK: [[PRED_LOAD_IF]]:
1192-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
1193-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP3]]
1194-
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
1195-
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0
1196-
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
1197-
; CHECK: [[PRED_LOAD_CONTINUE]]:
1198-
; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ]
1199-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
1200-
; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
1201-
; CHECK: [[PRED_LOAD_IF1]]:
1202-
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1
1203-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP9]]
1204-
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
1205-
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP11]], i32 1
1206-
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
1207-
; CHECK: [[PRED_LOAD_CONTINUE2]]:
1208-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = phi <2 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ]
1209-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
1189+
; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD1]], zeroinitializer
1190+
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4
1191+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[WIDE_LOAD1]], <2 x i32> [[WIDE_LOAD2]]
12101192
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
12111193
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP14]], align 4
12121194
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2

llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -504,24 +504,35 @@ exit:
504504
define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_size_nofree_via_context(ptr noalias %p1, ptr noalias %p2) nosync {
505505
; CHECK-LABEL: define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_size_nofree_via_context(
506506
; CHECK-SAME: ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) #[[ATTR1:[0-9]+]] {
507-
; CHECK-NEXT: [[ENTRY:.*]]:
507+
; CHECK-NEXT: [[ENTRY:.*:]]
508508
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P1]], i64 4), "dereferenceable"(ptr [[P1]], i64 1024) ]
509509
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 4), "dereferenceable"(ptr [[P2]], i64 1024) ]
510-
; CHECK-NEXT: br label %[[LOOP:.*]]
511-
; CHECK: [[LOOP]]:
512-
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 0, %[[ENTRY]] ]
510+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
511+
; CHECK: [[VECTOR_PH]]:
512+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
513+
; CHECK: [[VECTOR_BODY]]:
514+
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], %[[VECTOR_BODY]] ]
513515
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX1]]
514-
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
516+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[ARRAYIDX2]], align 1
515517
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX1]]
516-
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[TMP1]], align 1
517-
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
518-
; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
519-
; CHECK: [[LOOP_INC]]:
520-
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX1]], 1
521-
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 1024
522-
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
518+
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
519+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
520+
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
521+
; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP2]]
522+
; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]])
523+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024
524+
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
525+
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
526+
; CHECK: [[MIDDLE_SPLIT]]:
527+
; CHECK-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
528+
; CHECK: [[MIDDLE_BLOCK]]:
529+
; CHECK-NEXT: br label %[[LOOP_END:.*]]
530+
; CHECK: [[VECTOR_EARLY_EXIT]]:
531+
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 true)
532+
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[TMP7]]
533+
; CHECK-NEXT: br label %[[LOOP_END]]
523534
; CHECK: [[LOOP_END]]:
524-
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX1]], %[[LOOP]] ], [ -1, %[[LOOP_INC]] ]
535+
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ [[TMP8]], %[[VECTOR_EARLY_EXIT]] ]
525536
; CHECK-NEXT: ret i64 [[RETVAL]]
526537
;
527538
entry:

0 commit comments

Comments
 (0)