From 6b014c5c66d79ef99e3a7b4ddd6f6be99c781627 Mon Sep 17 00:00:00 2001 From: Bjorn Pettersson Date: Thu, 30 Jan 2025 16:32:04 +0100 Subject: [PATCH 1/4] [DSE] Update dereferenceable attributes when adjusting memintrinsic ptr Consider IR like this call void @llvm.memset.p0.i64(ptr dereferenceable(28) %p, i8 0, i64 28, i1 false) store i32 1, ptr %p It has been optimized like this: %p2 = getelementptr inbounds i8, ptr %p, i64 4 call void @llvm.memset.p0.i64(ptr dereferenceable(28) %p2, i8 0, i64 24, i1 false) store i32 1, ptr %p As the input IR doesn't guarantee that it is OK to deref 28 bytes starting at the adjusted pointer %p2 the transformation has been a bit flawed. With this patch we make sure to also adjust the size of any dereferenceable/dereferenceable_or_null attributes when doing the transform in tryToShorten (when adjusting the start pointer). So now we will get dereferenceable(24) in the example above. --- llvm/include/llvm/IR/InstrTypes.h | 5 ++++ .../Scalar/DeadStoreElimination.cpp | 18 +++++++++++ .../OverwriteStoreBegin.ll | 30 +++++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 47ddc7555594c..f26aafbe3f0ef 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -1559,6 +1559,11 @@ class CallBase : public Instruction { Attrs = Attrs.addDereferenceableParamAttr(getContext(), i, Bytes); } + /// adds the dereferenceable attribute to the list of attributes. + void addDereferenceableOrNullParamAttr(unsigned i, uint64_t Bytes) { + Attrs = Attrs.addDereferenceableOrNullParamAttr(getContext(), i, Bytes); + } + /// adds the dereferenceable attribute to the list of attributes. void addDereferenceableRetAttr(uint64_t Bytes) { Attrs = Attrs.addDereferenceableRetAttr(getContext(), Bytes); diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 13f3de07c3c44..c16134e3f0159 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -563,6 +563,23 @@ static void shortenAssignment(Instruction *Inst, Value *OriginalDest, for_each(LinkedDVRAssigns, InsertAssignForOverlap); } +// Helper to trim or drop any dereferencable/dereferencable_or_null attributes +// for a given argument, based on the new access being restricted to derefence +// bytes in the range [Offset, Offset+Size). +static void trimDereferencableAttrs(AnyMemIntrinsic *Intrinsic, unsigned Arg, + uint64_t Offset, uint64_t Size) { + uint64_t End = Offset + Size; + if (Intrinsic->getParamDereferenceableBytes(Arg) >= End) + Intrinsic->addDereferenceableParamAttr(Arg, Size); + else + Intrinsic->removeParamAttr(Arg, Attribute::Dereferenceable); + if (Intrinsic->getParamDereferenceableOrNullBytes(Arg) >= End) + Intrinsic->addDereferenceableOrNullParamAttr(Arg, Size); + else + Intrinsic->removeParamAttr(Arg, Attribute::DereferenceableOrNull); +} + + static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd) { @@ -644,6 +661,7 @@ static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, DeadI->getIterator()); NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc()); DeadIntrinsic->setDest(NewDestGEP); + trimDereferencableAttrs(DeadIntrinsic, 0, ToRemoveSize, NewSize); } // Update attached dbg.assign intrinsics. Assume 8-bit byte. diff --git a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll index bc1756f6ca9d1..6c42d8e2c9833 100644 --- a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll +++ b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll @@ -402,3 +402,33 @@ entry: store i64 1, ptr %p, align 1 ret void } + +; Verify that we adjust the dereferenceable attribute. +define void @dereferenceable(ptr nocapture %p) { +; CHECK-LABEL: @dereferenceable( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 dereferenceable(24) [[TMP0]], i8 0, i64 24, i1 false) +; CHECK-NEXT: store i32 1, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; +entry: + call void @llvm.memset.p0.i64(ptr dereferenceable(28) align 4 %p, i8 0, i64 28, i1 false) + store i32 1, ptr %p, align 4 + ret void +} + +; Verify that we adjust the dereferenceable_or_null attribute. +define void @dereferenceable_or_null(ptr nocapture %p) { +; CHECK-LABEL: @dereferenceable_or_null( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 dereferenceable_or_null(20) [[TMP0]], i8 0, i64 20, i1 false) +; CHECK-NEXT: store i64 1, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; +entry: + call void @llvm.memset.p0.i64(ptr dereferenceable_or_null(28) align 4 %p, i8 0, i64 28, i1 false) + store i64 1, ptr %p, align 4 + ret void +} From f6f70258b0d5377324f884a6e03ea75d5d8a2db2 Mon Sep 17 00:00:00 2001 From: Bjorn Pettersson Date: Thu, 30 Jan 2025 16:41:20 +0100 Subject: [PATCH 2/4] fixup formatting --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index c16134e3f0159..50f964411aba8 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -579,7 +579,6 @@ static void trimDereferencableAttrs(AnyMemIntrinsic *Intrinsic, unsigned Arg, Intrinsic->removeParamAttr(Arg, Attribute::DereferenceableOrNull); } - static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd) { From 8b8ce48ff5538f56dcdc6236d17c99c73341dc0f Mon Sep 17 00:00:00 2001 From: Bjorn Pettersson Date: Thu, 30 Jan 2025 23:25:54 +0100 Subject: [PATCH 3/4] Simplify the solution by just dropping the dereferenceable attrs when there is a risk that they would become incorrect if neither adjusting nor dropping them. --- llvm/include/llvm/IR/InstrTypes.h | 5 ----- .../Scalar/DeadStoreElimination.cpp | 22 +++++-------------- .../OverwriteStoreBegin.ll | 8 +++---- 3 files changed, 9 insertions(+), 26 deletions(-) diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index f26aafbe3f0ef..47ddc7555594c 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -1559,11 +1559,6 @@ class CallBase : public Instruction { Attrs = Attrs.addDereferenceableParamAttr(getContext(), i, Bytes); } - /// adds the dereferenceable attribute to the list of attributes. - void addDereferenceableOrNullParamAttr(unsigned i, uint64_t Bytes) { - Attrs = Attrs.addDereferenceableOrNullParamAttr(getContext(), i, Bytes); - } - /// adds the dereferenceable attribute to the list of attributes. void addDereferenceableRetAttr(uint64_t Bytes) { Attrs = Attrs.addDereferenceableRetAttr(getContext(), Bytes); diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 50f964411aba8..4b02e9993dc17 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -563,22 +563,6 @@ static void shortenAssignment(Instruction *Inst, Value *OriginalDest, for_each(LinkedDVRAssigns, InsertAssignForOverlap); } -// Helper to trim or drop any dereferencable/dereferencable_or_null attributes -// for a given argument, based on the new access being restricted to derefence -// bytes in the range [Offset, Offset+Size). -static void trimDereferencableAttrs(AnyMemIntrinsic *Intrinsic, unsigned Arg, - uint64_t Offset, uint64_t Size) { - uint64_t End = Offset + Size; - if (Intrinsic->getParamDereferenceableBytes(Arg) >= End) - Intrinsic->addDereferenceableParamAttr(Arg, Size); - else - Intrinsic->removeParamAttr(Arg, Attribute::Dereferenceable); - if (Intrinsic->getParamDereferenceableOrNullBytes(Arg) >= End) - Intrinsic->addDereferenceableOrNullParamAttr(Arg, Size); - else - Intrinsic->removeParamAttr(Arg, Attribute::DereferenceableOrNull); -} - static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd) { @@ -660,7 +644,11 @@ static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, DeadI->getIterator()); NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc()); DeadIntrinsic->setDest(NewDestGEP); - trimDereferencableAttrs(DeadIntrinsic, 0, ToRemoveSize, NewSize); + // Simply drop any dereferenceable attributes (memset with a constant length + // already implies dereferenceability by itself, so dropping is simpler than + // trying to adjust the dereferenceable size). + DeadIntrinsic->removeParamAttr(0, Attribute::Dereferenceable); + DeadIntrinsic->removeParamAttr(0, Attribute::DereferenceableOrNull); } // Update attached dbg.assign intrinsics. Assume 8-bit byte. diff --git a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll index 6c42d8e2c9833..43fbcfcc600c6 100644 --- a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll +++ b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll @@ -403,12 +403,12 @@ entry: ret void } -; Verify that we adjust the dereferenceable attribute. +; Verify that we adjust/drop the dereferenceable attribute. define void @dereferenceable(ptr nocapture %p) { ; CHECK-LABEL: @dereferenceable( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4 -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 dereferenceable(24) [[TMP0]], i8 0, i64 24, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i1 false) ; CHECK-NEXT: store i32 1, ptr [[P]], align 4 ; CHECK-NEXT: ret void ; @@ -418,12 +418,12 @@ entry: ret void } -; Verify that we adjust the dereferenceable_or_null attribute. +; Verify that we adjust/drop the dereferenceable_or_null attribute. define void @dereferenceable_or_null(ptr nocapture %p) { ; CHECK-LABEL: @dereferenceable_or_null( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 8 -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 dereferenceable_or_null(20) [[TMP0]], i8 0, i64 20, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 20, i1 false) ; CHECK-NEXT: store i64 1, ptr [[P]], align 4 ; CHECK-NEXT: ret void ; From 0b58bf467a4eab8ab0c507e552982aa87463a1c6 Mon Sep 17 00:00:00 2001 From: Bjorn Pettersson Date: Fri, 31 Jan 2025 16:06:56 +0100 Subject: [PATCH 4/4] Change the logic to drop all attributes that we do not explicitly handle. That should be more future proof as we automatically would drop any new attributes being added. --- .../Scalar/DeadStoreElimination.cpp | 44 ++++++++++++++++--- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 4b02e9993dc17..deebec4f6ee81 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -50,6 +50,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" +#include "llvm/IR/AttributeMask.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRangeList.h" @@ -563,6 +564,43 @@ static void shortenAssignment(Instruction *Inst, Value *OriginalDest, for_each(LinkedDVRAssigns, InsertAssignForOverlap); } +/// Update the attributes given that a memory access is updated (the +/// dereferenced pointer could be moved forward when shortening a +/// mem intrinsic). +static void adjustArgAttributes(AnyMemIntrinsic *Intrinsic, unsigned ArgNo, + uint64_t PtrOffset) { + // Remember old attributes. + AttributeSet OldAttrs = Intrinsic->getParamAttributes(ArgNo); + + // Find attributes that should be kept, and remove the rest. + AttributeMask AttrsToRemove; + for (auto &Attr : OldAttrs) { + if (Attr.hasKindAsEnum()) { + switch (Attr.getKindAsEnum()) { + default: + break; + case Attribute::Alignment: + // Only keep alignment if PtrOffset satisfy the alignment. + if (isAligned(Attr.getAlignment().valueOrOne(), PtrOffset)) + continue; + break; + case Attribute::Dereferenceable: + case Attribute::DereferenceableOrNull: + // We could reduce the size of these attributes according to + // PtrOffset. But we simply drop these for now. + break; + case Attribute::NonNull: + case Attribute::NoUndef: + continue; + } + } + AttrsToRemove.addAttribute(Attr); + } + + // Remove the attributes that should be dropped. + Intrinsic->removeParamAttrs(ArgNo, AttrsToRemove); +} + static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd) { @@ -644,11 +682,7 @@ static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, DeadI->getIterator()); NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc()); DeadIntrinsic->setDest(NewDestGEP); - // Simply drop any dereferenceable attributes (memset with a constant length - // already implies dereferenceability by itself, so dropping is simpler than - // trying to adjust the dereferenceable size). - DeadIntrinsic->removeParamAttr(0, Attribute::Dereferenceable); - DeadIntrinsic->removeParamAttr(0, Attribute::DereferenceableOrNull); + adjustArgAttributes(DeadIntrinsic, 0, ToRemoveSize); } // Update attached dbg.assign intrinsics. Assume 8-bit byte.