-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[PreISelIntrinsicLowering] Reuse previously generated GlobalVariable for memset_pattern16 when possible #144677
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[PreISelIntrinsicLowering] Reuse previously generated GlobalVariable for memset_pattern16 when possible #144677
Conversation
…for memset_pattern16 when possible As Constants are already uniquified, we can use a map to keep track of whether a GlobalVariable was produced for a given Constant or not. Repeated globals with the same value was one of the codegen differences noted in llvm#126736. This patch removes that diff, producing cleaner output.
|
@llvm/pr-subscribers-llvm-transforms Author: Alex Bradbury (asb) ChangesAs Constants are already uniquified, we can use a map to keep track of whether a GlobalVariable was produced for a given Constant or not. Repeated globals with the same value was one of the codegen differences noted in #126736. This patch removes that diff, producing cleaner output. Full diff: https://github.com/llvm/llvm-project/pull/144677.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 6f52b7cac1d46..1b05dd46252fb 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -68,7 +68,9 @@ struct PreISelIntrinsicLowering {
static bool shouldExpandMemIntrinsicWithSize(Value *Size,
const TargetTransformInfo &TTI);
- bool expandMemIntrinsicUses(Function &F) const;
+ bool
+ expandMemIntrinsicUses(Function &F,
+ DenseMap<Constant *, GlobalVariable *> &CMap) const;
bool lowerIntrinsics(Module &M) const;
};
@@ -287,7 +289,8 @@ static Constant *getMemSetPattern16Value(MemSetPatternInst *Inst,
// TODO: Handle atomic memcpy and memcpy.inline
// TODO: Pass ScalarEvolution
-bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
+bool PreISelIntrinsicLowering::expandMemIntrinsicUses(
+ Function &F, DenseMap<Constant *, GlobalVariable *> &CMap) const {
Intrinsic::ID ID = F.getIntrinsicID();
bool Changed = false;
@@ -404,13 +407,20 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
// global.
assert(Memset->getRawDest()->getType()->getPointerAddressSpace() == 0 &&
"Should have skipped if non-zero AS");
- GlobalVariable *GV = new GlobalVariable(
- *M, PatternValue->getType(), /*isConstant=*/true,
- GlobalValue::PrivateLinkage, PatternValue, ".memset_pattern");
- GV->setUnnamedAddr(
- GlobalValue::UnnamedAddr::Global); // Ok to merge these.
- // TODO: Consider relaxing alignment requirement.
- GV->setAlignment(Align(16));
+ GlobalVariable *GV;
+ auto It = CMap.find(PatternValue);
+ if (It != CMap.end()) {
+ GV = It->second;
+ } else {
+ GV = new GlobalVariable(
+ *M, PatternValue->getType(), /*isConstant=*/true,
+ GlobalValue::PrivateLinkage, PatternValue, ".memset_pattern");
+ GV->setUnnamedAddr(
+ GlobalValue::UnnamedAddr::Global); // Ok to merge these.
+ // TODO: Consider relaxing alignment requirement.
+ GV->setAlignment(Align(16));
+ CMap[PatternValue] = GV;
+ }
Value *PatternPtr = GV;
Value *NumBytes = Builder.CreateMul(
TLI.getAsSizeT(DL.getTypeAllocSize(Memset->getValue()->getType()),
@@ -439,6 +449,8 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
}
bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
+ // Map unique constants to globals.
+ DenseMap<Constant *, GlobalVariable *> CMap;
bool Changed = false;
for (Function &F : M) {
switch (F.getIntrinsicID()) {
@@ -450,7 +462,7 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
case Intrinsic::memset:
case Intrinsic::memset_inline:
case Intrinsic::experimental_memset_pattern:
- Changed |= expandMemIntrinsicUses(F);
+ Changed |= expandMemIntrinsicUses(F, CMap);
break;
case Intrinsic::load_relative:
Changed |= lowerLoadRelative(F);
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
index 64cc4ba281635..aaca5a6c87b4f 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
@@ -4,16 +4,10 @@
;.
; CHECK: @G = global i32 5
; CHECK: @.memset_pattern = private unnamed_addr constant [2 x ptr] [ptr @G, ptr @G], align 16
-; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x ptr] [ptr @G, ptr @G], align 16
-; CHECK: @.memset_pattern.2 = private unnamed_addr constant [2 x i64] [i64 -6148895925951734307, i64 -6148895925951734307], align 16
-; CHECK: @.memset_pattern.3 = private unnamed_addr constant [2 x i64] [i64 -6148895925951734307, i64 -6148895925951734307], align 16
-; CHECK: @.memset_pattern.4 = private unnamed_addr constant [2 x i64] [i64 -6148895925951734307, i64 -6148895925951734307], align 16
-; CHECK: @.memset_pattern.5 = private unnamed_addr constant [2 x i64] [i64 4614256656552045848, i64 4614256656552045848], align 16
-; CHECK: @.memset_pattern.6 = private unnamed_addr constant [8 x i16] [i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555], align 16
-; CHECK: @.memset_pattern.7 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
-; CHECK: @.memset_pattern.8 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
-; CHECK: @.memset_pattern.9 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
-; CHECK: @.memset_pattern.10 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
+; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x i64] [i64 -6148895925951734307, i64 -6148895925951734307], align 16
+; CHECK: @.memset_pattern.2 = private unnamed_addr constant [2 x i64] [i64 4614256656552045848, i64 4614256656552045848], align 16
+; CHECK: @.memset_pattern.3 = private unnamed_addr constant [8 x i16] [i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555], align 16
+; CHECK: @.memset_pattern.4 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
;.
define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_1_dynvalue(
@@ -36,7 +30,7 @@ define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind {
define void @memset_pattern_i128_1(ptr %a, i128 %value) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_1(
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.7, i64 16)
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.4, i64 16)
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 false)
@@ -64,7 +58,7 @@ define void @memset_pattern_i128_1_nz_as(ptr addrspace(1) %a, i128 %value) nounw
define void @memset_pattern_i128_1_align_attr(ptr align(16) %a, i128 %value) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_1_align_attr(
; CHECK-SAME: ptr align 16 [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: call void @memset_pattern16(ptr align 16 [[A]], ptr @.memset_pattern.8, i64 16)
+; CHECK-NEXT: call void @memset_pattern16(ptr align 16 [[A]], ptr @.memset_pattern.4, i64 16)
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr align(16) %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 false)
@@ -74,7 +68,7 @@ define void @memset_pattern_i128_1_align_attr(ptr align(16) %a, i128 %value) nou
define void @memset_pattern_i128_16(ptr %a) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_16(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.9, i64 256)
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.4, i64 256)
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 16, i1 false)
@@ -85,7 +79,7 @@ define void @memset_pattern_i128_x(ptr %a, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_x(
; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 16, [[X]]
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.10, i64 [[TMP1]])
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.4, i64 [[TMP1]])
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 %x, i1 false)
@@ -115,7 +109,7 @@ define void @memset_pattern_i16_x(ptr %a, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i16_x(
; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 2, [[X]]
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.6, i64 [[TMP1]])
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.3, i64 [[TMP1]])
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i16 u0xabcd, i64 %x, i1 false)
@@ -126,7 +120,7 @@ define void @memset_pattern_i64_x(ptr %a, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i64_x(
; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 8, [[X]]
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.4, i64 [[TMP1]])
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.1, i64 [[TMP1]])
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i64 u0xaaaabbbbccccdddd, i64 %x, i1 false)
@@ -137,7 +131,7 @@ define void @memset_pattern_i64_x(ptr %a, i64 %x) nounwind {
define void @memset_pattern_i64_128_tbaa(ptr %a) nounwind {
; CHECK-LABEL: define void @memset_pattern_i64_128_tbaa(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.5, i64 1024), !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 1024), !tbaa [[TBAA0:![0-9]+]]
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i64 u0x400921fb54442d18, i64 128, i1 false), !tbaa !5
@@ -154,7 +148,7 @@ define void @memset_pattern_i64_narrow_idx(ptr %a, i32 %x) nounwind {
; CHECK-SAME: ptr [[A:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[X]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 8, [[TMP1]]
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.3, i64 [[TMP2]])
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.1, i64 [[TMP2]])
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i64 u0xaaaabbbbccccdddd, i32 %x, i1 false)
@@ -166,7 +160,7 @@ define void @memset_pattern_i64_wide_idx(ptr %a, i128 %x) nounwind {
; CHECK-SAME: ptr [[A:%.*]], i128 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = trunc i128 [[X]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 8, [[TMP1]]
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 [[TMP2]])
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.1, i64 [[TMP2]])
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i64 u0xaaaabbbbccccdddd, i128 %x, i1 false)
@@ -189,7 +183,7 @@ define void @memset_pattern_i64_x_fromptr(ptr %a, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i64_x_fromptr(
; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 8, [[X]]
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.1, i64 [[TMP2]])
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern, i64 [[TMP2]])
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, ptr @G, i64 %x, i1 false)
|
nikic
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…for memset_pattern16 when possible (llvm#144677) As Constants are already uniquified, we can use a map to keep track of whether a GlobalVariable was produced for a given Constant or not. Repeated globals with the same value was one of the codegen differences noted in llvm#126736. This patch removes that diff, producing cleaner output.
As Constants are already uniquified, we can use a map to keep track of whether a GlobalVariable was produced for a given Constant or not. Repeated globals with the same value was one of the codegen differences noted in #126736. This patch removes that diff, producing cleaner output.