Skip to content

Conversation

@asb
Copy link
Contributor

@asb asb commented Jun 18, 2025

As Constants are already uniquified, we can use a map to keep track of whether a GlobalVariable was produced for a given Constant or not. Repeated globals with the same value was one of the codegen differences noted in #126736. This patch removes that diff, producing cleaner output.

…for memset_pattern16 when possible

As Constants are already uniquified, we can use a map to keep track of
whether a GlobalVariable was produced for a given Constant or not.
Repeated globals with the same value was one of the codegen differences
noted in llvm#126736. This patch removes that diff, producing cleaner
output.
@llvmbot
Copy link
Member

llvmbot commented Jun 18, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Alex Bradbury (asb)

Changes

As Constants are already uniquified, we can use a map to keep track of whether a GlobalVariable was produced for a given Constant or not. Repeated globals with the same value was one of the codegen differences noted in #126736. This patch removes that diff, producing cleaner output.


Full diff: https://github.com/llvm/llvm-project/pull/144677.diff

2 Files Affected:

  • (modified) llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp (+22-10)
  • (modified) llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll (+14-20)
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 6f52b7cac1d46..1b05dd46252fb 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -68,7 +68,9 @@ struct PreISelIntrinsicLowering {
 
   static bool shouldExpandMemIntrinsicWithSize(Value *Size,
                                                const TargetTransformInfo &TTI);
-  bool expandMemIntrinsicUses(Function &F) const;
+  bool
+  expandMemIntrinsicUses(Function &F,
+                         DenseMap<Constant *, GlobalVariable *> &CMap) const;
   bool lowerIntrinsics(Module &M) const;
 };
 
@@ -287,7 +289,8 @@ static Constant *getMemSetPattern16Value(MemSetPatternInst *Inst,
 
 // TODO: Handle atomic memcpy and memcpy.inline
 // TODO: Pass ScalarEvolution
-bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
+bool PreISelIntrinsicLowering::expandMemIntrinsicUses(
+    Function &F, DenseMap<Constant *, GlobalVariable *> &CMap) const {
   Intrinsic::ID ID = F.getIntrinsicID();
   bool Changed = false;
 
@@ -404,13 +407,20 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
       // global.
       assert(Memset->getRawDest()->getType()->getPointerAddressSpace() == 0 &&
              "Should have skipped if non-zero AS");
-      GlobalVariable *GV = new GlobalVariable(
-          *M, PatternValue->getType(), /*isConstant=*/true,
-          GlobalValue::PrivateLinkage, PatternValue, ".memset_pattern");
-      GV->setUnnamedAddr(
-          GlobalValue::UnnamedAddr::Global); // Ok to merge these.
-      // TODO: Consider relaxing alignment requirement.
-      GV->setAlignment(Align(16));
+      GlobalVariable *GV;
+      auto It = CMap.find(PatternValue);
+      if (It != CMap.end()) {
+        GV = It->second;
+      } else {
+        GV = new GlobalVariable(
+            *M, PatternValue->getType(), /*isConstant=*/true,
+            GlobalValue::PrivateLinkage, PatternValue, ".memset_pattern");
+        GV->setUnnamedAddr(
+            GlobalValue::UnnamedAddr::Global); // Ok to merge these.
+        // TODO: Consider relaxing alignment requirement.
+        GV->setAlignment(Align(16));
+        CMap[PatternValue] = GV;
+      }
       Value *PatternPtr = GV;
       Value *NumBytes = Builder.CreateMul(
           TLI.getAsSizeT(DL.getTypeAllocSize(Memset->getValue()->getType()),
@@ -439,6 +449,8 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
 }
 
 bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
+  // Map unique constants to globals.
+  DenseMap<Constant *, GlobalVariable *> CMap;
   bool Changed = false;
   for (Function &F : M) {
     switch (F.getIntrinsicID()) {
@@ -450,7 +462,7 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
     case Intrinsic::memset:
     case Intrinsic::memset_inline:
     case Intrinsic::experimental_memset_pattern:
-      Changed |= expandMemIntrinsicUses(F);
+      Changed |= expandMemIntrinsicUses(F, CMap);
       break;
     case Intrinsic::load_relative:
       Changed |= lowerLoadRelative(F);
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
index 64cc4ba281635..aaca5a6c87b4f 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
@@ -4,16 +4,10 @@
 ;.
 ; CHECK: @G = global i32 5
 ; CHECK: @.memset_pattern = private unnamed_addr constant [2 x ptr] [ptr @G, ptr @G], align 16
-; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x ptr] [ptr @G, ptr @G], align 16
-; CHECK: @.memset_pattern.2 = private unnamed_addr constant [2 x i64] [i64 -6148895925951734307, i64 -6148895925951734307], align 16
-; CHECK: @.memset_pattern.3 = private unnamed_addr constant [2 x i64] [i64 -6148895925951734307, i64 -6148895925951734307], align 16
-; CHECK: @.memset_pattern.4 = private unnamed_addr constant [2 x i64] [i64 -6148895925951734307, i64 -6148895925951734307], align 16
-; CHECK: @.memset_pattern.5 = private unnamed_addr constant [2 x i64] [i64 4614256656552045848, i64 4614256656552045848], align 16
-; CHECK: @.memset_pattern.6 = private unnamed_addr constant [8 x i16] [i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555], align 16
-; CHECK: @.memset_pattern.7 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
-; CHECK: @.memset_pattern.8 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
-; CHECK: @.memset_pattern.9 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
-; CHECK: @.memset_pattern.10 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
+; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x i64] [i64 -6148895925951734307, i64 -6148895925951734307], align 16
+; CHECK: @.memset_pattern.2 = private unnamed_addr constant [2 x i64] [i64 4614256656552045848, i64 4614256656552045848], align 16
+; CHECK: @.memset_pattern.3 = private unnamed_addr constant [8 x i16] [i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555], align 16
+; CHECK: @.memset_pattern.4 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
 ;.
 define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind {
 ; CHECK-LABEL: define void @memset_pattern_i128_1_dynvalue(
@@ -36,7 +30,7 @@ define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind {
 define void @memset_pattern_i128_1(ptr %a, i128 %value) nounwind {
 ; CHECK-LABEL: define void @memset_pattern_i128_1(
 ; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.7, i64 16)
+; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.4, i64 16)
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 false)
@@ -64,7 +58,7 @@ define void @memset_pattern_i128_1_nz_as(ptr addrspace(1) %a, i128 %value) nounw
 define void @memset_pattern_i128_1_align_attr(ptr align(16) %a, i128 %value) nounwind {
 ; CHECK-LABEL: define void @memset_pattern_i128_1_align_attr(
 ; CHECK-SAME: ptr align 16 [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    call void @memset_pattern16(ptr align 16 [[A]], ptr @.memset_pattern.8, i64 16)
+; CHECK-NEXT:    call void @memset_pattern16(ptr align 16 [[A]], ptr @.memset_pattern.4, i64 16)
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.experimental.memset.pattern(ptr align(16) %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 false)
@@ -74,7 +68,7 @@ define void @memset_pattern_i128_1_align_attr(ptr align(16) %a, i128 %value) nou
 define void @memset_pattern_i128_16(ptr %a) nounwind {
 ; CHECK-LABEL: define void @memset_pattern_i128_16(
 ; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.9, i64 256)
+; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.4, i64 256)
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 16, i1 false)
@@ -85,7 +79,7 @@ define void @memset_pattern_i128_x(ptr %a, i64 %x) nounwind {
 ; CHECK-LABEL: define void @memset_pattern_i128_x(
 ; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 16, [[X]]
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.10, i64 [[TMP1]])
+; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.4, i64 [[TMP1]])
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 %x, i1 false)
@@ -115,7 +109,7 @@ define void @memset_pattern_i16_x(ptr %a, i64 %x) nounwind {
 ; CHECK-LABEL: define void @memset_pattern_i16_x(
 ; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 2, [[X]]
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.6, i64 [[TMP1]])
+; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.3, i64 [[TMP1]])
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.experimental.memset.pattern(ptr %a, i16 u0xabcd, i64 %x, i1 false)
@@ -126,7 +120,7 @@ define void @memset_pattern_i64_x(ptr %a, i64 %x) nounwind {
 ; CHECK-LABEL: define void @memset_pattern_i64_x(
 ; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 8, [[X]]
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.4, i64 [[TMP1]])
+; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.1, i64 [[TMP1]])
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.experimental.memset.pattern(ptr %a, i64 u0xaaaabbbbccccdddd, i64 %x, i1 false)
@@ -137,7 +131,7 @@ define void @memset_pattern_i64_x(ptr %a, i64 %x) nounwind {
 define void @memset_pattern_i64_128_tbaa(ptr %a) nounwind {
 ; CHECK-LABEL: define void @memset_pattern_i64_128_tbaa(
 ; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.5, i64 1024), !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 1024), !tbaa [[TBAA0:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.experimental.memset.pattern(ptr %a, i64 u0x400921fb54442d18, i64 128, i1 false), !tbaa !5
@@ -154,7 +148,7 @@ define void @memset_pattern_i64_narrow_idx(ptr %a, i32 %x) nounwind {
 ; CHECK-SAME: ptr [[A:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[X]] to i64
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 8, [[TMP1]]
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.3, i64 [[TMP2]])
+; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.1, i64 [[TMP2]])
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.experimental.memset.pattern(ptr %a, i64 u0xaaaabbbbccccdddd, i32 %x, i1 false)
@@ -166,7 +160,7 @@ define void @memset_pattern_i64_wide_idx(ptr %a, i128 %x) nounwind {
 ; CHECK-SAME: ptr [[A:%.*]], i128 [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i128 [[X]] to i64
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 8, [[TMP1]]
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 [[TMP2]])
+; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.1, i64 [[TMP2]])
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.experimental.memset.pattern(ptr %a, i64 u0xaaaabbbbccccdddd, i128 %x, i1 false)
@@ -189,7 +183,7 @@ define void @memset_pattern_i64_x_fromptr(ptr %a, i64 %x) nounwind {
 ; CHECK-LABEL: define void @memset_pattern_i64_x_fromptr(
 ; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 8, [[X]]
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.1, i64 [[TMP2]])
+; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern, i64 [[TMP2]])
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.experimental.memset.pattern(ptr %a, ptr @G, i64 %x, i1 false)

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@asb asb merged commit 9f7567d into llvm:main Jun 23, 2025
10 checks passed
anthonyhatran pushed a commit to anthonyhatran/llvm-project that referenced this pull request Jun 26, 2025
…for memset_pattern16 when possible (llvm#144677)

As Constants are already uniquified, we can use a map to keep track of
whether a GlobalVariable was produced for a given Constant or not.
Repeated globals with the same value was one of the codegen differences
noted in llvm#126736. This patch removes that diff, producing cleaner
output.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants