From fb58d52684282d50d3baab88a5d25a1a9b94111b Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Mon, 2 Dec 2024 01:20:33 -0800 Subject: [PATCH 1/5] structural hash for global var --- llvm/include/llvm/IR/StructuralHash.h | 3 + llvm/lib/CodeGen/MachineStableHash.cpp | 13 ++- llvm/lib/IR/StructuralHash.cpp | 51 +++++++++-- .../test/CodeGen/AArch64/cgdata-merge-gvar.ll | 91 +++++++++++++++++++ .../CodeGen/AArch64/cgdata-outline-gvar.ll | 52 +++++++++++ 5 files changed, 200 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h index 071575137ff57..514dd6f174b90 100644 --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -31,6 +31,9 @@ class Module; /// to true includes instruction and operand type information. stable_hash StructuralHash(const Function &F, bool DetailedHash = false); +/// Returns a hash of the global variable \p G. +stable_hash StructuralHash(const GlobalVariable &G); + /// Returns a hash of the module \p M by hashing all functions and global /// variables contained within. \param M The module to hash. \param DetailedHash /// Whether or not to encode additional information in the function hashes that diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp index facda7a59e2f8..09a81cb318ecb 100644 --- a/llvm/lib/CodeGen/MachineStableHash.cpp +++ b/llvm/lib/CodeGen/MachineStableHash.cpp @@ -27,6 +27,8 @@ #include "llvm/CodeGen/Register.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/StructuralHash.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/ErrorHandling.h" @@ -97,9 +99,14 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) { ++StableHashBailingGlobalAddress; return 0; } - auto Name = GV->getName(); - return stable_hash_combine(MO.getType(), MO.getTargetFlags(), - stable_hash_name(Name), MO.getOffset()); + stable_hash GVHash = 0; + if (auto *GVar = dyn_cast(GV)) + GVHash = StructuralHash(*GVar); + if (!GVHash) + GVHash = stable_hash_name(GV->getName()); + + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), GVHash, + MO.getOffset()); } case MachineOperand::MO_TargetIndex: { diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index ccc534a890419..de883f81a8e4a 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -46,7 +46,7 @@ class StructuralHashImpl { /// Assign a unique ID to each Value in the order they are first seen. DenseMap ValueToId; - stable_hash hashType(Type *ValueType) { + static stable_hash hashType(Type *ValueType) { SmallVector Hashes; Hashes.emplace_back(ValueType->getTypeID()); if (ValueType->isIntegerTy()) @@ -65,7 +65,7 @@ class StructuralHashImpl { } } - stable_hash hashAPInt(const APInt &I) { + static stable_hash hashAPInt(const APInt &I) { SmallVector Hashes; Hashes.emplace_back(I.getBitWidth()); auto RawVals = ArrayRef(I.getRawData(), I.getNumWords()); @@ -73,11 +73,36 @@ class StructuralHashImpl { return stable_hash_combine(Hashes); } - stable_hash hashAPFloat(const APFloat &F) { + static stable_hash hashAPFloat(const APFloat &F) { return hashAPInt(F.bitcastToAPInt()); } - stable_hash hashGlobalValue(const GlobalValue *GV) { + static stable_hash hashGlobalVariable(const GlobalVariable &GVar) { + if (!GVar.hasInitializer()) + return hashGlobalValue(&GVar); + + // Hash the contents of a string. + if (GVar.getName().starts_with(".str")) + return hashConstant(GVar.getInitializer()); + + // Hash structural contents of Objective-C metadata in specific sections. + // This can be extended to other metadata if needed. + static constexpr const char *SectionNames[] = { + "__cfstring", "__cstring", "__objc_classrefs", + "__objc_methname", "__objc_selrefs", + }; + if (GVar.hasSection()) { + StringRef SectionName = GVar.getSection(); + for (const char *Name : SectionNames) { + if (SectionName.contains(Name)) + return hashConstant(GVar.getInitializer()); + } + } + + return hashGlobalValue(&GVar); + } + + static stable_hash hashGlobalValue(const GlobalValue *GV) { if (!GV->hasName()) return 0; return stable_hash_name(GV->getName()); @@ -87,7 +112,7 @@ class StructuralHashImpl { // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here // we're interested in computing a hash rather than comparing two Constants. // Some of the logic is simplified, e.g, we don't expand GEPOperator. - stable_hash hashConstant(Constant *C) { + static stable_hash hashConstant(const Constant *C) { SmallVector Hashes; Type *Ty = C->getType(); @@ -98,14 +123,21 @@ class StructuralHashImpl { return stable_hash_combine(Hashes); } + if (auto *GVar = dyn_cast(C)) { + Hashes.emplace_back(hashGlobalVariable(*GVar)); + return stable_hash_combine(Hashes); + } + if (auto *G = dyn_cast(C)) { Hashes.emplace_back(hashGlobalValue(G)); return stable_hash_combine(Hashes); } if (const auto *Seq = dyn_cast(C)) { - Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues())); - return stable_hash_combine(Hashes); + if (Seq->isString()) { + Hashes.emplace_back(stable_hash_name(Seq->getAsString())); + return stable_hash_combine(Hashes); + } } switch (C->getValueID()) { @@ -266,6 +298,7 @@ class StructuralHashImpl { Hashes.emplace_back(Hash); Hashes.emplace_back(GlobalHeaderHash); Hashes.emplace_back(GV.getValueType()->getTypeID()); + Hashes.emplace_back(hashGlobalVariable(GV)); // Update the combined hash in place. Hash = stable_hash_combine(Hashes); @@ -297,6 +330,10 @@ stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) { return H.getHash(); } +stable_hash llvm::StructuralHash(const GlobalVariable &GVar) { + return StructuralHashImpl::hashGlobalVariable(GVar); +} + stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) { StructuralHashImpl H(DetailedHash); H.update(M); diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll new file mode 100644 index 0000000000000..f1f5209abe350 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll @@ -0,0 +1,91 @@ +; This test verifies that global variables are hashed based on their initial contents, +; allowing them to be merged even if they appear different due to their names. +; Now they become identical functions that can be merged without creating a paramter. + +; RUN: rm -rf %t && split-file %s %t + +; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %t/string.ll | FileCheck %s +; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %t/ns-const.ll | FileCheck %s +; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %t/objc-ref.ll | FileCheck %s + +; CHECK: _f1.Tgm +; CHECK: _f2.Tgm + +;--- string.ll + +@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1 +@.str.1 = private unnamed_addr constant [6 x i8] c"hello\00", align 1 + +declare noundef i32 @goo(ptr noundef) + +define i32 @f1() { +entry: + %call = tail call noundef i32 @goo(ptr noundef nonnull @.str) + %add = add nsw i32 %call, 1 + ret i32 %add +} + +define i32 @f2() { +entry: + %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.1) + %add = add nsw i32 %call, 1 + ret i32 %add +} + +;--- ns-const.ll + +%struct.__NSConstantString_tag = type { ptr, i32, ptr, i64 } +@__CFConstantStringClassReference = external global [0 x i32] +@.str.2 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1 +@_unnamed_cfstring_ = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.2, i64 8 }, section "__DATA,__cfstring", align 8 + +@.str.3 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1 +@_unnamed_cfstring_.2 = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.3, i64 8 }, section "__DATA,__cfstring", align 8 + +declare noundef i32 @hoo(ptr noundef) + +define i32 @f1() { +entry: + %call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_) + %add = sub nsw i32 %call, 1 + ret i32 %add +} + +define i32 @f2() { +entry: + %call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_.2) + %add = sub nsw i32 %call, 1 + ret i32 %add +} + +;--- objc-ref.ll + +%struct._class_t = type { ptr, ptr, ptr, ptr, ptr } + +@"OBJC_CLASS_$_MyClass" = external global %struct._class_t +@"OBJC_CLASSLIST_REFERENCES_$_" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8 +@"OBJC_CLASSLIST_REFERENCES_$_.1" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8 + +@OBJC_METH_VAR_NAME_ = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1 +@OBJC_METH_VAR_NAME_.1 = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1 + +@OBJC_SELECTOR_REFERENCES_ = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8 +@OBJC_SELECTOR_REFERENCES_.1 = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_.1, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8 + +define i32 @f1() { +entry: + %0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_", align 8 + %1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_, align 8 + %call = tail call noundef i32 @objc_msgSend(ptr noundef %0, ptr noundef %1) + ret i32 %call +} + +declare ptr @objc_msgSend(ptr, ptr, ...) + +define i32 @f2() { +entry: + %0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_.1", align 8 + %1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_.1, align 8 + %call = tail call noundef i32 @objc_msgSend(ptr noundef %0, ptr noundef %1) + ret i32 %call +} diff --git a/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll b/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll new file mode 100644 index 0000000000000..447928dfa0724 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll @@ -0,0 +1,52 @@ +; This test verifies that global variables are hashed based on their initial contents, +; allowing them to be outlined even if they appear different due to their names. + +; RUN: split-file %s %t + +; Check if the outlined function is created locally. +; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -aarch64-enable-collect-loh=false -filetype=obj %t/local-two.ll -o %t_write_base +; RUN: llvm-objdump -d %t_write_base | FileCheck %s + +; RUN: llvm-cgdata --merge %t_write_base -o %t_cgdata_base + +; Read the cgdata in the machine outliner for optimistically outlining in local-one.ll. +; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata_base -aarch64-enable-collect-loh=false -append-content-hash-outlined-name=false -filetype=obj %t/local-one.ll -o %t_read_base +; RUN: llvm-objdump -d %t_read_base | FileCheck %s + +; The names of globals `.str` and `.str.4` are different, but their initial contents are identical. +; The outlined function now starts with a reference to that global ("hello\00"). +; CHECK: _OUTLINED_FUNCTION_{{.*}}: +; CHECK-NEXT: adrp x1 +; CHECK-NEXT: add x1, x1 +; CHECK-NEXT: mov w2 +; CHECK-NEXT: mov w3 +; CHECK-NEXT: mov w4 +; CHECK-NEXT: b + +;--- local-two.ll +@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1 +@.str.1 = private unnamed_addr constant [3 x i8] c"f1\00", align 1 +@.str.2 = private unnamed_addr constant [3 x i8] c"f2\00", align 1 + +declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32) +define i32 @f1() minsize { +entry: + %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.1, ptr noundef nonnull @.str, i32 1, i32 2, i32 3) + ret i32 %call +} +define i32 @f2() minsize { +entry: + %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2, ptr noundef nonnull @.str, i32 1, i32 2, i32 3) + ret i32 %call +} + +;--- local-one.ll +@.str.3 = private unnamed_addr constant [3 x i8] c"f3\00", align 1 +@.str.4 = private unnamed_addr constant [6 x i8] c"hello\00", align 1 + +declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32) +define i32 @f1() minsize { +entry: + %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.3, ptr noundef nonnull @.str.4, i32 1, i32 2, i32 3) + ret i32 %call +} From b08aab824b5215a5c3e2fccac2eed7b33451d187 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Mon, 2 Dec 2024 18:53:50 -0800 Subject: [PATCH 2/5] Address comments from mingmingl-llvm --- llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll index f1f5209abe350..9e867d37f6586 100644 --- a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll +++ b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll @@ -1,6 +1,6 @@ ; This test verifies that global variables are hashed based on their initial contents, ; allowing them to be merged even if they appear different due to their names. -; Now they become identical functions that can be merged without creating a paramter. +; Now they become identical functions that can be merged without creating a parameter. ; RUN: rm -rf %t && split-file %s %t From 58f729aa9729248b13a47db21b0d670bec6f03e4 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Mon, 2 Dec 2024 22:32:09 -0800 Subject: [PATCH 3/5] Address comments from ellishg --- llvm/lib/IR/StructuralHash.cpp | 11 ++- .../AArch64/cgdata-merge-gvar-nsconst.ll | 32 +++++++ .../CodeGen/AArch64/cgdata-merge-gvar-objc.ll | 40 ++++++++ .../AArch64/cgdata-merge-gvar-string.ll | 46 ++++++++++ .../test/CodeGen/AArch64/cgdata-merge-gvar.ll | 91 ------------------- .../CodeGen/AArch64/cgdata-outline-gvar.ll | 54 ++++++----- 6 files changed, 158 insertions(+), 116 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/cgdata-merge-gvar-nsconst.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-merge-gvar-string.ll delete mode 100644 llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index de883f81a8e4a..56b925626d845 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -82,8 +82,12 @@ class StructuralHashImpl { return hashGlobalValue(&GVar); // Hash the contents of a string. - if (GVar.getName().starts_with(".str")) - return hashConstant(GVar.getInitializer()); + if (GVar.getName().starts_with(".str")) { + auto *C = GVar.getInitializer(); + if (const auto *Seq = dyn_cast(C)) + if (Seq->isString()) + return stable_hash_name(Seq->getAsString()); + } // Hash structural contents of Objective-C metadata in specific sections. // This can be extended to other metadata if needed. @@ -93,10 +97,9 @@ class StructuralHashImpl { }; if (GVar.hasSection()) { StringRef SectionName = GVar.getSection(); - for (const char *Name : SectionNames) { + for (const char *Name : SectionNames) if (SectionName.contains(Name)) return hashConstant(GVar.getInitializer()); - } } return hashGlobalValue(&GVar); diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-nsconst.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-nsconst.ll new file mode 100644 index 0000000000000..490a778f69e26 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-nsconst.ll @@ -0,0 +1,32 @@ +; This test verifies that global variables (ns constant) are hashed based on their initial contents, +; allowing them to be merged even if they appear different due to their names. +; Now they become identical functions that can be merged without creating a parameter + +; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s + +; CHECK: _f1.Tgm +; CHECK: _f2.Tgm + +%struct.__NSConstantString_tag = type { ptr, i32, ptr, i64 } +@__CFConstantStringClassReference = external global [0 x i32] +@.str.2 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1 +@_unnamed_cfstring_ = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.2, i64 8 }, section "__DATA,__cfstring", align 8 + +@.str.3 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1 +@_unnamed_cfstring_.2 = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.3, i64 8 }, section "__DATA,__cfstring", align 8 + +declare i32 @hoo(ptr noundef) + +define i32 @f1() { +entry: + %call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_) + %add = sub nsw i32 %call, 1 + ret i32 %add +} + +define i32 @f2() { +entry: + %call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_.2) + %add = sub nsw i32 %call, 1 + ret i32 %add +} diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll new file mode 100644 index 0000000000000..e0d28721f2afb --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll @@ -0,0 +1,40 @@ + +; This test verifies that global variables (objc metadata) are hashed based on their initial contents, +; allowing them to be merged even if they appear different due to their names. +; Now they become identical functions that can be merged without creating a parameter + +; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s + +; CHECK: _f1.Tgm +; CHECK: _f2.Tgm + + +%struct._class_t = type { ptr, ptr, ptr, ptr, ptr } + +@"OBJC_CLASS_$_MyClass" = external global %struct._class_t +@"OBJC_CLASSLIST_REFERENCES_$_" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8 +@"OBJC_CLASSLIST_REFERENCES_$_.1" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8 + +@OBJC_METH_VAR_NAME_ = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1 +@OBJC_METH_VAR_NAME_.1 = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1 + +@OBJC_SELECTOR_REFERENCES_ = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8 +@OBJC_SELECTOR_REFERENCES_.1 = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_.1, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8 + +declare ptr @objc_msgSend(ptr, ptr, ...) + +define i32 @f1() { +entry: + %0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_", align 8 + %1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_, align 8 + %call = tail call i32 @objc_msgSend(ptr noundef %0, ptr noundef %1) + ret i32 %call +} + +define i32 @f2() { +entry: + %0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_.1", align 8 + %1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_.1, align 8 + %call = tail call i32 @objc_msgSend(ptr noundef %0, ptr noundef %1) + ret i32 %call +} diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-string.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-string.ll new file mode 100644 index 0000000000000..1e67425f0b847 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-string.ll @@ -0,0 +1,46 @@ +; This test verifies that global variables (string) are hashed based on their initial contents, +; allowing them to be merged even if they appear different due to their names. +; Now they become identical functions that can be merged without creating a parameter. + +; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s + +; CHECK: _f1.Tgm +; CHECK: _f2.Tgm +; CHECK-NOT: _f3.Tgm +; CHECK-NOT: _f4.Tgm + +; The initial contents of `.str` and `.str.1` are identical, but not with those of `.str.2` and `.str.3`. +@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1 +@.str.1 = private unnamed_addr constant [6 x i8] c"hello\00", align 1 +@.str.2 = private unnamed_addr constant [6 x i8] c"diff2\00", align 1 +@.str.3 = private unnamed_addr constant [6 x i8] c"diff3\00", align 1 + +declare i32 @goo(ptr noundef) + +define i32 @f1() { +entry: + %call = tail call i32 @goo(ptr noundef nonnull @.str) + %add = add nsw i32 %call, 1 + ret i32 %add +} + +define i32 @f2() { +entry: + %call = tail call i32 @goo(ptr noundef nonnull @.str.1) + %add = add nsw i32 %call, 1 + ret i32 %add +} + +define i32 @f3() { +entry: + %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2) + %add = sub nsw i32 %call, 1 + ret i32 %add +} + +define i32 @f4() { +entry: + %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.3) + %add = sub nsw i32 %call, 1 + ret i32 %add +} diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll deleted file mode 100644 index 9e867d37f6586..0000000000000 --- a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll +++ /dev/null @@ -1,91 +0,0 @@ -; This test verifies that global variables are hashed based on their initial contents, -; allowing them to be merged even if they appear different due to their names. -; Now they become identical functions that can be merged without creating a parameter. - -; RUN: rm -rf %t && split-file %s %t - -; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %t/string.ll | FileCheck %s -; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %t/ns-const.ll | FileCheck %s -; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %t/objc-ref.ll | FileCheck %s - -; CHECK: _f1.Tgm -; CHECK: _f2.Tgm - -;--- string.ll - -@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1 -@.str.1 = private unnamed_addr constant [6 x i8] c"hello\00", align 1 - -declare noundef i32 @goo(ptr noundef) - -define i32 @f1() { -entry: - %call = tail call noundef i32 @goo(ptr noundef nonnull @.str) - %add = add nsw i32 %call, 1 - ret i32 %add -} - -define i32 @f2() { -entry: - %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.1) - %add = add nsw i32 %call, 1 - ret i32 %add -} - -;--- ns-const.ll - -%struct.__NSConstantString_tag = type { ptr, i32, ptr, i64 } -@__CFConstantStringClassReference = external global [0 x i32] -@.str.2 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1 -@_unnamed_cfstring_ = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.2, i64 8 }, section "__DATA,__cfstring", align 8 - -@.str.3 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1 -@_unnamed_cfstring_.2 = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.3, i64 8 }, section "__DATA,__cfstring", align 8 - -declare noundef i32 @hoo(ptr noundef) - -define i32 @f1() { -entry: - %call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_) - %add = sub nsw i32 %call, 1 - ret i32 %add -} - -define i32 @f2() { -entry: - %call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_.2) - %add = sub nsw i32 %call, 1 - ret i32 %add -} - -;--- objc-ref.ll - -%struct._class_t = type { ptr, ptr, ptr, ptr, ptr } - -@"OBJC_CLASS_$_MyClass" = external global %struct._class_t -@"OBJC_CLASSLIST_REFERENCES_$_" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8 -@"OBJC_CLASSLIST_REFERENCES_$_.1" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8 - -@OBJC_METH_VAR_NAME_ = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1 -@OBJC_METH_VAR_NAME_.1 = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1 - -@OBJC_SELECTOR_REFERENCES_ = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8 -@OBJC_SELECTOR_REFERENCES_.1 = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_.1, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8 - -define i32 @f1() { -entry: - %0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_", align 8 - %1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_, align 8 - %call = tail call noundef i32 @objc_msgSend(ptr noundef %0, ptr noundef %1) - ret i32 %call -} - -declare ptr @objc_msgSend(ptr, ptr, ...) - -define i32 @f2() { -entry: - %0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_.1", align 8 - %1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_.1, align 8 - %call = tail call noundef i32 @objc_msgSend(ptr noundef %0, ptr noundef %1) - ret i32 %call -} diff --git a/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll b/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll index 447928dfa0724..63ba1d491f9c7 100644 --- a/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll +++ b/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll @@ -3,50 +3,62 @@ ; RUN: split-file %s %t -; Check if the outlined function is created locally. -; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -aarch64-enable-collect-loh=false -filetype=obj %t/local-two.ll -o %t_write_base -; RUN: llvm-objdump -d %t_write_base | FileCheck %s +; The outlined function is created locally. +; Note that `.str.3` is commonly used in both `f1()` and `f2()`. +; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate -aarch64-enable-collect-loh=false \ +; RUN: %t/local-two.ll -o - | FileCheck %s --check-prefix=WRITE +; WRITE-LABEL: _OUTLINED_FUNCTION_{{.*}}: +; WRITE: adrp x1, l_.str.3 +; WRITE-NEXT: add x1, x1, l_.str.3 +; WRITE-NEXT: mov w2 +; WRITE-NEXT: mov w3 +; WRITE-NEXT: mov w4 +; WRITE-NEXT: b + +; Create an object file and merge it into the cgdata. +; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate -aarch64-enable-collect-loh=false \ +; RUN: -filetype=obj %t/local-two.ll -o %t_write_base ; RUN: llvm-cgdata --merge %t_write_base -o %t_cgdata_base ; Read the cgdata in the machine outliner for optimistically outlining in local-one.ll. -; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata_base -aarch64-enable-collect-loh=false -append-content-hash-outlined-name=false -filetype=obj %t/local-one.ll -o %t_read_base -; RUN: llvm-objdump -d %t_read_base | FileCheck %s - -; The names of globals `.str` and `.str.4` are different, but their initial contents are identical. -; The outlined function now starts with a reference to that global ("hello\00"). -; CHECK: _OUTLINED_FUNCTION_{{.*}}: -; CHECK-NEXT: adrp x1 -; CHECK-NEXT: add x1, x1 -; CHECK-NEXT: mov w2 -; CHECK-NEXT: mov w3 -; CHECK-NEXT: mov w4 -; CHECK-NEXT: b +; Note that the hash of `.str.5` in local-one.ll matches that of `.str.3` in an outlined tree in the cgdata. + +; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata_base -aarch64-enable-collect-loh=false \ +; RUN: %t/local-one.ll -o - | FileCheck %s --check-prefix=READ + +; READ-LABEL: _OUTLINED_FUNCTION_{{.*}}: +; READ: adrp x1, l_.str.5 +; READ-NEXT: add x1, x1, l_.str.5 +; READ-NEXT: mov w2 +; READ-NEXT: mov w3 +; READ-NEXT: mov w4 +; READ-NEXT: b ;--- local-two.ll -@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1 @.str.1 = private unnamed_addr constant [3 x i8] c"f1\00", align 1 @.str.2 = private unnamed_addr constant [3 x i8] c"f2\00", align 1 +@.str.3 = private unnamed_addr constant [6 x i8] c"hello\00", align 1 declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32) define i32 @f1() minsize { entry: - %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.1, ptr noundef nonnull @.str, i32 1, i32 2, i32 3) + %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.1, ptr noundef nonnull @.str.3, i32 1, i32 2, i32 3) ret i32 %call } define i32 @f2() minsize { entry: - %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2, ptr noundef nonnull @.str, i32 1, i32 2, i32 3) + %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2, ptr noundef nonnull @.str.3, i32 1, i32 2, i32 3) ret i32 %call } ;--- local-one.ll -@.str.3 = private unnamed_addr constant [3 x i8] c"f3\00", align 1 -@.str.4 = private unnamed_addr constant [6 x i8] c"hello\00", align 1 +@.str.4 = private unnamed_addr constant [3 x i8] c"f3\00", align 1 +@.str.5 = private unnamed_addr constant [6 x i8] c"hello\00", align 1 declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32) define i32 @f1() minsize { entry: - %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.3, ptr noundef nonnull @.str.4, i32 1, i32 2, i32 3) + %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.4, ptr noundef nonnull @.str.5, i32 1, i32 2, i32 3) ret i32 %call } From d8d6cb70b65097751ac43c80c931b1995a6d8142 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Mon, 2 Dec 2024 22:34:12 -0800 Subject: [PATCH 4/5] Address comments from nocchijiang --- llvm/lib/CodeGen/MachineStableHash.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp index 09a81cb318ecb..5ab589acee413 100644 --- a/llvm/lib/CodeGen/MachineStableHash.cpp +++ b/llvm/lib/CodeGen/MachineStableHash.cpp @@ -95,15 +95,16 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) { return 0; case MachineOperand::MO_GlobalAddress: { const GlobalValue *GV = MO.getGlobal(); - if (!GV->hasName()) { - ++StableHashBailingGlobalAddress; - return 0; - } stable_hash GVHash = 0; if (auto *GVar = dyn_cast(GV)) GVHash = StructuralHash(*GVar); - if (!GVHash) + if (!GVHash) { + if (!GV->hasName()) { + ++StableHashBailingGlobalAddress; + return 0; + } GVHash = stable_hash_name(GV->getName()); + } return stable_hash_combine(MO.getType(), MO.getTargetFlags(), GVHash, MO.getOffset()); From c00f96376a35a7784a1c3a7b3443e2808dec79ae Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Tue, 3 Dec 2024 09:39:14 -0800 Subject: [PATCH 5/5] remove spaces --- llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll index e0d28721f2afb..0073114941501 100644 --- a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll +++ b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll @@ -1,4 +1,3 @@ - ; This test verifies that global variables (objc metadata) are hashed based on their initial contents, ; allowing them to be merged even if they appear different due to their names. ; Now they become identical functions that can be merged without creating a parameter @@ -8,7 +7,6 @@ ; CHECK: _f1.Tgm ; CHECK: _f2.Tgm - %struct._class_t = type { ptr, ptr, ptr, ptr, ptr } @"OBJC_CLASS_$_MyClass" = external global %struct._class_t