Skip to content

Commit d0e9890

Browse files
authored
[lld][MachO] Tail merge strings (llvm#161262)
Add the flag `--tail-merge-strings` to enable tail merging of cstrings. For example, if we have strings `mystring\0` and `ring\0`, we could place `mystring\0` at address `0x1000` and `ring\0` at address `0x1004` and have them share the same underlying data. It turns out that many ObjC method names can be tail merged. For example, `error:` and `doFoo:error:`. On a large iOS binary, we saw nearly a 15% size improvement in the `__TEXT__objc_methname` section and negligible impact on link time. ``` $ bloaty --domain=vm merged.o.stripped -- base.o.stripped VM SIZE -------------- +95% +5.85Ki [__TEXT] -2.4% -239Ki __TEXT,__cstring -14.5% -710Ki __TEXT,__objc_methname -1.0% -944Ki TOTAL ``` Tail merging for MachO was originally removed in llvm@7c269db. The previous implementation used `StringTableBuilder`, but that was removed in llvm@4308f03 to ensure deduplicated strings are aligned correctly. This implementation ensures that tail merged strings are also aligned correctly. Special thanks to nocchijiang for pointing this out in llvm#158720 (comment). Depends on llvm#161253.
1 parent 3add28b commit d0e9890

File tree

8 files changed

+351
-3
lines changed

8 files changed

+351
-3
lines changed

lld/MachO/Config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ struct Configuration {
223223
bool warnThinArchiveMissingMembers;
224224
bool disableVerify;
225225
bool separateCstringLiteralSections;
226+
bool tailMergeStrings;
226227

227228
bool callGraphProfileSort = false;
228229
llvm::StringRef printSymbolOrder;

lld/MachO/Driver.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1986,6 +1986,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
19861986
config->separateCstringLiteralSections =
19871987
args.hasFlag(OPT_separate_cstring_literal_sections,
19881988
OPT_no_separate_cstring_literal_sections, false);
1989+
config->tailMergeStrings =
1990+
args.hasFlag(OPT_tail_merge_strings, OPT_no_tail_merge_strings, false);
19891991

19901992
auto IncompatWithCGSort = [&](StringRef firstArgStr) {
19911993
// Throw an error only if --call-graph-profile-sort is explicitly specified

lld/MachO/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1091,6 +1091,10 @@ defm separate_cstring_literal_sections
10911091
"Emit all cstring literals into the __cstring section. As a special "
10921092
"case, the __objc_methname section will still be emitted. (default)">,
10931093
Group<grp_rare>;
1094+
defm tail_merge_strings
1095+
: BB<"tail-merge-strings", "Enable string tail merging",
1096+
"Disable string tail merging to improve link-time performance">,
1097+
Group<grp_rare>;
10941098

10951099
def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">;
10961100

lld/MachO/SyntheticSections.cpp

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1746,6 +1746,8 @@ void CStringSection::finalizeContents() {
17461746
void DeduplicatedCStringSection::finalizeContents() {
17471747
// Find the largest alignment required for each string.
17481748
DenseMap<CachedHashStringRef, Align> strToAlignment;
1749+
// Used for tail merging only
1750+
std::vector<CachedHashStringRef> deduplicatedStrs;
17491751
for (const CStringInputSection *isec : inputs) {
17501752
for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
17511753
if (!piece.live)
@@ -1754,27 +1756,79 @@ void DeduplicatedCStringSection::finalizeContents() {
17541756
assert(isec->align != 0);
17551757
auto align = getStringPieceAlignment(isec, piece);
17561758
auto [it, wasInserted] = strToAlignment.try_emplace(s, align);
1759+
if (config->tailMergeStrings && wasInserted)
1760+
deduplicatedStrs.push_back(s);
17571761
if (!wasInserted && it->second < align)
17581762
it->second = align;
17591763
}
17601764
}
17611765

1766+
// Like lexigraphical sort, except we read strings in reverse and take the
1767+
// longest string first
1768+
// TODO: We could improve performance by implementing our own sort that avoids
1769+
// comparing characters we know to be the same. See
1770+
// StringTableBuilder::multikeySort() for details
1771+
llvm::sort(deduplicatedStrs, [](const auto &left, const auto &right) {
1772+
for (const auto &[leftChar, rightChar] :
1773+
llvm::zip(llvm::reverse(left.val()), llvm::reverse(right.val()))) {
1774+
if (leftChar == rightChar)
1775+
continue;
1776+
return leftChar < rightChar;
1777+
}
1778+
return left.size() > right.size();
1779+
});
1780+
std::optional<CachedHashStringRef> mergeCandidate;
1781+
DenseMap<CachedHashStringRef, std::pair<CachedHashStringRef, uint64_t>>
1782+
tailMergeMap;
1783+
for (auto &s : deduplicatedStrs) {
1784+
if (!mergeCandidate || !mergeCandidate->val().ends_with(s.val())) {
1785+
mergeCandidate = s;
1786+
continue;
1787+
}
1788+
uint64_t tailMergeOffset = mergeCandidate->size() - s.size();
1789+
// TODO: If the tail offset is incompatible with this string's alignment, we
1790+
// might be able to find another superstring with a compatible tail offset.
1791+
// The difficulty is how to do this efficiently
1792+
const auto &align = strToAlignment.at(s);
1793+
if (!isAligned(align, tailMergeOffset))
1794+
continue;
1795+
auto &mergeCandidateAlign = strToAlignment[*mergeCandidate];
1796+
if (align > mergeCandidateAlign)
1797+
mergeCandidateAlign = align;
1798+
tailMergeMap.try_emplace(s, *mergeCandidate, tailMergeOffset);
1799+
}
1800+
17621801
// Sort the strings for performance and compression size win, and then
17631802
// assign an offset for each string and save it to the corresponding
17641803
// StringPieces for easy access.
17651804
for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) {
17661805
auto &piece = isec->pieces[i];
17671806
auto s = isec->getCachedHashStringRef(i);
1807+
// Any string can be tail merged with itself with an offset of zero
1808+
uint64_t tailMergeOffset = 0;
1809+
auto mergeIt =
1810+
config->tailMergeStrings ? tailMergeMap.find(s) : tailMergeMap.end();
1811+
if (mergeIt != tailMergeMap.end()) {
1812+
auto &[superString, offset] = mergeIt->second;
1813+
// s can be tail merged with superString. Do not layout s. Instead layout
1814+
// superString if we haven't already
1815+
assert(superString.val().ends_with(s.val()));
1816+
s = superString;
1817+
tailMergeOffset = offset;
1818+
}
17681819
auto [it, wasInserted] = stringOffsetMap.try_emplace(s, /*placeholder*/ 0);
17691820
if (wasInserted) {
17701821
// Avoid computing the offset until we are sure we will need to
17711822
uint64_t offset = alignTo(size, strToAlignment.at(s));
17721823
it->second = offset;
17731824
size = offset + s.size() + 1; // account for null terminator
17741825
}
1775-
// If the string was already in stringOffsetMap, it is a duplicate and we
1776-
// only need to assign the offset.
1777-
piece.outSecOff = it->second;
1826+
piece.outSecOff = it->second + tailMergeOffset;
1827+
if (mergeIt != tailMergeMap.end()) {
1828+
auto &tailMergedString = mergeIt->first;
1829+
stringOffsetMap[tailMergedString] = piece.outSecOff;
1830+
assert(isAligned(strToAlignment.at(tailMergedString), piece.outSecOff));
1831+
}
17781832
}
17791833
for (CStringInputSection *isec : inputs)
17801834
isec->isFinal = true;

lld/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ MachO Improvements
4646

4747
* ``--separate-cstring-literal-sections`` emits cstring literal sections into sections defined by their section name.
4848
(`#158720 <https://github.com/llvm/llvm-project/pull/158720>`_)
49+
* ``--tail-merge-strings`` enables tail merging of cstring literals.
50+
(`#161262 <https://github.com/llvm/llvm-project/pull/161262>`_)
4951

5052
WebAssembly Improvements
5153
------------------------
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
; REQUIRES: aarch64
2+
; RUN: rm -rf %t && split-file %s %t
3+
4+
; Test that ObjC method names are tail merged and
5+
; ObjCSelRefsHelper::makeSelRef() still works correctly
6+
7+
; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o
8+
; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/a.o -o %t/a
9+
; RUN: llvm-objdump --macho --section="__TEXT,__objc_methname" %t/a | FileCheck %s --implicit-check-not=error
10+
11+
; RUN: %lld -dylib -arch arm64 --no-tail-merge-strings %t/a.o -o %t/nomerge
12+
; RUN: llvm-objdump --macho --section="__TEXT,__objc_methname" %t/nomerge | FileCheck %s --check-prefixes=CHECK,NOMERGE --implicit-check-not=error
13+
14+
; CHECK: withBar:error:
15+
; NOMERGE: error:
16+
17+
;--- a.mm
18+
__attribute__((objc_root_class))
19+
@interface Foo
20+
- (void)withBar:(int)bar error:(int)error;
21+
- (void)error:(int)error;
22+
@end
23+
24+
@implementation Foo
25+
- (void)withBar:(int)bar error:(int)error {}
26+
- (void)error:(int)error {}
27+
@end
28+
29+
void *_objc_empty_cache;
30+
void *_objc_empty_vtable;
31+
;--- gen
32+
clang -Oz -target arm64-apple-darwin a.mm -S -o -
33+
;--- a.s
34+
.build_version macos, 11, 0
35+
.section __TEXT,__text,regular,pure_instructions
36+
.p2align 2 ; -- Begin function -[Foo withBar:error:]
37+
"-[Foo withBar:error:]": ; @"\01-[Foo withBar:error:]"
38+
.cfi_startproc
39+
; %bb.0:
40+
ret
41+
.cfi_endproc
42+
; -- End function
43+
.p2align 2 ; -- Begin function -[Foo error:]
44+
"-[Foo error:]": ; @"\01-[Foo error:]"
45+
.cfi_startproc
46+
; %bb.0:
47+
ret
48+
.cfi_endproc
49+
; -- End function
50+
.globl __objc_empty_vtable ; @_objc_empty_vtable
51+
.zerofill __DATA,__common,__objc_empty_vtable,8,3
52+
.section __DATA,__objc_data
53+
.globl _OBJC_CLASS_$_Foo ; @"OBJC_CLASS_$_Foo"
54+
.p2align 3, 0x0
55+
_OBJC_CLASS_$_Foo:
56+
.quad _OBJC_METACLASS_$_Foo
57+
.quad 0
58+
.quad __objc_empty_cache
59+
.quad __objc_empty_vtable
60+
.quad __OBJC_CLASS_RO_$_Foo
61+
62+
.globl _OBJC_METACLASS_$_Foo ; @"OBJC_METACLASS_$_Foo"
63+
.p2align 3, 0x0
64+
_OBJC_METACLASS_$_Foo:
65+
.quad _OBJC_METACLASS_$_Foo
66+
.quad _OBJC_CLASS_$_Foo
67+
.quad __objc_empty_cache
68+
.quad __objc_empty_vtable
69+
.quad __OBJC_METACLASS_RO_$_Foo
70+
71+
.section __TEXT,__objc_classname,cstring_literals
72+
l_OBJC_CLASS_NAME_: ; @OBJC_CLASS_NAME_
73+
.asciz "Foo"
74+
75+
.section __DATA,__objc_const
76+
.p2align 3, 0x0 ; @"_OBJC_METACLASS_RO_$_Foo"
77+
__OBJC_METACLASS_RO_$_Foo:
78+
.long 3 ; 0x3
79+
.long 40 ; 0x28
80+
.long 40 ; 0x28
81+
.space 4
82+
.quad 0
83+
.quad l_OBJC_CLASS_NAME_
84+
.quad 0
85+
.quad 0
86+
.quad 0
87+
.quad 0
88+
.quad 0
89+
90+
.section __TEXT,__objc_methname,cstring_literals
91+
l_OBJC_METH_VAR_NAME_: ; @OBJC_METH_VAR_NAME_
92+
.asciz "withBar:error:"
93+
94+
.section __TEXT,__objc_methtype,cstring_literals
95+
l_OBJC_METH_VAR_TYPE_: ; @OBJC_METH_VAR_TYPE_
96+
.asciz "v24@0:8i16i20"
97+
98+
.section __TEXT,__objc_methname,cstring_literals
99+
l_OBJC_METH_VAR_NAME_.1: ; @OBJC_METH_VAR_NAME_.1
100+
.asciz "error:"
101+
102+
.section __TEXT,__objc_methtype,cstring_literals
103+
l_OBJC_METH_VAR_TYPE_.2: ; @OBJC_METH_VAR_TYPE_.2
104+
.asciz "v20@0:8i16"
105+
106+
.section __DATA,__objc_const
107+
.p2align 3, 0x0 ; @"_OBJC_$_INSTANCE_METHODS_Foo"
108+
__OBJC_$_INSTANCE_METHODS_Foo:
109+
.long 24 ; 0x18
110+
.long 2 ; 0x2
111+
.quad l_OBJC_METH_VAR_NAME_
112+
.quad l_OBJC_METH_VAR_TYPE_
113+
.quad "-[Foo withBar:error:]"
114+
.quad l_OBJC_METH_VAR_NAME_.1
115+
.quad l_OBJC_METH_VAR_TYPE_.2
116+
.quad "-[Foo error:]"
117+
118+
.p2align 3, 0x0 ; @"_OBJC_CLASS_RO_$_Foo"
119+
__OBJC_CLASS_RO_$_Foo:
120+
.long 2 ; 0x2
121+
.long 0 ; 0x0
122+
.long 0 ; 0x0
123+
.space 4
124+
.quad 0
125+
.quad l_OBJC_CLASS_NAME_
126+
.quad __OBJC_$_INSTANCE_METHODS_Foo
127+
.quad 0
128+
.quad 0
129+
.quad 0
130+
.quad 0
131+
132+
.globl __objc_empty_cache ; @_objc_empty_cache
133+
.zerofill __DATA,__common,__objc_empty_cache,8,3
134+
.section __DATA,__objc_classlist,regular,no_dead_strip
135+
.p2align 3, 0x0 ; @"OBJC_LABEL_CLASS_$"
136+
l_OBJC_LABEL_CLASS_$:
137+
.quad _OBJC_CLASS_$_Foo
138+
139+
.section __DATA,__objc_imageinfo,regular,no_dead_strip
140+
L_OBJC_IMAGE_INFO:
141+
.long 0
142+
.long 64
143+
144+
.subsections_via_symbols

lld/test/MachO/cstring-tailmerge.s

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
; REQUIRES: aarch64
2+
; RUN: rm -rf %t && split-file %s %t
3+
4+
; RUN: sed "s/<ALIGN>/0/g" %t/align.s.template > %t/align-1.s
5+
; RUN: sed "s/<ALIGN>/1/g" %t/align.s.template > %t/align-2.s
6+
; RUN: sed "s/<ALIGN>/2/g" %t/align.s.template > %t/align-4.s
7+
8+
; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/first.s -o %t/first.o
9+
; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-1.s -o %t/align-1.o
10+
; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-2.s -o %t/align-2.o
11+
; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-4.s -o %t/align-4.o
12+
13+
; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-1.o -o %t/align-1
14+
; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-1 | FileCheck %s --check-prefixes=CHECK,ALIGN1
15+
16+
; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-2.o -o %t/align-2
17+
; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-2 | FileCheck %s --check-prefixes=CHECK,ALIGN2
18+
19+
; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-4.o -o %t/align-4
20+
; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-4 | FileCheck %s --check-prefixes=CHECK,ALIGN4
21+
22+
; CHECK: Contents of (__TEXT,__cstring) section
23+
; CHECK: [[#%.16x,START:]] get awkward offset{{$}}
24+
25+
; ALIGN1: [[#%.16x,START+19]] myotherlongstr{{$}}
26+
; ALIGN1: [[#%.16x,START+19+15]] otherstr{{$}}
27+
28+
; ALIGN2: [[#%.16x,START+20]] myotherlongstr{{$}}
29+
; ALIGN2: [[#%.16x,START+20+16]] longstr{{$}}
30+
; ALIGN2: [[#%.16x,START+20+16+8]] otherstr{{$}}
31+
; ALIGN2: [[#%.16x,START+20+16+8+10]] str{{$}}
32+
33+
; ALIGN4: [[#%.16x,START+20]] myotherlongstr{{$}}
34+
; ALIGN4: [[#%.16x,START+20+16]] otherlongstr{{$}}
35+
; ALIGN4: [[#%.16x,START+20+16+16]] longstr{{$}}
36+
; ALIGN4: [[#%.16x,START+20+16+16+8]] otherstr{{$}}
37+
; ALIGN4: [[#%.16x,START+20+16+16+8+12]] str{{$}}
38+
39+
; CHECK: SYMBOL TABLE:
40+
41+
; ALIGN1: [[#%.16x,START+19]] l O __TEXT,__cstring _myotherlongstr
42+
; ALIGN1: [[#%.16x,START+21]] l O __TEXT,__cstring _otherlongstr
43+
; ALIGN1: [[#%.16x,START+26]] l O __TEXT,__cstring _longstr
44+
; ALIGN1: [[#%.16x,START+34]] l O __TEXT,__cstring _otherstr
45+
; ALIGN1: [[#%.16x,START+39]] l O __TEXT,__cstring _str
46+
47+
; ALIGN2: [[#%.16x,START+20]] l O __TEXT,__cstring _myotherlongstr
48+
; ALIGN2: [[#%.16x,START+20+2]] l O __TEXT,__cstring _otherlongstr
49+
; ALIGN2: [[#%.16x,START+20+16]] l O __TEXT,__cstring _longstr
50+
; ALIGN2: [[#%.16x,START+20+16+8]] l O __TEXT,__cstring _otherstr
51+
; ALIGN2: [[#%.16x,START+20+16+8+10]] l O __TEXT,__cstring _str
52+
53+
; ALIGN4: [[#%.16x,START+20]] l O __TEXT,__cstring _myotherlongstr
54+
; ALIGN4: [[#%.16x,START+20+16]] l O __TEXT,__cstring _otherlongstr
55+
; ALIGN4: [[#%.16x,START+20+16+16]] l O __TEXT,__cstring _longstr
56+
; ALIGN4: [[#%.16x,START+20+16+16+8]] l O __TEXT,__cstring _otherstr
57+
; ALIGN4: [[#%.16x,START+20+16+16+8+12]] l O __TEXT,__cstring _str
58+
59+
;--- first.s
60+
.cstring
61+
.p2align 2
62+
.asciz "get awkward offset" ; length = 19
63+
64+
;--- align.s.template
65+
.cstring
66+
67+
.p2align <ALIGN>
68+
_myotherlongstr:
69+
.asciz "myotherlongstr" ; length = 15
70+
71+
.p2align <ALIGN>
72+
_otherlongstr:
73+
.asciz "otherlongstr" ; length = 13, tail offset = 2
74+
75+
.p2align <ALIGN>
76+
_longstr:
77+
.asciz "longstr" ; length = 8, tail offset = 7
78+
79+
.p2align <ALIGN>
80+
_otherstr:
81+
.asciz "otherstr" ; length = 9
82+
83+
.p2align <ALIGN>
84+
_str:
85+
.asciz "str" ; length = 4, tail offset = 5

0 commit comments

Comments
 (0)