Skip to content

Commit d0e9890

Browse files
authored
[lld][MachO] Tail merge strings (#161262)
Add the flag `--tail-merge-strings` to enable tail merging of cstrings. For example, if we have strings `mystring\0` and `ring\0`, we could place `mystring\0` at address `0x1000` and `ring\0` at address `0x1004` and have them share the same underlying data. It turns out that many ObjC method names can be tail merged. For example, `error:` and `doFoo:error:`. On a large iOS binary, we saw nearly a 15% size improvement in the `__TEXT__objc_methname` section and negligible impact on link time. ``` $ bloaty --domain=vm merged.o.stripped -- base.o.stripped VM SIZE -------------- +95% +5.85Ki [__TEXT] -2.4% -239Ki __TEXT,__cstring -14.5% -710Ki __TEXT,__objc_methname -1.0% -944Ki TOTAL ``` Tail merging for MachO was originally removed in 7c269db. The previous implementation used `StringTableBuilder`, but that was removed in 4308f03 to ensure deduplicated strings are aligned correctly. This implementation ensures that tail merged strings are also aligned correctly. Special thanks to nocchijiang for pointing this out in #158720 (comment). Depends on #161253.
1 parent 3add28b commit d0e9890

File tree

8 files changed

+351
-3
lines changed

8 files changed

+351
-3
lines changed

lld/MachO/Config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ struct Configuration {
223223
bool warnThinArchiveMissingMembers;
224224
bool disableVerify;
225225
bool separateCstringLiteralSections;
226+
bool tailMergeStrings;
226227

227228
bool callGraphProfileSort = false;
228229
llvm::StringRef printSymbolOrder;

lld/MachO/Driver.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1986,6 +1986,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
19861986
config->separateCstringLiteralSections =
19871987
args.hasFlag(OPT_separate_cstring_literal_sections,
19881988
OPT_no_separate_cstring_literal_sections, false);
1989+
config->tailMergeStrings =
1990+
args.hasFlag(OPT_tail_merge_strings, OPT_no_tail_merge_strings, false);
19891991

19901992
auto IncompatWithCGSort = [&](StringRef firstArgStr) {
19911993
// Throw an error only if --call-graph-profile-sort is explicitly specified

lld/MachO/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1091,6 +1091,10 @@ defm separate_cstring_literal_sections
10911091
"Emit all cstring literals into the __cstring section. As a special "
10921092
"case, the __objc_methname section will still be emitted. (default)">,
10931093
Group<grp_rare>;
1094+
defm tail_merge_strings
1095+
: BB<"tail-merge-strings", "Enable string tail merging",
1096+
"Disable string tail merging to improve link-time performance">,
1097+
Group<grp_rare>;
10941098

10951099
def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">;
10961100

lld/MachO/SyntheticSections.cpp

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1746,6 +1746,8 @@ void CStringSection::finalizeContents() {
17461746
void DeduplicatedCStringSection::finalizeContents() {
17471747
// Find the largest alignment required for each string.
17481748
DenseMap<CachedHashStringRef, Align> strToAlignment;
1749+
// Used for tail merging only
1750+
std::vector<CachedHashStringRef> deduplicatedStrs;
17491751
for (const CStringInputSection *isec : inputs) {
17501752
for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
17511753
if (!piece.live)
@@ -1754,27 +1756,79 @@ void DeduplicatedCStringSection::finalizeContents() {
17541756
assert(isec->align != 0);
17551757
auto align = getStringPieceAlignment(isec, piece);
17561758
auto [it, wasInserted] = strToAlignment.try_emplace(s, align);
1759+
if (config->tailMergeStrings && wasInserted)
1760+
deduplicatedStrs.push_back(s);
17571761
if (!wasInserted && it->second < align)
17581762
it->second = align;
17591763
}
17601764
}
17611765

1766+
// Like lexigraphical sort, except we read strings in reverse and take the
1767+
// longest string first
1768+
// TODO: We could improve performance by implementing our own sort that avoids
1769+
// comparing characters we know to be the same. See
1770+
// StringTableBuilder::multikeySort() for details
1771+
llvm::sort(deduplicatedStrs, [](const auto &left, const auto &right) {
1772+
for (const auto &[leftChar, rightChar] :
1773+
llvm::zip(llvm::reverse(left.val()), llvm::reverse(right.val()))) {
1774+
if (leftChar == rightChar)
1775+
continue;
1776+
return leftChar < rightChar;
1777+
}
1778+
return left.size() > right.size();
1779+
});
1780+
std::optional<CachedHashStringRef> mergeCandidate;
1781+
DenseMap<CachedHashStringRef, std::pair<CachedHashStringRef, uint64_t>>
1782+
tailMergeMap;
1783+
for (auto &s : deduplicatedStrs) {
1784+
if (!mergeCandidate || !mergeCandidate->val().ends_with(s.val())) {
1785+
mergeCandidate = s;
1786+
continue;
1787+
}
1788+
uint64_t tailMergeOffset = mergeCandidate->size() - s.size();
1789+
// TODO: If the tail offset is incompatible with this string's alignment, we
1790+
// might be able to find another superstring with a compatible tail offset.
1791+
// The difficulty is how to do this efficiently
1792+
const auto &align = strToAlignment.at(s);
1793+
if (!isAligned(align, tailMergeOffset))
1794+
continue;
1795+
auto &mergeCandidateAlign = strToAlignment[*mergeCandidate];
1796+
if (align > mergeCandidateAlign)
1797+
mergeCandidateAlign = align;
1798+
tailMergeMap.try_emplace(s, *mergeCandidate, tailMergeOffset);
1799+
}
1800+
17621801
// Sort the strings for performance and compression size win, and then
17631802
// assign an offset for each string and save it to the corresponding
17641803
// StringPieces for easy access.
17651804
for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) {
17661805
auto &piece = isec->pieces[i];
17671806
auto s = isec->getCachedHashStringRef(i);
1807+
// Any string can be tail merged with itself with an offset of zero
1808+
uint64_t tailMergeOffset = 0;
1809+
auto mergeIt =
1810+
config->tailMergeStrings ? tailMergeMap.find(s) : tailMergeMap.end();
1811+
if (mergeIt != tailMergeMap.end()) {
1812+
auto &[superString, offset] = mergeIt->second;
1813+
// s can be tail merged with superString. Do not layout s. Instead layout
1814+
// superString if we haven't already
1815+
assert(superString.val().ends_with(s.val()));
1816+
s = superString;
1817+
tailMergeOffset = offset;
1818+
}
17681819
auto [it, wasInserted] = stringOffsetMap.try_emplace(s, /*placeholder*/ 0);
17691820
if (wasInserted) {
17701821
// Avoid computing the offset until we are sure we will need to
17711822
uint64_t offset = alignTo(size, strToAlignment.at(s));
17721823
it->second = offset;
17731824
size = offset + s.size() + 1; // account for null terminator
17741825
}
1775-
// If the string was already in stringOffsetMap, it is a duplicate and we
1776-
// only need to assign the offset.
1777-
piece.outSecOff = it->second;
1826+
piece.outSecOff = it->second + tailMergeOffset;
1827+
if (mergeIt != tailMergeMap.end()) {
1828+
auto &tailMergedString = mergeIt->first;
1829+
stringOffsetMap[tailMergedString] = piece.outSecOff;
1830+
assert(isAligned(strToAlignment.at(tailMergedString), piece.outSecOff));
1831+
}
17781832
}
17791833
for (CStringInputSection *isec : inputs)
17801834
isec->isFinal = true;

lld/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ MachO Improvements
4646

4747
* ``--separate-cstring-literal-sections`` emits cstring literal sections into sections defined by their section name.
4848
(`#158720 <https://github.com/llvm/llvm-project/pull/158720>`_)
49+
* ``--tail-merge-strings`` enables tail merging of cstring literals.
50+
(`#161262 <https://github.com/llvm/llvm-project/pull/161262>`_)
4951

5052
WebAssembly Improvements
5153
------------------------
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
; REQUIRES: aarch64
2+
; RUN: rm -rf %t && split-file %s %t
3+
4+
; Test that ObjC method names are tail merged and
5+
; ObjCSelRefsHelper::makeSelRef() still works correctly
6+
7+
; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o
8+
; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/a.o -o %t/a
9+
; RUN: llvm-objdump --macho --section="__TEXT,__objc_methname" %t/a | FileCheck %s --implicit-check-not=error
10+
11+
; RUN: %lld -dylib -arch arm64 --no-tail-merge-strings %t/a.o -o %t/nomerge
12+
; RUN: llvm-objdump --macho --section="__TEXT,__objc_methname" %t/nomerge | FileCheck %s --check-prefixes=CHECK,NOMERGE --implicit-check-not=error
13+
14+
; CHECK: withBar:error:
15+
; NOMERGE: error:
16+
17+
;--- a.mm
18+
__attribute__((objc_root_class))
19+
@interface Foo
20+
- (void)withBar:(int)bar error:(int)error;
21+
- (void)error:(int)error;
22+
@end
23+
24+
@implementation Foo
25+
- (void)withBar:(int)bar error:(int)error {}
26+
- (void)error:(int)error {}
27+
@end
28+
29+
void *_objc_empty_cache;
30+
void *_objc_empty_vtable;
31+
;--- gen
32+
clang -Oz -target arm64-apple-darwin a.mm -S -o -
33+
;--- a.s
34+
.build_version macos, 11, 0
35+
.section __TEXT,__text,regular,pure_instructions
36+
.p2align 2 ; -- Begin function -[Foo withBar:error:]
37+
"-[Foo withBar:error:]": ; @"\01-[Foo withBar:error:]"
38+
.cfi_startproc
39+
; %bb.0:
40+
ret
41+
.cfi_endproc
42+
; -- End function
43+
.p2align 2 ; -- Begin function -[Foo error:]
44+
"-[Foo error:]": ; @"\01-[Foo error:]"
45+
.cfi_startproc
46+
; %bb.0:
47+
ret
48+
.cfi_endproc
49+
; -- End function
50+
.globl __objc_empty_vtable ; @_objc_empty_vtable
51+
.zerofill __DATA,__common,__objc_empty_vtable,8,3
52+
.section __DATA,__objc_data
53+
.globl _OBJC_CLASS_$_Foo ; @"OBJC_CLASS_$_Foo"
54+
.p2align 3, 0x0
55+
_OBJC_CLASS_$_Foo:
56+
.quad _OBJC_METACLASS_$_Foo
57+
.quad 0
58+
.quad __objc_empty_cache
59+
.quad __objc_empty_vtable
60+
.quad __OBJC_CLASS_RO_$_Foo
61+
62+
.globl _OBJC_METACLASS_$_Foo ; @"OBJC_METACLASS_$_Foo"
63+
.p2align 3, 0x0
64+
_OBJC_METACLASS_$_Foo:
65+
.quad _OBJC_METACLASS_$_Foo
66+
.quad _OBJC_CLASS_$_Foo
67+
.quad __objc_empty_cache
68+
.quad __objc_empty_vtable
69+
.quad __OBJC_METACLASS_RO_$_Foo
70+
71+
.section __TEXT,__objc_classname,cstring_literals
72+
l_OBJC_CLASS_NAME_: ; @OBJC_CLASS_NAME_
73+
.asciz "Foo"
74+
75+
.section __DATA,__objc_const
76+
.p2align 3, 0x0 ; @"_OBJC_METACLASS_RO_$_Foo"
77+
__OBJC_METACLASS_RO_$_Foo:
78+
.long 3 ; 0x3
79+
.long 40 ; 0x28
80+
.long 40 ; 0x28
81+
.space 4
82+
.quad 0
83+
.quad l_OBJC_CLASS_NAME_
84+
.quad 0
85+
.quad 0
86+
.quad 0
87+
.quad 0
88+
.quad 0
89+
90+
.section __TEXT,__objc_methname,cstring_literals
91+
l_OBJC_METH_VAR_NAME_: ; @OBJC_METH_VAR_NAME_
92+
.asciz "withBar:error:"
93+
94+
.section __TEXT,__objc_methtype,cstring_literals
95+
l_OBJC_METH_VAR_TYPE_: ; @OBJC_METH_VAR_TYPE_
96+
.asciz "v24@0:8i16i20"
97+
98+
.section __TEXT,__objc_methname,cstring_literals
99+
l_OBJC_METH_VAR_NAME_.1: ; @OBJC_METH_VAR_NAME_.1
100+
.asciz "error:"
101+
102+
.section __TEXT,__objc_methtype,cstring_literals
103+
l_OBJC_METH_VAR_TYPE_.2: ; @OBJC_METH_VAR_TYPE_.2
104+
.asciz "v20@0:8i16"
105+
106+
.section __DATA,__objc_const
107+
.p2align 3, 0x0 ; @"_OBJC_$_INSTANCE_METHODS_Foo"
108+
__OBJC_$_INSTANCE_METHODS_Foo:
109+
.long 24 ; 0x18
110+
.long 2 ; 0x2
111+
.quad l_OBJC_METH_VAR_NAME_
112+
.quad l_OBJC_METH_VAR_TYPE_
113+
.quad "-[Foo withBar:error:]"
114+
.quad l_OBJC_METH_VAR_NAME_.1
115+
.quad l_OBJC_METH_VAR_TYPE_.2
116+
.quad "-[Foo error:]"
117+
118+
.p2align 3, 0x0 ; @"_OBJC_CLASS_RO_$_Foo"
119+
__OBJC_CLASS_RO_$_Foo:
120+
.long 2 ; 0x2
121+
.long 0 ; 0x0
122+
.long 0 ; 0x0
123+
.space 4
124+
.quad 0
125+
.quad l_OBJC_CLASS_NAME_
126+
.quad __OBJC_$_INSTANCE_METHODS_Foo
127+
.quad 0
128+
.quad 0
129+
.quad 0
130+
.quad 0
131+
132+
.globl __objc_empty_cache ; @_objc_empty_cache
133+
.zerofill __DATA,__common,__objc_empty_cache,8,3
134+
.section __DATA,__objc_classlist,regular,no_dead_strip
135+
.p2align 3, 0x0 ; @"OBJC_LABEL_CLASS_$"
136+
l_OBJC_LABEL_CLASS_$:
137+
.quad _OBJC_CLASS_$_Foo
138+
139+
.section __DATA,__objc_imageinfo,regular,no_dead_strip
140+
L_OBJC_IMAGE_INFO:
141+
.long 0
142+
.long 64
143+
144+
.subsections_via_symbols

lld/test/MachO/cstring-tailmerge.s

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
; REQUIRES: aarch64
2+
; RUN: rm -rf %t && split-file %s %t
3+
4+
; RUN: sed "s/<ALIGN>/0/g" %t/align.s.template > %t/align-1.s
5+
; RUN: sed "s/<ALIGN>/1/g" %t/align.s.template > %t/align-2.s
6+
; RUN: sed "s/<ALIGN>/2/g" %t/align.s.template > %t/align-4.s
7+
8+
; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/first.s -o %t/first.o
9+
; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-1.s -o %t/align-1.o
10+
; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-2.s -o %t/align-2.o
11+
; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-4.s -o %t/align-4.o
12+
13+
; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-1.o -o %t/align-1
14+
; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-1 | FileCheck %s --check-prefixes=CHECK,ALIGN1
15+
16+
; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-2.o -o %t/align-2
17+
; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-2 | FileCheck %s --check-prefixes=CHECK,ALIGN2
18+
19+
; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-4.o -o %t/align-4
20+
; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-4 | FileCheck %s --check-prefixes=CHECK,ALIGN4
21+
22+
; CHECK: Contents of (__TEXT,__cstring) section
23+
; CHECK: [[#%.16x,START:]] get awkward offset{{$}}
24+
25+
; ALIGN1: [[#%.16x,START+19]] myotherlongstr{{$}}
26+
; ALIGN1: [[#%.16x,START+19+15]] otherstr{{$}}
27+
28+
; ALIGN2: [[#%.16x,START+20]] myotherlongstr{{$}}
29+
; ALIGN2: [[#%.16x,START+20+16]] longstr{{$}}
30+
; ALIGN2: [[#%.16x,START+20+16+8]] otherstr{{$}}
31+
; ALIGN2: [[#%.16x,START+20+16+8+10]] str{{$}}
32+
33+
; ALIGN4: [[#%.16x,START+20]] myotherlongstr{{$}}
34+
; ALIGN4: [[#%.16x,START+20+16]] otherlongstr{{$}}
35+
; ALIGN4: [[#%.16x,START+20+16+16]] longstr{{$}}
36+
; ALIGN4: [[#%.16x,START+20+16+16+8]] otherstr{{$}}
37+
; ALIGN4: [[#%.16x,START+20+16+16+8+12]] str{{$}}
38+
39+
; CHECK: SYMBOL TABLE:
40+
41+
; ALIGN1: [[#%.16x,START+19]] l O __TEXT,__cstring _myotherlongstr
42+
; ALIGN1: [[#%.16x,START+21]] l O __TEXT,__cstring _otherlongstr
43+
; ALIGN1: [[#%.16x,START+26]] l O __TEXT,__cstring _longstr
44+
; ALIGN1: [[#%.16x,START+34]] l O __TEXT,__cstring _otherstr
45+
; ALIGN1: [[#%.16x,START+39]] l O __TEXT,__cstring _str
46+
47+
; ALIGN2: [[#%.16x,START+20]] l O __TEXT,__cstring _myotherlongstr
48+
; ALIGN2: [[#%.16x,START+20+2]] l O __TEXT,__cstring _otherlongstr
49+
; ALIGN2: [[#%.16x,START+20+16]] l O __TEXT,__cstring _longstr
50+
; ALIGN2: [[#%.16x,START+20+16+8]] l O __TEXT,__cstring _otherstr
51+
; ALIGN2: [[#%.16x,START+20+16+8+10]] l O __TEXT,__cstring _str
52+
53+
; ALIGN4: [[#%.16x,START+20]] l O __TEXT,__cstring _myotherlongstr
54+
; ALIGN4: [[#%.16x,START+20+16]] l O __TEXT,__cstring _otherlongstr
55+
; ALIGN4: [[#%.16x,START+20+16+16]] l O __TEXT,__cstring _longstr
56+
; ALIGN4: [[#%.16x,START+20+16+16+8]] l O __TEXT,__cstring _otherstr
57+
; ALIGN4: [[#%.16x,START+20+16+16+8+12]] l O __TEXT,__cstring _str
58+
59+
;--- first.s
60+
.cstring
61+
.p2align 2
62+
.asciz "get awkward offset" ; length = 19
63+
64+
;--- align.s.template
65+
.cstring
66+
67+
.p2align <ALIGN>
68+
_myotherlongstr:
69+
.asciz "myotherlongstr" ; length = 15
70+
71+
.p2align <ALIGN>
72+
_otherlongstr:
73+
.asciz "otherlongstr" ; length = 13, tail offset = 2
74+
75+
.p2align <ALIGN>
76+
_longstr:
77+
.asciz "longstr" ; length = 8, tail offset = 7
78+
79+
.p2align <ALIGN>
80+
_otherstr:
81+
.asciz "otherstr" ; length = 9
82+
83+
.p2align <ALIGN>
84+
_str:
85+
.asciz "str" ; length = 4, tail offset = 5

0 commit comments

Comments
 (0)