Skip to content
59 changes: 58 additions & 1 deletion lld/MachO/SyntheticSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1746,6 +1746,7 @@ void CStringSection::finalizeContents() {
void DeduplicatedCStringSection::finalizeContents() {
// Find the largest alignment required for each string.
DenseMap<CachedHashStringRef, Align> strToAlignment;
std::vector<CachedHashStringRef> deduplicatedStrs;
for (const CStringInputSection *isec : inputs) {
for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
if (!piece.live)
Expand All @@ -1754,17 +1755,57 @@ void DeduplicatedCStringSection::finalizeContents() {
assert(isec->align != 0);
auto align = getStringPieceAlignment(isec, piece);
auto [it, wasInserted] = strToAlignment.try_emplace(s, align);
if (wasInserted)
deduplicatedStrs.push_back(s);
if (!wasInserted && it->second < align)
it->second = align;
}
}

// Like lexigraphical sort, except we read strings in reverse and take the
// longest string first
// TODO: We could improve performance by implementing our own sort that avoids
// comparing characters we know to be the same. See
// StringTableBuilder::multikeySort() for details
llvm::sort(deduplicatedStrs, [](const auto &left, const auto &right) {
for (const auto &[leftChar, rightChar] :
llvm::zip(llvm::reverse(left.val()), llvm::reverse(right.val()))) {
if (leftChar == rightChar)
continue;
return leftChar < rightChar;
}
return left.size() > right.size();
});
std::optional<CachedHashStringRef> mergeCandidate;
DenseMap<CachedHashStringRef, std::pair<CachedHashStringRef, uint64_t>>
tailMergeMap;
for (auto &s : deduplicatedStrs) {
if (!mergeCandidate || !mergeCandidate->val().ends_with(s.val())) {
mergeCandidate = s;
continue;
}
uint64_t tailOffset = mergeCandidate->size() - s.size();
// TODO: If the tail offset is incompatible with this string's alignment, we
// might be able to find another superstring with a compatible tail offset.
// The difficulty is how to do this efficiently
const auto &align = strToAlignment.at(s);
if (!isAligned(align, tailOffset))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was a bit confused initially. Because the alignment of superstring will be promoted by the following code, here we can just check if the tailOffset is aligned and bail-out the failing case early.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The isAligned(align, tailOffset) check assumes that the superstring is laid out at address zero. If this check fails, it's impossible to tail merge with this candidate correctly.

But if the check passes, then we also need to make sure the superstring does not have a weaker alignment than the substring.

Suppose the superstring is at address 0x1004 and the substring could tail merge at address 0x1004 + 8, but requires an alignment of 8. That would break alignment, so instead we need to force the superstring to have alignment 8 so it is laid out at address 0x1008.

continue;
auto &mergeCandidateAlign = strToAlignment[*mergeCandidate];
if (align > mergeCandidateAlign)
mergeCandidateAlign = align;
tailMergeMap.try_emplace(s, *mergeCandidate, tailOffset);
}

// Sort the strings for performance and compression size win, and then
// assign an offset for each string and save it to the corresponding
// StringPieces for easy access.
for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) {
auto &piece = isec->pieces[i];
auto s = isec->getCachedHashStringRef(i);
// Skip tail merged strings until their superstring offsets are resolved
if (tailMergeMap.count(s))
continue;
auto [it, wasInserted] = stringOffsetMap.try_emplace(s, /*placeholder*/ 0);
if (wasInserted) {
// Avoid computing the offset until we are sure we will need to
Expand All @@ -1776,8 +1817,24 @@ void DeduplicatedCStringSection::finalizeContents() {
// only need to assign the offset.
piece.outSecOff = it->second;
}
for (CStringInputSection *isec : inputs)
for (CStringInputSection *isec : inputs) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add some comment on this loop? It appears the previous loop layouts the substrings while the loop resolves the substring offsets.

for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
if (!piece.live)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As we've already skipped this for tailMergeMap in the prior loop, we can drop this check.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's true. If the string is in tailMergeMap then it will be live. However, we can skip the lookup if the string is dead, so this is more for performance than correctness.

continue;
auto s = isec->getCachedHashStringRef(i);
auto it = tailMergeMap.find(s);
if (it == tailMergeMap.end())
continue;
const auto &[superString, tailOffset] = it->second;
assert(superString.val().ends_with(s.val()));
assert(!tailMergeMap.count(superString));
auto &outSecOff = stringOffsetMap[s];
outSecOff = stringOffsetMap.at(superString) + tailOffset;
piece.outSecOff = outSecOff;
assert(isAligned(strToAlignment.at(s), piece.outSecOff));
}
isec->isFinal = true;
}
}

void DeduplicatedCStringSection::writeTo(uint8_t *buf) const {
Expand Down
2 changes: 2 additions & 0 deletions lld/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ MachO Improvements

* ``--separate-cstring-literal-sections`` emits cstring literal sections into sections defined by their section name.
(`#158720 <https://github.com/llvm/llvm-project/pull/158720>`_)
* cstrings sections are now tail merged.
(`#161262 <https://github.com/llvm/llvm-project/pull/161262>`_)

WebAssembly Improvements
------------------------
Expand Down
3 changes: 1 addition & 2 deletions lld/test/MachO/cstring-dedup.s
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,10 @@
# RUN: llvm-objdump --macho --section="__DATA,ptrs" --syms %t/test | FileCheck %s
# RUN: llvm-readobj --section-headers %t/test | FileCheck %s --check-prefix=HEADER

## Make sure we only have 3 deduplicated strings in __cstring.
## Make sure we only have 2 deduplicated strings in __cstring.
# STR: Contents of (__TEXT,__cstring) section
# STR: {{[[:xdigit:]]+}} foo
# STR: {{[[:xdigit:]]+}} barbaz
# STR: {{[[:xdigit:]]+}} {{$}}

## Make sure both symbol and section relocations point to the right thing.
# CHECK: Contents of (__DATA,ptrs) section
Expand Down
85 changes: 85 additions & 0 deletions lld/test/MachO/cstring-tailmerge.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# REQUIRES: x86
# RUN: rm -rf %t; split-file %s %t

# RUN: sed "s/<ALIGN>/0/g" %t/align.s.template > %t/align-1.s
# RUN: sed "s/<ALIGN>/1/g" %t/align.s.template > %t/align-2.s
# RUN: sed "s/<ALIGN>/2/g" %t/align.s.template > %t/align-4.s

# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/first.s -o %t/first.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-1.s -o %t/align-1.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-2.s -o %t/align-2.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-4.s -o %t/align-4.o

# RUN: %lld -dylib --deduplicate-strings %t/first.o %t/align-1.o -o %t/align-1
# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-1 | FileCheck %s --check-prefixes=CHECK,ALIGN1

# RUN: %lld -dylib --deduplicate-strings %t/first.o %t/align-2.o -o %t/align-2
# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-2 | FileCheck %s --check-prefixes=CHECK,ALIGN2

# RUN: %lld -dylib --deduplicate-strings %t/first.o %t/align-4.o -o %t/align-4
# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-4 | FileCheck %s --check-prefixes=CHECK,ALIGN4

# CHECK: Contents of (__TEXT,__cstring) section
# CHECK: [[#%.16x,START:]] get awkward offset{{$}}

# ALIGN1: [[#%.16x,START+19]] myotherlongstr{{$}}
# ALIGN1: [[#%.16x,START+19+15]] otherstr{{$}}

# ALIGN2: [[#%.16x,START+20]] myotherlongstr{{$}}
# ALIGN2: [[#%.16x,START+20+16]] longstr{{$}}
# ALIGN2: [[#%.16x,START+20+16+8]] otherstr{{$}}
# ALIGN2: [[#%.16x,START+20+16+8+10]] str{{$}}

# ALIGN4: [[#%.16x,START+20]] myotherlongstr{{$}}
# ALIGN4: [[#%.16x,START+20+16]] otherlongstr{{$}}
# ALIGN4: [[#%.16x,START+20+16+16]] longstr{{$}}
# ALIGN4: [[#%.16x,START+20+16+16+8]] otherstr{{$}}
# ALIGN4: [[#%.16x,START+20+16+16+8+12]] str{{$}}

# CHECK: SYMBOL TABLE:

# ALIGN1: [[#%.16x,START+19]] l O __TEXT,__cstring _myotherlongstr
# ALIGN1: [[#%.16x,START+21]] l O __TEXT,__cstring _otherlongstr
# ALIGN1: [[#%.16x,START+26]] l O __TEXT,__cstring _longstr
# ALIGN1: [[#%.16x,START+34]] l O __TEXT,__cstring _otherstr
# ALIGN1: [[#%.16x,START+39]] l O __TEXT,__cstring _str

# ALIGN2: [[#%.16x,START+20]] l O __TEXT,__cstring _myotherlongstr
# ALIGN2: [[#%.16x,START+20+2]] l O __TEXT,__cstring _otherlongstr
# ALIGN2: [[#%.16x,START+20+16]] l O __TEXT,__cstring _longstr
# ALIGN2: [[#%.16x,START+20+16+8]] l O __TEXT,__cstring _otherstr
# ALIGN2: [[#%.16x,START+20+16+8+10]] l O __TEXT,__cstring _str

# ALIGN4: [[#%.16x,START+20]] l O __TEXT,__cstring _myotherlongstr
# ALIGN4: [[#%.16x,START+20+16]] l O __TEXT,__cstring _otherlongstr
# ALIGN4: [[#%.16x,START+20+16+16]] l O __TEXT,__cstring _longstr
# ALIGN4: [[#%.16x,START+20+16+16+8]] l O __TEXT,__cstring _otherstr
# ALIGN4: [[#%.16x,START+20+16+16+8+12]] l O __TEXT,__cstring _str

#--- first.s
.cstring
.p2align 2
.asciz "get awkward offset" # length = 19

#--- align.s.template
.cstring

.p2align <ALIGN>
_myotherlongstr:
.asciz "myotherlongstr" # length = 15

.p2align <ALIGN>
_otherlongstr:
.asciz "otherlongstr" # length = 13, tail offset = 2

.p2align <ALIGN>
_longstr:
.asciz "longstr" # length = 8, tail offset = 7

.p2align <ALIGN>
_otherstr:
.asciz "otherstr" # length = 9

.p2align <ALIGN>
_str:
.asciz "str" # length = 4, tail offset = 5