Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 65 additions & 62 deletions lld/MachO/SectionPriorities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/xxhash.h"

#include <numeric>

Expand Down Expand Up @@ -246,33 +247,45 @@ DenseMap<const InputSection *, int> CallGraphSort::run() {
return orderMap;
}

std::optional<int>
macho::PriorityBuilder::getSymbolOrCStringPriority(const StringRef key,
InputFile *f) {
void macho::PriorityBuilder::SymbolPriorityEntry::setPriority(
int priority, StringRef objectFile) {
if (!objectFile.empty())
objectFiles.try_emplace(objectFile, priority);
else
anyObjectFile = std::min(anyObjectFile, priority);
}

auto it = priorities.find(key);
if (it == priorities.end())
return std::nullopt;
const SymbolPriorityEntry &entry = it->second;
int macho::PriorityBuilder::SymbolPriorityEntry::getPriority(
const InputFile *f) const {
if (!f)
return entry.anyObjectFile;
return anyObjectFile;
// We don't use toString(InputFile *) here because it returns the full path
// for object files, and we only want the basename.
StringRef filename;
if (f->archiveName.empty())
filename = path::filename(f->getName());
else
filename = saver().save(path::filename(f->archiveName) + "(" +
path::filename(f->getName()) + ")");
return std::min(entry.objectFiles.lookup(filename), entry.anyObjectFile);
StringRef basename = path::filename(f->getName());
StringRef filename =
f->archiveName.empty()
? basename
: saver().save(path::filename(f->archiveName) + "(" + basename + ")");
return std::min(objectFiles.lookup(filename), anyObjectFile);
}

std::optional<int>
macho::PriorityBuilder::getSymbolPriority(const Defined *sym) {
macho::PriorityBuilder::getCStringPriority(uint32_t hash,
const InputFile *f) const {
auto it = cStringPriorities.find(hash);
if (it == cStringPriorities.end())
return std::nullopt;
return it->second.getPriority(f);
}

std::optional<int>
macho::PriorityBuilder::getSymbolPriority(const Defined *sym) const {
if (sym->isAbsolute())
return std::nullopt;
return getSymbolOrCStringPriority(utils::getRootSymbol(sym->getName()),
sym->isec()->getFile());
auto it = priorities.find(utils::getRootSymbol(sym->getName()));
if (it == priorities.end())
return std::nullopt;
return it->second.getPriority(sym->isec()->getFile());
}

void macho::PriorityBuilder::extractCallGraphProfile() {
Expand Down Expand Up @@ -307,7 +320,7 @@ void macho::PriorityBuilder::parseOrderFile(StringRef path) {
int prio = std::numeric_limits<int>::min();
MemoryBufferRef mbref = *buffer;
for (StringRef line : args::getLines(mbref)) {
StringRef objectFile, symbolOrCStrHash;
StringRef objectFile;
line = line.take_until([](char c) { return c == '#'; }); // ignore comments
line = line.ltrim();

Expand Down Expand Up @@ -338,23 +351,14 @@ void macho::PriorityBuilder::parseOrderFile(StringRef path) {
}

// The rest of the line is either <symbol name> or
// CStringEntryPrefix<cstring hash>
// cStringEntryPrefix<cstring hash>
line = line.trim();
if (line.starts_with(CStringEntryPrefix)) {
StringRef possibleHash = line.drop_front(CStringEntryPrefix.size());
if (line.consume_front(cStringEntryPrefix)) {
uint32_t hash = 0;
if (to_integer(possibleHash, hash))
symbolOrCStrHash = possibleHash;
if (to_integer(line, hash))
cStringPriorities[hash].setPriority(prio, objectFile);
} else
symbolOrCStrHash = utils::getRootSymbol(line);

if (!symbolOrCStrHash.empty()) {
SymbolPriorityEntry &entry = priorities[symbolOrCStrHash];
if (!objectFile.empty())
entry.objectFiles.insert(std::make_pair(objectFile, prio));
else
entry.anyObjectFile = std::min(entry.anyObjectFile, prio);
}
priorities[utils::getRootSymbol(line)].setPriority(prio, objectFile);

++prio;
}
Expand Down Expand Up @@ -405,40 +409,39 @@ macho::PriorityBuilder::buildInputSectionPriorities() {
return sectionPriorities;
}

std::vector<StringPiecePair> macho::PriorityBuilder::buildCStringPriorities(
ArrayRef<CStringInputSection *> inputs) {
// Split the input strings into hold and cold sets.
// Order hot set based on -order_file_cstring for performance improvement;
// TODO: Order cold set of cstrings for compression via BP.
std::vector<std::pair<int, StringPiecePair>>
hotStringPrioritiesAndStringPieces;
std::vector<StringPiecePair> coldStringPieces;
std::vector<StringPiecePair> orderedStringPieces;

void macho::PriorityBuilder::forEachStringPiece(
ArrayRef<CStringInputSection *> inputs,
std::function<void(CStringInputSection &, StringPiece &, size_t)> f,
bool forceInputOrder, bool computeHash) const {
std::vector<std::tuple<int, CStringInputSection *, size_t>> orderedPieces;
std::vector<std::pair<CStringInputSection *, size_t>> unorderedPieces;
for (CStringInputSection *isec : inputs) {
for (const auto &[stringPieceIdx, piece] : llvm::enumerate(isec->pieces)) {
if (!piece.live)
continue;

std::optional<int> priority = getSymbolOrCStringPriority(
std::to_string(piece.hash), isec->getFile());
if (!priority)
coldStringPieces.emplace_back(isec, stringPieceIdx);
// Process pieces in input order if we have no cstrings in our orderfile
if (forceInputOrder || cStringPriorities.empty()) {
f(*isec, piece, stringPieceIdx);
continue;
}
uint32_t hash =
computeHash
? (xxh3_64bits(isec->getStringRef(stringPieceIdx)) & 0x7fffffff)
: piece.hash;
if (auto priority = getCStringPriority(hash, isec->getFile()))
orderedPieces.emplace_back(*priority, isec, stringPieceIdx);
else
hotStringPrioritiesAndStringPieces.emplace_back(
*priority, std::make_pair(isec, stringPieceIdx));
unorderedPieces.emplace_back(isec, stringPieceIdx);
}
}

// Order hot set for perf
llvm::stable_sort(hotStringPrioritiesAndStringPieces);
for (auto &[priority, stringPiecePair] : hotStringPrioritiesAndStringPieces)
orderedStringPieces.push_back(stringPiecePair);

// TODO: Order cold set for compression

orderedStringPieces.insert(orderedStringPieces.end(),
coldStringPieces.begin(), coldStringPieces.end());

return orderedStringPieces;
if (orderedPieces.empty() && unorderedPieces.empty())
return;
llvm::stable_sort(orderedPieces, [](const auto &left, const auto &right) {
return std::get<0>(left) < std::get<0>(right);
});
for (auto &[priority, isec, pieceIdx] : orderedPieces)
f(*isec, isec->pieces[pieceIdx], pieceIdx);
// TODO: Add option to order the remaining cstrings for compression
for (auto &[isec, pieceIdx] : unorderedPieces)
f(*isec, isec->pieces[pieceIdx], pieceIdx);
}
28 changes: 19 additions & 9 deletions lld/MachO/SectionPriorities.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
namespace lld::macho {

using SectionPair = std::pair<const InputSection *, const InputSection *>;
using StringPiecePair = std::pair<CStringInputSection *, size_t>;

class PriorityBuilder {
public:
Expand All @@ -29,7 +28,7 @@ class PriorityBuilder {
//
// An order file has one entry per line, in the following format:
//
// <cpu>:<object file>:[<symbol name> | CStringEntryPrefix <cstring hash>]
// <cpu>:<object file>:[<symbol name> | cStringEntryPrefix <cstring hash>]
//
// <cpu> and <object file> are optional.
// If not specified, then that entry tries to match either,
Expand All @@ -42,7 +41,7 @@ class PriorityBuilder {
// lowest-ordered entry (the one nearest to the front of the list.)
//
// or 2) any cstring literal with the given hash, if the entry has the
// CStringEntryPrefix prefix defined below in the file. <cstring hash> is the
// cStringEntryPrefix prefix defined below in the file. <cstring hash> is the
// hash of cstring literal content.
//
// Cstring literals are not symbolized, we can't identify them by name
Expand All @@ -54,6 +53,16 @@ class PriorityBuilder {
// The file can also have line comments that start with '#'.
void parseOrderFile(StringRef path);

/// Call \p f for each string piece in \p inputs. If there are any cstring
/// literals in the orderfile (and \p forceInputOrder is false) then string
/// pieces are ordered by the orderfile. \p computeHash must be set when
/// \p deduplicateLiterals is false because then the string piece hash is not
/// set.
void forEachStringPiece(
ArrayRef<CStringInputSection *> inputs,
std::function<void(CStringInputSection &, StringPiece &, size_t)> f,
bool forceInputOrder = false, bool computeHash = false) const;

// Returns layout priorities for some or all input sections. Sections are laid
// out in decreasing order; that is, a higher priority section will be closer
// to the beginning of its output section.
Expand All @@ -66,8 +75,6 @@ class PriorityBuilder {
// Each section gets assigned the priority of the highest-priority symbol it
// contains.
llvm::DenseMap<const InputSection *, int> buildInputSectionPriorities();
std::vector<StringPiecePair>
buildCStringPriorities(ArrayRef<CStringInputSection *>);

private:
// The symbol with the smallest priority should be ordered first in the output
Expand All @@ -78,13 +85,16 @@ class PriorityBuilder {
int anyObjectFile = 0;
// The priority given to a matching symbol from a particular object file.
llvm::DenseMap<llvm::StringRef, int> objectFiles;
void setPriority(int priority, StringRef objectFile);
int getPriority(const InputFile *f) const;
};
const llvm::StringRef CStringEntryPrefix = "CSTR;";
const llvm::StringRef cStringEntryPrefix = "CSTR;";

std::optional<int> getSymbolPriority(const Defined *sym);
std::optional<int> getSymbolOrCStringPriority(const StringRef key,
InputFile *f);
std::optional<int> getSymbolPriority(const Defined *sym) const;
std::optional<int> getCStringPriority(uint32_t hash,
const InputFile *f) const;
llvm::DenseMap<llvm::StringRef, SymbolPriorityEntry> priorities;
llvm::DenseMap<int32_t, SymbolPriorityEntry> cStringPriorities;
llvm::MapVector<SectionPair, uint64_t> callGraphProfile;
};

Expand Down
62 changes: 30 additions & 32 deletions lld/MachO/SyntheticSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1721,47 +1721,44 @@ void CStringSection::writeTo(uint8_t *buf) const {
// and don't need this alignment. They will be emitted at some arbitrary address
// `A`, but ld64 will treat them as being 16-byte aligned with an offset of
// `16 % A`.
static Align getStringPieceAlignment(const CStringInputSection *isec,
static Align getStringPieceAlignment(const CStringInputSection &isec,
const StringPiece &piece) {
return llvm::Align(1ULL << llvm::countr_zero(isec->align | piece.inSecOff));
return llvm::Align(1ULL << llvm::countr_zero(isec.align | piece.inSecOff));
}

void CStringSection::finalizeContents() {
size = 0;
// TODO: Call buildCStringPriorities() to support cstring ordering when
// deduplication is off, although this may negatively impact build
// performance.
for (CStringInputSection *isec : inputs) {
for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
if (!piece.live)
continue;
piece.outSecOff = alignTo(size, getStringPieceAlignment(isec, piece));
StringRef string = isec->getStringRef(i);
size = piece.outSecOff + string.size() + 1; // account for null terminator
}
priorityBuilder.forEachStringPiece(
inputs,
[&](CStringInputSection &isec, StringPiece &piece, size_t pieceIdx) {
piece.outSecOff = alignTo(size, getStringPieceAlignment(isec, piece));
StringRef string = isec.getStringRef(pieceIdx);
size =
piece.outSecOff + string.size() + 1; // account for null terminator
},
/*forceInputOrder=*/false, /*computeHash=*/true);
for (CStringInputSection *isec : inputs)
isec->isFinal = true;
}
}

void DeduplicatedCStringSection::finalizeContents() {
// Find the largest alignment required for each string.
DenseMap<CachedHashStringRef, Align> strToAlignment;
// Used for tail merging only
std::vector<CachedHashStringRef> deduplicatedStrs;
for (const CStringInputSection *isec : inputs) {
for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
if (!piece.live)
continue;
auto s = isec->getCachedHashStringRef(i);
assert(isec->align != 0);
auto align = getStringPieceAlignment(isec, piece);
auto [it, wasInserted] = strToAlignment.try_emplace(s, align);
if (config->tailMergeStrings && wasInserted)
deduplicatedStrs.push_back(s);
if (!wasInserted && it->second < align)
it->second = align;
}
}
priorityBuilder.forEachStringPiece(
inputs,
[&](CStringInputSection &isec, StringPiece &piece, size_t pieceIdx) {
auto s = isec.getCachedHashStringRef(pieceIdx);
assert(isec.align != 0);
auto align = getStringPieceAlignment(isec, piece);
auto [it, wasInserted] = strToAlignment.try_emplace(s, align);
if (config->tailMergeStrings && wasInserted)
deduplicatedStrs.push_back(s);
if (!wasInserted && it->second < align)
it->second = align;
},
/*forceInputOrder=*/true);

// Like lexigraphical sort, except we read strings in reverse and take the
// longest string first
Expand Down Expand Up @@ -1801,9 +1798,10 @@ void DeduplicatedCStringSection::finalizeContents() {
// Sort the strings for performance and compression size win, and then
// assign an offset for each string and save it to the corresponding
// StringPieces for easy access.
for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) {
auto &piece = isec->pieces[i];
auto s = isec->getCachedHashStringRef(i);
priorityBuilder.forEachStringPiece(inputs, [&](CStringInputSection &isec,
StringPiece &piece,
size_t pieceIdx) {
auto s = isec.getCachedHashStringRef(pieceIdx);
// Any string can be tail merged with itself with an offset of zero
uint64_t tailMergeOffset = 0;
auto mergeIt =
Expand All @@ -1829,7 +1827,7 @@ void DeduplicatedCStringSection::finalizeContents() {
stringOffsetMap[tailMergedString] = piece.outSecOff;
assert(isAligned(strToAlignment.at(tailMergedString), piece.outSecOff));
}
}
});
for (CStringInputSection *isec : inputs)
isec->isFinal = true;
}
Expand Down
Loading