Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lld/MachO/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ struct Configuration {
bool callGraphProfileSort = false;
llvm::StringRef printSymbolOrder;

llvm::StringRef cStringOrderFilePath;
llvm::StringRef irpgoProfilePath;
bool bpStartupFunctionSort = false;
bool bpCompressionSortStartupFunctions = false;
Expand Down
21 changes: 12 additions & 9 deletions lld/MachO/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -337,15 +337,15 @@ static InputFile *addFile(StringRef path, LoadType loadType,
for (const object::Archive::Child &c : file->getArchive().children(e)) {
StringRef reason;
switch (loadType) {
case LoadType::LCLinkerOption:
reason = "LC_LINKER_OPTION";
break;
case LoadType::CommandLineForce:
reason = "-force_load";
break;
case LoadType::CommandLine:
reason = "-all_load";
break;
case LoadType::LCLinkerOption:
reason = "LC_LINKER_OPTION";
break;
case LoadType::CommandLineForce:
reason = "-force_load";
break;
case LoadType::CommandLine:
reason = "-all_load";
break;
}
if (Error e = file->fetch(c, reason)) {
if (config->warnThinArchiveMissingMembers)
Expand Down Expand Up @@ -2178,6 +2178,9 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
StringRef orderFile = args.getLastArgValue(OPT_order_file);
if (!orderFile.empty())
priorityBuilder.parseOrderFile(orderFile);
config->cStringOrderFilePath = args.getLastArgValue(OPT_order_file_cstring);
if (!config->cStringOrderFilePath.empty())
priorityBuilder.parseOrderFileCString(config->cStringOrderFilePath);

referenceStubBinder();

Expand Down
4 changes: 4 additions & 0 deletions lld/MachO/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,10 @@ def order_file : Separate<["-"], "order_file">,
MetaVarName<"<file>">,
HelpText<"Layout functions and data according to specification in <file>">,
Group<grp_opts>;
def order_file_cstring : Separate<["-"], "order_file_cstring">,
MetaVarName<"<file>">,
HelpText<"Layout cstrings according to specification in <file>">,
Group<grp_opts>;
def no_order_inits : Flag<["-"], "no_order_inits">,
HelpText<"Disable default reordering of initializer and terminator functions">,
Flags<[HelpHidden]>,
Expand Down
71 changes: 71 additions & 0 deletions lld/MachO/SectionPriorities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -388,3 +388,74 @@ macho::PriorityBuilder::buildInputSectionPriorities() {

return sectionPriorities;
}

void macho::PriorityBuilder::parseOrderFileCString(StringRef path) {
std::optional<MemoryBufferRef> buffer = readFile(path);
if (!buffer) {
error("Could not read cstring order file at " + path);
return;
}
MemoryBufferRef mbref = *buffer;
int priority = std::numeric_limits<int>::min();
for (StringRef line : args::getLines(mbref)) {
if (line.empty())
continue;
uint32_t hash = 0;
if (!to_integer(line, hash))
continue;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to just silently ignore these errors? Is it valid to just continue parsing once we've encountered such a scenario?
Could this ever be a hex value?

Copy link
Contributor Author

@SharonXSharon SharonXSharon May 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea I guess we don't want to exit just because we encounter an invalid line?
The hash is indeed a hex, the to_integer should be true for parsing a hex number. The hash we are using is the existing hash lld use for cstring literal dedup,
in

uint32_t hash = xxh3_64bits(str) & 0x7fffffff;

auto it = cStringPriorities.find(hash);
if (it == cStringPriorities.end())
cStringPriorities[hash] = ++priority;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This performs the lookup twice, which could be a noticeable performance hit. Instead:

auto [it, inserted] = cStringPriorities.try_emplace(hash, 0);
// If actually inserted, update with the new priority
if (inserted)
  it->second = ++priority;

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alx32 if we want to use try_emplace() we can do this change if we are ok with setting the priority first and then incrementing

Suggested change
auto it = cStringPriorities.find(hash);
if (it == cStringPriorities.end())
cStringPriorities[hash] = ++priority;
auto [it, wasInserted] = cStringPriorities.try_emplace(hash, priority);
if (wasInserted)
++priority;

else
assert(it->second <= priority);
}
}

std::vector<StringPiecePair> macho::PriorityBuilder::buildCStringPriorities(
ArrayRef<CStringInputSection *> inputs) {
std::vector<StringPiecePair> orderedStringPieces;
if (config->cStringOrderFilePath.empty()) {
for (CStringInputSection *isec : inputs) {
for (const auto &[stringPieceIdx, piece] :
llvm::enumerate(isec->pieces)) {
if (!piece.live)
continue;
orderedStringPieces.emplace_back(isec, stringPieceIdx);
}
}
return orderedStringPieces;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If cStringOrderFilePath is empty, then cStringPriorities will be empty. So I think you can just remove this code entirely.

Suggested change
std::vector<StringPiecePair> orderedStringPieces;
if (config->cStringOrderFilePath.empty()) {
for (CStringInputSection *isec : inputs) {
for (const auto &[stringPieceIdx, piece] :
llvm::enumerate(isec->pieces)) {
if (!piece.live)
continue;
orderedStringPieces.emplace_back(isec, stringPieceIdx);
}
}
return orderedStringPieces;
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, i was thinking adding this early return can avoid the unnecessary checks of cStringPriorities when cStringOrderFilePath is empty, but perhaps those operations aren't really expensive?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DenseMap::find() returns pretty quickly if it's empty. I don't think it would impact performance

template <typename LookupKeyT> BucketT *doFind(const LookupKeyT &Val) {
BucketT *BucketsPtr = getBuckets();
const unsigned NumBuckets = getNumBuckets();
if (NumBuckets == 0)
return nullptr;


// Split the input strings into hold and cold sets.
// Order hot set based on -order_file_cstring for performance improvement;
// TODO: Order cold set of cstrings for compression via BP.
std::vector<std::pair<int, StringPiecePair>>
hotStringPrioritiesAndStringPieces;
std::vector<StringPiecePair> coldStringPieces;

for (CStringInputSection *isec : inputs) {
for (const auto &[stringPieceIdx, piece] : llvm::enumerate(isec->pieces)) {
if (!piece.live)
continue;

auto it = cStringPriorities.find(piece.hash);
if (it != cStringPriorities.end())
hotStringPrioritiesAndStringPieces.emplace_back(
it->second, std::make_pair(isec, stringPieceIdx));
else
coldStringPieces.emplace_back(isec, stringPieceIdx);
}
}

// Order hot set for perf
llvm::stable_sort(hotStringPrioritiesAndStringPieces);
for (auto &[priority, stringPiecePair] : hotStringPrioritiesAndStringPieces)
orderedStringPieces.push_back(stringPiecePair);

// TODO: Order cold set for compression

orderedStringPieces.insert(orderedStringPieces.end(),
coldStringPieces.begin(), coldStringPieces.end());

return orderedStringPieces;
}
20 changes: 20 additions & 0 deletions lld/MachO/SectionPriorities.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
namespace lld::macho {

using SectionPair = std::pair<const InputSection *, const InputSection *>;
using StringPiecePair = std::pair<CStringInputSection *, size_t>;

class PriorityBuilder {
public:
Expand Down Expand Up @@ -55,6 +56,23 @@ class PriorityBuilder {
// contains.
llvm::DenseMap<const InputSection *, int> buildInputSectionPriorities();

// Reads the cstring order file at `path` into cStringPriorities.
// An cstring order file has one entry per line, in the following format:
//
// <hash of cstring literal content>
//
// Cstring literals are not symbolized, we can't identify them by name
// However, cstrings are deduplicated, hence unique, so we use the hash of
// the content of cstring literals to identify them and assign priority to it.
// We use the same hash as used in StringPiece, i.e. 31 bit:
// xxh3_64bits(string) & 0x7fffffff
//
// Additionally, given they are deduplicated and unique, we don't need to know
// which object file they are from.
void parseOrderFileCString(StringRef path);
std::vector<StringPiecePair>
buildCStringPriorities(ArrayRef<CStringInputSection *>);

private:
// The symbol with the smallest priority should be ordered first in the output
// section (modulo input section contiguity constraints).
Expand All @@ -68,6 +86,8 @@ class PriorityBuilder {

std::optional<int> getSymbolPriority(const Defined *sym);
llvm::DenseMap<llvm::StringRef, SymbolPriorityEntry> priorities;
/// A map from cstring literal hashes to priorities
llvm::DenseMap<uint32_t, int> cStringPriorities;
llvm::MapVector<SectionPair, uint64_t> callGraphProfile;
};

Expand Down
58 changes: 29 additions & 29 deletions lld/MachO/SyntheticSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "MachOStructs.h"
#include "ObjC.h"
#include "OutputSegment.h"
#include "SectionPriorities.h"
#include "SymbolTable.h"
#include "Symbols.h"

Expand Down Expand Up @@ -1766,26 +1767,25 @@ void DeduplicatedCStringSection::finalizeContents() {
}
}

// Assign an offset for each string and save it to the corresponding
// Sort the strings for performance and compression size win, and then
// assign an offset for each string and save it to the corresponding
// StringPieces for easy access.
for (CStringInputSection *isec : inputs) {
for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
if (!piece.live)
continue;
auto s = isec->getCachedHashStringRef(i);
auto it = stringOffsetMap.find(s);
assert(it != stringOffsetMap.end());
StringOffset &offsetInfo = it->second;
if (offsetInfo.outSecOff == UINT64_MAX) {
offsetInfo.outSecOff =
alignToPowerOf2(size, 1ULL << offsetInfo.trailingZeros);
size =
offsetInfo.outSecOff + s.size() + 1; // account for null terminator
}
piece.outSecOff = offsetInfo.outSecOff;
for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) {
auto &piece = isec->pieces[i];
auto s = isec->getCachedHashStringRef(i);
auto it = stringOffsetMap.find(s);
assert(it != stringOffsetMap.end());
lld::macho::DeduplicatedCStringSection::StringOffset &offsetInfo =
it->second;
if (offsetInfo.outSecOff == UINT64_MAX) {
offsetInfo.outSecOff =
alignToPowerOf2(size, 1ULL << offsetInfo.trailingZeros);
size = offsetInfo.outSecOff + s.size() + 1; // account for null terminator
}
isec->isFinal = true;
piece.outSecOff = offsetInfo.outSecOff;
}
for (CStringInputSection *isec : inputs)
isec->isFinal = true;
}

void DeduplicatedCStringSection::writeTo(uint8_t *buf) const {
Expand Down Expand Up @@ -1908,18 +1908,18 @@ ObjCImageInfoSection::parseImageInfo(const InputFile *file) {

static std::string swiftVersionString(uint8_t version) {
switch (version) {
case 1:
return "1.0";
case 2:
return "1.1";
case 3:
return "2.0";
case 4:
return "3.0";
case 5:
return "4.0";
default:
return ("0x" + Twine::utohexstr(version)).str();
case 1:
return "1.0";
case 2:
return "1.1";
case 3:
return "2.0";
case 4:
return "3.0";
case 5:
return "4.0";
default:
return ("0x" + Twine::utohexstr(version)).str();
}
}

Expand Down
Loading