Skip to content

Commit 36bed4d

Browse files
johnno1962jh7370
andauthored
[lld][MachO] Follow-up to use madvise() for threaded file page-in. (#157917)
Further to #147134 (comment), switch to use the madvise() api to page in mmap'd files and 1) All new code compiled in #if LLVM_ENABLE_THREADS is set so it can be seen where the changes were from this PR. 2) The new PR moves to use madvise() instead of the ad-hoc page referencing code I wrote which should avoid SIGSEGVs if the buffer is deallocated. 3) A new property SerialBackgroundQueue().stopAllWork to be used to stop background workers when there is no further call for them. Usually the background "page-in" threads have completed first but it seems with this troublesome test this is not always the case and buffers stored in the static input file cache are being deallocated while being referenced. --------- Co-authored-by: James Henderson <[email protected]>
1 parent 2bef14c commit 36bed4d

File tree

4 files changed

+37
-18
lines changed

4 files changed

+37
-18
lines changed

lld/MachO/Driver.cpp

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "llvm/Object/Archive.h"
4242
#include "llvm/Option/ArgList.h"
4343
#include "llvm/Support/CommandLine.h"
44+
#include "llvm/Support/Debug.h"
4445
#include "llvm/Support/FileSystem.h"
4546
#include "llvm/Support/Parallel.h"
4647
#include "llvm/Support/Path.h"
@@ -53,6 +54,10 @@
5354
#include "llvm/TextAPI/Architecture.h"
5455
#include "llvm/TextAPI/PackedVersion.h"
5556

57+
#if !_WIN32
58+
#include <sys/mman.h>
59+
#endif
60+
5661
using namespace llvm;
5762
using namespace llvm::MachO;
5863
using namespace llvm::object;
@@ -292,12 +297,13 @@ struct DeferredFile {
292297
using DeferredFiles = std::vector<DeferredFile>;
293298

294299
#if LLVM_ENABLE_THREADS
295-
class SerialBackgroundQueue {
300+
class SerialBackgroundWorkQueue {
296301
std::deque<std::function<void()>> queue;
297302
std::thread *running;
298303
std::mutex mutex;
299304

300305
public:
306+
std::atomic_bool stopAllWork = false;
301307
void queueWork(std::function<void()> work) {
302308
mutex.lock();
303309
if (running && queue.empty()) {
@@ -312,7 +318,7 @@ class SerialBackgroundQueue {
312318
queue.emplace_back(std::move(work));
313319
if (!running)
314320
running = new std::thread([&]() {
315-
while (true) {
321+
while (!stopAllWork) {
316322
mutex.lock();
317323
if (queue.empty()) {
318324
mutex.unlock();
@@ -331,6 +337,8 @@ class SerialBackgroundQueue {
331337
}
332338
};
333339

340+
static SerialBackgroundWorkQueue pageInQueue;
341+
334342
// Most input files have been mapped but not yet paged in.
335343
// This code forces the page-ins on multiple threads so
336344
// the process is not stalled waiting on disk buffer i/o.
@@ -339,51 +347,61 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) {
339347
static const size_t largeArchive = 10 * 1024 * 1024;
340348
#ifndef NDEBUG
341349
using namespace std::chrono;
342-
std::atomic_int numDeferedFilesTouched = 0;
343350
static std::atomic_uint64_t totalBytes = 0;
351+
std::atomic_int numDeferedFilesAdvised = 0;
344352
auto t0 = high_resolution_clock::now();
345353
#endif
346354

347355
auto preloadDeferredFile = [&](const DeferredFile &deferredFile) {
348356
const StringRef &buff = deferredFile.buffer.getBuffer();
349357
if (buff.size() > largeArchive)
350358
return;
359+
351360
#ifndef NDEBUG
352361
totalBytes += buff.size();
353-
numDeferedFilesTouched += 1;
362+
numDeferedFilesAdvised += 1;
354363
#endif
355-
364+
#if _WIN32
356365
// Reference all file's mmap'd pages to load them into memory.
357-
for (const char *page = buff.data(), *end = page + buff.size(); page < end;
358-
page += pageSize) {
366+
for (const char *page = buff.data(), *end = page + buff.size();
367+
page < end && !pageInQueue.stopAllWork; page += pageSize) {
359368
[[maybe_unused]] volatile char t = *page;
360369
(void)t;
361370
}
371+
#else
372+
#define DEBUG_TYPE "lld-madvise"
373+
auto aligned =
374+
llvm::alignDown(reinterpret_cast<uintptr_t>(buff.data()), pageSize);
375+
if (madvise((void *)aligned, buff.size(), MADV_WILLNEED) < 0)
376+
LLVM_DEBUG(llvm::dbgs() << "madvise error: " << strerror(errno) << "\n");
377+
#undef DEBUG_TYPE
378+
#endif
362379
};
380+
363381
{ // Create scope for waiting for the taskGroup
364382
std::atomic_size_t index = 0;
365383
llvm::parallel::TaskGroup taskGroup;
366384
for (int w = 0; w < config->readWorkers; w++)
367385
taskGroup.spawn([&index, &preloadDeferredFile, &deferred]() {
368-
while (true) {
386+
while (!pageInQueue.stopAllWork) {
369387
size_t localIndex = index.fetch_add(1);
370388
if (localIndex >= deferred.size())
371389
break;
372390
preloadDeferredFile(deferred[localIndex]);
373391
}
374392
});
375393
}
394+
376395
#ifndef NDEBUG
377396
auto dt = high_resolution_clock::now() - t0;
378397
if (Process::GetEnv("LLD_MULTI_THREAD_PAGE"))
379398
llvm::dbgs() << "multiThreadedPageIn " << totalBytes << "/"
380-
<< numDeferedFilesTouched << "/" << deferred.size() << "/"
399+
<< numDeferedFilesAdvised << "/" << deferred.size() << "/"
381400
<< duration_cast<milliseconds>(dt).count() / 1000. << "\n";
382401
#endif
383402
}
384403

385404
static void multiThreadedPageIn(const DeferredFiles &deferred) {
386-
static SerialBackgroundQueue pageInQueue;
387405
pageInQueue.queueWork([=]() {
388406
DeferredFiles files = deferred;
389407
multiThreadedPageInBackground(files);
@@ -489,7 +507,7 @@ static InputFile *processFile(std::optional<MemoryBufferRef> buffer,
489507
continue;
490508
}
491509

492-
if (archiveContents)
510+
if (config->readWorkers && archiveContents)
493511
archiveContents->push_back({path, isLazy, *mb});
494512
if (!hasObjCSection(*mb))
495513
continue;
@@ -1447,6 +1465,8 @@ static void createFiles(const InputArgList &args) {
14471465
multiThreadedPageIn(archiveContents);
14481466
for (auto *archive : archives)
14491467
archive->addLazySymbols();
1468+
1469+
pageInQueue.stopAllWork = true;
14501470
}
14511471
#endif
14521472
}
@@ -1845,8 +1865,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
18451865
"'");
18461866
config->readWorkers = workers;
18471867
#else
1848-
error(arg->getSpelling() +
1849-
": option unavailable because lld was not built with thread support");
1868+
warn(arg->getSpelling() +
1869+
": option unavailable because lld was not built with thread support");
18501870
#endif
18511871
}
18521872
if (auto *arg = args.getLastArg(OPT_threads_eq)) {

lld/MachO/InputFiles.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,8 @@ std::optional<MemoryBufferRef> macho::readFile(StringRef path) {
217217
if (entry != cachedReads.end())
218218
return entry->second;
219219

220-
ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = MemoryBuffer::getFile(path);
220+
ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr =
221+
MemoryBuffer::getFile(path, false, /*RequiresNullTerminator=*/false);
221222
if (std::error_code ec = mbOrErr.getError()) {
222223
error("cannot open " + path + ": " + ec.message());
223224
return std::nullopt;

lld/test/MachO/read-workers.s

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
# REQUIRES: x86 && thread_support
2-
## Sometimes fails, particularly in an ASAN build, do not run until
3-
## https://github.com/llvm/llvm-project/pull/157917 addresses the cause.
4-
# UNSUPPORTED: target={{.*}}
52
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
63

74
## A non-negative integer is allowed.

llvm/lib/Object/Archive.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,8 @@ Expected<StringRef> Archive::Child::getBuffer() const {
582582
if (!FullNameOrErr)
583583
return FullNameOrErr.takeError();
584584
const std::string &FullName = *FullNameOrErr;
585-
ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
585+
ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
586+
MemoryBuffer::getFile(FullName, false, /*RequiresNullTerminator=*/false);
586587
if (std::error_code EC = Buf.getError())
587588
return errorCodeToError(EC);
588589
Parent->ThinBuffers.push_back(std::move(*Buf));

0 commit comments

Comments
 (0)