Skip to content

Commit 6db410e

Browse files
committed
Use mmap for stdin when possible in getSTDIN
Enable memory-mapping (mmap) for stdin when input is redirected (e.g., ./prog < file). This can improve performance when processing large files, as tools like llvm-strings iterate over the entire input buffer. Also refactored LLLexer::getNextChar to avoid relying on MemoryBuffer for null termination checks, which ensures relevant test cases continue to pass.
1 parent 05a3f76 commit 6db410e

File tree

5 files changed

+42
-29
lines changed

5 files changed

+42
-29
lines changed

llvm/include/llvm/Support/MemoryBuffer.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ class LLVM_ABI MemoryBuffer {
9797
/// least the specified alignment.
9898
static ErrorOr<std::unique_ptr<MemoryBuffer>>
9999
getFile(const Twine &Filename, bool IsText = false,
100-
bool RequiresNullTerminator = true, bool IsVolatile = false,
100+
bool RequiresNullTerminator = false, bool IsVolatile = false,
101101
std::optional<Align> Alignment = std::nullopt);
102102

103103
/// Read all of the specified file into a MemoryBuffer as a stream
@@ -125,31 +125,32 @@ class LLVM_ABI MemoryBuffer {
125125
/// least the specified alignment.
126126
static ErrorOr<std::unique_ptr<MemoryBuffer>>
127127
getOpenFile(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
128-
bool RequiresNullTerminator = true, bool IsVolatile = false,
128+
bool RequiresNullTerminator = false, bool IsVolatile = false,
129129
std::optional<Align> Alignment = std::nullopt);
130130

131131
/// Open the specified memory range as a MemoryBuffer. Note that InputData
132132
/// must be null terminated if RequiresNullTerminator is true.
133133
static std::unique_ptr<MemoryBuffer>
134134
getMemBuffer(StringRef InputData, StringRef BufferName = "",
135-
bool RequiresNullTerminator = true);
135+
bool RequiresNullTerminator = false);
136136

137137
static std::unique_ptr<MemoryBuffer>
138-
getMemBuffer(MemoryBufferRef Ref, bool RequiresNullTerminator = true);
138+
getMemBuffer(MemoryBufferRef Ref, bool RequiresNullTerminator = false);
139139

140140
/// Open the specified memory range as a MemoryBuffer, copying the contents
141141
/// and taking ownership of it. InputData does not have to be null terminated.
142142
static std::unique_ptr<MemoryBuffer>
143143
getMemBufferCopy(StringRef InputData, const Twine &BufferName = "");
144144

145145
/// Read all of stdin into a file buffer, and return it.
146-
static ErrorOr<std::unique_ptr<MemoryBuffer>> getSTDIN();
146+
static ErrorOr<std::unique_ptr<MemoryBuffer>>
147+
getSTDIN(bool RequiresNullTerminator = false);
147148

148149
/// Open the specified file as a MemoryBuffer, or open stdin if the Filename
149150
/// is "-".
150151
static ErrorOr<std::unique_ptr<MemoryBuffer>>
151152
getFileOrSTDIN(const Twine &Filename, bool IsText = false,
152-
bool RequiresNullTerminator = true,
153+
bool RequiresNullTerminator = false,
153154
std::optional<Align> Alignment = std::nullopt);
154155

155156
/// Map a subrange of the specified file as a MemoryBuffer.

llvm/lib/AsmParser/LLLexer.cpp

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -175,19 +175,10 @@ LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err,
175175
}
176176

177177
int LLLexer::getNextChar() {
178-
char CurChar = *CurPtr++;
179-
switch (CurChar) {
180-
default: return (unsigned char)CurChar;
181-
case 0:
182-
// A nul character in the stream is either the end of the current buffer or
183-
// a random nul in the file. Disambiguate that here.
184-
if (CurPtr-1 != CurBuf.end())
185-
return 0; // Just whitespace.
186-
187-
// Otherwise, return end of file.
188-
--CurPtr; // Another call to lex will return EOF again.
178+
if (CurPtr == CurBuf.end())
189179
return EOF;
190-
}
180+
181+
return *CurPtr++;
191182
}
192183

193184
lltok::Kind LLLexer::LexToken() {

llvm/lib/Support/MemoryBuffer.cpp

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ MemoryBuffer::getFileOrSTDIN(const Twine &Filename, bool IsText,
165165
StringRef NameRef = Filename.toStringRef(NameBuf);
166166

167167
if (NameRef == "-")
168-
return getSTDIN();
168+
return getSTDIN(RequiresNullTerminator);
169169
return getFile(Filename, IsText, RequiresNullTerminator,
170170
/*IsVolatile=*/false, Alignment);
171171
}
@@ -372,7 +372,7 @@ static bool shouldUseMmap(sys::fs::file_t FD,
372372

373373
// We don't use mmap for small files because this can severely fragment our
374374
// address space.
375-
if (MapSize < 4 * 4096 || MapSize < (unsigned)PageSize)
375+
if (MapSize < 64 * (size_t)PageSize || MapSize < (unsigned)PageSize)
376376
return false;
377377

378378
if (!RequiresNullTerminator)
@@ -567,12 +567,33 @@ ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getOpenFileSlice(
567567
IsVolatile, Alignment);
568568
}
569569

570-
ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() {
570+
ErrorOr<std::unique_ptr<MemoryBuffer>>
571+
MemoryBuffer::getSTDIN(bool RequiresNullTerminator) {
571572
// Read in all of the data from stdin, we cannot mmap stdin.
572-
//
573-
// FIXME: That isn't necessarily true, we should try to mmap stdin and
574-
// fallback if it fails.
575573
sys::ChangeStdinMode(sys::fs::OF_Text);
574+
std::error_code EC;
575+
sys::fs::file_type Type;
576+
sys::fs::file_status Status;
577+
EC = sys::fs::status(sys::fs::getStdinHandle(), Status);
578+
if (EC)
579+
return EC;
580+
581+
Type = Status.type();
582+
// If the FD is regular file or block file,
583+
// we try to create a mmap buffer first.
584+
// If failed, rollback to read and copy.
585+
if ((Type == sys::fs::file_type::regular_file ||
586+
Type == sys::fs::file_type::block_file) &&
587+
shouldUseMmap(sys::fs::getStdinHandle(), Status.getSize(),
588+
Status.getSize(), 0, RequiresNullTerminator,
589+
sys::Process::getPageSizeEstimate(), false)) {
590+
std::unique_ptr<MemoryBuffer> Result(
591+
new (NamedBufferAlloc("<stdin>")) MemoryBufferMMapFile<MemoryBuffer>(
592+
RequiresNullTerminator, sys::fs::getStdinHandle(), Status.getSize(),
593+
0, EC));
594+
if (!EC && (!RequiresNullTerminator || *Result->getBufferEnd() == '\0'))
595+
return std::move(Result);
596+
}
576597

577598
return getMemoryBufferForStream(sys::fs::getStdinHandle(), "<stdin>");
578599
}

llvm/unittests/AsmParser/AsmParserTest.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,10 @@ TEST(AsmParserTest, NonNullTerminatedInput) {
3939
LLVMContext Ctx;
4040
StringRef Source = "; Empty module \n\1\2";
4141
SMDiagnostic Error;
42-
std::unique_ptr<Module> Mod;
43-
EXPECT_DEATH(Mod = parseAssemblyString(Source.substr(0, Source.size() - 2),
44-
Error, Ctx),
45-
"Buffer is not null terminated!");
42+
std::unique_ptr<Module> Mod =
43+
parseAssemblyString(Source.substr(0, Source.size() - 2), Error, Ctx);
44+
EXPECT_TRUE(Mod != nullptr);
45+
EXPECT_TRUE(Error.getMessage().empty());
4646
}
4747

4848
#endif

llvm/unittests/Support/MemoryBufferTest.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,7 @@ TEST_F(MemoryBufferTest, mmapVolatileNoNull) {
417417
raw_fd_ostream OF(FD, true);
418418
// Create a file large enough to mmap. 4 pages should be enough.
419419
unsigned PageSize = sys::Process::getPageSizeEstimate();
420-
unsigned FileWrites = (PageSize * 4) / 8;
420+
unsigned FileWrites = (PageSize * 64);
421421
for (unsigned i = 0; i < FileWrites; ++i)
422422
OF << "01234567";
423423
OF.close();

0 commit comments

Comments
 (0)