Skip to content

Commit 3cab63b

Browse files
committed
[Support] Use mmap for stdin when possible in getSTDIN
Enable memory-mapping (mmap) for stdin when input is redirected (e.g., ./prog < file). This can improve performance when processing large files, as tools like llvm-strings iterate over the entire input buffer. Also refactored LLLexer::getNextChar to avoid relying on MemoryBuffer for null termination checks, which ensures relevant test cases continue to pass.
1 parent 05a3f76 commit 3cab63b

File tree

7 files changed

+52
-56
lines changed

7 files changed

+52
-56
lines changed

llvm/include/llvm/Support/MemoryBuffer.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ class LLVM_ABI MemoryBuffer {
9797
/// least the specified alignment.
9898
static ErrorOr<std::unique_ptr<MemoryBuffer>>
9999
getFile(const Twine &Filename, bool IsText = false,
100-
bool RequiresNullTerminator = true, bool IsVolatile = false,
100+
bool RequiresNullTerminator = false, bool IsVolatile = false,
101101
std::optional<Align> Alignment = std::nullopt);
102102

103103
/// Read all of the specified file into a MemoryBuffer as a stream
@@ -125,31 +125,32 @@ class LLVM_ABI MemoryBuffer {
125125
/// least the specified alignment.
126126
static ErrorOr<std::unique_ptr<MemoryBuffer>>
127127
getOpenFile(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
128-
bool RequiresNullTerminator = true, bool IsVolatile = false,
128+
bool RequiresNullTerminator = false, bool IsVolatile = false,
129129
std::optional<Align> Alignment = std::nullopt);
130130

131131
/// Open the specified memory range as a MemoryBuffer. Note that InputData
132132
/// must be null terminated if RequiresNullTerminator is true.
133133
static std::unique_ptr<MemoryBuffer>
134134
getMemBuffer(StringRef InputData, StringRef BufferName = "",
135-
bool RequiresNullTerminator = true);
135+
bool RequiresNullTerminator = false);
136136

137137
static std::unique_ptr<MemoryBuffer>
138-
getMemBuffer(MemoryBufferRef Ref, bool RequiresNullTerminator = true);
138+
getMemBuffer(MemoryBufferRef Ref, bool RequiresNullTerminator = false);
139139

140140
/// Open the specified memory range as a MemoryBuffer, copying the contents
141141
/// and taking ownership of it. InputData does not have to be null terminated.
142142
static std::unique_ptr<MemoryBuffer>
143143
getMemBufferCopy(StringRef InputData, const Twine &BufferName = "");
144144

145145
/// Read all of stdin into a file buffer, and return it.
146-
static ErrorOr<std::unique_ptr<MemoryBuffer>> getSTDIN();
146+
static ErrorOr<std::unique_ptr<MemoryBuffer>>
147+
getSTDIN(bool RequiresNullTerminator = false);
147148

148149
/// Open the specified file as a MemoryBuffer, or open stdin if the Filename
149150
/// is "-".
150151
static ErrorOr<std::unique_ptr<MemoryBuffer>>
151152
getFileOrSTDIN(const Twine &Filename, bool IsText = false,
152-
bool RequiresNullTerminator = true,
153+
bool RequiresNullTerminator = false,
153154
std::optional<Align> Alignment = std::nullopt);
154155

155156
/// Map a subrange of the specified file as a MemoryBuffer.

llvm/lib/AsmParser/LLLexer.cpp

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -175,19 +175,10 @@ LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err,
175175
}
176176

177177
int LLLexer::getNextChar() {
178-
char CurChar = *CurPtr++;
179-
switch (CurChar) {
180-
default: return (unsigned char)CurChar;
181-
case 0:
182-
// A nul character in the stream is either the end of the current buffer or
183-
// a random nul in the file. Disambiguate that here.
184-
if (CurPtr-1 != CurBuf.end())
185-
return 0; // Just whitespace.
186-
187-
// Otherwise, return end of file.
188-
--CurPtr; // Another call to lex will return EOF again.
178+
if (CurPtr == CurBuf.end())
189179
return EOF;
190-
}
180+
181+
return *CurPtr++;
191182
}
192183

193184
lltok::Kind LLLexer::LexToken() {

llvm/lib/Support/MemoryBuffer.cpp

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ MemoryBuffer::getFileOrSTDIN(const Twine &Filename, bool IsText,
165165
StringRef NameRef = Filename.toStringRef(NameBuf);
166166

167167
if (NameRef == "-")
168-
return getSTDIN();
168+
return getSTDIN(RequiresNullTerminator);
169169
return getFile(Filename, IsText, RequiresNullTerminator,
170170
/*IsVolatile=*/false, Alignment);
171171
}
@@ -567,12 +567,33 @@ ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getOpenFileSlice(
567567
IsVolatile, Alignment);
568568
}
569569

570-
ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() {
570+
ErrorOr<std::unique_ptr<MemoryBuffer>>
571+
MemoryBuffer::getSTDIN(bool RequiresNullTerminator) {
571572
// Read in all of the data from stdin, we cannot mmap stdin.
572-
//
573-
// FIXME: That isn't necessarily true, we should try to mmap stdin and
574-
// fallback if it fails.
575573
sys::ChangeStdinMode(sys::fs::OF_Text);
574+
std::error_code EC;
575+
sys::fs::file_type Type;
576+
sys::fs::file_status Status;
577+
EC = sys::fs::status(sys::fs::getStdinHandle(), Status);
578+
if (EC)
579+
return EC;
580+
581+
Type = Status.type();
582+
// If the FD is regular file or block file,
583+
// we try to create a mmap buffer first.
584+
// If failed, rollback to read and copy.
585+
if ((Type == sys::fs::file_type::regular_file ||
586+
Type == sys::fs::file_type::block_file) &&
587+
shouldUseMmap(sys::fs::getStdinHandle(), Status.getSize(),
588+
Status.getSize(), 0, RequiresNullTerminator,
589+
sys::Process::getPageSizeEstimate(), false)) {
590+
std::unique_ptr<MemoryBuffer> Result(
591+
new (NamedBufferAlloc("<stdin>")) MemoryBufferMMapFile<MemoryBuffer>(
592+
RequiresNullTerminator, sys::fs::getStdinHandle(), Status.getSize(),
593+
0, EC));
594+
if (!EC && (!RequiresNullTerminator || *Result->getBufferEnd() == '\0'))
595+
return std::move(Result);
596+
}
576597

577598
return getMemoryBufferForStream(sys::fs::getStdinHandle(), "<stdin>");
578599
}

llvm/lib/TableGen/TGLexer.cpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -138,18 +138,14 @@ bool TGLexer::processEOF() {
138138
}
139139

140140
int TGLexer::getNextChar() {
141+
if (CurPtr == CurBuf.end())
142+
return EOF;
141143
char CurChar = *CurPtr++;
142144
switch (CurChar) {
143145
default:
144146
return (unsigned char)CurChar;
145147

146148
case 0: {
147-
// A NUL character in the stream is either the end of the current buffer or
148-
// a spurious NUL in the file. Disambiguate that here.
149-
if (CurPtr - 1 == CurBuf.end()) {
150-
--CurPtr; // Arrange for another call to return EOF again.
151-
return EOF;
152-
}
153149
PrintError(getLoc(),
154150
"NUL character is invalid in source; treated as space");
155151
return ' ';
@@ -160,7 +156,8 @@ int TGLexer::getNextChar() {
160156
// Handle the newline character by ignoring it and incrementing the line
161157
// count. However, be careful about 'dos style' files with \n\r in them.
162158
// Only treat a \n\r or \r\n as a single line.
163-
if ((*CurPtr == '\n' || (*CurPtr == '\r')) && *CurPtr != CurChar)
159+
if (CurPtr != CurBuf.end() && (*CurPtr == '\n' || (*CurPtr == '\r')) &&
160+
*CurPtr != CurChar)
164161
++CurPtr; // Eat the two char newline sequence.
165162
return '\n';
166163
}

llvm/unittests/AsmParser/AsmParserTest.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,10 @@ TEST(AsmParserTest, NonNullTerminatedInput) {
3939
LLVMContext Ctx;
4040
StringRef Source = "; Empty module \n\1\2";
4141
SMDiagnostic Error;
42-
std::unique_ptr<Module> Mod;
43-
EXPECT_DEATH(Mod = parseAssemblyString(Source.substr(0, Source.size() - 2),
44-
Error, Ctx),
45-
"Buffer is not null terminated!");
42+
std::unique_ptr<Module> Mod =
43+
parseAssemblyString(Source.substr(0, Source.size() - 2), Error, Ctx);
44+
EXPECT_TRUE(Mod != nullptr);
45+
EXPECT_TRUE(Error.getMessage().empty());
4646
}
4747

4848
#endif

mlir/lib/Tools/PDLL/Parser/Lexer.cpp

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -132,26 +132,19 @@ Token Lexer::emitError(const char *loc, const Twine &msg) {
132132
}
133133

134134
int Lexer::getNextChar() {
135+
if (curPtr == curBuffer.end())
136+
return EOF;
135137
char curChar = *curPtr++;
136138
switch (curChar) {
137139
default:
138140
return static_cast<unsigned char>(curChar);
139-
case 0: {
140-
// A nul character in the stream is either the end of the current buffer
141-
// or a random nul in the file. Disambiguate that here.
142-
if (curPtr - 1 != curBuffer.end())
143-
return 0;
144-
145-
// Otherwise, return end of file.
146-
--curPtr;
147-
return EOF;
148-
}
149141
case '\n':
150142
case '\r':
151143
// Handle the newline character by ignoring it and incrementing the line
152144
// count. However, be careful about 'dos style' files with \n\r in them.
153145
// Only treat a \n\r or \r\n as a single line.
154-
if ((*curPtr == '\n' || (*curPtr == '\r')) && *curPtr != curChar)
146+
if (curPtr != curBuffer.end() && (*curPtr == '\n' || (*curPtr == '\r')) &&
147+
*curPtr != curChar)
155148
++curPtr;
156149
return '\n';
157150
}

mlir/tools/mlir-tblgen/FormatGen.cpp

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -53,26 +53,19 @@ FormatToken FormatLexer::emitErrorAndNote(SMLoc loc, const Twine &msg,
5353
}
5454

5555
int FormatLexer::getNextChar() {
56+
if (curPtr == curBuffer.end())
57+
return EOF;
5658
char curChar = *curPtr++;
5759
switch (curChar) {
5860
default:
5961
return (unsigned char)curChar;
60-
case 0: {
61-
// A nul character in the stream is either the end of the current buffer or
62-
// a random nul in the file. Disambiguate that here.
63-
if (curPtr - 1 != curBuffer.end())
64-
return 0;
65-
66-
// Otherwise, return end of file.
67-
--curPtr;
68-
return EOF;
69-
}
7062
case '\n':
7163
case '\r':
7264
// Handle the newline character by ignoring it and incrementing the line
7365
// count. However, be careful about 'dos style' files with \n\r in them.
7466
// Only treat a \n\r or \r\n as a single line.
75-
if ((*curPtr == '\n' || (*curPtr == '\r')) && *curPtr != curChar)
67+
if (curPtr != curBuffer.end() && (*curPtr == '\n' || (*curPtr == '\r')) &&
68+
*curPtr != curChar)
7669
++curPtr;
7770
return '\n';
7871
}

0 commit comments

Comments
 (0)