From ecf591c76631957bea7f8d62191d4b99fe8fc4c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Tue, 29 Jul 2025 14:41:18 +0000 Subject: [PATCH 01/33] Update CurLineNum anc CurColNum in sync with movement in text --- llvm/include/llvm/AsmParser/LLLexer.h | 2 + llvm/lib/AsmParser/LLLexer.cpp | 97 +++++++++++++++++---------- 2 files changed, 63 insertions(+), 36 deletions(-) diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h index 501a7aefccd7f..5f6e32a4bf5e1 100644 --- a/llvm/include/llvm/AsmParser/LLLexer.h +++ b/llvm/include/llvm/AsmParser/LLLexer.h @@ -94,6 +94,8 @@ namespace llvm { lltok::Kind LexToken(); int getNextChar(); + const char *skipNChars(unsigned N); + void advancePositionTo(const char *Ptr); void SkipLineComment(); bool SkipCComment(); lltok::Kind ReadString(lltok::Kind kind); diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 520c6a00a9c07..7cefd4f6b4935 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -190,6 +190,23 @@ int LLLexer::getNextChar() { } } +const char *LLLexer::skipNChars(unsigned N) { + while (N--) + getNextChar(); + return CurPtr; +} + +void LLLexer::advancePositionTo(const char *Ptr) { + while (CurPtr != Ptr) { + // FIXME: Assumes that if moving back, we stay in that line + if (CurPtr > Ptr) { + --CurPtr; + --CurColNum; + } else + getNextChar(); + } +} + lltok::Kind LLLexer::LexToken() { while (true) { TokStart = CurPtr; @@ -216,12 +233,12 @@ lltok::Kind LLLexer::LexToken() { case '"': return LexQuote(); case '.': if (const char *Ptr = isLabelTail(CurPtr)) { - CurPtr = Ptr; + advancePositionTo(Ptr); StrVal.assign(TokStart, CurPtr-1); return lltok::LabelStr; } if (CurPtr[0] == '.' && CurPtr[1] == '.') { - CurPtr += 2; + skipNChars(2); return lltok::dotdotdot; } return lltok::Error; @@ -299,14 +316,14 @@ lltok::Kind LLLexer::LexAt() { lltok::Kind LLLexer::LexDollar() { if (const char *Ptr = isLabelTail(TokStart)) { - CurPtr = Ptr; + advancePositionTo(Ptr); StrVal.assign(TokStart, CurPtr - 1); return lltok::LabelStr; } // Handle DollarStringConstant: $\"[^\"]*\" if (CurPtr[0] == '"') { - ++CurPtr; + getNextChar(); while (true) { int CurChar = getNextChar(); @@ -358,11 +375,11 @@ bool LLLexer::ReadVarName() { if (isalpha(static_cast(CurPtr[0])) || CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_') { - ++CurPtr; + getNextChar(); while (isalnum(static_cast(CurPtr[0])) || CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_') - ++CurPtr; + getNextChar(); StrVal.assign(NameStart, CurPtr); return true; @@ -376,7 +393,8 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) { if (!isdigit(static_cast(CurPtr[0]))) return lltok::Error; - for (++CurPtr; isdigit(static_cast(CurPtr[0])); ++CurPtr) + for (getNextChar(); isdigit(static_cast(CurPtr[0])); + getNextChar()) /*empty*/; uint64_t Val = atoull(TokStart + 1, CurPtr); @@ -389,7 +407,7 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) { lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) { // Handle StringConstant: \"[^\"]*\" if (CurPtr[0] == '"') { - ++CurPtr; + getNextChar(); while (true) { int CurChar = getNextChar(); @@ -435,7 +453,7 @@ lltok::Kind LLLexer::LexQuote() { return kind; if (CurPtr[0] == ':') { - ++CurPtr; + getNextChar(); if (StringRef(StrVal).contains(0)) { LexError("NUL character is not allowed in names"); kind = lltok::Error; @@ -455,11 +473,11 @@ lltok::Kind LLLexer::LexExclaim() { if (isalpha(static_cast(CurPtr[0])) || CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') { - ++CurPtr; + getNextChar(); while (isalnum(static_cast(CurPtr[0])) || CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') - ++CurPtr; + getNextChar(); StrVal.assign(TokStart+1, CurPtr); // Skip ! UnEscapeLexed(StrVal); @@ -495,7 +513,7 @@ lltok::Kind LLLexer::LexIdentifier() { const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar; const char *KeywordEnd = nullptr; - for (; isLabelChar(*CurPtr); ++CurPtr) { + for (; isLabelChar(*CurPtr); getNextChar()) { // If we decide this is an integer, remember the end of the sequence. if (!IntEnd && !isdigit(static_cast(*CurPtr))) IntEnd = CurPtr; @@ -507,7 +525,8 @@ lltok::Kind LLLexer::LexIdentifier() { // If we stopped due to a colon, unless we were directed to ignore it, // this really is a label. if (!IgnoreColonInIdentifiers && *CurPtr == ':') { - StrVal.assign(StartChar-1, CurPtr++); + StrVal.assign(StartChar - 1, CurPtr); + getNextChar(); return lltok::LabelStr; } @@ -515,7 +534,7 @@ lltok::Kind LLLexer::LexIdentifier() { // return it. if (!IntEnd) IntEnd = CurPtr; if (IntEnd != StartChar) { - CurPtr = IntEnd; + advancePositionTo(IntEnd); uint64_t NumBits = atoull(StartChar, CurPtr); if (NumBits < IntegerType::MIN_INT_BITS || NumBits > IntegerType::MAX_INT_BITS) { @@ -528,7 +547,7 @@ lltok::Kind LLLexer::LexIdentifier() { // Otherwise, this was a letter sequence. See which keyword this is. if (!KeywordEnd) KeywordEnd = CurPtr; - CurPtr = KeywordEnd; + advancePositionTo(KeywordEnd); --StartChar; StringRef Keyword(StartChar, CurPtr - StartChar); @@ -1042,7 +1061,7 @@ lltok::Kind LLLexer::LexIdentifier() { StringRef HexStr(TokStart + 3, len); if (!all_of(HexStr, isxdigit)) { // Bad token, return it as an error. - CurPtr = TokStart+3; + advancePositionTo(TokStart + 3); return lltok::Error; } APInt Tmp(bits, HexStr, 16); @@ -1055,12 +1074,12 @@ lltok::Kind LLLexer::LexIdentifier() { // If this is "cc1234", return this as just "cc". if (TokStart[0] == 'c' && TokStart[1] == 'c') { - CurPtr = TokStart+2; + advancePositionTo(TokStart + 2); return lltok::kw_cc; } // Finally, if this isn't known, return an error. - CurPtr = TokStart+1; + advancePositionTo(TokStart + 1); return lltok::Error; } @@ -1073,24 +1092,25 @@ lltok::Kind LLLexer::LexIdentifier() { /// HexHalfConstant 0xH[0-9A-Fa-f]+ /// HexBFloatConstant 0xR[0-9A-Fa-f]+ lltok::Kind LLLexer::Lex0x() { - CurPtr = TokStart + 2; + advancePositionTo(TokStart + 2); char Kind; if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H' || CurPtr[0] == 'R') { - Kind = *CurPtr++; + Kind = *CurPtr; + getNextChar(); } else { Kind = 'J'; } if (!isxdigit(static_cast(CurPtr[0]))) { // Bad token, return it as an error. - CurPtr = TokStart+1; + advancePositionTo(TokStart + 1); return lltok::Error; } while (isxdigit(static_cast(CurPtr[0]))) - ++CurPtr; + getNextChar(); if (Kind == 'J') { // HexFPConstant - Floating point constant represented in IEEE format as a @@ -1147,7 +1167,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() { // Okay, this is not a number after the -, it's probably a label. if (const char *End = isLabelTail(CurPtr)) { StrVal.assign(TokStart, End-1); - CurPtr = End; + advancePositionTo(End); return lltok::LabelStr; } @@ -1157,13 +1177,13 @@ lltok::Kind LLLexer::LexDigitOrNegative() { // At this point, it is either a label, int or fp constant. // Skip digits, we have at least one. - for (; isdigit(static_cast(CurPtr[0])); ++CurPtr) + for (; isdigit(static_cast(CurPtr[0])); getNextChar()) /*empty*/; // Check if this is a fully-numeric label: if (isdigit(TokStart[0]) && CurPtr[0] == ':') { uint64_t Val = atoull(TokStart, CurPtr); - ++CurPtr; // Skip the colon. + getNextChar(); // Skip the colon. if ((unsigned)Val != Val) LexError("invalid value number (too large)"); UIntVal = unsigned(Val); @@ -1174,7 +1194,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() { if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') { if (const char *End = isLabelTail(CurPtr)) { StrVal.assign(TokStart, End-1); - CurPtr = End; + advancePositionTo(End); return lltok::LabelStr; } } @@ -1188,17 +1208,19 @@ lltok::Kind LLLexer::LexDigitOrNegative() { return lltok::APSInt; } - ++CurPtr; + getNextChar(); // Skip over [0-9]*([eE][-+]?[0-9]+)? - while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; + while (isdigit(static_cast(CurPtr[0]))) + getNextChar(); if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { if (isdigit(static_cast(CurPtr[1])) || ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(static_cast(CurPtr[2])))) { - CurPtr += 2; - while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; + skipNChars(2); + while (isdigit(static_cast(CurPtr[0]))) + getNextChar(); } } @@ -1216,26 +1238,29 @@ lltok::Kind LLLexer::LexPositive() { return lltok::Error; // Skip digits. - for (++CurPtr; isdigit(static_cast(CurPtr[0])); ++CurPtr) + for (getNextChar(); isdigit(static_cast(CurPtr[0])); + getNextChar()) /*empty*/; // At this point, we need a '.'. if (CurPtr[0] != '.') { - CurPtr = TokStart+1; + advancePositionTo(TokStart + 1); return lltok::Error; } - ++CurPtr; + getNextChar(); // Skip over [0-9]*([eE][-+]?[0-9]+)? - while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; + while (isdigit(static_cast(CurPtr[0]))) + getNextChar(); if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { if (isdigit(static_cast(CurPtr[1])) || ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(static_cast(CurPtr[2])))) { - CurPtr += 2; - while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; + skipNChars(2); + while (isdigit(static_cast(CurPtr[0]))) + getNextChar(); } } From 06926e9fa28e10abbec7803c0cd8c196ea09daf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Mon, 4 Aug 2025 09:36:51 +0000 Subject: [PATCH 02/33] Remove remains from cherry pick from LSP branch --- llvm/lib/AsmParser/LLLexer.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 7cefd4f6b4935..db4079975ad40 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -198,10 +198,8 @@ const char *LLLexer::skipNChars(unsigned N) { void LLLexer::advancePositionTo(const char *Ptr) { while (CurPtr != Ptr) { - // FIXME: Assumes that if moving back, we stay in that line if (CurPtr > Ptr) { --CurPtr; - --CurColNum; } else getNextChar(); } From 1fdf13c326ba2f7889b27de5789c0339190fa0f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Mon, 4 Aug 2025 09:49:32 +0000 Subject: [PATCH 03/33] Make isLabelTail more safe and rename it to better show what it does --- llvm/include/llvm/AsmParser/LLLexer.h | 3 ++ llvm/lib/AsmParser/LLLexer.cpp | 43 +++++++++++---------------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h index 5f6e32a4bf5e1..d0d6f72c197da 100644 --- a/llvm/include/llvm/AsmParser/LLLexer.h +++ b/llvm/include/llvm/AsmParser/LLLexer.h @@ -93,6 +93,9 @@ namespace llvm { private: lltok::Kind LexToken(); + // Return closes pointer after `Ptr` that is an end of a label. + // Returns nullptr if `Ptr` doesn't point into a label. + const char *getLabelTail(const char *Ptr); int getNextChar(); const char *skipNChars(unsigned N); void advancePositionTo(const char *Ptr); diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index db4079975ad40..bbd6b690a97c0 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -155,15 +155,6 @@ static bool isLabelChar(char C) { C == '.' || C == '_'; } -/// isLabelTail - Return true if this pointer points to a valid end of a label. -static const char *isLabelTail(const char *CurPtr) { - while (true) { - if (CurPtr[0] == ':') return CurPtr+1; - if (!isLabelChar(CurPtr[0])) return nullptr; - ++CurPtr; - } -} - //===----------------------------------------------------------------------===// // Lexer definition. //===----------------------------------------------------------------------===// @@ -174,20 +165,22 @@ LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err, CurPtr = CurBuf.begin(); } +/// getLabelTail - Return true if this pointer points to a valid end of a label. +const char *LLLexer::getLabelTail(const char *Ptr) { + while (Ptr != CurBuf.end()) { + if (Ptr[0] == ':') + return Ptr + 1; + if (!isLabelChar(Ptr[0])) + return nullptr; + ++Ptr; + } + return nullptr; +} + int LLLexer::getNextChar() { - char CurChar = *CurPtr++; - switch (CurChar) { - default: return (unsigned char)CurChar; - case 0: - // A nul character in the stream is either the end of the current buffer or - // a random nul in the file. Disambiguate that here. - if (CurPtr-1 != CurBuf.end()) - return 0; // Just whitespace. - - // Otherwise, return end of file. - --CurPtr; // Another call to lex will return EOF again. + if (CurPtr == CurBuf.end()) return EOF; - } + return *CurPtr++; } const char *LLLexer::skipNChars(unsigned N) { @@ -230,7 +223,7 @@ lltok::Kind LLLexer::LexToken() { case '%': return LexPercent(); case '"': return LexQuote(); case '.': - if (const char *Ptr = isLabelTail(CurPtr)) { + if (const char *Ptr = getLabelTail(CurPtr)) { advancePositionTo(Ptr); StrVal.assign(TokStart, CurPtr-1); return lltok::LabelStr; @@ -313,7 +306,7 @@ lltok::Kind LLLexer::LexAt() { } lltok::Kind LLLexer::LexDollar() { - if (const char *Ptr = isLabelTail(TokStart)) { + if (const char *Ptr = getLabelTail(TokStart)) { advancePositionTo(Ptr); StrVal.assign(TokStart, CurPtr - 1); return lltok::LabelStr; @@ -1163,7 +1156,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() { if (!isdigit(static_cast(TokStart[0])) && !isdigit(static_cast(CurPtr[0]))) { // Okay, this is not a number after the -, it's probably a label. - if (const char *End = isLabelTail(CurPtr)) { + if (const char *End = getLabelTail(CurPtr)) { StrVal.assign(TokStart, End-1); advancePositionTo(End); return lltok::LabelStr; @@ -1190,7 +1183,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() { // Check to see if this really is a string label, e.g. "-1:". if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') { - if (const char *End = isLabelTail(CurPtr)) { + if (const char *End = getLabelTail(CurPtr)) { StrVal.assign(TokStart, End-1); advancePositionTo(End); return lltok::LabelStr; From 2772cd8f679124daed8bae737451dcf54b637694 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Mon, 4 Aug 2025 09:51:38 +0000 Subject: [PATCH 04/33] Remove dangling comment --- llvm/lib/AsmParser/LLLexer.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index bbd6b690a97c0..578ac851e38c5 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -165,7 +165,6 @@ LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err, CurPtr = CurBuf.begin(); } -/// getLabelTail - Return true if this pointer points to a valid end of a label. const char *LLLexer::getLabelTail(const char *Ptr) { while (Ptr != CurBuf.end()) { if (Ptr[0] == ':') From b05d11ac2ee3f8e14020a0fa4d2e7c602bb3d77b Mon Sep 17 00:00:00 2001 From: Bertik23 <39457484+Bertik23@users.noreply.github.com> Date: Tue, 12 Aug 2025 12:23:23 +0200 Subject: [PATCH 05/33] Fix typo Co-authored-by: Nikita Popov --- llvm/include/llvm/AsmParser/LLLexer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h index d0d6f72c197da..beb88a8c73305 100644 --- a/llvm/include/llvm/AsmParser/LLLexer.h +++ b/llvm/include/llvm/AsmParser/LLLexer.h @@ -93,7 +93,7 @@ namespace llvm { private: lltok::Kind LexToken(); - // Return closes pointer after `Ptr` that is an end of a label. + // Return closest pointer after `Ptr` that is an end of a label. // Returns nullptr if `Ptr` doesn't point into a label. const char *getLabelTail(const char *Ptr); int getNextChar(); From 458599b36cfdc15d7d79c7c7ff0fe770c4d0685d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Thu, 28 Aug 2025 07:57:04 +0000 Subject: [PATCH 06/33] Add location tracking to IR parser --- .../include/llvm/AsmParser/AsmParserContext.h | 53 +++++++ llvm/include/llvm/AsmParser/LLLexer.h | 35 ++++- llvm/include/llvm/AsmParser/LLParser.h | 9 +- llvm/include/llvm/AsmParser/Parser.h | 16 ++- llvm/include/llvm/IR/Value.h | 32 +++++ llvm/include/llvm/IRReader/IRReader.h | 17 +-- llvm/lib/AsmParser/AsmParserContext.cpp | 91 ++++++++++++ llvm/lib/AsmParser/CMakeLists.txt | 1 + llvm/lib/AsmParser/LLLexer.cpp | 131 +++++++++++++----- llvm/lib/AsmParser/LLParser.cpp | 27 +++- llvm/lib/AsmParser/Parser.cpp | 31 +++-- llvm/lib/IRReader/IRReader.cpp | 13 +- llvm/unittests/AsmParser/AsmParserTest.cpp | 60 ++++++++ 13 files changed, 440 insertions(+), 76 deletions(-) create mode 100644 llvm/include/llvm/AsmParser/AsmParserContext.h create mode 100644 llvm/lib/AsmParser/AsmParserContext.cpp diff --git a/llvm/include/llvm/AsmParser/AsmParserContext.h b/llvm/include/llvm/AsmParser/AsmParserContext.h new file mode 100644 index 0000000000000..bc4d93ef727ef --- /dev/null +++ b/llvm/include/llvm/AsmParser/AsmParserContext.h @@ -0,0 +1,53 @@ +//===-- AsmParserContext.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ASMPARSER_ASMPARSER_STATE_H +#define LLVM_ASMPARSER_ASMPARSER_STATE_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/Value.h" +#include + +namespace llvm { + +/// Registry of file location information for LLVM IR constructs +/// +/// This class provides access to the file location information +/// for various LLVM IR constructs. Currently, it supports Function, +/// BasicBlock and Instruction locations. +/// +/// When available, it can answer queries about what is at a given +/// file location, as well as where in a file a given IR construct +/// is. +/// +/// This information is optionally emitted by the LLParser while +/// it reads LLVM textual IR. +class AsmParserContext { +public: + std::optional getFunctionLocation(const Function *) const; + std::optional getBlockLocation(const BasicBlock *) const; + std::optional getInstructionLocation(const Instruction *) const; + std::optional getFunctionAtLocation(const FileLocRange &) const; + std::optional getFunctionAtLocation(const FileLoc &) const; + std::optional getBlockAtLocation(const FileLocRange &) const; + std::optional getBlockAtLocation(const FileLoc &) const; + std::optional + getInstructionAtLocation(const FileLocRange &) const; + std::optional getInstructionAtLocation(const FileLoc &) const; + bool addFunctionLocation(Function *, const FileLocRange &); + bool addBlockLocation(BasicBlock *, const FileLocRange &); + bool addInstructionLocation(Instruction *, const FileLocRange &); + +private: + DenseMap Functions; + DenseMap Blocks; + DenseMap Instructions; +}; +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h index 501a7aefccd7f..3d0e28ea9f5bc 100644 --- a/llvm/include/llvm/AsmParser/LLLexer.h +++ b/llvm/include/llvm/AsmParser/LLLexer.h @@ -29,6 +29,20 @@ namespace llvm { const char *CurPtr; StringRef CurBuf; + // The line number at `CurPtr-1`, zero-indexed + unsigned CurLineNum = 0; + // The column number at `CurPtr-1`, zero-indexed + unsigned CurColNum = -1; + // The line number of the start of the current token, zero-indexed + unsigned CurTokLineNum = 0; + // The column number of the start of the current token, zero-indexed + unsigned CurTokColNum = 0; + // The line number of the end of the current token, zero-indexed + unsigned PrevTokEndLineNum = -1; + // The column number of the end (exclusive) of the current token, + // zero-indexed + unsigned PrevTokEndColNum = -1; + enum class ErrorPriority { None, // No error message present. Parser, // Errors issued by parser. @@ -62,9 +76,7 @@ namespace llvm { explicit LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &, LLVMContext &C); - lltok::Kind Lex() { - return CurKind = LexToken(); - } + lltok::Kind Lex() { return CurKind = LexToken(); } typedef SMLoc LocTy; LocTy getLoc() const { return SMLoc::getFromPointer(TokStart); } @@ -79,6 +91,21 @@ namespace llvm { IgnoreColonInIdentifiers = val; } + // Get the current line number, zero-indexed + unsigned getLineNum() { return CurLineNum; } + // Get the current column number, zero-indexed + unsigned getColNum() { return CurColNum; } + // Get the line number of the start of the current token, zero-indexed + unsigned getTokLineNum() { return CurTokLineNum; } + // Get the column number of the start of the current token, zero-indexed + unsigned getTokColNum() { return CurTokColNum; } + // Get the line number of the end of the previous token, zero-indexed, + // exclusive + unsigned getPrevTokEndLineNum() { return PrevTokEndLineNum; } + // Get the column number of the end of the previous token, zero-indexed, + // exclusive + unsigned getPrevTokEndColNum() { return PrevTokEndColNum; } + // This returns true as a convenience for the parser functions that return // true on error. bool ParseError(LocTy ErrorLoc, const Twine &Msg) { @@ -94,6 +121,8 @@ namespace llvm { lltok::Kind LexToken(); int getNextChar(); + const char *skipNChars(unsigned N); + void advancePositionTo(const char *Ptr); void SkipLineComment(); bool SkipCComment(); lltok::Kind ReadString(lltok::Kind kind); diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index c01de4a289a69..02460e5e52203 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -13,6 +13,7 @@ #ifndef LLVM_ASMPARSER_LLPARSER_H #define LLVM_ASMPARSER_LLPARSER_H +#include "AsmParserContext.h" #include "LLLexer.h" #include "llvm/ADT/StringMap.h" #include "llvm/AsmParser/NumberedValues.h" @@ -177,6 +178,9 @@ namespace llvm { // Map of module ID to path. std::map ModuleIdMap; + /// Keeps track of source locations for Values, BasicBlocks, and Functions + AsmParserContext *ParserContext; + /// Only the llvm-as tool may set this to false to bypass /// UpgradeDebuginfo so it can generate broken bitcode. bool UpgradeDebugInfo; @@ -189,10 +193,11 @@ namespace llvm { public: LLParser(StringRef F, SourceMgr &SM, SMDiagnostic &Err, Module *M, ModuleSummaryIndex *Index, LLVMContext &Context, - SlotMapping *Slots = nullptr) + SlotMapping *Slots = nullptr, + AsmParserContext *ParserContext = nullptr) : Context(Context), OPLex(F, SM, Err, Context), Lex(F, SM, Err, Context), M(M), Index(Index), Slots(Slots), - BlockAddressPFS(nullptr) {} + BlockAddressPFS(nullptr), ParserContext(ParserContext) {} bool Run( bool UpgradeDebugInfo, DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) { diff --git a/llvm/include/llvm/AsmParser/Parser.h b/llvm/include/llvm/AsmParser/Parser.h index c900b79665404..22b0881d92b53 100644 --- a/llvm/include/llvm/AsmParser/Parser.h +++ b/llvm/include/llvm/AsmParser/Parser.h @@ -15,6 +15,7 @@ #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/AsmParser/AsmParserContext.h" #include "llvm/Support/Compiler.h" #include #include @@ -62,7 +63,8 @@ parseAssemblyFile(StringRef Filename, SMDiagnostic &Err, LLVMContext &Context, /// parsing. LLVM_ABI std::unique_ptr parseAssemblyString(StringRef AsmString, SMDiagnostic &Err, - LLVMContext &Context, SlotMapping *Slots = nullptr); + LLVMContext &Context, SlotMapping *Slots = nullptr, + AsmParserContext *ParserContext = nullptr); /// Holds the Module and ModuleSummaryIndex returned by the interfaces /// that parse both. @@ -128,9 +130,9 @@ parseSummaryIndexAssemblyString(StringRef AsmString, SMDiagnostic &Err); LLVM_ABI std::unique_ptr parseAssembly( MemoryBufferRef F, SMDiagnostic &Err, LLVMContext &Context, SlotMapping *Slots = nullptr, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) { - return std::nullopt; - }); + DataLayoutCallbackTy DataLayoutCallback = + [](StringRef, StringRef) { return std::nullopt; }, + AsmParserContext *ParserContext = nullptr); /// Parse LLVM Assembly including the summary index from a MemoryBuffer. /// @@ -169,9 +171,9 @@ parseSummaryIndexAssembly(MemoryBufferRef F, SMDiagnostic &Err); LLVM_ABI bool parseAssemblyInto( MemoryBufferRef F, Module *M, ModuleSummaryIndex *Index, SMDiagnostic &Err, SlotMapping *Slots = nullptr, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) { - return std::nullopt; - }); + DataLayoutCallbackTy DataLayoutCallback = + [](StringRef, StringRef) { return std::nullopt; }, + AsmParserContext *ParserContext = nullptr); /// Parse a type and a constant value in the given string. /// diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h index 04d0391c04098..9e27797d151e2 100644 --- a/llvm/include/llvm/IR/Value.h +++ b/llvm/include/llvm/IR/Value.h @@ -55,6 +55,38 @@ class User; using ValueName = StringMapEntry; +struct FileLoc { + unsigned Line; + unsigned Col; + + bool operator<=(const FileLoc &RHS) const { + return Line < RHS.Line || (Line == RHS.Line && Col <= RHS.Col); + } + + bool operator<(const FileLoc &RHS) const { + return Line < RHS.Line || (Line == RHS.Line && Col < RHS.Col); + } + + FileLoc(unsigned L, unsigned C) : Line(L), Col(C) {} +}; + +struct FileLocRange { + FileLoc Start; + FileLoc End; + + FileLocRange() : Start(0, 0), End(0, 0) {} + + FileLocRange(FileLoc S, FileLoc E) : Start(S), End(E) { + assert(Start <= End); + } + + bool contains(FileLoc L) const { return Start <= L && L <= End; } + + bool contains(FileLocRange LR) const { + return contains(LR.Start) && contains(LR.End); + } +}; + //===----------------------------------------------------------------------===// // Value Class //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IRReader/IRReader.h b/llvm/include/llvm/IRReader/IRReader.h index 790140f19934e..00cf12d342ae0 100644 --- a/llvm/include/llvm/IRReader/IRReader.h +++ b/llvm/include/llvm/IRReader/IRReader.h @@ -15,6 +15,7 @@ #define LLVM_IRREADER_IRREADER_H #include "llvm/ADT/StringRef.h" +#include "llvm/AsmParser/AsmParserContext.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Support/Compiler.h" #include @@ -50,19 +51,19 @@ getLazyIRFileModule(StringRef Filename, SMDiagnostic &Err, LLVMContext &Context, /// for it. Otherwise, attempt to parse it as LLVM Assembly and return /// a Module for it. /// \param DataLayoutCallback Override datalayout in the llvm assembly. -LLVM_ABI std::unique_ptr parseIR(MemoryBufferRef Buffer, - SMDiagnostic &Err, - LLVMContext &Context, - ParserCallbacks Callbacks = {}); +LLVM_ABI std::unique_ptr +parseIR(MemoryBufferRef Buffer, SMDiagnostic &Err, LLVMContext &Context, + ParserCallbacks Callbacks = {}, + AsmParserContext *ParserContext = nullptr); /// If the given file holds a bitcode image, return a Module for it. /// Otherwise, attempt to parse it as LLVM Assembly and return a Module /// for it. /// \param DataLayoutCallback Override datalayout in the llvm assembly. -LLVM_ABI std::unique_ptr parseIRFile(StringRef Filename, - SMDiagnostic &Err, - LLVMContext &Context, - ParserCallbacks Callbacks = {}); +LLVM_ABI std::unique_ptr +parseIRFile(StringRef Filename, SMDiagnostic &Err, LLVMContext &Context, + ParserCallbacks Callbacks = {}, + AsmParserContext *ParserContext = nullptr); } #endif diff --git a/llvm/lib/AsmParser/AsmParserContext.cpp b/llvm/lib/AsmParser/AsmParserContext.cpp new file mode 100644 index 0000000000000..f5e3d83f5d346 --- /dev/null +++ b/llvm/lib/AsmParser/AsmParserContext.cpp @@ -0,0 +1,91 @@ +//===-- AsmParserContext.cpp ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/AsmParser/AsmParserContext.h" + +namespace llvm { + +std::optional +AsmParserContext::getFunctionLocation(const Function *F) const { + if (!Functions.contains(F)) + return std::nullopt; + return Functions.at(F); +} + +std::optional +AsmParserContext::getBlockLocation(const BasicBlock *BB) const { + if (!Blocks.contains(BB)) + return std::nullopt; + return Blocks.at(BB); +} + +std::optional +AsmParserContext::getInstructionLocation(const Instruction *I) const { + if (!Instructions.contains(I)) + return std::nullopt; + return Instructions.at(I); +} + +std::optional +AsmParserContext::getFunctionAtLocation(const FileLocRange &Query) const { + for (auto &[F, Loc] : Functions) { + if (Loc.contains(Query)) + return F; + } + return std::nullopt; +} + +std::optional +AsmParserContext::getFunctionAtLocation(const FileLoc &Query) const { + return getFunctionAtLocation(FileLocRange(Query, Query)); +} + +std::optional +AsmParserContext::getBlockAtLocation(const FileLocRange &Query) const { + for (auto &[BB, Loc] : Blocks) { + if (Loc.contains(Query)) + return BB; + } + return std::nullopt; +} + +std::optional +AsmParserContext::getBlockAtLocation(const FileLoc &Query) const { + return getBlockAtLocation(FileLocRange(Query, Query)); +} + +std::optional +AsmParserContext::getInstructionAtLocation(const FileLocRange &Query) const { + for (auto &[I, Loc] : Instructions) { + if (Loc.contains(Query)) + return I; + } + return std::nullopt; +} + +std::optional +AsmParserContext::getInstructionAtLocation(const FileLoc &Query) const { + return getInstructionAtLocation(FileLocRange(Query, Query)); +} + +bool AsmParserContext::addFunctionLocation(Function *F, + const FileLocRange &Loc) { + return Functions.insert({F, Loc}).second; +} + +bool AsmParserContext::addBlockLocation(BasicBlock *BB, + const FileLocRange &Loc) { + return Blocks.insert({BB, Loc}).second; +} + +bool AsmParserContext::addInstructionLocation(Instruction *I, + const FileLocRange &Loc) { + return Instructions.insert({I, Loc}).second; +} + +} // namespace llvm diff --git a/llvm/lib/AsmParser/CMakeLists.txt b/llvm/lib/AsmParser/CMakeLists.txt index 20d0c50a029ca..dcfcc06f093a7 100644 --- a/llvm/lib/AsmParser/CMakeLists.txt +++ b/llvm/lib/AsmParser/CMakeLists.txt @@ -1,5 +1,6 @@ # AsmParser add_llvm_component_library(LLVMAsmParser + AsmParserContext.cpp LLLexer.cpp LLParser.cpp Parser.cpp diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 3d5bd6155536e..a209de05c39db 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -176,6 +176,14 @@ LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err, int LLLexer::getNextChar() { char CurChar = *CurPtr++; + // Increment line number if this is the first character after a newline + // CurPtr points to the char after CurChar, so two positions before that + if ((CurPtr - 2) >= CurBuf.begin() && *(CurPtr - 2) == '\n') { + CurLineNum++; + CurColNum = 0; + } else + CurColNum++; + switch (CurChar) { default: return (unsigned char)CurChar; case 0: @@ -190,11 +198,52 @@ int LLLexer::getNextChar() { } } +const char *LLLexer::skipNChars(unsigned N) { + while (N--) + getNextChar(); + return CurPtr; +} + +void LLLexer::advancePositionTo(const char *Ptr) { + bool RecalculateColumn = false; + while (CurPtr != Ptr) { + if (CurPtr > Ptr) { + --CurPtr; + --CurColNum; + // Since CurPtr is one char ahead of the stored position, chech if the + // previous char is not a newline + if (CurPtr != CurBuf.begin() && *(CurPtr - 1) == '\n') { + --CurLineNum; + RecalculateColumn = true; + } + } else + getNextChar(); + } + if (RecalculateColumn) { + CurColNum = 0; + // Count the number of chars to the previous newline or start of buffer + for (const char *Ptr = CurPtr; Ptr != CurBuf.begin() && *(Ptr - 1) != '\n'; + --Ptr, ++CurColNum) + ; + } +} + lltok::Kind LLLexer::LexToken() { + // Set token end to next location, since the end is + // exclusive + if (CurPtr != CurBuf.begin() && *(CurPtr - 1) == '\n') { + PrevTokEndLineNum = CurLineNum + 1; + PrevTokEndColNum = 0; + } else { + PrevTokEndLineNum = CurLineNum; + PrevTokEndColNum = CurColNum + 1; + } while (true) { TokStart = CurPtr; - int CurChar = getNextChar(); + CurTokColNum = CurColNum; + CurTokLineNum = CurLineNum; + switch (CurChar) { default: // Handle letters: [a-zA-Z_] @@ -216,12 +265,12 @@ lltok::Kind LLLexer::LexToken() { case '"': return LexQuote(); case '.': if (const char *Ptr = isLabelTail(CurPtr)) { - CurPtr = Ptr; + advancePositionTo(Ptr); StrVal.assign(TokStart, CurPtr-1); return lltok::LabelStr; } if (CurPtr[0] == '.' && CurPtr[1] == '.') { - CurPtr += 2; + skipNChars(2); return lltok::dotdotdot; } return lltok::Error; @@ -299,14 +348,14 @@ lltok::Kind LLLexer::LexAt() { lltok::Kind LLLexer::LexDollar() { if (const char *Ptr = isLabelTail(TokStart)) { - CurPtr = Ptr; + advancePositionTo(Ptr); StrVal.assign(TokStart, CurPtr - 1); return lltok::LabelStr; } // Handle DollarStringConstant: $\"[^\"]*\" if (CurPtr[0] == '"') { - ++CurPtr; + getNextChar(); while (true) { int CurChar = getNextChar(); @@ -358,11 +407,11 @@ bool LLLexer::ReadVarName() { if (isalpha(static_cast(CurPtr[0])) || CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_') { - ++CurPtr; + getNextChar(); while (isalnum(static_cast(CurPtr[0])) || CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_') - ++CurPtr; + getNextChar(); StrVal.assign(NameStart, CurPtr); return true; @@ -376,7 +425,8 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) { if (!isdigit(static_cast(CurPtr[0]))) return lltok::Error; - for (++CurPtr; isdigit(static_cast(CurPtr[0])); ++CurPtr) + for (getNextChar(); isdigit(static_cast(CurPtr[0])); + getNextChar()) /*empty*/; uint64_t Val = atoull(TokStart + 1, CurPtr); @@ -389,7 +439,7 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) { lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) { // Handle StringConstant: \"[^\"]*\" if (CurPtr[0] == '"') { - ++CurPtr; + getNextChar(); while (true) { int CurChar = getNextChar(); @@ -435,7 +485,7 @@ lltok::Kind LLLexer::LexQuote() { return kind; if (CurPtr[0] == ':') { - ++CurPtr; + getNextChar(); if (StringRef(StrVal).contains(0)) { LexError("NUL character is not allowed in names"); kind = lltok::Error; @@ -455,11 +505,11 @@ lltok::Kind LLLexer::LexExclaim() { if (isalpha(static_cast(CurPtr[0])) || CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') { - ++CurPtr; + getNextChar(); while (isalnum(static_cast(CurPtr[0])) || CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') - ++CurPtr; + getNextChar(); StrVal.assign(TokStart+1, CurPtr); // Skip ! UnEscapeLexed(StrVal); @@ -495,7 +545,7 @@ lltok::Kind LLLexer::LexIdentifier() { const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar; const char *KeywordEnd = nullptr; - for (; isLabelChar(*CurPtr); ++CurPtr) { + for (; isLabelChar(*CurPtr); getNextChar()) { // If we decide this is an integer, remember the end of the sequence. if (!IntEnd && !isdigit(static_cast(*CurPtr))) IntEnd = CurPtr; @@ -507,7 +557,8 @@ lltok::Kind LLLexer::LexIdentifier() { // If we stopped due to a colon, unless we were directed to ignore it, // this really is a label. if (!IgnoreColonInIdentifiers && *CurPtr == ':') { - StrVal.assign(StartChar-1, CurPtr++); + StrVal.assign(StartChar - 1, CurPtr); + getNextChar(); return lltok::LabelStr; } @@ -515,7 +566,7 @@ lltok::Kind LLLexer::LexIdentifier() { // return it. if (!IntEnd) IntEnd = CurPtr; if (IntEnd != StartChar) { - CurPtr = IntEnd; + advancePositionTo(IntEnd); uint64_t NumBits = atoull(StartChar, CurPtr); if (NumBits < IntegerType::MIN_INT_BITS || NumBits > IntegerType::MAX_INT_BITS) { @@ -528,7 +579,7 @@ lltok::Kind LLLexer::LexIdentifier() { // Otherwise, this was a letter sequence. See which keyword this is. if (!KeywordEnd) KeywordEnd = CurPtr; - CurPtr = KeywordEnd; + advancePositionTo(KeywordEnd); --StartChar; StringRef Keyword(StartChar, CurPtr - StartChar); @@ -1043,7 +1094,7 @@ lltok::Kind LLLexer::LexIdentifier() { StringRef HexStr(TokStart + 3, len); if (!all_of(HexStr, isxdigit)) { // Bad token, return it as an error. - CurPtr = TokStart+3; + advancePositionTo(TokStart + 3); return lltok::Error; } APInt Tmp(bits, HexStr, 16); @@ -1056,12 +1107,12 @@ lltok::Kind LLLexer::LexIdentifier() { // If this is "cc1234", return this as just "cc". if (TokStart[0] == 'c' && TokStart[1] == 'c') { - CurPtr = TokStart+2; + advancePositionTo(TokStart + 2); return lltok::kw_cc; } // Finally, if this isn't known, return an error. - CurPtr = TokStart+1; + advancePositionTo(TokStart + 1); return lltok::Error; } @@ -1074,24 +1125,25 @@ lltok::Kind LLLexer::LexIdentifier() { /// HexHalfConstant 0xH[0-9A-Fa-f]+ /// HexBFloatConstant 0xR[0-9A-Fa-f]+ lltok::Kind LLLexer::Lex0x() { - CurPtr = TokStart + 2; + advancePositionTo(TokStart + 2); char Kind; if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H' || CurPtr[0] == 'R') { - Kind = *CurPtr++; + Kind = *CurPtr; + getNextChar(); } else { Kind = 'J'; } if (!isxdigit(static_cast(CurPtr[0]))) { // Bad token, return it as an error. - CurPtr = TokStart+1; + advancePositionTo(TokStart + 1); return lltok::Error; } while (isxdigit(static_cast(CurPtr[0]))) - ++CurPtr; + getNextChar(); if (Kind == 'J') { // HexFPConstant - Floating point constant represented in IEEE format as a @@ -1148,7 +1200,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() { // Okay, this is not a number after the -, it's probably a label. if (const char *End = isLabelTail(CurPtr)) { StrVal.assign(TokStart, End-1); - CurPtr = End; + advancePositionTo(End); return lltok::LabelStr; } @@ -1158,13 +1210,13 @@ lltok::Kind LLLexer::LexDigitOrNegative() { // At this point, it is either a label, int or fp constant. // Skip digits, we have at least one. - for (; isdigit(static_cast(CurPtr[0])); ++CurPtr) + for (; isdigit(static_cast(CurPtr[0])); getNextChar()) /*empty*/; // Check if this is a fully-numeric label: if (isdigit(TokStart[0]) && CurPtr[0] == ':') { uint64_t Val = atoull(TokStart, CurPtr); - ++CurPtr; // Skip the colon. + getNextChar(); // Skip the colon. if ((unsigned)Val != Val) LexError("invalid value number (too large)"); UIntVal = unsigned(Val); @@ -1175,7 +1227,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() { if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') { if (const char *End = isLabelTail(CurPtr)) { StrVal.assign(TokStart, End-1); - CurPtr = End; + advancePositionTo(End); return lltok::LabelStr; } } @@ -1189,17 +1241,19 @@ lltok::Kind LLLexer::LexDigitOrNegative() { return lltok::APSInt; } - ++CurPtr; + getNextChar(); // Skip over [0-9]*([eE][-+]?[0-9]+)? - while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; + while (isdigit(static_cast(CurPtr[0]))) + getNextChar(); if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { if (isdigit(static_cast(CurPtr[1])) || ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(static_cast(CurPtr[2])))) { - CurPtr += 2; - while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; + skipNChars(2); + while (isdigit(static_cast(CurPtr[0]))) + getNextChar(); } } @@ -1217,26 +1271,29 @@ lltok::Kind LLLexer::LexPositive() { return lltok::Error; // Skip digits. - for (++CurPtr; isdigit(static_cast(CurPtr[0])); ++CurPtr) + for (getNextChar(); isdigit(static_cast(CurPtr[0])); + getNextChar()) /*empty*/; // At this point, we need a '.'. if (CurPtr[0] != '.') { - CurPtr = TokStart+1; + advancePositionTo(TokStart + 1); return lltok::Error; } - ++CurPtr; + getNextChar(); // Skip over [0-9]*([eE][-+]?[0-9]+)? - while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; + while (isdigit(static_cast(CurPtr[0]))) + getNextChar(); if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { if (isdigit(static_cast(CurPtr[1])) || ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(static_cast(CurPtr[2])))) { - CurPtr += 2; - while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; + skipNChars(2); + while (isdigit(static_cast(CurPtr[0]))) + getNextChar(); } } diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 1bc2906f63b07..03fe1097f8612 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -740,14 +740,22 @@ bool LLParser::parseDeclare() { /// ::= 'define' FunctionHeader (!dbg !56)* '{' ... bool LLParser::parseDefine() { assert(Lex.getKind() == lltok::kw_define); + FileLoc FunctionStart(Lex.getTokLineNum(), Lex.getTokColNum()); Lex.Lex(); Function *F; unsigned FunctionNumber = -1; SmallVector UnnamedArgNums; - return parseFunctionHeader(F, true, FunctionNumber, UnnamedArgNums) || - parseOptionalFunctionMetadata(*F) || - parseFunctionBody(*F, FunctionNumber, UnnamedArgNums); + bool RetValue = + parseFunctionHeader(F, true, FunctionNumber, UnnamedArgNums) || + parseOptionalFunctionMetadata(*F) || + parseFunctionBody(*F, FunctionNumber, UnnamedArgNums); + if (ParserContext) + ParserContext->addFunctionLocation( + F, FileLocRange(FunctionStart, {Lex.getPrevTokEndLineNum(), + Lex.getPrevTokEndColNum()})); + + return RetValue; } /// parseGlobalType @@ -6951,6 +6959,8 @@ bool LLParser::parseFunctionBody(Function &Fn, unsigned FunctionNumber, /// parseBasicBlock /// ::= (LabelStr|LabelID)? Instruction* bool LLParser::parseBasicBlock(PerFunctionState &PFS) { + FileLoc BBStart(Lex.getTokLineNum(), Lex.getTokColNum()); + // If this basic block starts out with a name, remember it. std::string Name; int NameID = -1; @@ -6992,6 +7002,7 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) { TrailingDbgRecord.emplace_back(DR, DeleteDbgRecord); } + FileLoc InstStart(Lex.getTokLineNum(), Lex.getTokColNum()); // This instruction may have three possibilities for a name: a) none // specified, b) name specified "%foo =", c) number specified: "%4 =". LocTy NameLoc = Lex.getLoc(); @@ -7041,8 +7052,18 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) { for (DbgRecordPtr &DR : TrailingDbgRecord) BB->insertDbgRecordBefore(DR.release(), Inst->getIterator()); TrailingDbgRecord.clear(); + if (ParserContext) { + ParserContext->addInstructionLocation( + Inst, FileLocRange(InstStart, {Lex.getPrevTokEndLineNum(), + Lex.getPrevTokEndColNum()})); + } } while (!Inst->isTerminator()); + if (ParserContext) + ParserContext->addBlockLocation( + BB, FileLocRange(BBStart, {Lex.getPrevTokEndLineNum(), + Lex.getPrevTokEndColNum()})); + assert(TrailingDbgRecord.empty() && "All debug values should have been attached to an instruction."); diff --git a/llvm/lib/AsmParser/Parser.cpp b/llvm/lib/AsmParser/Parser.cpp index 07fdce981b084..c5346d0977314 100644 --- a/llvm/lib/AsmParser/Parser.cpp +++ b/llvm/lib/AsmParser/Parser.cpp @@ -24,33 +24,38 @@ using namespace llvm; static bool parseAssemblyInto(MemoryBufferRef F, Module *M, ModuleSummaryIndex *Index, SMDiagnostic &Err, SlotMapping *Slots, bool UpgradeDebugInfo, - DataLayoutCallbackTy DataLayoutCallback) { + DataLayoutCallbackTy DataLayoutCallback, + AsmParserContext *ParserContext = nullptr) { SourceMgr SM; std::unique_ptr Buf = MemoryBuffer::getMemBuffer(F); SM.AddNewSourceBuffer(std::move(Buf), SMLoc()); std::optional OptContext; return LLParser(F.getBuffer(), SM, Err, M, Index, - M ? M->getContext() : OptContext.emplace(), Slots) + M ? M->getContext() : OptContext.emplace(), Slots, + ParserContext) .Run(UpgradeDebugInfo, DataLayoutCallback); } bool llvm::parseAssemblyInto(MemoryBufferRef F, Module *M, ModuleSummaryIndex *Index, SMDiagnostic &Err, SlotMapping *Slots, - DataLayoutCallbackTy DataLayoutCallback) { + DataLayoutCallbackTy DataLayoutCallback, + AsmParserContext *ParserContext) { return ::parseAssemblyInto(F, M, Index, Err, Slots, - /*UpgradeDebugInfo*/ true, DataLayoutCallback); + /*UpgradeDebugInfo*/ true, DataLayoutCallback, + ParserContext); } std::unique_ptr llvm::parseAssembly(MemoryBufferRef F, SMDiagnostic &Err, LLVMContext &Context, - SlotMapping *Slots, - DataLayoutCallbackTy DataLayoutCallback) { + SlotMapping *Slots, DataLayoutCallbackTy DataLayoutCallback, + AsmParserContext *ParserContext) { std::unique_ptr M = std::make_unique(F.getBufferIdentifier(), Context); - if (parseAssemblyInto(F, M.get(), nullptr, Err, Slots, DataLayoutCallback)) + if (parseAssemblyInto(F, M.get(), nullptr, Err, Slots, DataLayoutCallback, + ParserContext)) return nullptr; return M; @@ -133,12 +138,14 @@ ParsedModuleAndIndex llvm::parseAssemblyFileWithIndexNoUpgradeDebugInfo( DataLayoutCallback); } -std::unique_ptr llvm::parseAssemblyString(StringRef AsmString, - SMDiagnostic &Err, - LLVMContext &Context, - SlotMapping *Slots) { +std::unique_ptr +llvm::parseAssemblyString(StringRef AsmString, SMDiagnostic &Err, + LLVMContext &Context, SlotMapping *Slots, + AsmParserContext *ParserContext) { MemoryBufferRef F(AsmString, ""); - return parseAssembly(F, Err, Context, Slots); + return parseAssembly( + F, Err, Context, Slots, [](StringRef, StringRef) { return std::nullopt; }, + ParserContext); } static bool parseSummaryIndexAssemblyInto(MemoryBufferRef F, diff --git a/llvm/lib/IRReader/IRReader.cpp b/llvm/lib/IRReader/IRReader.cpp index a7e7deee8aa91..c16871f081d1d 100644 --- a/llvm/lib/IRReader/IRReader.cpp +++ b/llvm/lib/IRReader/IRReader.cpp @@ -8,6 +8,7 @@ #include "llvm/IRReader/IRReader.h" #include "llvm-c/IRReader.h" +#include "llvm/AsmParser/AsmParserContext.h" #include "llvm/AsmParser/Parser.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/LLVMContext.h" @@ -68,7 +69,8 @@ std::unique_ptr llvm::getLazyIRFileModule(StringRef Filename, std::unique_ptr llvm::parseIR(MemoryBufferRef Buffer, SMDiagnostic &Err, LLVMContext &Context, - ParserCallbacks Callbacks) { + ParserCallbacks Callbacks, + llvm::AsmParserContext *ParserContext) { NamedRegionTimer T(TimeIRParsingName, TimeIRParsingDescription, TimeIRParsingGroupName, TimeIRParsingGroupDescription, TimePassesIsEnabled); @@ -88,12 +90,14 @@ std::unique_ptr llvm::parseIR(MemoryBufferRef Buffer, SMDiagnostic &Err, return parseAssembly(Buffer, Err, Context, nullptr, Callbacks.DataLayout.value_or( - [](StringRef, StringRef) { return std::nullopt; })); + [](StringRef, StringRef) { return std::nullopt; }), + ParserContext); } std::unique_ptr llvm::parseIRFile(StringRef Filename, SMDiagnostic &Err, LLVMContext &Context, - ParserCallbacks Callbacks) { + ParserCallbacks Callbacks, + AsmParserContext *ParserContext) { ErrorOr> FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true); if (std::error_code EC = FileOrErr.getError()) { @@ -102,7 +106,8 @@ std::unique_ptr llvm::parseIRFile(StringRef Filename, SMDiagnostic &Err, return nullptr; } - return parseIR(FileOrErr.get()->getMemBufferRef(), Err, Context, Callbacks); + return parseIR(FileOrErr.get()->getMemBufferRef(), Err, Context, Callbacks, + ParserContext); } //===----------------------------------------------------------------------===// diff --git a/llvm/unittests/AsmParser/AsmParserTest.cpp b/llvm/unittests/AsmParser/AsmParserTest.cpp index ce226705068af..49f59696709f2 100644 --- a/llvm/unittests/AsmParser/AsmParserTest.cpp +++ b/llvm/unittests/AsmParser/AsmParserTest.cpp @@ -6,7 +6,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/AsmParser/AsmParserContext.h" #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" #include "llvm/IR/Constants.h" @@ -14,6 +16,8 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" #include "llvm/Support/SourceMgr.h" #include "gtest/gtest.h" @@ -479,4 +483,60 @@ TEST(AsmParserTest, DIExpressionBodyAtBeginningWithSlotMappingParsing) { ASSERT_EQ(Mapping.MetadataNodes.size(), 0u); } +#define ASSERT_EQ_LOC(Loc1, Loc2) \ + do { \ + bool AreLocsEqual = Loc1.contains(Loc2) && Loc2.contains(Loc1); \ + if (!AreLocsEqual) { \ + dbgs() << #Loc1 " location: " << Loc1.Start.Line << ":" \ + << Loc1.Start.Col << " - " << Loc1.End.Line << ":" \ + << Loc1.End.Col << "\n"; \ + dbgs() << #Loc2 " location: " << Loc2.Start.Line << ":" \ + << Loc2.Start.Col << " - " << Loc2.End.Line << ":" \ + << Loc2.End.Col << "\n"; \ + } \ + ASSERT_TRUE(AreLocsEqual); \ + } while (false) + +TEST(AsmParserTest, ParserObjectLocations) { + // Expected to fail with function location starting one character later, needs + // a fix + StringRef Source = "define i32 @main() {\n" + "entry:\n" + " %a = add i32 1, 2\n" + " ret i32 %a\n" + "}\n"; + LLVMContext Ctx; + SMDiagnostic Error; + SlotMapping Mapping; + AsmParserContext ParserContext; + auto Mod = parseAssemblyString(Source, Error, Ctx, &Mapping, &ParserContext); + + auto *MainFn = Mod->getFunction("main"); + ASSERT_TRUE(MainFn != nullptr); + + auto MaybeMainLoc = ParserContext.getFunctionLocation(MainFn); + ASSERT_TRUE(MaybeMainLoc.has_value()); + auto MainLoc = MaybeMainLoc.value(); + auto ExpectedMainLoc = FileLocRange(FileLoc{0, 0}, FileLoc{4, 1}); + ASSERT_EQ_LOC(MainLoc, ExpectedMainLoc); + + auto &EntryBB = MainFn->getEntryBlock(); + auto MaybeEntryBBLoc = ParserContext.getBlockLocation(&EntryBB); + ASSERT_TRUE(MaybeEntryBBLoc.has_value()); + auto EntryBBLoc = MaybeEntryBBLoc.value(); + auto ExpectedEntryBBLoc = FileLocRange(FileLoc{1, 0}, FileLoc{3, 14}); + ASSERT_EQ_LOC(EntryBBLoc, ExpectedEntryBBLoc); + + SmallVector InstructionLocations = { + FileLocRange(FileLoc{2, 4}, FileLoc{2, 21}), + FileLocRange(FileLoc{3, 4}, FileLoc{3, 14})}; + + for (const auto &[Inst, ExpectedLoc] : zip(EntryBB, InstructionLocations)) { + auto MaybeInstLoc = ParserContext.getInstructionLocation(&Inst); + ASSERT_TRUE(MaybeMainLoc.has_value()); + auto InstLoc = MaybeInstLoc.value(); + ASSERT_EQ_LOC(InstLoc, ExpectedLoc); + } +} + } // end anonymous namespace From b0c5318d100ecb44684c5c31de1b19ab774b5549 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Thu, 28 Aug 2025 10:10:43 +0000 Subject: [PATCH 07/33] Fix clang format --- llvm/include/llvm/AsmParser/AsmParserContext.h | 2 +- llvm/lib/AsmParser/LLLexer.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/AsmParser/AsmParserContext.h b/llvm/include/llvm/AsmParser/AsmParserContext.h index bc4d93ef727ef..78ea32ac7ca08 100644 --- a/llvm/include/llvm/AsmParser/AsmParserContext.h +++ b/llvm/include/llvm/AsmParser/AsmParserContext.h @@ -24,7 +24,7 @@ namespace llvm { /// When available, it can answer queries about what is at a given /// file location, as well as where in a file a given IR construct /// is. -/// +/// /// This information is optionally emitted by the LLParser while /// it reads LLVM textual IR. class AsmParserContext { diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index cd9d5b37d86e5..be5b2b9bce0ca 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -180,7 +180,7 @@ int LLLexer::getNextChar() { if (CurPtr == CurBuf.end()) return EOF; // Increment line number if this is the first character after a newline - if (CurPtr > CurBuf.begin() && *(CurPtr-1) == '\n'){ + if (CurPtr > CurBuf.begin() && *(CurPtr - 1) == '\n') { CurLineNum++; CurColNum = 0; } else From 416514e8eb5de149dffe5bd49035b7a91904d70c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Fri, 29 Aug 2025 07:49:27 +0000 Subject: [PATCH 08/33] Move private members to top of class definition --- llvm/include/llvm/AsmParser/AsmParserContext.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/AsmParser/AsmParserContext.h b/llvm/include/llvm/AsmParser/AsmParserContext.h index 78ea32ac7ca08..39c0e0d9df4de 100644 --- a/llvm/include/llvm/AsmParser/AsmParserContext.h +++ b/llvm/include/llvm/AsmParser/AsmParserContext.h @@ -28,6 +28,10 @@ namespace llvm { /// This information is optionally emitted by the LLParser while /// it reads LLVM textual IR. class AsmParserContext { + DenseMap Functions; + DenseMap Blocks; + DenseMap Instructions; + public: std::optional getFunctionLocation(const Function *) const; std::optional getBlockLocation(const BasicBlock *) const; @@ -42,11 +46,6 @@ class AsmParserContext { bool addFunctionLocation(Function *, const FileLocRange &); bool addBlockLocation(BasicBlock *, const FileLocRange &); bool addInstructionLocation(Instruction *, const FileLocRange &); - -private: - DenseMap Functions; - DenseMap Blocks; - DenseMap Instructions; }; } // namespace llvm From 35ca1a501ac7a7c969df05d8e40ea3530bad75b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Tue, 2 Sep 2025 12:12:51 +0000 Subject: [PATCH 09/33] Use SourceMgr to resolve Line:Column position --- llvm/include/llvm/AsmParser/LLLexer.h | 8 ----- llvm/lib/AsmParser/LLLexer.cpp | 45 +++++++-------------------- 2 files changed, 12 insertions(+), 41 deletions(-) diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h index 5008ef029f3ff..bacf124d07d20 100644 --- a/llvm/include/llvm/AsmParser/LLLexer.h +++ b/llvm/include/llvm/AsmParser/LLLexer.h @@ -29,10 +29,6 @@ namespace llvm { const char *CurPtr; StringRef CurBuf; - // The line number at `CurPtr-1`, zero-indexed - unsigned CurLineNum = 0; - // The column number at `CurPtr-1`, zero-indexed - unsigned CurColNum = -1; // The line number of the start of the current token, zero-indexed unsigned CurTokLineNum = 0; // The column number of the start of the current token, zero-indexed @@ -91,10 +87,6 @@ namespace llvm { IgnoreColonInIdentifiers = val; } - // Get the current line number, zero-indexed - unsigned getLineNum() { return CurLineNum; } - // Get the current column number, zero-indexed - unsigned getColNum() { return CurColNum; } // Get the line number of the start of the current token, zero-indexed unsigned getTokLineNum() { return CurTokLineNum; } // Get the column number of the start of the current token, zero-indexed diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index be5b2b9bce0ca..0041cc5fd95fa 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -180,11 +180,6 @@ int LLLexer::getNextChar() { if (CurPtr == CurBuf.end()) return EOF; // Increment line number if this is the first character after a newline - if (CurPtr > CurBuf.begin() && *(CurPtr - 1) == '\n') { - CurLineNum++; - CurColNum = 0; - } else - CurColNum++; return *CurPtr++; } @@ -195,44 +190,28 @@ const char *LLLexer::skipNChars(unsigned N) { } void LLLexer::advancePositionTo(const char *Ptr) { - bool RecalculateColumn = false; - while (CurPtr != Ptr) { - if (CurPtr > Ptr) { - --CurPtr; - --CurColNum; - // Since CurPtr is one char ahead of the stored position, check if the - // previous char is not a newline - if (CurPtr != CurBuf.begin() && *(CurPtr - 1) == '\n') { - --CurLineNum; - RecalculateColumn = true; - } - } else - getNextChar(); + if (CurBuf.begin() > Ptr) { + CurPtr = CurBuf.begin(); + return; } - if (RecalculateColumn) { - CurColNum = 0; - // Count the number of chars to the previous newline or start of buffer - for (const char *Ptr = CurPtr; Ptr != CurBuf.begin() && *(Ptr - 1) != '\n'; - --Ptr, ++CurColNum) - ; + if (CurBuf.end() < Ptr) { + CurPtr = CurBuf.end(); + return; } + + CurPtr = Ptr; } lltok::Kind LLLexer::LexToken() { // Set token end to next location, since the end is // exclusive - if (CurPtr != CurBuf.begin() && *(CurPtr - 1) == '\n') { - PrevTokEndLineNum = CurLineNum + 1; - PrevTokEndColNum = 0; - } else { - PrevTokEndLineNum = CurLineNum; - PrevTokEndColNum = CurColNum + 1; - } + std::tie(PrevTokEndLineNum, PrevTokEndColNum) = + SM.getLineAndColumn(SMLoc::getFromPointer(CurPtr)); while (true) { TokStart = CurPtr; + std::tie(CurTokLineNum, CurTokColNum) = + SM.getLineAndColumn(SMLoc::getFromPointer(CurPtr)); int CurChar = getNextChar(); - CurTokColNum = CurColNum; - CurTokLineNum = CurLineNum; switch (CurChar) { default: From b3d8254fadec29bde061f89dd74ef85e758419e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Tue, 2 Sep 2025 13:44:38 +0000 Subject: [PATCH 10/33] Fix zeroindexing on token positions --- llvm/lib/AsmParser/LLLexer.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 0041cc5fd95fa..8ce963702f330 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -207,10 +207,14 @@ lltok::Kind LLLexer::LexToken() { // exclusive std::tie(PrevTokEndLineNum, PrevTokEndColNum) = SM.getLineAndColumn(SMLoc::getFromPointer(CurPtr)); + --PrevTokEndLineNum; + --PrevTokEndColNum; while (true) { TokStart = CurPtr; std::tie(CurTokLineNum, CurTokColNum) = SM.getLineAndColumn(SMLoc::getFromPointer(CurPtr)); + --CurTokLineNum; + --CurTokColNum; int CurChar = getNextChar(); switch (CurChar) { From 23dcc6b4d05c58dc359c5334fcba0061f92499be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Wed, 3 Sep 2025 13:21:54 +0000 Subject: [PATCH 11/33] Replace Line:Column storage with Poiters and on demand conversion --- llvm/include/llvm/AsmParser/LLLexer.h | 39 +++++++++++++-------------- llvm/include/llvm/IR/Value.h | 1 + llvm/lib/AsmParser/LLLexer.cpp | 9 +------ llvm/lib/AsmParser/LLParser.cpp | 15 +++++------ 4 files changed, 27 insertions(+), 37 deletions(-) diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h index bacf124d07d20..5e4d43ebbd4ed 100644 --- a/llvm/include/llvm/AsmParser/LLLexer.h +++ b/llvm/include/llvm/AsmParser/LLLexer.h @@ -17,27 +17,20 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APSInt.h" #include "llvm/Support/SMLoc.h" +#include "llvm/Support/SourceMgr.h" #include namespace llvm { class Type; class SMDiagnostic; - class SourceMgr; class LLVMContext; class LLLexer { const char *CurPtr; StringRef CurBuf; - // The line number of the start of the current token, zero-indexed - unsigned CurTokLineNum = 0; - // The column number of the start of the current token, zero-indexed - unsigned CurTokColNum = 0; - // The line number of the end of the current token, zero-indexed - unsigned PrevTokEndLineNum = -1; - // The column number of the end (exclusive) of the current token, - // zero-indexed - unsigned PrevTokEndColNum = -1; + // The the end (exclusive) of the current token + const char *PrevTokEnd = nullptr; enum class ErrorPriority { None, // No error message present. @@ -87,16 +80,22 @@ namespace llvm { IgnoreColonInIdentifiers = val; } - // Get the line number of the start of the current token, zero-indexed - unsigned getTokLineNum() { return CurTokLineNum; } - // Get the column number of the start of the current token, zero-indexed - unsigned getTokColNum() { return CurTokColNum; } - // Get the line number of the end of the previous token, zero-indexed, - // exclusive - unsigned getPrevTokEndLineNum() { return PrevTokEndLineNum; } - // Get the column number of the end of the previous token, zero-indexed, - // exclusive - unsigned getPrevTokEndColNum() { return PrevTokEndColNum; } + // Get the line, column position of the start of the current token, + // zero-indexed + std::pair getTokLineColumnPos() { + auto LC = SM.getLineAndColumn(SMLoc::getFromPointer(TokStart)); + --LC.first; + --LC.second; + return LC; + } + // Get the line, column position of the end of the previous token, + // zero-indexed exclusive + std::pair getPrevTokEndLineColumnPos() { + auto LC = SM.getLineAndColumn(SMLoc::getFromPointer(PrevTokEnd)); + --LC.first; + --LC.second; + return LC; + } // This returns true as a convenience for the parser functions that return // true on error. diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h index 9e27797d151e2..2617981cc090c 100644 --- a/llvm/include/llvm/IR/Value.h +++ b/llvm/include/llvm/IR/Value.h @@ -68,6 +68,7 @@ struct FileLoc { } FileLoc(unsigned L, unsigned C) : Line(L), Col(C) {} + FileLoc(std::pair LC) : Line(LC.first), Col(LC.second) {} }; struct FileLocRange { diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 8ce963702f330..0e378bc81fd69 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -205,16 +205,9 @@ void LLLexer::advancePositionTo(const char *Ptr) { lltok::Kind LLLexer::LexToken() { // Set token end to next location, since the end is // exclusive - std::tie(PrevTokEndLineNum, PrevTokEndColNum) = - SM.getLineAndColumn(SMLoc::getFromPointer(CurPtr)); - --PrevTokEndLineNum; - --PrevTokEndColNum; + PrevTokEnd = CurPtr; while (true) { TokStart = CurPtr; - std::tie(CurTokLineNum, CurTokColNum) = - SM.getLineAndColumn(SMLoc::getFromPointer(CurPtr)); - --CurTokLineNum; - --CurTokColNum; int CurChar = getNextChar(); switch (CurChar) { diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 03fe1097f8612..65daaf5be318d 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -740,7 +740,7 @@ bool LLParser::parseDeclare() { /// ::= 'define' FunctionHeader (!dbg !56)* '{' ... bool LLParser::parseDefine() { assert(Lex.getKind() == lltok::kw_define); - FileLoc FunctionStart(Lex.getTokLineNum(), Lex.getTokColNum()); + FileLoc FunctionStart(Lex.getTokLineColumnPos()); Lex.Lex(); Function *F; @@ -752,8 +752,7 @@ bool LLParser::parseDefine() { parseFunctionBody(*F, FunctionNumber, UnnamedArgNums); if (ParserContext) ParserContext->addFunctionLocation( - F, FileLocRange(FunctionStart, {Lex.getPrevTokEndLineNum(), - Lex.getPrevTokEndColNum()})); + F, FileLocRange(FunctionStart, Lex.getPrevTokEndLineColumnPos())); return RetValue; } @@ -6959,7 +6958,7 @@ bool LLParser::parseFunctionBody(Function &Fn, unsigned FunctionNumber, /// parseBasicBlock /// ::= (LabelStr|LabelID)? Instruction* bool LLParser::parseBasicBlock(PerFunctionState &PFS) { - FileLoc BBStart(Lex.getTokLineNum(), Lex.getTokColNum()); + FileLoc BBStart(Lex.getTokLineColumnPos()); // If this basic block starts out with a name, remember it. std::string Name; @@ -7002,7 +7001,7 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) { TrailingDbgRecord.emplace_back(DR, DeleteDbgRecord); } - FileLoc InstStart(Lex.getTokLineNum(), Lex.getTokColNum()); + FileLoc InstStart(Lex.getTokLineColumnPos()); // This instruction may have three possibilities for a name: a) none // specified, b) name specified "%foo =", c) number specified: "%4 =". LocTy NameLoc = Lex.getLoc(); @@ -7054,15 +7053,13 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) { TrailingDbgRecord.clear(); if (ParserContext) { ParserContext->addInstructionLocation( - Inst, FileLocRange(InstStart, {Lex.getPrevTokEndLineNum(), - Lex.getPrevTokEndColNum()})); + Inst, FileLocRange(InstStart, Lex.getPrevTokEndLineColumnPos())); } } while (!Inst->isTerminator()); if (ParserContext) ParserContext->addBlockLocation( - BB, FileLocRange(BBStart, {Lex.getPrevTokEndLineNum(), - Lex.getPrevTokEndColNum()})); + BB, FileLocRange(BBStart, Lex.getPrevTokEndLineColumnPos())); assert(TrailingDbgRecord.empty() && "All debug values should have been attached to an instruction."); From 06d526544de7973b5abf4779c08bc2e45c444983 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Thu, 4 Sep 2025 09:11:51 +0000 Subject: [PATCH 12/33] Use nullptr as missing value --- .../include/llvm/AsmParser/AsmParserContext.h | 31 ++++++++++++++----- llvm/lib/AsmParser/AsmParserContext.cpp | 20 ++++++------ 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/AsmParser/AsmParserContext.h b/llvm/include/llvm/AsmParser/AsmParserContext.h index 39c0e0d9df4de..092485d23437d 100644 --- a/llvm/include/llvm/AsmParser/AsmParserContext.h +++ b/llvm/include/llvm/AsmParser/AsmParserContext.h @@ -36,13 +36,30 @@ class AsmParserContext { std::optional getFunctionLocation(const Function *) const; std::optional getBlockLocation(const BasicBlock *) const; std::optional getInstructionLocation(const Instruction *) const; - std::optional getFunctionAtLocation(const FileLocRange &) const; - std::optional getFunctionAtLocation(const FileLoc &) const; - std::optional getBlockAtLocation(const FileLocRange &) const; - std::optional getBlockAtLocation(const FileLoc &) const; - std::optional - getInstructionAtLocation(const FileLocRange &) const; - std::optional getInstructionAtLocation(const FileLoc &) const; + /// Get the function at the requested location range. + /// If no single function occupies the queried range, or the record is + /// missing, a nullptr is returned. + Function *getFunctionAtLocation(const FileLocRange &) const; + /// Get the function at the requested location. + /// If no function occupies the queried location, or the record is missing, a + /// nullptr is returned. + Function *getFunctionAtLocation(const FileLoc &) const; + /// Get the block at the requested location range. + /// If no single block occupies the queried range, or the record is missing, a + /// nullptr is returned. + BasicBlock *getBlockAtLocation(const FileLocRange &) const; + /// Get the block at the requested location. + /// If no block occupies the queried location, or the record is missing, a + /// nullptr is returned. + BasicBlock *getBlockAtLocation(const FileLoc &) const; + /// Get the instruction at the requested location range. + /// If no single instruction occupies the queried range, or the record is + /// missing, a nullptr is returned. + Instruction *getInstructionAtLocation(const FileLocRange &) const; + /// Get the instruction at the requested location. + /// If no instruction occupies the queried location, or the record is missing, + /// a nullptr is returned. + Instruction *getInstructionAtLocation(const FileLoc &) const; bool addFunctionLocation(Function *, const FileLocRange &); bool addBlockLocation(BasicBlock *, const FileLocRange &); bool addInstructionLocation(Instruction *, const FileLocRange &); diff --git a/llvm/lib/AsmParser/AsmParserContext.cpp b/llvm/lib/AsmParser/AsmParserContext.cpp index f5e3d83f5d346..7de2bfc67acfb 100644 --- a/llvm/lib/AsmParser/AsmParserContext.cpp +++ b/llvm/lib/AsmParser/AsmParserContext.cpp @@ -31,44 +31,42 @@ AsmParserContext::getInstructionLocation(const Instruction *I) const { return Instructions.at(I); } -std::optional +Function * AsmParserContext::getFunctionAtLocation(const FileLocRange &Query) const { for (auto &[F, Loc] : Functions) { if (Loc.contains(Query)) return F; } - return std::nullopt; + return nullptr; } -std::optional -AsmParserContext::getFunctionAtLocation(const FileLoc &Query) const { +Function *AsmParserContext::getFunctionAtLocation(const FileLoc &Query) const { return getFunctionAtLocation(FileLocRange(Query, Query)); } -std::optional +BasicBlock * AsmParserContext::getBlockAtLocation(const FileLocRange &Query) const { for (auto &[BB, Loc] : Blocks) { if (Loc.contains(Query)) return BB; } - return std::nullopt; + return nullptr; } -std::optional -AsmParserContext::getBlockAtLocation(const FileLoc &Query) const { +BasicBlock *AsmParserContext::getBlockAtLocation(const FileLoc &Query) const { return getBlockAtLocation(FileLocRange(Query, Query)); } -std::optional +Instruction * AsmParserContext::getInstructionAtLocation(const FileLocRange &Query) const { for (auto &[I, Loc] : Instructions) { if (Loc.contains(Query)) return I; } - return std::nullopt; + return nullptr; } -std::optional +Instruction * AsmParserContext::getInstructionAtLocation(const FileLoc &Query) const { return getInstructionAtLocation(FileLocRange(Query, Query)); } From 4e08921093d0031fa135f5ffff55ff6beabe9e3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Thu, 4 Sep 2025 09:12:32 +0000 Subject: [PATCH 13/33] Enclose debug prints of tests in LLVM_DEBUG --- llvm/unittests/AsmParser/AsmParserTest.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/unittests/AsmParser/AsmParserTest.cpp b/llvm/unittests/AsmParser/AsmParserTest.cpp index 49f59696709f2..700864c2d12b9 100644 --- a/llvm/unittests/AsmParser/AsmParserTest.cpp +++ b/llvm/unittests/AsmParser/AsmParserTest.cpp @@ -22,6 +22,8 @@ #include "llvm/Support/SourceMgr.h" #include "gtest/gtest.h" +#define DEBUG_TYPE "Unittest-asm-parser-tests" + using namespace llvm; namespace { @@ -486,14 +488,14 @@ TEST(AsmParserTest, DIExpressionBodyAtBeginningWithSlotMappingParsing) { #define ASSERT_EQ_LOC(Loc1, Loc2) \ do { \ bool AreLocsEqual = Loc1.contains(Loc2) && Loc2.contains(Loc1); \ - if (!AreLocsEqual) { \ + LLVM_DEBUG(if (!AreLocsEqual) { \ dbgs() << #Loc1 " location: " << Loc1.Start.Line << ":" \ << Loc1.Start.Col << " - " << Loc1.End.Line << ":" \ << Loc1.End.Col << "\n"; \ dbgs() << #Loc2 " location: " << Loc2.Start.Line << ":" \ << Loc2.Start.Col << " - " << Loc2.End.Line << ":" \ << Loc2.End.Col << "\n"; \ - } \ + }); \ ASSERT_TRUE(AreLocsEqual); \ } while (false) From 3da9e9db2de1dfcea062522eb2ebdcd7d2eba715 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Mon, 15 Sep 2025 11:42:06 +0000 Subject: [PATCH 14/33] Decapitalize DEBUG_TYPE --- llvm/unittests/AsmParser/AsmParserTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/unittests/AsmParser/AsmParserTest.cpp b/llvm/unittests/AsmParser/AsmParserTest.cpp index 700864c2d12b9..0ca21eaea800f 100644 --- a/llvm/unittests/AsmParser/AsmParserTest.cpp +++ b/llvm/unittests/AsmParser/AsmParserTest.cpp @@ -22,7 +22,7 @@ #include "llvm/Support/SourceMgr.h" #include "gtest/gtest.h" -#define DEBUG_TYPE "Unittest-asm-parser-tests" +#define DEBUG_TYPE "unittest-asm-parser-tests" using namespace llvm; From 4b3bc0ee923a1c402d1279696d191f1907c51d11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Fri, 26 Sep 2025 10:11:24 +0000 Subject: [PATCH 15/33] Move FileLoc from Value.h to FileLoc.h --- .../include/llvm/AsmParser/AsmParserContext.h | 1 + llvm/include/llvm/AsmParser/FileLoc.h | 48 +++++++++++++++++++ llvm/include/llvm/IR/Value.h | 33 ------------- 3 files changed, 49 insertions(+), 33 deletions(-) create mode 100644 llvm/include/llvm/AsmParser/FileLoc.h diff --git a/llvm/include/llvm/AsmParser/AsmParserContext.h b/llvm/include/llvm/AsmParser/AsmParserContext.h index 092485d23437d..0bc383ff147fd 100644 --- a/llvm/include/llvm/AsmParser/AsmParserContext.h +++ b/llvm/include/llvm/AsmParser/AsmParserContext.h @@ -9,6 +9,7 @@ #ifndef LLVM_ASMPARSER_ASMPARSER_STATE_H #define LLVM_ASMPARSER_ASMPARSER_STATE_H +#include "FileLoc.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/Value.h" #include diff --git a/llvm/include/llvm/AsmParser/FileLoc.h b/llvm/include/llvm/AsmParser/FileLoc.h new file mode 100644 index 0000000000000..66fd4f21df9c0 --- /dev/null +++ b/llvm/include/llvm/AsmParser/FileLoc.h @@ -0,0 +1,48 @@ +//===-- FileLoc.h ---------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ASMPARSER_FILELOC_H +#define LLVM_ASMPARSER_FILELOC_H + +#include +#include + +struct FileLoc { + unsigned Line; + unsigned Col; + + bool operator<=(const FileLoc &RHS) const { + return Line < RHS.Line || (Line == RHS.Line && Col <= RHS.Col); + } + + bool operator<(const FileLoc &RHS) const { + return Line < RHS.Line || (Line == RHS.Line && Col < RHS.Col); + } + + FileLoc(unsigned L, unsigned C) : Line(L), Col(C) {} + FileLoc(std::pair LC) : Line(LC.first), Col(LC.second) {} +}; + +struct FileLocRange { + FileLoc Start; + FileLoc End; + + FileLocRange() : Start(0, 0), End(0, 0) {} + + FileLocRange(FileLoc S, FileLoc E) : Start(S), End(E) { + assert(Start <= End); + } + + bool contains(FileLoc L) const { return Start <= L && L <= End; } + + bool contains(FileLocRange LR) const { + return contains(LR.Start) && contains(LR.End); + } +}; + +#endif diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h index 2617981cc090c..04d0391c04098 100644 --- a/llvm/include/llvm/IR/Value.h +++ b/llvm/include/llvm/IR/Value.h @@ -55,39 +55,6 @@ class User; using ValueName = StringMapEntry; -struct FileLoc { - unsigned Line; - unsigned Col; - - bool operator<=(const FileLoc &RHS) const { - return Line < RHS.Line || (Line == RHS.Line && Col <= RHS.Col); - } - - bool operator<(const FileLoc &RHS) const { - return Line < RHS.Line || (Line == RHS.Line && Col < RHS.Col); - } - - FileLoc(unsigned L, unsigned C) : Line(L), Col(C) {} - FileLoc(std::pair LC) : Line(LC.first), Col(LC.second) {} -}; - -struct FileLocRange { - FileLoc Start; - FileLoc End; - - FileLocRange() : Start(0, 0), End(0, 0) {} - - FileLocRange(FileLoc S, FileLoc E) : Start(S), End(E) { - assert(Start <= End); - } - - bool contains(FileLoc L) const { return Start <= L && L <= End; } - - bool contains(FileLocRange LR) const { - return contains(LR.Start) && contains(LR.End); - } -}; - //===----------------------------------------------------------------------===// // Value Class //===----------------------------------------------------------------------===// From ed7a04a5c6ab05963bdde3285e663280a78434c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Fri, 26 Sep 2025 10:23:44 +0000 Subject: [PATCH 16/33] Rename include guard defines to reflext filename --- llvm/include/llvm/AsmParser/AsmParserContext.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/AsmParser/AsmParserContext.h b/llvm/include/llvm/AsmParser/AsmParserContext.h index 0bc383ff147fd..eb4b9c4013b9a 100644 --- a/llvm/include/llvm/AsmParser/AsmParserContext.h +++ b/llvm/include/llvm/AsmParser/AsmParserContext.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ASMPARSER_ASMPARSER_STATE_H -#define LLVM_ASMPARSER_ASMPARSER_STATE_H +#ifndef LLVM_ASMPARSER_ASMPARSERCONTEXT_H +#define LLVM_ASMPARSER_ASMPARSERCONTEXT_H #include "FileLoc.h" #include "llvm/ADT/DenseMap.h" From e6142b5bd2b768d65ee1215e1add309ddeff197e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Wed, 1 Oct 2025 12:14:41 +0000 Subject: [PATCH 17/33] include in namespace llvm --- llvm/include/llvm/AsmParser/FileLoc.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/include/llvm/AsmParser/FileLoc.h b/llvm/include/llvm/AsmParser/FileLoc.h index 66fd4f21df9c0..d209fd56f9bfb 100644 --- a/llvm/include/llvm/AsmParser/FileLoc.h +++ b/llvm/include/llvm/AsmParser/FileLoc.h @@ -12,6 +12,8 @@ #include #include +namespace llvm { + struct FileLoc { unsigned Line; unsigned Col; @@ -45,4 +47,6 @@ struct FileLocRange { } }; +} // namespace llvm + #endif From f5da73c5c3df56034de8caeb7fd4aea256c8c040 Mon Sep 17 00:00:00 2001 From: Bertik23 <39457484+Bertik23@users.noreply.github.com> Date: Mon, 6 Oct 2025 15:20:10 +0200 Subject: [PATCH 18/33] Fix typo in comment Co-authored-by: Nikita Popov --- llvm/include/llvm/AsmParser/AsmParserContext.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/AsmParser/AsmParserContext.h b/llvm/include/llvm/AsmParser/AsmParserContext.h index eb4b9c4013b9a..95d8ab6c8cafe 100644 --- a/llvm/include/llvm/AsmParser/AsmParserContext.h +++ b/llvm/include/llvm/AsmParser/AsmParserContext.h @@ -16,7 +16,7 @@ namespace llvm { -/// Registry of file location information for LLVM IR constructs +/// Registry of file location information for LLVM IR constructs. /// /// This class provides access to the file location information /// for various LLVM IR constructs. Currently, it supports Function, From 10a2b75948159f5482103b402602fba0a85e332f Mon Sep 17 00:00:00 2001 From: Bertik23 <39457484+Bertik23@users.noreply.github.com> Date: Mon, 6 Oct 2025 15:22:11 +0200 Subject: [PATCH 19/33] Path to llvm/AsmParser/FileLoc.h Co-authored-by: Nikita Popov --- llvm/include/llvm/AsmParser/AsmParserContext.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/AsmParser/AsmParserContext.h b/llvm/include/llvm/AsmParser/AsmParserContext.h index 95d8ab6c8cafe..54c0de0f1fcf7 100644 --- a/llvm/include/llvm/AsmParser/AsmParserContext.h +++ b/llvm/include/llvm/AsmParser/AsmParserContext.h @@ -9,7 +9,7 @@ #ifndef LLVM_ASMPARSER_ASMPARSERCONTEXT_H #define LLVM_ASMPARSER_ASMPARSERCONTEXT_H -#include "FileLoc.h" +#include "llvm/AsmParser/FileLoc.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/Value.h" #include From 17b5753f286472c28a4fd6e5374d6c0b397058bb Mon Sep 17 00:00:00 2001 From: Bertik23 <39457484+Bertik23@users.noreply.github.com> Date: Mon, 6 Oct 2025 15:22:43 +0200 Subject: [PATCH 20/33] assert.h -> cassert Co-authored-by: Nikita Popov --- llvm/include/llvm/AsmParser/FileLoc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/AsmParser/FileLoc.h b/llvm/include/llvm/AsmParser/FileLoc.h index d209fd56f9bfb..0a7045a089c95 100644 --- a/llvm/include/llvm/AsmParser/FileLoc.h +++ b/llvm/include/llvm/AsmParser/FileLoc.h @@ -9,7 +9,7 @@ #ifndef LLVM_ASMPARSER_FILELOC_H #define LLVM_ASMPARSER_FILELOC_H -#include +#include #include namespace llvm { From 737c5e02ed1f1350f9c1121ecc522f15719a832f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Wed, 8 Oct 2025 08:48:42 +0000 Subject: [PATCH 21/33] Remove filename and emacs marker --- llvm/include/llvm/AsmParser/AsmParserContext.h | 2 +- llvm/lib/AsmParser/AsmParserContext.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/AsmParser/AsmParserContext.h b/llvm/include/llvm/AsmParser/AsmParserContext.h index 54c0de0f1fcf7..89b1626c48b6d 100644 --- a/llvm/include/llvm/AsmParser/AsmParserContext.h +++ b/llvm/include/llvm/AsmParser/AsmParserContext.h @@ -1,4 +1,4 @@ -//===-- AsmParserContext.h --------------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/AsmParser/AsmParserContext.cpp b/llvm/lib/AsmParser/AsmParserContext.cpp index 7de2bfc67acfb..43f33652efc28 100644 --- a/llvm/lib/AsmParser/AsmParserContext.cpp +++ b/llvm/lib/AsmParser/AsmParserContext.cpp @@ -1,4 +1,4 @@ -//===-- AsmParserContext.cpp ------------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From 72b89e5a4aba84d1ab0f2c648accd1d0e77f3dea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Wed, 8 Oct 2025 09:22:11 +0000 Subject: [PATCH 22/33] optimize lookup --- llvm/lib/AsmParser/AsmParserContext.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/lib/AsmParser/AsmParserContext.cpp b/llvm/lib/AsmParser/AsmParserContext.cpp index 43f33652efc28..59d3ffcb470e4 100644 --- a/llvm/lib/AsmParser/AsmParserContext.cpp +++ b/llvm/lib/AsmParser/AsmParserContext.cpp @@ -12,23 +12,23 @@ namespace llvm { std::optional AsmParserContext::getFunctionLocation(const Function *F) const { - if (!Functions.contains(F)) - return std::nullopt; - return Functions.at(F); + if (auto FIt = Functions.find(F); FIt != Functions.end()) + return FIt->second; + return std::nullopt; } std::optional AsmParserContext::getBlockLocation(const BasicBlock *BB) const { - if (!Blocks.contains(BB)) - return std::nullopt; - return Blocks.at(BB); + if (auto BBIt = Blocks.find(BB); BBIt != Blocks.end()) + return BBIt->second; + return std::nullopt; } std::optional AsmParserContext::getInstructionLocation(const Instruction *I) const { - if (!Instructions.contains(I)) - return std::nullopt; - return Instructions.at(I); + if (auto IIt = Instructions.find(I); IIt != Instructions.end()) + return IIt->second; + return std::nullopt; } Function * From 41284dfb8403dff7681ce7c4222b8211fa773ce9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Wed, 8 Oct 2025 09:50:59 +0000 Subject: [PATCH 23/33] FileLoc docs and fix reange --- llvm/include/llvm/AsmParser/FileLoc.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/AsmParser/FileLoc.h b/llvm/include/llvm/AsmParser/FileLoc.h index 0a7045a089c95..155780a076587 100644 --- a/llvm/include/llvm/AsmParser/FileLoc.h +++ b/llvm/include/llvm/AsmParser/FileLoc.h @@ -14,8 +14,11 @@ namespace llvm { +/// Struct holding Line:Column location struct FileLoc { + // 0-based line number unsigned Line; + // 0-based column number unsigned Col; bool operator<=(const FileLoc &RHS) const { @@ -30,6 +33,7 @@ struct FileLoc { FileLoc(std::pair LC) : Line(LC.first), Col(LC.second) {} }; +// Struct holding a semiopen range [Start; End) struct FileLocRange { FileLoc Start; FileLoc End; @@ -43,7 +47,7 @@ struct FileLocRange { bool contains(FileLoc L) const { return Start <= L && L <= End; } bool contains(FileLocRange LR) const { - return contains(LR.Start) && contains(LR.End); + return Start <= LR.Start && LR.End <= End; } }; From ff9a33d4665b2615a0f49a3cc6de865f1ebe6ef5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Wed, 8 Oct 2025 09:54:29 +0000 Subject: [PATCH 24/33] full path to includes --- llvm/include/llvm/AsmParser/LLLexer.h | 2 +- llvm/include/llvm/AsmParser/LLParser.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h index 5e4d43ebbd4ed..de2c44da9f9d3 100644 --- a/llvm/include/llvm/AsmParser/LLLexer.h +++ b/llvm/include/llvm/AsmParser/LLLexer.h @@ -13,9 +13,9 @@ #ifndef LLVM_ASMPARSER_LLLEXER_H #define LLVM_ASMPARSER_LLLEXER_H -#include "LLToken.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APSInt.h" +#include "llvm/AsmParser/LLToken.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index 02460e5e52203..dd3360c022829 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -13,9 +13,9 @@ #ifndef LLVM_ASMPARSER_LLPARSER_H #define LLVM_ASMPARSER_LLPARSER_H -#include "AsmParserContext.h" -#include "LLLexer.h" #include "llvm/ADT/StringMap.h" +#include "llvm/AsmParser/AsmParserContext.h" +#include "llvm/AsmParser/LLLexer.h" #include "llvm/AsmParser/NumberedValues.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/Attributes.h" From 008ae63dff7a6a6cf01190d154d655790f8f18b4 Mon Sep 17 00:00:00 2001 From: Bertik23 <39457484+Bertik23@users.noreply.github.com> Date: Wed, 8 Oct 2025 12:45:44 +0200 Subject: [PATCH 25/33] Apply suggestion from @nikic Co-authored-by: Nikita Popov --- llvm/include/llvm/AsmParser/LLLexer.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h index de2c44da9f9d3..c38bf5d148ed2 100644 --- a/llvm/include/llvm/AsmParser/LLLexer.h +++ b/llvm/include/llvm/AsmParser/LLLexer.h @@ -84,9 +84,7 @@ namespace llvm { // zero-indexed std::pair getTokLineColumnPos() { auto LC = SM.getLineAndColumn(SMLoc::getFromPointer(TokStart)); - --LC.first; - --LC.second; - return LC; + return {LC.first - 1, LC.second - 1}; } // Get the line, column position of the end of the previous token, // zero-indexed exclusive From a44ef20c291a25df7705bf2a9abdbb221dc7a5ce Mon Sep 17 00:00:00 2001 From: Bertik23 <39457484+Bertik23@users.noreply.github.com> Date: Wed, 8 Oct 2025 12:51:24 +0200 Subject: [PATCH 26/33] Typo add period Co-authored-by: Nikita Popov --- llvm/include/llvm/AsmParser/LLParser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index dd3360c022829..9eb31d7e0a451 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -178,7 +178,7 @@ namespace llvm { // Map of module ID to path. std::map ModuleIdMap; - /// Keeps track of source locations for Values, BasicBlocks, and Functions + /// Keeps track of source locations for Values, BasicBlocks, and Functions. AsmParserContext *ParserContext; /// Only the llvm-as tool may set this to false to bypass From f201d1f4e1dbeaefb7fa8764272755a3c3919891 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Wed, 8 Oct 2025 10:26:33 +0000 Subject: [PATCH 27/33] actually fix filelocrange openness --- llvm/include/llvm/AsmParser/FileLoc.h | 2 +- llvm/unittests/AsmParser/AsmParserTest.cpp | 17 ++++++----------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/AsmParser/FileLoc.h b/llvm/include/llvm/AsmParser/FileLoc.h index 155780a076587..d1b22f5187e75 100644 --- a/llvm/include/llvm/AsmParser/FileLoc.h +++ b/llvm/include/llvm/AsmParser/FileLoc.h @@ -44,7 +44,7 @@ struct FileLocRange { assert(Start <= End); } - bool contains(FileLoc L) const { return Start <= L && L <= End; } + bool contains(FileLoc L) const { return Start <= L && L < End; } bool contains(FileLocRange LR) const { return Start <= LR.Start && LR.End <= End; diff --git a/llvm/unittests/AsmParser/AsmParserTest.cpp b/llvm/unittests/AsmParser/AsmParserTest.cpp index 0ca21eaea800f..32ca4b2c29493 100644 --- a/llvm/unittests/AsmParser/AsmParserTest.cpp +++ b/llvm/unittests/AsmParser/AsmParserTest.cpp @@ -487,16 +487,11 @@ TEST(AsmParserTest, DIExpressionBodyAtBeginningWithSlotMappingParsing) { #define ASSERT_EQ_LOC(Loc1, Loc2) \ do { \ - bool AreLocsEqual = Loc1.contains(Loc2) && Loc2.contains(Loc1); \ - LLVM_DEBUG(if (!AreLocsEqual) { \ - dbgs() << #Loc1 " location: " << Loc1.Start.Line << ":" \ - << Loc1.Start.Col << " - " << Loc1.End.Line << ":" \ - << Loc1.End.Col << "\n"; \ - dbgs() << #Loc2 " location: " << Loc2.Start.Line << ":" \ - << Loc2.Start.Col << " - " << Loc2.End.Line << ":" \ - << Loc2.End.Col << "\n"; \ - }); \ - ASSERT_TRUE(AreLocsEqual); \ + EXPECT_TRUE(Loc1.contains(Loc2) && Loc2.contains(Loc1)) \ + << #Loc1 " location: " << Loc1.Start.Line << ":" << Loc1.Start.Col \ + << " - " << Loc1.End.Line << ":" << Loc1.End.Col << "\n" \ + << #Loc2 " location: " << Loc2.Start.Line << ":" << Loc2.Start.Col \ + << " - " << Loc2.End.Line << ":" << Loc2.End.Col << "\n"; \ } while (false) TEST(AsmParserTest, ParserObjectLocations) { @@ -517,7 +512,7 @@ TEST(AsmParserTest, ParserObjectLocations) { ASSERT_TRUE(MainFn != nullptr); auto MaybeMainLoc = ParserContext.getFunctionLocation(MainFn); - ASSERT_TRUE(MaybeMainLoc.has_value()); + EXPECT_TRUE(MaybeMainLoc.has_value()); auto MainLoc = MaybeMainLoc.value(); auto ExpectedMainLoc = FileLocRange(FileLoc{0, 0}, FileLoc{4, 1}); ASSERT_EQ_LOC(MainLoc, ExpectedMainLoc); From 1de2447dbd77bcd4cd920b6409da72753743484b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Wed, 8 Oct 2025 10:37:18 +0000 Subject: [PATCH 28/33] remove old irrelevant comment --- llvm/unittests/AsmParser/AsmParserTest.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/unittests/AsmParser/AsmParserTest.cpp b/llvm/unittests/AsmParser/AsmParserTest.cpp index 32ca4b2c29493..898a8293925b6 100644 --- a/llvm/unittests/AsmParser/AsmParserTest.cpp +++ b/llvm/unittests/AsmParser/AsmParserTest.cpp @@ -495,8 +495,6 @@ TEST(AsmParserTest, DIExpressionBodyAtBeginningWithSlotMappingParsing) { } while (false) TEST(AsmParserTest, ParserObjectLocations) { - // Expected to fail with function location starting one character later, needs - // a fix StringRef Source = "define i32 @main() {\n" "entry:\n" " %a = add i32 1, 2\n" From 4d5183995957e289d0b2b3e14e71890e1e375993 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Wed, 8 Oct 2025 10:53:11 +0000 Subject: [PATCH 29/33] Doc coments with /// --- llvm/include/llvm/AsmParser/LLLexer.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h index c38bf5d148ed2..4801a61c72348 100644 --- a/llvm/include/llvm/AsmParser/LLLexer.h +++ b/llvm/include/llvm/AsmParser/LLLexer.h @@ -29,7 +29,7 @@ namespace llvm { const char *CurPtr; StringRef CurBuf; - // The the end (exclusive) of the current token + /// The end (exclusive) of the previous token. const char *PrevTokEnd = nullptr; enum class ErrorPriority { @@ -80,14 +80,14 @@ namespace llvm { IgnoreColonInIdentifiers = val; } - // Get the line, column position of the start of the current token, - // zero-indexed + /// Get the line, column position of the start of the current token, + /// zero-indexed std::pair getTokLineColumnPos() { auto LC = SM.getLineAndColumn(SMLoc::getFromPointer(TokStart)); return {LC.first - 1, LC.second - 1}; } - // Get the line, column position of the end of the previous token, - // zero-indexed exclusive + /// Get the line, column position of the end of the previous token, + /// zero-indexed exclusive std::pair getPrevTokEndLineColumnPos() { auto LC = SM.getLineAndColumn(SMLoc::getFromPointer(PrevTokEnd)); --LC.first; From 77385c085357f7b130abb056eb90987bc2bb83c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Wed, 8 Oct 2025 10:56:11 +0000 Subject: [PATCH 30/33] Doc coments with /// --- llvm/include/llvm/AsmParser/FileLoc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/AsmParser/FileLoc.h b/llvm/include/llvm/AsmParser/FileLoc.h index d1b22f5187e75..02c1849fa986e 100644 --- a/llvm/include/llvm/AsmParser/FileLoc.h +++ b/llvm/include/llvm/AsmParser/FileLoc.h @@ -16,9 +16,9 @@ namespace llvm { /// Struct holding Line:Column location struct FileLoc { - // 0-based line number + /// 0-based line number unsigned Line; - // 0-based column number + /// 0-based column number unsigned Col; bool operator<=(const FileLoc &RHS) const { @@ -33,7 +33,7 @@ struct FileLoc { FileLoc(std::pair LC) : Line(LC.first), Col(LC.second) {} }; -// Struct holding a semiopen range [Start; End) +/// Struct holding a semiopen range [Start; End) struct FileLocRange { FileLoc Start; FileLoc End; From 75e5b57d7a5b8c0f1b7f13e00d00d090610bdf51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Wed, 8 Oct 2025 11:14:14 +0000 Subject: [PATCH 31/33] Revert changes irrelevant in LLLexer --- llvm/include/llvm/AsmParser/LLLexer.h | 5 - llvm/lib/AsmParser/LLLexer.cpp | 141 ++++++++++++-------------- 2 files changed, 64 insertions(+), 82 deletions(-) diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h index 4801a61c72348..8f0ae6989d7d4 100644 --- a/llvm/include/llvm/AsmParser/LLLexer.h +++ b/llvm/include/llvm/AsmParser/LLLexer.h @@ -109,12 +109,7 @@ namespace llvm { private: lltok::Kind LexToken(); - // Return closest pointer after `Ptr` that is an end of a label. - // Returns nullptr if `Ptr` doesn't point into a label. - const char *getLabelTail(const char *Ptr); int getNextChar(); - const char *skipNChars(unsigned N); - void advancePositionTo(const char *Ptr); void SkipLineComment(); bool SkipCComment(); lltok::Kind ReadString(lltok::Kind kind); diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 585fc0c0fcaad..10ab4b658553c 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -155,6 +155,17 @@ static bool isLabelChar(char C) { C == '.' || C == '_'; } +/// isLabelTail - Return true if this pointer points to a valid end of a label. +static const char *isLabelTail(const char *CurPtr) { + while (true) { + if (CurPtr[0] == ':') + return CurPtr + 1; + if (!isLabelChar(CurPtr[0])) + return nullptr; + ++CurPtr; + } +} + //===----------------------------------------------------------------------===// // Lexer definition. //===----------------------------------------------------------------------===// @@ -165,41 +176,21 @@ LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err, CurPtr = CurBuf.begin(); } -const char *LLLexer::getLabelTail(const char *Ptr) { - while (Ptr != CurBuf.end()) { - if (Ptr[0] == ':') - return Ptr + 1; - if (!isLabelChar(Ptr[0])) - return nullptr; - ++Ptr; - } - return nullptr; -} - int LLLexer::getNextChar() { - if (CurPtr == CurBuf.end()) + char CurChar = *CurPtr++; + switch (CurChar) { + default: + return (unsigned char)CurChar; + case 0: + // A nul character in the stream is either the end of the current buffer or + // a random nul in the file. Disambiguate that here. + if (CurPtr - 1 != CurBuf.end()) + return 0; // Just whitespace. + + // Otherwise, return end of file. + --CurPtr; // Another call to lex will return EOF again. return EOF; - // Increment line number if this is the first character after a newline - return *CurPtr++; -} - -const char *LLLexer::skipNChars(unsigned N) { - while (N--) - getNextChar(); - return CurPtr; -} - -void LLLexer::advancePositionTo(const char *Ptr) { - if (CurBuf.begin() > Ptr) { - CurPtr = CurBuf.begin(); - return; } - if (CurBuf.end() < Ptr) { - CurPtr = CurBuf.end(); - return; - } - - CurPtr = Ptr; } lltok::Kind LLLexer::LexToken() { @@ -208,8 +199,8 @@ lltok::Kind LLLexer::LexToken() { PrevTokEnd = CurPtr; while (true) { TokStart = CurPtr; - int CurChar = getNextChar(); + int CurChar = getNextChar(); switch (CurChar) { default: // Handle letters: [a-zA-Z_] @@ -230,13 +221,13 @@ lltok::Kind LLLexer::LexToken() { case '%': return LexPercent(); case '"': return LexQuote(); case '.': - if (const char *Ptr = getLabelTail(CurPtr)) { - advancePositionTo(Ptr); + if (const char *Ptr = isLabelTail(CurPtr)) { + CurPtr = Ptr; StrVal.assign(TokStart, CurPtr-1); return lltok::LabelStr; } if (CurPtr[0] == '.' && CurPtr[1] == '.') { - skipNChars(2); + CurPtr += 2; return lltok::dotdotdot; } return lltok::Error; @@ -313,15 +304,15 @@ lltok::Kind LLLexer::LexAt() { } lltok::Kind LLLexer::LexDollar() { - if (const char *Ptr = getLabelTail(TokStart)) { - advancePositionTo(Ptr); + if (const char *Ptr = isLabelTail(TokStart)) { + CurPtr = Ptr; StrVal.assign(TokStart, CurPtr - 1); return lltok::LabelStr; } // Handle DollarStringConstant: $\"[^\"]*\" if (CurPtr[0] == '"') { - getNextChar(); + ++CurPtr; while (true) { int CurChar = getNextChar(); @@ -373,11 +364,11 @@ bool LLLexer::ReadVarName() { if (isalpha(static_cast(CurPtr[0])) || CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_') { - getNextChar(); + ++CurPtr; while (isalnum(static_cast(CurPtr[0])) || CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_') - getNextChar(); + ++CurPtr; StrVal.assign(NameStart, CurPtr); return true; @@ -391,8 +382,7 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) { if (!isdigit(static_cast(CurPtr[0]))) return lltok::Error; - for (getNextChar(); isdigit(static_cast(CurPtr[0])); - getNextChar()) + for (++CurPtr; isdigit(static_cast(CurPtr[0])); ++CurPtr) /*empty*/; uint64_t Val = atoull(TokStart + 1, CurPtr); @@ -405,7 +395,7 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) { lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) { // Handle StringConstant: \"[^\"]*\" if (CurPtr[0] == '"') { - getNextChar(); + ++CurPtr; while (true) { int CurChar = getNextChar(); @@ -451,7 +441,7 @@ lltok::Kind LLLexer::LexQuote() { return kind; if (CurPtr[0] == ':') { - getNextChar(); + ++CurPtr; if (StringRef(StrVal).contains(0)) { LexError("NUL character is not allowed in names"); kind = lltok::Error; @@ -471,11 +461,11 @@ lltok::Kind LLLexer::LexExclaim() { if (isalpha(static_cast(CurPtr[0])) || CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') { - getNextChar(); + ++CurPtr; while (isalnum(static_cast(CurPtr[0])) || CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') - getNextChar(); + ++CurPtr; StrVal.assign(TokStart+1, CurPtr); // Skip ! UnEscapeLexed(StrVal); @@ -511,7 +501,7 @@ lltok::Kind LLLexer::LexIdentifier() { const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar; const char *KeywordEnd = nullptr; - for (; isLabelChar(*CurPtr); getNextChar()) { + for (; isLabelChar(*CurPtr); ++CurPtr) { // If we decide this is an integer, remember the end of the sequence. if (!IntEnd && !isdigit(static_cast(*CurPtr))) IntEnd = CurPtr; @@ -523,8 +513,7 @@ lltok::Kind LLLexer::LexIdentifier() { // If we stopped due to a colon, unless we were directed to ignore it, // this really is a label. if (!IgnoreColonInIdentifiers && *CurPtr == ':') { - StrVal.assign(StartChar - 1, CurPtr); - getNextChar(); + StrVal.assign(StartChar - 1, CurPtr++); return lltok::LabelStr; } @@ -532,7 +521,7 @@ lltok::Kind LLLexer::LexIdentifier() { // return it. if (!IntEnd) IntEnd = CurPtr; if (IntEnd != StartChar) { - advancePositionTo(IntEnd); + CurPtr = IntEnd; uint64_t NumBits = atoull(StartChar, CurPtr); if (NumBits < IntegerType::MIN_INT_BITS || NumBits > IntegerType::MAX_INT_BITS) { @@ -545,7 +534,7 @@ lltok::Kind LLLexer::LexIdentifier() { // Otherwise, this was a letter sequence. See which keyword this is. if (!KeywordEnd) KeywordEnd = CurPtr; - advancePositionTo(KeywordEnd); + CurPtr = KeywordEnd; --StartChar; StringRef Keyword(StartChar, CurPtr - StartChar); @@ -1063,7 +1052,7 @@ lltok::Kind LLLexer::LexIdentifier() { StringRef HexStr(TokStart + 3, len); if (!all_of(HexStr, isxdigit)) { // Bad token, return it as an error. - advancePositionTo(TokStart + 3); + CurPtr = TokStart + 3; return lltok::Error; } APInt Tmp(bits, HexStr, 16); @@ -1076,12 +1065,12 @@ lltok::Kind LLLexer::LexIdentifier() { // If this is "cc1234", return this as just "cc". if (TokStart[0] == 'c' && TokStart[1] == 'c') { - advancePositionTo(TokStart + 2); + CurPtr = TokStart + 2; return lltok::kw_cc; } // Finally, if this isn't known, return an error. - advancePositionTo(TokStart + 1); + CurPtr = TokStart + 1; return lltok::Error; } @@ -1094,25 +1083,24 @@ lltok::Kind LLLexer::LexIdentifier() { /// HexHalfConstant 0xH[0-9A-Fa-f]+ /// HexBFloatConstant 0xR[0-9A-Fa-f]+ lltok::Kind LLLexer::Lex0x() { - advancePositionTo(TokStart + 2); + CurPtr = TokStart + 2; char Kind; if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H' || CurPtr[0] == 'R') { - Kind = *CurPtr; - getNextChar(); + Kind = *CurPtr++; } else { Kind = 'J'; } if (!isxdigit(static_cast(CurPtr[0]))) { // Bad token, return it as an error. - advancePositionTo(TokStart + 1); + CurPtr = TokStart + 1; return lltok::Error; } while (isxdigit(static_cast(CurPtr[0]))) - getNextChar(); + ++CurPtr; if (Kind == 'J') { // HexFPConstant - Floating point constant represented in IEEE format as a @@ -1167,9 +1155,9 @@ lltok::Kind LLLexer::LexDigitOrNegative() { if (!isdigit(static_cast(TokStart[0])) && !isdigit(static_cast(CurPtr[0]))) { // Okay, this is not a number after the -, it's probably a label. - if (const char *End = getLabelTail(CurPtr)) { + if (const char *End = isLabelTail(CurPtr)) { StrVal.assign(TokStart, End-1); - advancePositionTo(End); + CurPtr = End; return lltok::LabelStr; } @@ -1179,13 +1167,13 @@ lltok::Kind LLLexer::LexDigitOrNegative() { // At this point, it is either a label, int or fp constant. // Skip digits, we have at least one. - for (; isdigit(static_cast(CurPtr[0])); getNextChar()) + for (; isdigit(static_cast(CurPtr[0])); ++CurPtr) /*empty*/; // Check if this is a fully-numeric label: if (isdigit(TokStart[0]) && CurPtr[0] == ':') { uint64_t Val = atoull(TokStart, CurPtr); - getNextChar(); // Skip the colon. + ++CurPtr; // Skip the colon. if ((unsigned)Val != Val) LexError("invalid value number (too large)"); UIntVal = unsigned(Val); @@ -1194,9 +1182,9 @@ lltok::Kind LLLexer::LexDigitOrNegative() { // Check to see if this really is a string label, e.g. "-1:". if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') { - if (const char *End = getLabelTail(CurPtr)) { + if (const char *End = isLabelTail(CurPtr)) { StrVal.assign(TokStart, End-1); - advancePositionTo(End); + CurPtr = End; return lltok::LabelStr; } } @@ -1210,19 +1198,19 @@ lltok::Kind LLLexer::LexDigitOrNegative() { return lltok::APSInt; } - getNextChar(); + ++CurPtr; // Skip over [0-9]*([eE][-+]?[0-9]+)? while (isdigit(static_cast(CurPtr[0]))) - getNextChar(); + ++CurPtr; if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { if (isdigit(static_cast(CurPtr[1])) || ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(static_cast(CurPtr[2])))) { - skipNChars(2); + CurPtr += 2; while (isdigit(static_cast(CurPtr[0]))) - getNextChar(); + ++CurPtr; } } @@ -1240,29 +1228,28 @@ lltok::Kind LLLexer::LexPositive() { return lltok::Error; // Skip digits. - for (getNextChar(); isdigit(static_cast(CurPtr[0])); - getNextChar()) + for (++CurPtr; isdigit(static_cast(CurPtr[0])); ++CurPtr) /*empty*/; // At this point, we need a '.'. if (CurPtr[0] != '.') { - advancePositionTo(TokStart + 1); + CurPtr = TokStart + 1; return lltok::Error; } - getNextChar(); + ++CurPtr; // Skip over [0-9]*([eE][-+]?[0-9]+)? while (isdigit(static_cast(CurPtr[0]))) - getNextChar(); + ++CurPtr; if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { if (isdigit(static_cast(CurPtr[1])) || ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(static_cast(CurPtr[2])))) { - skipNChars(2); + CurPtr += 2; while (isdigit(static_cast(CurPtr[0]))) - getNextChar(); + ++CurPtr; } } From 07689fcef17275b4293aff0397a2c3668d01305c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Wed, 8 Oct 2025 11:22:23 +0000 Subject: [PATCH 32/33] revert formating --- llvm/lib/AsmParser/LLLexer.cpp | 39 ++++++++++++++-------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 10ab4b658553c..6d21009ddd7a2 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -158,10 +158,8 @@ static bool isLabelChar(char C) { /// isLabelTail - Return true if this pointer points to a valid end of a label. static const char *isLabelTail(const char *CurPtr) { while (true) { - if (CurPtr[0] == ':') - return CurPtr + 1; - if (!isLabelChar(CurPtr[0])) - return nullptr; + if (CurPtr[0] == ':') return CurPtr+1; + if (!isLabelChar(CurPtr[0])) return nullptr; ++CurPtr; } } @@ -179,16 +177,15 @@ LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err, int LLLexer::getNextChar() { char CurChar = *CurPtr++; switch (CurChar) { - default: - return (unsigned char)CurChar; + default: return (unsigned char)CurChar; case 0: // A nul character in the stream is either the end of the current buffer or // a random nul in the file. Disambiguate that here. - if (CurPtr - 1 != CurBuf.end()) - return 0; // Just whitespace. + if (CurPtr-1 != CurBuf.end()) + return 0; // Just whitespace. // Otherwise, return end of file. - --CurPtr; // Another call to lex will return EOF again. + --CurPtr; // Another call to lex will return EOF again. return EOF; } } @@ -513,7 +510,7 @@ lltok::Kind LLLexer::LexIdentifier() { // If we stopped due to a colon, unless we were directed to ignore it, // this really is a label. if (!IgnoreColonInIdentifiers && *CurPtr == ':') { - StrVal.assign(StartChar - 1, CurPtr++); + StrVal.assign(StartChar-1, CurPtr++); return lltok::LabelStr; } @@ -1052,7 +1049,7 @@ lltok::Kind LLLexer::LexIdentifier() { StringRef HexStr(TokStart + 3, len); if (!all_of(HexStr, isxdigit)) { // Bad token, return it as an error. - CurPtr = TokStart + 3; + CurPtr = TokStart+3; return lltok::Error; } APInt Tmp(bits, HexStr, 16); @@ -1065,12 +1062,12 @@ lltok::Kind LLLexer::LexIdentifier() { // If this is "cc1234", return this as just "cc". if (TokStart[0] == 'c' && TokStart[1] == 'c') { - CurPtr = TokStart + 2; + CurPtr = TokStart+2; return lltok::kw_cc; } // Finally, if this isn't known, return an error. - CurPtr = TokStart + 1; + CurPtr = TokStart+1; return lltok::Error; } @@ -1095,7 +1092,7 @@ lltok::Kind LLLexer::Lex0x() { if (!isxdigit(static_cast(CurPtr[0]))) { // Bad token, return it as an error. - CurPtr = TokStart + 1; + CurPtr = TokStart+1; return lltok::Error; } @@ -1201,16 +1198,14 @@ lltok::Kind LLLexer::LexDigitOrNegative() { ++CurPtr; // Skip over [0-9]*([eE][-+]?[0-9]+)? - while (isdigit(static_cast(CurPtr[0]))) - ++CurPtr; + while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { if (isdigit(static_cast(CurPtr[1])) || ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(static_cast(CurPtr[2])))) { CurPtr += 2; - while (isdigit(static_cast(CurPtr[0]))) - ++CurPtr; + while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; } } @@ -1233,23 +1228,21 @@ lltok::Kind LLLexer::LexPositive() { // At this point, we need a '.'. if (CurPtr[0] != '.') { - CurPtr = TokStart + 1; + CurPtr = TokStart+1; return lltok::Error; } ++CurPtr; // Skip over [0-9]*([eE][-+]?[0-9]+)? - while (isdigit(static_cast(CurPtr[0]))) - ++CurPtr; + while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { if (isdigit(static_cast(CurPtr[1])) || ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(static_cast(CurPtr[2])))) { CurPtr += 2; - while (isdigit(static_cast(CurPtr[0]))) - ++CurPtr; + while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; } } From 66ce6b6528f3e862b5ce93b435686465173d30ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= Date: Wed, 8 Oct 2025 13:18:47 +0000 Subject: [PATCH 33/33] make clang-format happy --- llvm/include/llvm/AsmParser/AsmParserContext.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/AsmParser/AsmParserContext.h b/llvm/include/llvm/AsmParser/AsmParserContext.h index 89b1626c48b6d..1a397486cba4f 100644 --- a/llvm/include/llvm/AsmParser/AsmParserContext.h +++ b/llvm/include/llvm/AsmParser/AsmParserContext.h @@ -9,8 +9,8 @@ #ifndef LLVM_ASMPARSER_ASMPARSERCONTEXT_H #define LLVM_ASMPARSER_ASMPARSERCONTEXT_H -#include "llvm/AsmParser/FileLoc.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/AsmParser/FileLoc.h" #include "llvm/IR/Value.h" #include