Skip to content

Commit 7378fd2

Browse files
Bertik23github-actions[bot]
authored andcommitted
Automerge: [LLVM][IR] Add location tracking to LLVM IR parser (#155797)
This PR is part of the LLVM IR LSP server project ([RFC](https://discourse.llvm.org/t/rfc-ir-visualization-with-vs-code-extension-using-an-lsp-server/87773)) To be able to make a LSP server, it's crucial to have location information about the LLVM objects (Functions, BasicBlocks and Instructions). This PR adds: * Position tracking to the Lexer * A new AsmParserContext class, to hold the new position info * Tests to check if the location is correct The AsmParserContext can be passed as an optional parameter into the parser. Which populates it and it can be then used by other tools, such as the LSP server. The AsmParserContext idea was borrowed from MLIR. As we didn't want to store data no one else uses inside the objects themselves. But the implementation is different, this class holds several maps of Functions, BasicBlocks and Instructions, to map them to their location. And some utility methods were added to get the positions of the processed tokens.
2 parents ffa98aa + 18d4ba5 commit 7378fd2

File tree

13 files changed

+367
-42
lines changed

13 files changed

+367
-42
lines changed
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_ASMPARSER_ASMPARSERCONTEXT_H
10+
#define LLVM_ASMPARSER_ASMPARSERCONTEXT_H
11+
12+
#include "llvm/ADT/DenseMap.h"
13+
#include "llvm/AsmParser/FileLoc.h"
14+
#include "llvm/IR/Value.h"
15+
#include <optional>
16+
17+
namespace llvm {
18+
19+
/// Registry of file location information for LLVM IR constructs.
20+
///
21+
/// This class provides access to the file location information
22+
/// for various LLVM IR constructs. Currently, it supports Function,
23+
/// BasicBlock and Instruction locations.
24+
///
25+
/// When available, it can answer queries about what is at a given
26+
/// file location, as well as where in a file a given IR construct
27+
/// is.
28+
///
29+
/// This information is optionally emitted by the LLParser while
30+
/// it reads LLVM textual IR.
31+
class AsmParserContext {
32+
DenseMap<Function *, FileLocRange> Functions;
33+
DenseMap<BasicBlock *, FileLocRange> Blocks;
34+
DenseMap<Instruction *, FileLocRange> Instructions;
35+
36+
public:
37+
std::optional<FileLocRange> getFunctionLocation(const Function *) const;
38+
std::optional<FileLocRange> getBlockLocation(const BasicBlock *) const;
39+
std::optional<FileLocRange> getInstructionLocation(const Instruction *) const;
40+
/// Get the function at the requested location range.
41+
/// If no single function occupies the queried range, or the record is
42+
/// missing, a nullptr is returned.
43+
Function *getFunctionAtLocation(const FileLocRange &) const;
44+
/// Get the function at the requested location.
45+
/// If no function occupies the queried location, or the record is missing, a
46+
/// nullptr is returned.
47+
Function *getFunctionAtLocation(const FileLoc &) const;
48+
/// Get the block at the requested location range.
49+
/// If no single block occupies the queried range, or the record is missing, a
50+
/// nullptr is returned.
51+
BasicBlock *getBlockAtLocation(const FileLocRange &) const;
52+
/// Get the block at the requested location.
53+
/// If no block occupies the queried location, or the record is missing, a
54+
/// nullptr is returned.
55+
BasicBlock *getBlockAtLocation(const FileLoc &) const;
56+
/// Get the instruction at the requested location range.
57+
/// If no single instruction occupies the queried range, or the record is
58+
/// missing, a nullptr is returned.
59+
Instruction *getInstructionAtLocation(const FileLocRange &) const;
60+
/// Get the instruction at the requested location.
61+
/// If no instruction occupies the queried location, or the record is missing,
62+
/// a nullptr is returned.
63+
Instruction *getInstructionAtLocation(const FileLoc &) const;
64+
bool addFunctionLocation(Function *, const FileLocRange &);
65+
bool addBlockLocation(BasicBlock *, const FileLocRange &);
66+
bool addInstructionLocation(Instruction *, const FileLocRange &);
67+
};
68+
} // namespace llvm
69+
70+
#endif
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
//===-- FileLoc.h ---------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_ASMPARSER_FILELOC_H
10+
#define LLVM_ASMPARSER_FILELOC_H
11+
12+
#include <cassert>
13+
#include <utility>
14+
15+
namespace llvm {
16+
17+
/// Struct holding Line:Column location
18+
struct FileLoc {
19+
/// 0-based line number
20+
unsigned Line;
21+
/// 0-based column number
22+
unsigned Col;
23+
24+
bool operator<=(const FileLoc &RHS) const {
25+
return Line < RHS.Line || (Line == RHS.Line && Col <= RHS.Col);
26+
}
27+
28+
bool operator<(const FileLoc &RHS) const {
29+
return Line < RHS.Line || (Line == RHS.Line && Col < RHS.Col);
30+
}
31+
32+
FileLoc(unsigned L, unsigned C) : Line(L), Col(C) {}
33+
FileLoc(std::pair<unsigned, unsigned> LC) : Line(LC.first), Col(LC.second) {}
34+
};
35+
36+
/// Struct holding a semiopen range [Start; End)
37+
struct FileLocRange {
38+
FileLoc Start;
39+
FileLoc End;
40+
41+
FileLocRange() : Start(0, 0), End(0, 0) {}
42+
43+
FileLocRange(FileLoc S, FileLoc E) : Start(S), End(E) {
44+
assert(Start <= End);
45+
}
46+
47+
bool contains(FileLoc L) const { return Start <= L && L < End; }
48+
49+
bool contains(FileLocRange LR) const {
50+
return Start <= LR.Start && LR.End <= End;
51+
}
52+
};
53+
54+
} // namespace llvm
55+
56+
#endif

llvm/include/llvm/AsmParser/LLLexer.h

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,25 @@
1313
#ifndef LLVM_ASMPARSER_LLLEXER_H
1414
#define LLVM_ASMPARSER_LLLEXER_H
1515

16-
#include "LLToken.h"
1716
#include "llvm/ADT/APFloat.h"
1817
#include "llvm/ADT/APSInt.h"
18+
#include "llvm/AsmParser/LLToken.h"
1919
#include "llvm/Support/SMLoc.h"
20+
#include "llvm/Support/SourceMgr.h"
2021
#include <string>
2122

2223
namespace llvm {
2324
class Type;
2425
class SMDiagnostic;
25-
class SourceMgr;
2626
class LLVMContext;
2727

2828
class LLLexer {
2929
const char *CurPtr;
3030
StringRef CurBuf;
3131

32+
/// The end (exclusive) of the previous token.
33+
const char *PrevTokEnd = nullptr;
34+
3235
enum class ErrorPriority {
3336
None, // No error message present.
3437
Parser, // Errors issued by parser.
@@ -62,9 +65,7 @@ namespace llvm {
6265
explicit LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &,
6366
LLVMContext &C);
6467

65-
lltok::Kind Lex() {
66-
return CurKind = LexToken();
67-
}
68+
lltok::Kind Lex() { return CurKind = LexToken(); }
6869

6970
typedef SMLoc LocTy;
7071
LocTy getLoc() const { return SMLoc::getFromPointer(TokStart); }
@@ -79,6 +80,19 @@ namespace llvm {
7980
IgnoreColonInIdentifiers = val;
8081
}
8182

83+
/// Get the line, column position of the start of the current token,
84+
/// zero-indexed
85+
std::pair<unsigned, unsigned> getTokLineColumnPos() {
86+
auto LC = SM.getLineAndColumn(SMLoc::getFromPointer(TokStart));
87+
return {LC.first - 1, LC.second - 1};
88+
}
89+
/// Get the line, column position of the end of the previous token,
90+
/// zero-indexed exclusive
91+
std::pair<unsigned, unsigned> getPrevTokEndLineColumnPos() {
92+
auto LC = SM.getLineAndColumn(SMLoc::getFromPointer(PrevTokEnd));
93+
return {LC.first - 1, LC.second - 1};
94+
}
95+
8296
// This returns true as a convenience for the parser functions that return
8397
// true on error.
8498
bool ParseError(LocTy ErrorLoc, const Twine &Msg) {

llvm/include/llvm/AsmParser/LLParser.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@
1313
#ifndef LLVM_ASMPARSER_LLPARSER_H
1414
#define LLVM_ASMPARSER_LLPARSER_H
1515

16-
#include "LLLexer.h"
1716
#include "llvm/ADT/StringMap.h"
17+
#include "llvm/AsmParser/AsmParserContext.h"
18+
#include "llvm/AsmParser/LLLexer.h"
1819
#include "llvm/AsmParser/NumberedValues.h"
1920
#include "llvm/AsmParser/Parser.h"
2021
#include "llvm/IR/Attributes.h"
@@ -177,6 +178,9 @@ namespace llvm {
177178
// Map of module ID to path.
178179
std::map<unsigned, StringRef> ModuleIdMap;
179180

181+
/// Keeps track of source locations for Values, BasicBlocks, and Functions.
182+
AsmParserContext *ParserContext;
183+
180184
/// Only the llvm-as tool may set this to false to bypass
181185
/// UpgradeDebuginfo so it can generate broken bitcode.
182186
bool UpgradeDebugInfo;
@@ -189,10 +193,11 @@ namespace llvm {
189193
public:
190194
LLParser(StringRef F, SourceMgr &SM, SMDiagnostic &Err, Module *M,
191195
ModuleSummaryIndex *Index, LLVMContext &Context,
192-
SlotMapping *Slots = nullptr)
196+
SlotMapping *Slots = nullptr,
197+
AsmParserContext *ParserContext = nullptr)
193198
: Context(Context), OPLex(F, SM, Err, Context),
194199
Lex(F, SM, Err, Context), M(M), Index(Index), Slots(Slots),
195-
BlockAddressPFS(nullptr) {}
200+
BlockAddressPFS(nullptr), ParserContext(ParserContext) {}
196201
bool Run(
197202
bool UpgradeDebugInfo,
198203
DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {

llvm/include/llvm/AsmParser/Parser.h

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include "llvm/ADT/STLFunctionalExtras.h"
1717
#include "llvm/ADT/StringRef.h"
18+
#include "llvm/AsmParser/AsmParserContext.h"
1819
#include "llvm/Support/Compiler.h"
1920
#include <memory>
2021
#include <optional>
@@ -62,7 +63,8 @@ parseAssemblyFile(StringRef Filename, SMDiagnostic &Err, LLVMContext &Context,
6263
/// parsing.
6364
LLVM_ABI std::unique_ptr<Module>
6465
parseAssemblyString(StringRef AsmString, SMDiagnostic &Err,
65-
LLVMContext &Context, SlotMapping *Slots = nullptr);
66+
LLVMContext &Context, SlotMapping *Slots = nullptr,
67+
AsmParserContext *ParserContext = nullptr);
6668

6769
/// Holds the Module and ModuleSummaryIndex returned by the interfaces
6870
/// that parse both.
@@ -128,9 +130,9 @@ parseSummaryIndexAssemblyString(StringRef AsmString, SMDiagnostic &Err);
128130
LLVM_ABI std::unique_ptr<Module> parseAssembly(
129131
MemoryBufferRef F, SMDiagnostic &Err, LLVMContext &Context,
130132
SlotMapping *Slots = nullptr,
131-
DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
132-
return std::nullopt;
133-
});
133+
DataLayoutCallbackTy DataLayoutCallback =
134+
[](StringRef, StringRef) { return std::nullopt; },
135+
AsmParserContext *ParserContext = nullptr);
134136

135137
/// Parse LLVM Assembly including the summary index from a MemoryBuffer.
136138
///
@@ -169,9 +171,9 @@ parseSummaryIndexAssembly(MemoryBufferRef F, SMDiagnostic &Err);
169171
LLVM_ABI bool parseAssemblyInto(
170172
MemoryBufferRef F, Module *M, ModuleSummaryIndex *Index, SMDiagnostic &Err,
171173
SlotMapping *Slots = nullptr,
172-
DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
173-
return std::nullopt;
174-
});
174+
DataLayoutCallbackTy DataLayoutCallback =
175+
[](StringRef, StringRef) { return std::nullopt; },
176+
AsmParserContext *ParserContext = nullptr);
175177

176178
/// Parse a type and a constant value in the given string.
177179
///

llvm/include/llvm/IRReader/IRReader.h

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#define LLVM_IRREADER_IRREADER_H
1616

1717
#include "llvm/ADT/StringRef.h"
18+
#include "llvm/AsmParser/AsmParserContext.h"
1819
#include "llvm/Bitcode/BitcodeReader.h"
1920
#include "llvm/Support/Compiler.h"
2021
#include <memory>
@@ -50,19 +51,19 @@ getLazyIRFileModule(StringRef Filename, SMDiagnostic &Err, LLVMContext &Context,
5051
/// for it. Otherwise, attempt to parse it as LLVM Assembly and return
5152
/// a Module for it.
5253
/// \param DataLayoutCallback Override datalayout in the llvm assembly.
53-
LLVM_ABI std::unique_ptr<Module> parseIR(MemoryBufferRef Buffer,
54-
SMDiagnostic &Err,
55-
LLVMContext &Context,
56-
ParserCallbacks Callbacks = {});
54+
LLVM_ABI std::unique_ptr<Module>
55+
parseIR(MemoryBufferRef Buffer, SMDiagnostic &Err, LLVMContext &Context,
56+
ParserCallbacks Callbacks = {},
57+
AsmParserContext *ParserContext = nullptr);
5758

5859
/// If the given file holds a bitcode image, return a Module for it.
5960
/// Otherwise, attempt to parse it as LLVM Assembly and return a Module
6061
/// for it.
6162
/// \param DataLayoutCallback Override datalayout in the llvm assembly.
62-
LLVM_ABI std::unique_ptr<Module> parseIRFile(StringRef Filename,
63-
SMDiagnostic &Err,
64-
LLVMContext &Context,
65-
ParserCallbacks Callbacks = {});
63+
LLVM_ABI std::unique_ptr<Module>
64+
parseIRFile(StringRef Filename, SMDiagnostic &Err, LLVMContext &Context,
65+
ParserCallbacks Callbacks = {},
66+
AsmParserContext *ParserContext = nullptr);
6667
}
6768

6869
#endif
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/AsmParser/AsmParserContext.h"
10+
11+
namespace llvm {
12+
13+
std::optional<FileLocRange>
14+
AsmParserContext::getFunctionLocation(const Function *F) const {
15+
if (auto FIt = Functions.find(F); FIt != Functions.end())
16+
return FIt->second;
17+
return std::nullopt;
18+
}
19+
20+
std::optional<FileLocRange>
21+
AsmParserContext::getBlockLocation(const BasicBlock *BB) const {
22+
if (auto BBIt = Blocks.find(BB); BBIt != Blocks.end())
23+
return BBIt->second;
24+
return std::nullopt;
25+
}
26+
27+
std::optional<FileLocRange>
28+
AsmParserContext::getInstructionLocation(const Instruction *I) const {
29+
if (auto IIt = Instructions.find(I); IIt != Instructions.end())
30+
return IIt->second;
31+
return std::nullopt;
32+
}
33+
34+
Function *
35+
AsmParserContext::getFunctionAtLocation(const FileLocRange &Query) const {
36+
for (auto &[F, Loc] : Functions) {
37+
if (Loc.contains(Query))
38+
return F;
39+
}
40+
return nullptr;
41+
}
42+
43+
Function *AsmParserContext::getFunctionAtLocation(const FileLoc &Query) const {
44+
return getFunctionAtLocation(FileLocRange(Query, Query));
45+
}
46+
47+
BasicBlock *
48+
AsmParserContext::getBlockAtLocation(const FileLocRange &Query) const {
49+
for (auto &[BB, Loc] : Blocks) {
50+
if (Loc.contains(Query))
51+
return BB;
52+
}
53+
return nullptr;
54+
}
55+
56+
BasicBlock *AsmParserContext::getBlockAtLocation(const FileLoc &Query) const {
57+
return getBlockAtLocation(FileLocRange(Query, Query));
58+
}
59+
60+
Instruction *
61+
AsmParserContext::getInstructionAtLocation(const FileLocRange &Query) const {
62+
for (auto &[I, Loc] : Instructions) {
63+
if (Loc.contains(Query))
64+
return I;
65+
}
66+
return nullptr;
67+
}
68+
69+
Instruction *
70+
AsmParserContext::getInstructionAtLocation(const FileLoc &Query) const {
71+
return getInstructionAtLocation(FileLocRange(Query, Query));
72+
}
73+
74+
bool AsmParserContext::addFunctionLocation(Function *F,
75+
const FileLocRange &Loc) {
76+
return Functions.insert({F, Loc}).second;
77+
}
78+
79+
bool AsmParserContext::addBlockLocation(BasicBlock *BB,
80+
const FileLocRange &Loc) {
81+
return Blocks.insert({BB, Loc}).second;
82+
}
83+
84+
bool AsmParserContext::addInstructionLocation(Instruction *I,
85+
const FileLocRange &Loc) {
86+
return Instructions.insert({I, Loc}).second;
87+
}
88+
89+
} // namespace llvm

llvm/lib/AsmParser/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# AsmParser
22
add_llvm_component_library(LLVMAsmParser
3+
AsmParserContext.cpp
34
LLLexer.cpp
45
LLParser.cpp
56
Parser.cpp

0 commit comments

Comments
 (0)