Skip to content

Commit ccf5203

Browse files
committed
Address latest review comments:
- Remove 'namespace' as a keyword (make it a normal identifier) - Remove 'invalid' and 'none' token types. - Remove unnecessary SetKind and GetLength methods from Tokens. - Re-arrange Lexer: - Give it a static Create method, which pre-lexes all the tokens - Make Lex method static - Pull IsWord method out of Lexer class - Make the Lexer constructor private. - Remove LexAll, GetLocation, UpdateLexedTokens, AcceptLookAhead, GetNextToken, and IncrementTokenIdx methods from Lexer class. - Add new 'Advance' method (to help replace some of the removed methods). - Update indexing in LookAhead (LookAead(0) now means the 'current' token). - Remove m_cur_pos data member from Lexer class. - Replace m_invalid_token with m_eof_token. - Use 'remainder' StringRef to help with lexing. - Update the unit tests to handle all the code changes in the Lexer. - Update the unit tests to use ASSERT_THAT_EXPECTED to check llvm::Expected return values. - Update the unit tests to use "testing::ElementsAre(testing::Pair ..." to verify all the lexed tokens; also added helper function ExtractTokenData, and deleted function VerifyExpectedTokens.
1 parent 5e2ee55 commit ccf5203

File tree

4 files changed

+187
-296
lines changed

4 files changed

+187
-296
lines changed

lldb/include/lldb/ValueObject/DILLexer.h

Lines changed: 35 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,7 @@
1818
#include <string>
1919
#include <vector>
2020

21-
namespace lldb_private {
22-
23-
namespace dil {
21+
namespace lldb_private::dil {
2422

2523
/// Class defining the tokens generated by the DIL lexer and used by the
2624
/// DIL parser.
@@ -30,27 +28,18 @@ class Token {
3028
coloncolon,
3129
eof,
3230
identifier,
33-
invalid,
34-
kw_namespace,
3531
l_paren,
36-
none,
3732
r_paren,
3833
unknown,
3934
};
4035

4136
Token(Kind kind, std::string spelling, uint32_t start)
4237
: m_kind(kind), m_spelling(spelling), m_start_pos(start) {}
4338

44-
Token() : m_kind(Kind::none), m_spelling(""), m_start_pos(0) {}
45-
46-
void SetKind(Kind kind) { m_kind = kind; }
47-
4839
Kind GetKind() const { return m_kind; }
4940

5041
std::string GetSpelling() const { return m_spelling; }
5142

52-
uint32_t GetLength() const { return m_spelling.size(); }
53-
5443
bool Is(Kind kind) const { return m_kind == kind; }
5544

5645
bool IsNot(Kind kind) const { return m_kind != kind; }
@@ -74,85 +63,69 @@ class Token {
7463
/// Class for doing the simple lexing required by DIL.
7564
class DILLexer {
7665
public:
77-
DILLexer(llvm::StringRef dil_expr) : m_expr(dil_expr) {
78-
m_cur_pos = m_expr.begin();
79-
// Use UINT_MAX to indicate invalid/uninitialized value.
80-
m_tokens_idx = UINT_MAX;
81-
m_invalid_token = Token(Token::invalid, "", 0);
82-
}
83-
84-
llvm::Expected<bool> LexAll();
85-
86-
/// Return the lexed token N+1 positions ahead of the 'current' token
87-
/// being handled by the DIL parser.
88-
const Token &LookAhead(uint32_t N);
89-
90-
const Token &AcceptLookAhead(uint32_t N);
91-
92-
const Token &GetNextToken();
93-
94-
/// Return the index for the 'current' token being handled by the DIL parser.
95-
uint32_t GetCurrentTokenIdx() { return m_tokens_idx; }
66+
/// Lexes all the tokens in expr and calls the private constructor
67+
/// with the lexed tokens.
68+
static llvm::Expected<DILLexer> Create(llvm::StringRef expr);
9669

9770
/// Return the current token to be handled by the DIL parser.
9871
const Token &GetCurrentToken() { return m_lexed_tokens[m_tokens_idx]; }
9972

100-
uint32_t NumLexedTokens() { return m_lexed_tokens.size(); }
73+
/// Advance the current token position by N.
74+
void Advance(uint32_t N = 1) {
75+
// UINT_MAX means uninitialized, no "current" position, so move to start.
76+
if (m_tokens_idx == UINT_MAX)
77+
m_tokens_idx = 0;
78+
else if (m_tokens_idx + N >= m_lexed_tokens.size())
79+
// N is too large; advance to the end of the lexed tokens.
80+
m_tokens_idx = m_lexed_tokens.size() - 1;
81+
else
82+
m_tokens_idx += N;
83+
}
10184

102-
/// Update the index for the 'current' token, to point to the next lexed
103-
/// token.
104-
bool IncrementTokenIdx() {
105-
if (m_tokens_idx >= m_lexed_tokens.size() - 1)
106-
return false;
85+
/// Return the lexed token N positions ahead of the 'current' token
86+
/// being handled by the DIL parser.
87+
const Token &LookAhead(uint32_t N) {
88+
if (m_tokens_idx + N < m_lexed_tokens.size())
89+
return m_lexed_tokens[m_tokens_idx + N];
10790

108-
m_tokens_idx++;
109-
return true;
91+
return m_eof_token;
11092
}
11193

94+
/// Return the index for the 'current' token being handled by the DIL parser.
95+
uint32_t GetCurrentTokenIdx() { return m_tokens_idx; }
96+
11297
/// Set the index for the 'current' token (to be handled by the parser)
11398
/// to a particular position. Used for either committing 'look ahead' parsing
11499
/// or rolling back tentative parsing.
115-
bool ResetTokenIdx(uint32_t new_value) {
116-
if (new_value > m_lexed_tokens.size() - 1)
117-
return false;
118-
100+
void ResetTokenIdx(uint32_t new_value) {
101+
assert(new_value == UINT_MAX || new_value < m_lexed_tokens.size());
119102
m_tokens_idx = new_value;
120-
return true;
121103
}
122104

123-
uint32_t GetLocation() { return m_cur_pos - m_expr.begin(); }
105+
uint32_t NumLexedTokens() { return m_lexed_tokens.size(); }
124106

125107
private:
126-
llvm::Expected<Token> Lex();
108+
DILLexer(llvm::StringRef dil_expr, std::vector<Token> lexed_tokens)
109+
: m_expr(dil_expr), m_lexed_tokens(lexed_tokens), m_tokens_idx(UINT_MAX),
110+
m_eof_token(Token(Token::eof, "", 0)) {}
127111

128-
llvm::iterator_range<llvm::StringRef::iterator> IsWord();
129-
130-
/// Update 'result' with the other paremeter values, create a
131-
/// duplicate token, and push the duplicate token onto the vector of
132-
/// lexed tokens.
133-
void UpdateLexedTokens(Token &result, Token::Kind tok_kind,
134-
std::string tok_str, uint32_t tok_pos);
112+
static llvm::Expected<Token> Lex(llvm::StringRef expr,
113+
llvm::StringRef &remainder);
135114

136115
// The input string we are lexing & parsing.
137116
llvm::StringRef m_expr;
138117

139-
// The current position of the lexer within m_expr (the character position,
140-
// within the string, of the next item to be lexed).
141-
llvm::StringRef::iterator m_cur_pos;
142-
143118
// Holds all of the tokens lexed so far.
144119
std::vector<Token> m_lexed_tokens;
145120

146121
// Index into m_lexed_tokens; indicates which token the DIL parser is
147122
// currently trying to parse/handle.
148123
uint32_t m_tokens_idx;
149124

150-
// "invalid" token; to be returned by lexer when 'look ahead' fails.
151-
Token m_invalid_token;
125+
// "eof" token; to be returned by lexer when 'look ahead' fails.
126+
Token m_eof_token;
152127
};
153128

154-
} // namespace dil
155-
156-
} // namespace lldb_private
129+
} // namespace lldb_private::dil
157130

158131
#endif // LLDB_VALUEOBJECT_DILLEXER_H_

lldb/source/ValueObject/DILLexer.cpp

Lines changed: 48 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@
1515
#include "lldb/Utility/Status.h"
1616
#include "llvm/ADT/StringSwitch.h"
1717

18-
namespace lldb_private {
19-
20-
namespace dil {
18+
namespace lldb_private::dil {
2119

2220
llvm::StringRef Token::GetTokenName(Kind kind) {
2321
switch (kind) {
@@ -27,14 +25,8 @@ llvm::StringRef Token::GetTokenName(Kind kind) {
2725
return "eof";
2826
case Kind::identifier:
2927
return "identifier";
30-
case Kind::invalid:
31-
return "invalid";
32-
case Kind::kw_namespace:
33-
return "namespace";
3428
case Kind::l_paren:
3529
return "l_paren";
36-
case Kind::none:
37-
return "none";
3830
case Kind::r_paren:
3931
return "r_paren";
4032
case Kind::unknown:
@@ -50,140 +42,91 @@ static bool IsDigit(char c) { return '0' <= c && c <= '9'; }
5042

5143
// A word starts with a letter, underscore, or dollar sign, followed by
5244
// letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or underscores.
53-
llvm::iterator_range<llvm::StringRef::iterator> DILLexer::IsWord() {
54-
llvm::StringRef::iterator start = m_cur_pos;
45+
static std::optional<llvm::StringRef> IsWord(llvm::StringRef expr,
46+
llvm::StringRef &remainder) {
47+
llvm::StringRef::iterator cur_pos = expr.end() - remainder.size();
48+
llvm::StringRef::iterator start = cur_pos;
5549
bool dollar_start = false;
5650

5751
// Must not start with a digit.
58-
if (m_cur_pos == m_expr.end() || IsDigit(*m_cur_pos))
59-
return llvm::make_range(m_cur_pos, m_cur_pos);
52+
if (cur_pos == expr.end() || IsDigit(*cur_pos))
53+
return std::nullopt;
6054

6155
// First character *may* be a '$', for a register name or convenience
6256
// variable.
63-
if (*m_cur_pos == '$') {
57+
if (*cur_pos == '$') {
6458
dollar_start = true;
65-
++m_cur_pos;
59+
++cur_pos;
6660
}
6761

6862
// Contains only letters, digits or underscores
69-
for (; m_cur_pos != m_expr.end(); ++m_cur_pos) {
70-
char c = *m_cur_pos;
63+
for (; cur_pos != expr.end(); ++cur_pos) {
64+
char c = *cur_pos;
7165
if (!IsLetter(c) && !IsDigit(c) && c != '_')
7266
break;
7367
}
7468

7569
// If first char is '$', make sure there's at least one mare char, or it's
7670
// invalid.
77-
if (dollar_start && (m_cur_pos - start <= 1)) {
78-
m_cur_pos = start;
79-
return llvm::make_range(start, start); // Empty range
71+
if (dollar_start && (cur_pos - start <= 1)) {
72+
cur_pos = start;
73+
return std::nullopt;
8074
}
8175

82-
return llvm::make_range(start, m_cur_pos);
83-
}
76+
if (cur_pos == start)
77+
return std::nullopt;
78+
79+
llvm::StringRef word = expr.substr(start - expr.begin(), cur_pos - start);
80+
if (remainder.consume_front(word))
81+
return word;
8482

85-
void DILLexer::UpdateLexedTokens(Token &result, Token::Kind tok_kind,
86-
std::string tok_str, uint32_t tok_pos) {
87-
Token new_token(tok_kind, tok_str, tok_pos);
88-
result = new_token;
89-
m_lexed_tokens.push_back(std::move(new_token));
83+
return std::nullopt;
9084
}
9185

92-
llvm::Expected<bool> DILLexer::LexAll() {
93-
bool done = false;
94-
while (!done) {
95-
auto tok_or_err = Lex();
96-
if (!tok_or_err)
97-
return tok_or_err.takeError();
98-
Token token = *tok_or_err;
99-
if (token.GetKind() == Token::eof) {
100-
done = true;
86+
llvm::Expected<DILLexer> DILLexer::Create(llvm::StringRef expr) {
87+
std::vector<Token> tokens;
88+
llvm::StringRef remainder = expr;
89+
do {
90+
if (llvm::Expected<Token> t = Lex(expr, remainder)) {
91+
tokens.push_back(std::move(*t));
92+
} else {
93+
return t.takeError();
10194
}
102-
}
103-
return true;
95+
} while (tokens.back().GetKind() != Token::eof);
96+
return DILLexer(expr, std::move(tokens));
10497
}
10598

106-
llvm::Expected<Token> DILLexer::Lex() {
107-
Token result;
108-
99+
llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr,
100+
llvm::StringRef &remainder) {
109101
// Skip over whitespace (spaces).
110-
while (m_cur_pos != m_expr.end() && *m_cur_pos == ' ')
111-
m_cur_pos++;
102+
remainder = remainder.ltrim();
103+
llvm::StringRef::iterator cur_pos = expr.end() - remainder.size();
112104

113105
// Check to see if we've reached the end of our input string.
114-
if (m_cur_pos == m_expr.end()) {
115-
UpdateLexedTokens(result, Token::eof, "", (uint32_t)m_expr.size());
116-
return result;
106+
if (remainder.empty() || cur_pos == expr.end())
107+
return Token(Token::eof, "", (uint32_t)expr.size());
108+
109+
uint32_t position = cur_pos - expr.begin();
110+
std::optional<llvm::StringRef> maybe_word = IsWord(expr, remainder);
111+
if (maybe_word) {
112+
llvm::StringRef word = *maybe_word;
113+
return Token(Token::identifier, word.str(), position);
117114
}
118115

119-
uint32_t position = m_cur_pos - m_expr.begin();
120-
llvm::StringRef::iterator start = m_cur_pos;
121-
llvm::iterator_range<llvm::StringRef::iterator> word_range = IsWord();
122-
if (!word_range.empty()) {
123-
uint32_t length = word_range.end() - word_range.begin();
124-
llvm::StringRef word(m_expr.substr(position, length));
125-
// We will be adding more keywords here in the future...
126-
Token::Kind kind = llvm::StringSwitch<Token::Kind>(word)
127-
.Case("namespace", Token::kw_namespace)
128-
.Default(Token::identifier);
129-
UpdateLexedTokens(result, kind, word.str(), position);
130-
return result;
131-
}
132-
133-
m_cur_pos = start;
134-
llvm::StringRef remainder(m_expr.substr(position, m_expr.end() - m_cur_pos));
135-
std::vector<std::pair<Token::Kind, const char *>> operators = {
116+
constexpr std::pair<Token::Kind, const char *> operators[] = {
136117
{Token::l_paren, "("},
137118
{Token::r_paren, ")"},
138119
{Token::coloncolon, "::"},
139120
};
140121
for (auto [kind, str] : operators) {
141122
if (remainder.consume_front(str)) {
142-
m_cur_pos += strlen(str);
143-
UpdateLexedTokens(result, kind, str, position);
144-
return result;
123+
cur_pos += strlen(str);
124+
return Token(kind, str, position);
145125
}
146126
}
147127

148128
// Unrecognized character(s) in string; unable to lex it.
149-
Status error("Unable to lex input string");
150-
return error.ToError();
151-
}
152-
153-
const Token &DILLexer::LookAhead(uint32_t N) {
154-
if (m_tokens_idx + N + 1 < m_lexed_tokens.size())
155-
return m_lexed_tokens[m_tokens_idx + N + 1];
156-
157-
return m_invalid_token;
129+
return llvm::createStringError("Unable to lex input string");
158130
}
159131

160-
const Token &DILLexer::AcceptLookAhead(uint32_t N) {
161-
if (m_tokens_idx + N + 1 > m_lexed_tokens.size())
162-
return m_invalid_token;
163-
164-
m_tokens_idx += N + 1;
165-
return m_lexed_tokens[m_tokens_idx];
166-
}
167-
168-
const Token &DILLexer::GetNextToken() {
169-
if (m_tokens_idx == UINT_MAX)
170-
m_tokens_idx = 0;
171-
else
172-
m_tokens_idx++;
173-
174-
// Return the next token in the vector of lexed tokens.
175-
if (m_tokens_idx < m_lexed_tokens.size())
176-
return m_lexed_tokens[m_tokens_idx];
177-
178-
// We're already at/beyond the end of our lexed tokens. If the last token
179-
// is an eof token, return it.
180-
if (m_lexed_tokens[m_lexed_tokens.size() - 1].GetKind() == Token::eof)
181-
return m_lexed_tokens[m_lexed_tokens.size() - 1];
182-
183-
// Return the invalid token.
184-
return m_invalid_token;
185-
}
186-
187-
} // namespace dil
188-
189-
} // namespace lldb_private
132+
} // namespace lldb_private::dil

lldb/unittests/ValueObject/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ add_lldb_unittest(LLDBValueObjectTests
66
lldbValueObject
77
lldbPluginPlatformLinux
88
lldbPluginScriptInterpreterNone
9+
LLVMTestingSupport
910

1011
LINK_COMPONENTS
1112
Support

0 commit comments

Comments
 (0)