Skip to content

Commit 5e2ee55

Browse files
committed
Many changes, to address all the review comments:
- Remove "DIL" prefix from DILTokenKind and DILToken. - Change the token kind from an enum class to an enum inside the Token class. - Use CamelCase for all the method names. - Replace Token::SetValues method with assignments. - Use a StringRef, not std::string, to hold the input string in the lexer. - Update the lexer to lex all the tokens at one time. Added two new methods for this: LexAll and GetNextToken. - Made some of the Lexer methods private. - Replaces StringMap with StringSwitch for fast keyword lookups. - Updated GetTokenName to directly return StringRefs; removed default case from switch statement. - Cleaned up code format in IsLetter & IsDigit. - Updated IsWord too return an iterator range containing the word (if any). - Updated Lex function (now called by LexAll) to return an llvm::Expected token; removed look_ahead checks; changed the operator lexing to use a vector of operators (as suggested). - Cleaned up LookAhead method, now that we know all tokens have already been lexed. - Added helper function to unittests, to help check a sequence of tokens. - Generally cleaned up the tests to deal with all the code changes.
1 parent 61a2607 commit 5e2ee55

File tree

3 files changed

+302
-291
lines changed

3 files changed

+302
-291
lines changed

lldb/include/lldb/ValueObject/DILLexer.h

Lines changed: 56 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#define LLDB_VALUEOBJECT_DILLEXER_H_
1111

1212
#include "llvm/ADT/StringRef.h"
13+
#include "llvm/ADT/iterator_range.h"
14+
#include "llvm/Support/Error.h"
1315
#include <cstdint>
1416
#include <limits.h>
1517
#include <memory>
@@ -20,94 +22,82 @@ namespace lldb_private {
2022

2123
namespace dil {
2224

23-
enum class TokenKind {
24-
coloncolon,
25-
eof,
26-
identifier,
27-
invalid,
28-
kw_namespace,
29-
l_paren,
30-
none,
31-
r_paren,
32-
unknown,
33-
};
34-
3525
/// Class defining the tokens generated by the DIL lexer and used by the
3626
/// DIL parser.
37-
class DILToken {
27+
class Token {
3828
public:
39-
DILToken(dil::TokenKind kind, std::string spelling, uint32_t start)
29+
enum Kind {
30+
coloncolon,
31+
eof,
32+
identifier,
33+
invalid,
34+
kw_namespace,
35+
l_paren,
36+
none,
37+
r_paren,
38+
unknown,
39+
};
40+
41+
Token(Kind kind, std::string spelling, uint32_t start)
4042
: m_kind(kind), m_spelling(spelling), m_start_pos(start) {}
4143

42-
DILToken() : m_kind(dil::TokenKind::none), m_spelling(""), m_start_pos(0) {}
44+
Token() : m_kind(Kind::none), m_spelling(""), m_start_pos(0) {}
4345

44-
void setKind(dil::TokenKind kind) { m_kind = kind; }
45-
dil::TokenKind getKind() const { return m_kind; }
46+
void SetKind(Kind kind) { m_kind = kind; }
4647

47-
std::string getSpelling() const { return m_spelling; }
48+
Kind GetKind() const { return m_kind; }
4849

49-
uint32_t getLength() const { return m_spelling.size(); }
50+
std::string GetSpelling() const { return m_spelling; }
5051

51-
bool is(dil::TokenKind kind) const { return m_kind == kind; }
52+
uint32_t GetLength() const { return m_spelling.size(); }
5253

53-
bool isNot(dil::TokenKind kind) const { return m_kind != kind; }
54+
bool Is(Kind kind) const { return m_kind == kind; }
5455

55-
bool isOneOf(dil::TokenKind kind1, dil::TokenKind kind2) const {
56-
return is(kind1) || is(kind2);
57-
}
56+
bool IsNot(Kind kind) const { return m_kind != kind; }
5857

59-
template <typename... Ts> bool isOneOf(dil::TokenKind kind, Ts... Ks) const {
60-
return is(kind) || isOneOf(Ks...);
61-
}
58+
bool IsOneOf(Kind kind1, Kind kind2) const { return Is(kind1) || Is(kind2); }
6259

63-
uint32_t getLocation() const { return m_start_pos; }
64-
65-
void setValues(dil::TokenKind kind, std::string spelling, uint32_t start) {
66-
m_kind = kind;
67-
m_spelling = spelling;
68-
m_start_pos = start;
60+
template <typename... Ts> bool IsOneOf(Kind kind, Ts... Ks) const {
61+
return Is(kind) || IsOneOf(Ks...);
6962
}
7063

71-
static const std::string getTokenName(dil::TokenKind kind);
64+
uint32_t GetLocation() const { return m_start_pos; }
65+
66+
static llvm::StringRef GetTokenName(Kind kind);
7267

7368
private:
74-
dil::TokenKind m_kind;
69+
Kind m_kind;
7570
std::string m_spelling;
7671
uint32_t m_start_pos; // within entire expression string
7772
};
7873

7974
/// Class for doing the simple lexing required by DIL.
8075
class DILLexer {
8176
public:
82-
DILLexer(llvm::StringRef dil_expr) : m_expr(dil_expr.str()) {
77+
DILLexer(llvm::StringRef dil_expr) : m_expr(dil_expr) {
8378
m_cur_pos = m_expr.begin();
8479
// Use UINT_MAX to indicate invalid/uninitialized value.
8580
m_tokens_idx = UINT_MAX;
81+
m_invalid_token = Token(Token::invalid, "", 0);
8682
}
8783

88-
bool Lex(DILToken &result, bool look_ahead = false);
89-
90-
bool Is_Word(std::string::iterator start, uint32_t &length);
91-
92-
uint32_t GetLocation() { return m_cur_pos - m_expr.begin(); }
93-
94-
/// Update 'result' with the other paremeter values, create a
95-
/// duplicate token, and push the duplicate token onto the vector of
96-
/// lexed tokens.
97-
void UpdateLexedTokens(DILToken &result, dil::TokenKind tok_kind,
98-
std::string tok_str, uint32_t tok_pos);
84+
llvm::Expected<bool> LexAll();
9985

10086
/// Return the lexed token N+1 positions ahead of the 'current' token
10187
/// being handled by the DIL parser.
102-
const DILToken &LookAhead(uint32_t N);
88+
const Token &LookAhead(uint32_t N);
89+
90+
const Token &AcceptLookAhead(uint32_t N);
10391

104-
const DILToken &AcceptLookAhead(uint32_t N);
92+
const Token &GetNextToken();
10593

10694
/// Return the index for the 'current' token being handled by the DIL parser.
10795
uint32_t GetCurrentTokenIdx() { return m_tokens_idx; }
10896

10997
/// Return the current token to be handled by the DIL parser.
110-
DILToken &GetCurrentToken() { return m_lexed_tokens[m_tokens_idx]; }
98+
const Token &GetCurrentToken() { return m_lexed_tokens[m_tokens_idx]; }
99+
100+
uint32_t NumLexedTokens() { return m_lexed_tokens.size(); }
111101

112102
/// Update the index for the 'current' token, to point to the next lexed
113103
/// token.
@@ -130,23 +120,35 @@ class DILLexer {
130120
return true;
131121
}
132122

123+
uint32_t GetLocation() { return m_cur_pos - m_expr.begin(); }
124+
133125
private:
126+
llvm::Expected<Token> Lex();
127+
128+
llvm::iterator_range<llvm::StringRef::iterator> IsWord();
129+
130+
/// Update 'result' with the other paremeter values, create a
131+
/// duplicate token, and push the duplicate token onto the vector of
132+
/// lexed tokens.
133+
void UpdateLexedTokens(Token &result, Token::Kind tok_kind,
134+
std::string tok_str, uint32_t tok_pos);
135+
134136
// The input string we are lexing & parsing.
135-
std::string m_expr;
137+
llvm::StringRef m_expr;
136138

137139
// The current position of the lexer within m_expr (the character position,
138140
// within the string, of the next item to be lexed).
139-
std::string::iterator m_cur_pos;
141+
llvm::StringRef::iterator m_cur_pos;
140142

141143
// Holds all of the tokens lexed so far.
142-
std::vector<DILToken> m_lexed_tokens;
144+
std::vector<Token> m_lexed_tokens;
143145

144146
// Index into m_lexed_tokens; indicates which token the DIL parser is
145147
// currently trying to parse/handle.
146148
uint32_t m_tokens_idx;
147149

148150
// "invalid" token; to be returned by lexer when 'look ahead' fails.
149-
DILToken m_invalid_token;
151+
Token m_invalid_token;
150152
};
151153

152154
} // namespace dil

0 commit comments

Comments
 (0)