Skip to content

Commit de364d8

Browse files
committed
review: remove api for pre-allocating tokens
- we want to prevent the pre-allocation of all the tokens and rather let the parser allocate the tokens as it goes along - this allows for a quicker cycle to the user if there is a parsing error as we don't need to lex all of the tokens before trying to parse - as such, this change provides a new api for interacting with the lexer through ConsumeToken and PeekNextToken as opposed to generated the entire list of lexed tokens
1 parent f557d5a commit de364d8

File tree

5 files changed

+143
-53
lines changed

5 files changed

+143
-53
lines changed

clang/include/clang/Basic/DiagnosticLexKinds.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,6 +1025,7 @@ let CategoryName = "Root Signature Lexical Issue" in {
10251025
Error<"integer literal is too large to be represented in any %select{signed |}0 integer type">;
10261026
def err_hlsl_invalid_token: Error<"unable to lex a valid Root Signature token">;
10271027
def err_hlsl_invalid_register_literal: Error<"expected unsigned integer literal as part of register">;
1028+
def err_hlsl_rootsig_unexpected_eos : Error<"unexpected end to token stream">;
10281029
}
10291030

10301031
}

clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151

5252
// General Tokens:
5353
TOK(invalid)
54+
TOK(end_of_stream)
5455
TOK(int_literal)
5556

5657
// Register Tokens:

clang/include/clang/Parse/ParseHLSLRootSignature.h

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ struct RootSignatureToken {
3939
APValue NumLiteral = APValue();
4040

4141
// Constructors
42+
RootSignatureToken() : TokLoc(SourceLocation()) {}
4243
RootSignatureToken(clang::SourceLocation TokLoc) : TokLoc(TokLoc) {}
44+
RootSignatureToken(enum Kind Kind, clang::SourceLocation TokLoc) : Kind(Kind), TokLoc(TokLoc) {}
4345
};
4446
using TokenKind = enum RootSignatureToken::Kind;
4547

@@ -49,28 +51,38 @@ class RootSignatureLexer {
4951
clang::Preprocessor &PP)
5052
: Buffer(Signature), SourceLoc(SourceLoc), PP(PP) {}
5153

52-
// Consumes the internal buffer as a list of tokens and will emplace them
53-
// onto the given tokens.
54-
//
55-
// It will consume until it successfully reaches the end of the buffer,
56-
// or, until the first error is encountered. The return value denotes if
57-
// there was a failure.
58-
bool Lex(SmallVector<RootSignatureToken> &Tokens);
54+
/// Updates CurToken to the next token. Either it will take the previously
55+
/// lexed NextToken, or it will lex a token.
56+
///
57+
/// The return value denotes if there was a failure.
58+
bool ConsumeToken();
59+
60+
/// Returns the token that comes after CurToken or std::nullopt if an
61+
/// error is encountered during lexing of the next token.
62+
std::optional<RootSignatureToken> PeekNextToken();
63+
64+
RootSignatureToken GetCurToken() { return CurToken; }
65+
66+
/// Check if we have reached the end of input
67+
bool EndOfBuffer() {
68+
AdvanceBuffer(Buffer.take_while(isspace).size());
69+
return Buffer.empty();
70+
}
5971

6072
private:
6173
// Internal buffer to iterate over
6274
StringRef Buffer;
6375

76+
// Current Token state
77+
RootSignatureToken CurToken;
78+
std::optional<RootSignatureToken> NextToken = std::nullopt;
79+
6480
// Passed down parameters from Sema
6581
clang::SourceLocation SourceLoc;
6682
clang::Preprocessor &PP;
6783

6884
bool LexNumber(RootSignatureToken &Result);
69-
70-
// Consumes the internal buffer for a single token.
71-
//
72-
// The return value denotes if there was a failure.
73-
bool LexToken(RootSignatureToken &Token);
85+
bool LexToken(RootSignatureToken &Result);
7486

7587
// Advance the buffer by the specified number of characters. Updates the
7688
// SourceLocation appropriately.

clang/lib/Parse/ParseHLSLRootSignature.cpp

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -60,27 +60,13 @@ bool RootSignatureLexer::LexNumber(RootSignatureToken &Result) {
6060
return false;
6161
}
6262

63-
bool RootSignatureLexer::Lex(SmallVector<RootSignatureToken> &Tokens) {
63+
bool RootSignatureLexer::LexToken(RootSignatureToken &Result) {
6464
// Discard any leading whitespace
6565
AdvanceBuffer(Buffer.take_while(isspace).size());
6666

67-
while (!Buffer.empty()) {
68-
// Record where this token is in the text for usage in parser diagnostics
69-
RootSignatureToken Result(SourceLoc);
70-
if (LexToken(Result))
71-
return true;
72-
73-
// Successfully Lexed the token so we can store it
74-
Tokens.push_back(Result);
67+
// Record where this token is in the text for usage in parser diagnostics
68+
Result = RootSignatureToken(SourceLoc);
7569

76-
// Discard any trailing whitespace
77-
AdvanceBuffer(Buffer.take_while(isspace).size());
78-
}
79-
80-
return false;
81-
}
82-
83-
bool RootSignatureLexer::LexToken(RootSignatureToken &Result) {
8470
char C = Buffer.front();
8571

8672
// Punctuators
@@ -168,5 +154,40 @@ bool RootSignatureLexer::LexToken(RootSignatureToken &Result) {
168154
return false;
169155
}
170156

157+
bool RootSignatureLexer::ConsumeToken() {
158+
// If we previously peeked then just copy the value over
159+
if (NextToken && NextToken->Kind != TokenKind::end_of_stream) {
160+
CurToken = *NextToken;
161+
NextToken = std::nullopt;
162+
return false;
163+
}
164+
165+
// This will be implicity be true if NextToken->Kind == end_of_stream
166+
if (EndOfBuffer()) {
167+
// Report unexpected end of tokens error
168+
PP.getDiagnostics().Report(SourceLoc, diag::err_hlsl_rootsig_unexpected_eos);
169+
return true;
170+
}
171+
172+
return LexToken(CurToken);
173+
}
174+
175+
std::optional<RootSignatureToken> RootSignatureLexer::PeekNextToken() {
176+
// Already peeked from the current token
177+
if (NextToken.has_value())
178+
return NextToken;
179+
180+
if (EndOfBuffer()) {
181+
// We have reached the end of the stream, but only error on consume
182+
return RootSignatureToken(TokenKind::end_of_stream, SourceLoc);
183+
}
184+
185+
RootSignatureToken Result;
186+
if (LexToken(Result))
187+
return std::nullopt;
188+
NextToken = Result;
189+
return Result;
190+
}
191+
171192
} // namespace hlsl
172193
} // namespace clang

clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp

Lines changed: 79 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,19 @@ class ParseHLSLRootSignatureTest : public ::testing::Test {
9191
return PP;
9292
}
9393

94-
void CheckTokens(SmallVector<hlsl::RootSignatureToken> &Computed,
94+
void CheckTokens(hlsl::RootSignatureLexer &Lexer,
95+
SmallVector<hlsl::RootSignatureToken> &Computed,
9596
SmallVector<hlsl::TokenKind> &Expected) {
96-
ASSERT_EQ(Computed.size(), Expected.size());
9797
for (unsigned I = 0, E = Expected.size(); I != E; ++I) {
98-
ASSERT_EQ(Computed[I].Kind, Expected[I]);
98+
if (Expected[I] == hlsl::TokenKind::invalid ||
99+
Expected[I] == hlsl::TokenKind::end_of_stream)
100+
continue;
101+
ASSERT_FALSE(Lexer.ConsumeToken());
102+
hlsl::RootSignatureToken Result = Lexer.GetCurToken();
103+
ASSERT_EQ(Result.Kind, Expected[I]);
104+
Computed.push_back(Result);
99105
}
106+
ASSERT_TRUE(Lexer.EndOfBuffer());
100107
}
101108

102109
FileSystemOptions FileMgrOpts;
@@ -128,16 +135,14 @@ TEST_F(ParseHLSLRootSignatureTest, ValidLexNumbersTest) {
128135
hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP);
129136

130137
SmallVector<hlsl::RootSignatureToken> Tokens;
131-
ASSERT_FALSE(Lexer.Lex(Tokens));
132-
ASSERT_TRUE(Consumer->IsSatisfied());
133-
134138
SmallVector<hlsl::TokenKind> Expected = {
135139
hlsl::TokenKind::int_literal,
136140
hlsl::TokenKind::int_literal,
137141
hlsl::TokenKind::int_literal,
138142
hlsl::TokenKind::int_literal,
139143
};
140-
CheckTokens(Tokens, Expected);
144+
CheckTokens(Lexer, Tokens, Expected);
145+
ASSERT_TRUE(Consumer->IsSatisfied());
141146

142147
// Sample negative int
143148
hlsl::RootSignatureToken IntToken = Tokens[0];
@@ -204,23 +209,77 @@ TEST_F(ParseHLSLRootSignatureTest, ValidLexAllTokensTest) {
204209

205210
hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP);
206211

207-
SmallVector<hlsl::RootSignatureToken> Tokens = {
208-
hlsl::RootSignatureToken(
209-
SourceLocation()) // invalid token for completeness
210-
};
211-
ASSERT_FALSE(Lexer.Lex(Tokens));
212-
ASSERT_TRUE(Consumer->IsSatisfied());
213-
212+
SmallVector<hlsl::RootSignatureToken> Tokens;
214213
SmallVector<hlsl::TokenKind> Expected = {
215214
#define TOK(NAME) hlsl::TokenKind::NAME,
216215
#include "clang/Parse/HLSLRootSignatureTokenKinds.def"
217216
};
218217

219-
CheckTokens(Tokens, Expected);
218+
CheckTokens(Lexer, Tokens, Expected);
219+
ASSERT_TRUE(Consumer->IsSatisfied());
220+
}
221+
222+
TEST_F(ParseHLSLRootSignatureTest, ValidLexPeekTest) {
223+
// This test will check that we can lex all defined tokens as defined in
224+
// HLSLRootSignatureTokenKinds.def, plus some additional integer variations
225+
const llvm::StringLiteral Source = R"cc(
226+
)1
227+
)cc";
228+
229+
TrivialModuleLoader ModLoader;
230+
auto PP = CreatePP(Source, ModLoader);
231+
auto TokLoc = SourceLocation();
232+
233+
// Test no diagnostics produced
234+
Consumer->SetNoDiag();
235+
236+
hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP);
237+
// Test basic peek
238+
auto Res = Lexer.PeekNextToken();
239+
ASSERT_TRUE(Res.has_value());
240+
ASSERT_EQ(Res->Kind, hlsl::TokenKind::pu_r_paren);
241+
242+
// Ensure it doesn't peek past one element
243+
Res = Lexer.PeekNextToken();
244+
ASSERT_TRUE(Res.has_value());
245+
ASSERT_EQ(Res->Kind, hlsl::TokenKind::pu_r_paren);
246+
247+
ASSERT_FALSE(Lexer.ConsumeToken());
248+
249+
// Invoke after reseting the NextToken
250+
Res = Lexer.PeekNextToken();
251+
ASSERT_TRUE(Res.has_value());
252+
ASSERT_EQ(Res->Kind, hlsl::TokenKind::int_literal);
253+
254+
// Ensure we can still consume the second token
255+
ASSERT_FALSE(Lexer.ConsumeToken());
256+
257+
// Ensure no error raised when peeking past end of stream
258+
Res = Lexer.PeekNextToken();
259+
ASSERT_TRUE(Res.has_value());
260+
ASSERT_EQ(Res->Kind, hlsl::TokenKind::end_of_stream);
261+
262+
ASSERT_TRUE(Consumer->IsSatisfied());
220263
}
221264

222265
// Invalid Lexing Tests
223266

267+
TEST_F(ParseHLSLRootSignatureTest, InvalidParseUnexpectedEOSTest) {
268+
const llvm::StringLiteral Source = R"cc()cc";
269+
270+
TrivialModuleLoader ModLoader;
271+
auto PP = CreatePP(Source, ModLoader);
272+
auto TokLoc = SourceLocation();
273+
274+
hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP);
275+
276+
// Test correct diagnostic produced
277+
Consumer->SetExpected(diag::err_hlsl_rootsig_unexpected_eos);
278+
ASSERT_TRUE(Lexer.ConsumeToken());
279+
280+
ASSERT_TRUE(Consumer->IsSatisfied());
281+
}
282+
224283
TEST_F(ParseHLSLRootSignatureTest, InvalidLexOverflowedNumberTest) {
225284
// This test will check that the lexing fails due to an integer overflow
226285
const llvm::StringLiteral Source = R"cc(
@@ -236,8 +295,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidLexOverflowedNumberTest) {
236295

237296
hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP);
238297

239-
SmallVector<hlsl::RootSignatureToken> Tokens;
240-
ASSERT_TRUE(Lexer.Lex(Tokens));
298+
ASSERT_TRUE(Lexer.ConsumeToken());
241299
ASSERT_TRUE(Consumer->IsSatisfied());
242300
}
243301

@@ -256,8 +314,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidLexEmptyNumberTest) {
256314

257315
hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP);
258316

259-
SmallVector<hlsl::RootSignatureToken> Tokens;
260-
ASSERT_TRUE(Lexer.Lex(Tokens));
317+
ASSERT_TRUE(Lexer.ConsumeToken());
261318
ASSERT_TRUE(Consumer->IsSatisfied());
262319
}
263320

@@ -276,9 +333,8 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidLexRegNumberTest) {
276333

277334
hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP);
278335

279-
SmallVector<hlsl::RootSignatureToken> Tokens;
280-
ASSERT_TRUE(Lexer.Lex(Tokens));
281-
// FIXME(#120472): This should be TRUE once we can lex a floating
336+
ASSERT_TRUE(Lexer.ConsumeToken());
337+
// FIXME(#126565): This should be TRUE once we can lex a float
282338
ASSERT_FALSE(Consumer->IsSatisfied());
283339
}
284340

@@ -297,8 +353,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidLexIdentifierTest) {
297353

298354
hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP);
299355

300-
SmallVector<hlsl::RootSignatureToken> Tokens;
301-
ASSERT_TRUE(Lexer.Lex(Tokens));
356+
ASSERT_TRUE(Lexer.ConsumeToken());
302357
ASSERT_TRUE(Consumer->IsSatisfied());
303358
}
304359

0 commit comments

Comments
 (0)