Skip to content

Commit f6de927

Browse files
committed
Add basic support to recognize C keywords
1 parent 5b11c35 commit f6de927

File tree

12 files changed

+510
-37
lines changed

12 files changed

+510
-37
lines changed

packages/cxx-gen-ast/src/tokens.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ export const CXX_TOKEN_ALIASES = {
278278
};
279279

280280
export const C_KEYWORDS: string[] = [
281+
"asm",
281282
"alignas",
282283
"alignof",
283284
"auto",
@@ -332,11 +333,35 @@ export const C_KEYWORDS: string[] = [
332333
"_Generic",
333334
"_Imaginary",
334335
"_Noreturn",
336+
337+
"__attribute__",
338+
"__builtin_bit_cast",
339+
"__builtin_offsetof",
340+
"__builtin_va_arg",
341+
"__builtin_va_list",
342+
"__complex__",
343+
"__extension__",
344+
"__float128",
345+
"__float80",
346+
"__imag__",
347+
"__int128",
348+
"__int64",
349+
"__real__",
350+
"__restrict__",
351+
"__thread",
352+
"__underlying_type",
353+
"_Atomic",
354+
"_Complex",
335355
];
336356

337357
export const C_TOKEN_ALIASES = {
358+
__asm__: "ASM",
359+
__asm: "ASM",
360+
__inline__: "INLINE",
361+
__inline: "INLINE",
338362
_Alignas: "ALIGNAS",
339363
_Alignof: "ALIGNOF",
364+
_asm: "ASM",
340365
_Bool: "BOOL",
341366
_Static_assert: "STATIC_ASSERT",
342367
_Thread_local: "THREAD_LOCAL",

src/frontend/cxx/frontend.cc

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,9 @@ auto readAll(const std::string& fileName) -> std::optional<std::string> {
117117
}
118118

119119
void dumpTokens(const CLI& cli, TranslationUnit& unit, std::ostream& output) {
120+
const auto lang =
121+
cli.getSingle("-x") == "c" ? LanguageKind::kC : LanguageKind::kCXX;
122+
120123
std::string flags;
121124

122125
for (SourceLocation loc(1);; loc = loc.next()) {
@@ -134,7 +137,7 @@ void dumpTokens(const CLI& cli, TranslationUnit& unit, std::ostream& output) {
134137

135138
auto kind = tk.kind();
136139
if (kind == TokenKind::T_IDENTIFIER) {
137-
kind = Lexer::classifyKeyword(tk.spell());
140+
kind = Lexer::classifyKeyword(tk.spell(), lang);
138141
}
139142

140143
output << std::format("{} '{}'{}", Token::name(kind), tk.spell(), flags);
@@ -153,6 +156,13 @@ auto runOnFile(const CLI& cli, const std::string& fileName) -> bool {
153156

154157
auto preprocessor = unit.preprocessor();
155158

159+
const auto lang = cli.getSingle("-x");
160+
161+
if (lang == "c") {
162+
// set the language to C
163+
preprocessor->setLanguage(LanguageKind::kC);
164+
}
165+
156166
std::unique_ptr<Toolchain> toolchain;
157167

158168
if (cli.opt_verify) {

src/parser/cxx/cli.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ std::vector<CLIOptionDescr> options{
139139
{"-o", "<file>", "Place output into <file>",
140140
CLIOptionDescrKind::kSeparated},
141141

142-
{"-x", "Specify the language from the compiler driver",
142+
{"-x", "Specify the language from the compiler driver, e.g. c, or c++",
143143
CLIOptionDescrKind::kSeparated},
144144

145145
{"-fcheck", "Enable type checker (WIP)", &CLI::opt_fcheck},

src/parser/cxx/cxx_fwd.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@ class Arena;
2828
class Control;
2929
class TranslationUnit;
3030

31+
enum struct LanguageKind {
32+
kC,
33+
kCXX,
34+
};
35+
3136
[[noreturn]] void cxx_runtime_error(const std::string& msg);
3237

3338
} // namespace cxx

src/parser/cxx/lexer.cc

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -107,17 +107,18 @@ inline auto skipBOM(It& it, It end) -> bool {
107107
}
108108
} // namespace
109109

110-
Lexer::Lexer(std::string_view source)
111-
: source_(source), pos_(cbegin(source_)), end_(cend(source_)) {
110+
Lexer::Lexer(std::string_view source, LanguageKind lang)
111+
: source_(source), pos_(cbegin(source_)), end_(cend(source_)), lang_(lang) {
112112
hasBOM_ = skipBOM(pos_, end_);
113113
currentChar_ = pos_ < end_ ? peekNext(pos_, end_) : 0;
114114
}
115115

116-
Lexer::Lexer(std::string buffer)
116+
Lexer::Lexer(std::string buffer, LanguageKind lang)
117117
: buffer_(std::move(buffer)),
118118
source_(buffer_),
119119
pos_(cbegin(source_)),
120-
end_(cend(source_)) {
120+
end_(cend(source_)),
121+
lang_(lang) {
121122
hasBOM_ = skipBOM(pos_, end_);
122123
currentChar_ = pos_ < end_ ? peekNext(pos_, end_) : 0;
123124
}
@@ -206,13 +207,7 @@ auto Lexer::readToken() -> TokenKind {
206207

207208
if (preprocessing_) return TokenKind::T_IDENTIFIER;
208209

209-
if (auto keyword =
210-
classify(text_.c_str(), static_cast<int>(text_.length()));
211-
keyword != TokenKind::T_IDENTIFIER) {
212-
return keyword;
213-
}
214-
215-
return TokenKind::T_IDENTIFIER;
210+
return classifyKeyword(text_, lang_);
216211
}
217212
}
218213

@@ -258,15 +253,17 @@ auto Lexer::readToken() -> TokenKind {
258253
}
259254
}
260255

261-
bool ud = false;
262-
if (std::isalpha(LA()) || LA() == '_') {
263-
ud = true;
264-
do {
265-
consume();
266-
} while (pos_ != end_ && is_idcont(LA()));
267-
}
256+
if (lang_ == LanguageKind::kCXX) {
257+
bool ud = false;
258+
if (std::isalpha(LA()) || LA() == '_') {
259+
ud = true;
260+
do {
261+
consume();
262+
} while (pos_ != end_ && is_idcont(LA()));
263+
}
268264

269-
if (ud) return TokenKind::T_USER_DEFINED_STRING_LITERAL;
265+
if (ud) return TokenKind::T_USER_DEFINED_STRING_LITERAL;
266+
}
270267

271268
switch (encodingPrefix) {
272269
case EncodingPrefix::kWide:
@@ -569,8 +566,12 @@ auto Lexer::skipSpaces() -> bool {
569566
return pos_ != end_;
570567
}
571568

572-
auto Lexer::classifyKeyword(const std::string_view& text) -> TokenKind {
573-
return classify(text.data(), static_cast<int>(text.size()));
569+
auto Lexer::classifyKeyword(const std::string_view& text, LanguageKind lang)
570+
-> TokenKind {
571+
if (lang == LanguageKind::kCXX) {
572+
return classify(text.data(), static_cast<int>(text.size()));
573+
}
574+
return classifyC(text.data(), static_cast<int>(text.size()));
574575
}
575576

576577
void Lexer::clearBuffer() { buffer_.clear(); }

src/parser/cxx/lexer.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,10 @@ namespace cxx {
2929

3030
class Lexer {
3131
public:
32-
explicit Lexer(std::string_view source);
33-
explicit Lexer(std::string buffer);
32+
explicit Lexer(std::string_view source,
33+
LanguageKind lang = LanguageKind::kCXX);
34+
35+
explicit Lexer(std::string buffer, LanguageKind lang = LanguageKind::kCXX);
3436

3537
[[nodiscard]] auto preprocessing() const -> bool { return preprocessing_; }
3638
void setPreprocessing(bool preprocessing) { preprocessing_ = preprocessing; }
@@ -77,7 +79,8 @@ class Lexer {
7779
[[nodiscard]] auto text() -> std::string& { return text_; }
7880
[[nodiscard]] auto text() const -> const std::string& { return text_; }
7981

80-
static auto classifyKeyword(const std::string_view& text) -> TokenKind;
82+
static auto classifyKeyword(const std::string_view& text, LanguageKind lang)
83+
-> TokenKind;
8184

8285
struct State {
8386
std::string_view::const_iterator pos_;
@@ -114,6 +117,7 @@ class Lexer {
114117
std::string_view::const_iterator pos_;
115118
std::string_view::const_iterator end_;
116119
std::string text_;
120+
LanguageKind lang_ = LanguageKind::kCXX;
117121
bool leadingSpace_ = false;
118122
bool startOfLine_ = true;
119123
bool keepComments_ = false;

src/parser/cxx/preprocessor.cc

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,7 @@ struct Preprocessor::Private {
701701
Control *control_ = nullptr;
702702
DiagnosticsClient *diagnosticsClient_ = nullptr;
703703
CommentHandler *commentHandler_ = nullptr;
704+
LanguageKind language_ = LanguageKind::kCXX;
704705
bool canResolveFiles_ = true;
705706
std::vector<std::string> systemIncludePaths_;
706707
std::vector<std::string> quoteIncludePaths_;
@@ -1621,7 +1622,7 @@ void Preprocessor::Private::finalizeToken(std::vector<Token> &tokens,
16211622

16221623
switch (tk->kind) {
16231624
case TokenKind::T_IDENTIFIER: {
1624-
kind = Lexer::classifyKeyword(tk->text);
1625+
kind = Lexer::classifyKeyword(tk->text, language_);
16251626

16261627
if (kind == TokenKind::T_IDENTIFIER) {
16271628
value.idValue = control_->getIdentifier(tk->text);
@@ -1710,7 +1711,7 @@ void Preprocessor::Private::finalizeToken(std::vector<Token> &tokens,
17101711

17111712
auto Preprocessor::Private::tokenize(const std::string_view &source,
17121713
int sourceFile, bool bol) -> TokList * {
1713-
cxx::Lexer lex(source);
1714+
cxx::Lexer lex(source, language_);
17141715
lex.setKeepComments(true);
17151716
lex.setPreprocessing(true);
17161717
TokList *ts = nullptr;
@@ -2824,7 +2825,7 @@ auto Preprocessor::Private::merge(const Tok *left, const Tok *right)
28242825
if (!right) return left;
28252826
const auto hideset = makeIntersection(left->hideset, right->hideset);
28262827
auto text = string(std::string(left->text) + std::string(right->text));
2827-
Lexer lex(text);
2828+
Lexer lex(text, language_);
28282829
lex.setPreprocessing(true);
28292830
lex.next();
28302831
auto tok = gen(lex.tokenKind(), lex.tokenText(), hideset);
@@ -2925,6 +2926,10 @@ auto Preprocessor::diagnosticsClient() const -> DiagnosticsClient * {
29252926
return d->diagnosticsClient_;
29262927
}
29272928

2929+
auto Preprocessor::language() const -> LanguageKind { return d->language_; }
2930+
2931+
void Preprocessor::setLanguage(LanguageKind lang) { d->language_ = lang; }
2932+
29282933
auto Preprocessor::commentHandler() const -> CommentHandler * {
29292934
return d->commentHandler_;
29302935
}
@@ -3091,7 +3096,7 @@ void Preprocessor::getPreprocessedText(const std::vector<Token> &tokens,
30913096
const auto &prevToken = tokens[index - 2];
30923097
std::string s = prevToken.spell();
30933098
s += token.spell();
3094-
Lexer lex(s);
3099+
Lexer lex(s, d->language_);
30953100
// lex.setPreprocessing(true);
30963101
lex.next();
30973102
if (lex.tokenKind() != prevToken.kind()) {

src/parser/cxx/preprocessor.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ class Preprocessor {
5858

5959
[[nodiscard]] auto diagnosticsClient() const -> DiagnosticsClient *;
6060

61+
[[nodiscard]] auto language() const -> LanguageKind;
62+
void setLanguage(LanguageKind lang);
63+
64+
[[nodiscard]] auto preprocessorDelegate() const -> PreprocessorDelegate *;
65+
6166
[[nodiscard]] auto commentHandler() const -> CommentHandler *;
6267
void setCommentHandler(CommentHandler *commentHandler);
6368

0 commit comments

Comments
 (0)