Skip to content

Commit 008844c

Browse files
committed
A module directive may only appear as the first preprocessing tokens in a file
Signed-off-by: yronglin <[email protected]>
1 parent ece6382 commit 008844c

File tree

15 files changed

+177
-98
lines changed

15 files changed

+177
-98
lines changed

clang/include/clang/Lex/Lexer.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,9 @@ class Lexer : public PreprocessorLexer {
143143
/// True if this is the first time we're lexing the input file.
144144
bool IsFirstTimeLexingFile;
145145

146+
/// True if current lexing token is the first pp-token.
147+
bool IsFirstPPToken;
148+
146149
// NewLinePtr - A pointer to new line character '\n' being lexed. For '\r\n',
147150
// it also points to '\n.'
148151
const char *NewLinePtr;

clang/include/clang/Lex/Preprocessor.h

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,24 @@ struct CXXStandardLibraryVersionInfo {
137137
std::uint64_t Version;
138138
};
139139

140+
class ExportContextualKeywordInfo {
141+
Token ExportTok;
142+
bool AtPhysicalStartOfLine = false;
143+
144+
public:
145+
ExportContextualKeywordInfo() = default;
146+
ExportContextualKeywordInfo(const Token &Tok, bool AtPhysicalStartOfLine)
147+
: ExportTok(Tok), AtPhysicalStartOfLine(AtPhysicalStartOfLine) {}
148+
149+
bool isValid() const { return ExportTok.is(tok::kw_export); }
150+
bool isAtPhysicalStartOfLine() const { return AtPhysicalStartOfLine; }
151+
Token getExportTok() const { return ExportTok; }
152+
void reset() {
153+
ExportTok.startToken();
154+
AtPhysicalStartOfLine = false;
155+
}
156+
};
157+
140158
/// Engages in a tight little dance with the lexer to efficiently
141159
/// preprocess tokens.
142160
///
@@ -359,14 +377,11 @@ class Preprocessor {
359377
/// Whether we're declaring a standard C++20 named Modules.
360378
bool DeclaringCXXNamedModules = false;
361379

362-
struct ExportContextualKeywordInfo {
363-
Token ExportTok;
364-
bool TokAtPhysicalStartOfLine;
365-
};
366-
367380
/// Whether the last token we lexed was an 'export' keyword.
368-
std::optional<ExportContextualKeywordInfo> LastTokenWasExportKeyword =
369-
std::nullopt;
381+
ExportContextualKeywordInfo LastTokenWasExportKeyword;
382+
383+
/// First pp-token in current translation unit.
384+
Token FirstPPToken;
370385

371386
/// A position within a C++20 import-seq.
372387
class StdCXXImportSeq {
@@ -1774,6 +1789,9 @@ class Preprocessor {
17741789
void HandleCXXImportDirective(Token Import);
17751790
void HandleCXXModuleDirective(Token Module);
17761791

1792+
void setFirstPPToken(const Token &Tok) { FirstPPToken = Tok; }
1793+
Token getFirstPPToken() const { return FirstPPToken; }
1794+
17771795
/// Callback invoked when the lexer sees one of export, import or module token
17781796
/// at the start of a line.
17791797
///

clang/include/clang/Lex/Token.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ class Token {
8989
IsReinjected = 0x800, // A phase 4 token that was produced before and
9090
// re-added, e.g. via EnterTokenStream. Annotation
9191
// tokens are *not* reinjected.
92+
FirstPPToken = 0x1000, // This token is the first pp token in the
93+
// translation unit.
9294
};
9395

9496
tok::TokenKind getKind() const { return Kind; }
@@ -325,6 +327,9 @@ class Token {
325327
/// represented as characters between '<#' and '#>' in the source code. The
326328
/// lexer uses identifier tokens to represent placeholders.
327329
bool isEditorPlaceholder() const { return getFlag(IsEditorPlaceholder); }
330+
331+
/// Returns true if this token is the first pp-token.
332+
bool isFirstPPToken() const { return getFlag(FirstPPToken); }
328333
};
329334

330335
/// Information about the conditional stack (\#if directives)

clang/include/clang/Sema/Sema.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9937,9 +9937,9 @@ class Sema final : public SemaBase {
99379937
/// of a module interface or implementation.
99389938
DeclGroupPtrTy ActOnModuleDecl(SourceLocation StartLoc,
99399939
SourceLocation ModuleLoc, ModuleDeclKind MDK,
9940-
ModuleNameLoc *PathLoc,
9941-
ModuleNameLoc *PartitionLoc,
9942-
ModuleImportState &ImportState);
9940+
ModuleIdPath Path, ModuleIdPath Partition,
9941+
ModuleImportState &ImportState,
9942+
bool AtStartOfTU);
99439943

99449944
/// The parser has processed a global-module-fragment declaration that begins
99459945
/// the definition of the global module fragment of the current module unit.

clang/lib/Lex/Lexer.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,8 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
187187
ExtendedTokenMode = 0;
188188

189189
NewLinePtr = nullptr;
190+
191+
IsFirstPPToken = true;
190192
}
191193

192194
/// Lexer constructor - Create a new lexer object for the specified buffer
@@ -3739,11 +3741,20 @@ bool Lexer::Lex(Token &Result) {
37393741
HasLeadingEmptyMacro = false;
37403742
}
37413743

3744+
if (IsFirstPPToken) {
3745+
Result.setFlag(Token::FirstPPToken);
3746+
IsFirstPPToken = false;
3747+
}
3748+
37423749
bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
37433750
IsAtPhysicalStartOfLine = false;
37443751
bool isRawLex = isLexingRawMode();
37453752
(void) isRawLex;
37463753
bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine);
3754+
3755+
if (returnedToken && Result.isFirstPPToken() && PP)
3756+
PP->setFirstPPToken(Result);
3757+
37473758
// (After the LexTokenInternal call, the lexer might be destroyed.)
37483759
assert((returnedToken || !isRawLex) && "Raw lex must succeed");
37493760
return returnedToken;

clang/lib/Lex/PPDirectives.cpp

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4089,7 +4089,7 @@ void Preprocessor::HandleCXXImportDirective(Token ImportTok) {
40894089
this->ImportingCXXNamedModules);
40904090
ImportingCXXNamedModules = true;
40914091

4092-
if (LastTokenWasExportKeyword)
4092+
if (LastTokenWasExportKeyword.isValid())
40934093
LastTokenWasExportKeyword.reset();
40944094

40954095
Token Tok;
@@ -4216,13 +4216,26 @@ void Preprocessor::HandleCXXImportDirective(Token ImportTok) {
42164216

42174217
void Preprocessor::HandleCXXModuleDirective(Token ModuleTok) {
42184218
assert(getLangOpts().CPlusPlusModules && ModuleTok.is(tok::kw_module));
4219-
SourceLocation StartLoc = ModuleTok.getLocation();
4220-
if (LastTokenWasExportKeyword) {
4221-
StartLoc = LastTokenWasExportKeyword->ExportTok.getLocation();
4219+
Token Introducer = ModuleTok;
4220+
if (LastTokenWasExportKeyword.isValid()) {
4221+
Introducer = LastTokenWasExportKeyword.getExportTok();
42224222
LastTokenWasExportKeyword.reset();
42234223
}
4224-
bool IsInHeaderInclusion = !IncludeMacroStack.empty();
4225-
bool IsInConditionBlock = CurPPLexer->getConditionalStackDepth() != 0;
4224+
4225+
SourceLocation StartLoc = Introducer.getLocation();
4226+
if (!IncludeMacroStack.empty()) {
4227+
SourceLocation End = DiscardUntilEndOfDirective().getEnd();
4228+
Diag(StartLoc, diag::err_module_decl_in_header)
4229+
<< SourceRange(StartLoc, End);
4230+
return;
4231+
}
4232+
4233+
if (CurPPLexer->getConditionalStackDepth() != 0) {
4234+
SourceLocation End = DiscardUntilEndOfDirective().getEnd();
4235+
Diag(StartLoc, diag::err_pp_cond_span_module_decl)
4236+
<< SourceRange(StartLoc, End);
4237+
return;
4238+
}
42264239

42274240
Token Tok;
42284241
SourceLocation UseLoc = ModuleTok.getLocation();
@@ -4305,13 +4318,6 @@ void Preprocessor::HandleCXXModuleDirective(Token ModuleTok) {
43054318
End = CheckEndOfDirective(ModuleTok.getIdentifierInfo()->getName());
43064319
else
43074320
End = DirToks.pop_back_val().getLocation();
4308-
if (IsInHeaderInclusion)
4309-
Diag(StartLoc, diag::err_module_decl_in_header)
4310-
<< SourceRange(StartLoc, End);
4311-
4312-
if (IsInConditionBlock)
4313-
Diag(StartLoc, diag::err_pp_cond_span_module_decl)
4314-
<< SourceRange(StartLoc, End);
43154321

43164322
EnterModuleSuffixTokenStream(DirToks);
43174323
}

clang/lib/Lex/Preprocessor.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1185,11 +1185,10 @@ bool Preprocessor::HandleModuleContextualKeyword(
11851185
return false;
11861186
}
11871187

1188-
if (LastTokenWasExportKeyword) {
1189-
auto Export = *LastTokenWasExportKeyword;
1188+
if (LastTokenWasExportKeyword.isValid()) {
11901189
// The export keyword was not at the start of line, it's not a
11911190
// directive-introducing token.
1192-
if (!Export.TokAtPhysicalStartOfLine)
1191+
if (!LastTokenWasExportKeyword.isAtPhysicalStartOfLine())
11931192
return false;
11941193
// [cpp.pre]/1.4
11951194
// export // not a preprocessing directive

clang/lib/Parse/Parser.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "clang/AST/DeclTemplate.h"
1818
#include "clang/Basic/DiagnosticParse.h"
1919
#include "clang/Basic/StackExhaustionHandler.h"
20+
#include "clang/Lex/ModuleLoader.h"
2021
#include "clang/Parse/RAIIObjectsForParser.h"
2122
#include "clang/Sema/DeclSpec.h"
2223
#include "clang/Sema/EnterExpressionEvaluationContext.h"
@@ -2303,7 +2304,8 @@ void Parser::ParseMicrosoftIfExistsExternalDeclaration() {
23032304

23042305
Parser::DeclGroupPtrTy
23052306
Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) {
2306-
SourceLocation StartLoc = Tok.getLocation();
2307+
Token Introducer = Tok;
2308+
SourceLocation StartLoc = Introducer.getLocation();
23072309

23082310
Sema::ModuleDeclKind MDK = TryConsumeToken(tok::kw_export)
23092311
? Sema::ModuleDeclKind::Interface
@@ -2322,7 +2324,7 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) {
23222324
// Parse a global-module-fragment, if present.
23232325
if (getLangOpts().CPlusPlusModules && Tok.is(tok::semi)) {
23242326
SourceLocation SemiLoc = ConsumeToken();
2325-
if (ImportState != Sema::ModuleImportState::FirstDecl) {
2327+
if (!Introducer.isFirstPPToken()) {
23262328
Diag(StartLoc, diag::err_global_module_introducer_not_at_start)
23272329
<< SourceRange(StartLoc, SemiLoc);
23282330
return nullptr;
@@ -2378,8 +2380,10 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) {
23782380

23792381
ExpectAndConsumeSemi(diag::err_module_expected_semi);
23802382

2381-
return Actions.ActOnModuleDecl(StartLoc, ModuleLoc, MDK, Path, Partition,
2382-
ImportState);
2383+
return Actions.ActOnModuleDecl(
2384+
StartLoc, ModuleLoc, MDK, Path->getModuleIdPath(),
2385+
Partition ? Partition->getModuleIdPath() : ModuleIdPath{}, ImportState,
2386+
Introducer.isFirstPPToken());
23832387
}
23842388

23852389
Decl *Parser::ParseModuleImport(SourceLocation AtLoc,

clang/lib/Sema/SemaModule.cpp

Lines changed: 29 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313

1414
#include "clang/AST/ASTConsumer.h"
1515
#include "clang/AST/ASTMutationListener.h"
16+
#include "clang/Basic/SourceLocation.h"
1617
#include "clang/Lex/HeaderSearch.h"
18+
#include "clang/Lex/ModuleLoader.h"
1719
#include "clang/Lex/Preprocessor.h"
1820
#include "clang/Sema/ParsedAttr.h"
1921
#include "clang/Sema/SemaInternal.h"
@@ -238,22 +240,21 @@ static bool DiagReservedModuleName(Sema &S, const IdentifierInfo *II,
238240
llvm_unreachable("fell off a fully covered switch");
239241
}
240242

241-
Sema::DeclGroupPtrTy Sema::ActOnModuleDecl(SourceLocation StartLoc,
242-
SourceLocation ModuleLoc,
243-
ModuleDeclKind MDK,
244-
ModuleNameLoc *PathLoc,
245-
ModuleNameLoc *PartitionLoc,
246-
ModuleImportState &ImportState) {
243+
Sema::DeclGroupPtrTy
244+
Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc,
245+
ModuleDeclKind MDK, ModuleIdPath Path,
246+
ModuleIdPath Partition, ModuleImportState &ImportState,
247+
bool AtStartOfTU) {
247248
assert(getLangOpts().CPlusPlusModules &&
248249
"should only have module decl in standard C++ modules");
249250

250-
bool IsFirstDecl = ImportState == ModuleImportState::FirstDecl;
251251
bool SeenGMF = ImportState == ModuleImportState::GlobalFragment;
252252
// If any of the steps here fail, we count that as invalidating C++20
253253
// module state;
254254
ImportState = ModuleImportState::NotACXX20Module;
255255

256-
if (PartitionLoc)
256+
bool IsPartition = !Partition.empty();
257+
if (IsPartition)
257258
switch (MDK) {
258259
case ModuleDeclKind::Implementation:
259260
MDK = ModuleDeclKind::PartitionImplementation;
@@ -282,7 +283,7 @@ Sema::DeclGroupPtrTy Sema::ActOnModuleDecl(SourceLocation StartLoc,
282283
// We were asked to compile a module interface unit but this is a module
283284
// implementation unit.
284285
Diag(ModuleLoc, diag::err_module_interface_implementation_mismatch)
285-
<< FixItHint::CreateInsertion(ModuleLoc, "export ");
286+
<< FixItHint::CreateInsertion(ModuleLoc, "export ");
286287
MDK = ModuleDeclKind::Interface;
287288
break;
288289

@@ -314,7 +315,7 @@ Sema::DeclGroupPtrTy Sema::ActOnModuleDecl(SourceLocation StartLoc,
314315

315316
// In C++20, the module-declaration must be the first declaration if there
316317
// is no global module fragment.
317-
if (getLangOpts().CPlusPlusModules && !IsFirstDecl && !SeenGMF) {
318+
if (getLangOpts().CPlusPlusModules && !AtStartOfTU && !SeenGMF) {
318319
Diag(ModuleLoc, diag::err_module_decl_not_at_start);
319320
SourceLocation BeginLoc =
320321
ModuleScopes.empty()
@@ -335,39 +336,41 @@ Sema::DeclGroupPtrTy Sema::ActOnModuleDecl(SourceLocation StartLoc,
335336

336337
// Test the first part of the path to see if it's std[0-9]+ but allow the
337338
// name in a system header.
338-
StringRef FirstComponentName =
339-
PathLoc->getModuleIdPath()[0].getIdentifierInfo()->getName();
340-
if (!getSourceManager().isInSystemHeader(PathLoc->getBeginLoc()) &&
339+
StringRef FirstComponentName = Path[0].getIdentifierInfo()->getName();
340+
if (!getSourceManager().isInSystemHeader(Path[0].getLoc()) &&
341341
(FirstComponentName == "std" ||
342342
(FirstComponentName.starts_with("std") &&
343343
llvm::all_of(FirstComponentName.drop_front(3), &llvm::isDigit))))
344-
Diag(PathLoc->getBeginLoc(), diag::warn_reserved_module_name)
345-
<< PathLoc->getModuleIdPath()[0].getIdentifierInfo();
344+
Diag(Path[0].getLoc(), diag::warn_reserved_module_name)
345+
<< Path[0].getIdentifierInfo();
346346

347347
// Then test all of the components in the path to see if any of them are
348348
// using another kind of reserved or invalid identifier.
349-
for (auto Part : PathLoc->getModuleIdPath()) {
349+
for (auto Part : Path) {
350350
if (DiagReservedModuleName(*this, Part.getIdentifierInfo(), Part.getLoc()))
351351
return nullptr;
352352
}
353353

354354
// Flatten the dots in a module name. Unlike Clang's hierarchical module map
355355
// modules, the dots here are just another character that can appear in a
356356
// module name.
357-
std::string ModuleName = PathLoc->str();
358-
if (PartitionLoc) {
357+
std::string ModuleName = ModuleNameLoc::stringFromModuleIdPath(Path);
358+
if (IsPartition) {
359359
ModuleName += ":";
360-
ModuleName += PartitionLoc->str();
360+
ModuleName += ModuleNameLoc::stringFromModuleIdPath(Partition);
361361
}
362362
// If a module name was explicitly specified on the command line, it must be
363363
// correct.
364364
if (!getLangOpts().CurrentModule.empty() &&
365365
getLangOpts().CurrentModule != ModuleName) {
366-
Diag(PathLoc->getBeginLoc(), diag::err_current_module_name_mismatch)
367-
<< PathLoc->getRange() << getLangOpts().CurrentModule;
366+
Diag(Path.front().getLoc(), diag::err_current_module_name_mismatch)
367+
<< SourceRange(Path.front().getLoc(), IsPartition
368+
? Partition.back().getLoc()
369+
: Path.back().getLoc())
370+
<< getLangOpts().CurrentModule;
368371
return nullptr;
369372
}
370-
const_cast<LangOptions&>(getLangOpts()).CurrentModule = ModuleName;
373+
const_cast<LangOptions &>(getLangOpts()).CurrentModule = ModuleName;
371374

372375
auto &Map = PP.getHeaderSearchInfo().getModuleMap();
373376
Module *Mod; // The module we are creating.
@@ -378,7 +381,7 @@ Sema::DeclGroupPtrTy Sema::ActOnModuleDecl(SourceLocation StartLoc,
378381
// We can't have parsed or imported a definition of this module or parsed a
379382
// module map defining it already.
380383
if (auto *M = Map.findOrLoadModule(ModuleName)) {
381-
Diag(PathLoc->getBeginLoc(), diag::err_module_redefinition) << ModuleName;
384+
Diag(Path[0].getLoc(), diag::err_module_redefinition) << ModuleName;
382385
if (M->DefinitionLoc.isValid())
383386
Diag(M->DefinitionLoc, diag::note_prev_module_definition);
384387
else if (OptionalFileEntryRef FE = M->getASTFile())
@@ -401,7 +404,7 @@ Sema::DeclGroupPtrTy Sema::ActOnModuleDecl(SourceLocation StartLoc,
401404
// keyword nor a module-partition implicitly imports the primary
402405
// module interface unit of the module as if by a module-import-
403406
// declaration.
404-
IdentifierLoc ModuleNameLoc(PathLoc->getBeginLoc(),
407+
IdentifierLoc ModuleNameLoc(Path[0].getLoc(),
405408
PP.getIdentifierInfo(ModuleName));
406409

407410
// The module loader will assume we're trying to import the module that
@@ -412,7 +415,7 @@ Sema::DeclGroupPtrTy Sema::ActOnModuleDecl(SourceLocation StartLoc,
412415
Interface = getModuleLoader().loadModule(ModuleLoc, {ModuleNameLoc},
413416
Module::AllVisible,
414417
/*IsInclusionDirective=*/false);
415-
const_cast<LangOptions&>(getLangOpts()).CurrentModule = ModuleName;
418+
const_cast<LangOptions &>(getLangOpts()).CurrentModule = ModuleName;
416419

417420
if (!Interface) {
418421
Diag(ModuleLoc, diag::err_module_not_defined) << ModuleName;
@@ -474,7 +477,7 @@ Sema::DeclGroupPtrTy Sema::ActOnModuleDecl(SourceLocation StartLoc,
474477

475478
// Make the import decl for the interface in the impl module.
476479
ImportDecl *Import = ImportDecl::Create(Context, CurContext, ModuleLoc,
477-
Interface, PathLoc->getBeginLoc());
480+
Interface, Path[0].getLoc());
478481
CurContext->addDecl(Import);
479482

480483
// Sequence initialization of the imported module before that of the current

0 commit comments

Comments
 (0)