diff --git a/clang/examples/AnnotateFunctions/AnnotateFunctions.cpp b/clang/examples/AnnotateFunctions/AnnotateFunctions.cpp index d872020c2d8a3..22a3eb97f938b 100644 --- a/clang/examples/AnnotateFunctions/AnnotateFunctions.cpp +++ b/clang/examples/AnnotateFunctions/AnnotateFunctions.cpp @@ -65,7 +65,7 @@ class PragmaAnnotateHandler : public PragmaHandler { Token Tok; PP.LexUnexpandedToken(Tok); if (Tok.isNot(tok::eod)) - PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma"; + PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "#pragma"; if (HandledDecl) { DiagnosticsEngine &D = PP.getDiagnostics(); diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index c03c4033cd3a6..c8f04e408ad82 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -465,7 +465,7 @@ def err_pp_embed_device_file : Error< "device files are not yet supported by '#embed' directive">; def ext_pp_extra_tokens_at_eol : ExtWarn< - "extra tokens at end of #%0 directive">, InGroup; + "extra tokens at end of %0 directive">, InGroup; def ext_pp_comma_expr : Extension<"comma operator in operand of #if">; def ext_pp_bad_vaargs_use : Extension< @@ -495,8 +495,8 @@ def warn_cxx98_compat_variadic_macro : Warning< InGroup, DefaultIgnore; def ext_named_variadic_macro : Extension< "named variadic macros are a GNU extension">, InGroup; -def err_embedded_directive : Error< - "embedding a #%0 directive within macro arguments is not supported">; +def err_embedded_directive : Error<"embedding a %select{#|C++ }0%1 directive " + "within macro arguments is not supported">; def ext_embedded_directive : Extension< "embedding a directive within macro arguments has undefined behavior">, InGroup>; @@ -991,6 +991,10 @@ def warn_module_conflict : Warning< InGroup; // C++20 modules +def err_module_decl_in_header + : Error<"module declaration must not come from an #include directive">; +def err_pp_cond_span_module_decl + : Error<"preprocessor conditionals shall not span a module declaration">; def err_header_import_semi_in_macro : Error< "semicolon terminating header import declaration cannot be produced " "by a macro">; diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index ff506fb258b64..fa10a69b021f5 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1778,10 +1778,20 @@ def ext_bit_int : Extension< } // end of Parse Issue category. let CategoryName = "Modules Issue" in { -def err_unexpected_module_decl : Error< - "module declaration can only appear at the top level">; +def err_invalid_module_or_import_directive : Error< + "the %select{module|import}0 directive is ill-formed, " + "%select{module contextual keyword must be immediately " + "followed on the same line by an identifier, " + "or a ';' after being at the start of a line, or preceded by " + "an export keyword at the start of a line|" + "import contextual keyword must be immediately followed " + "on the same line by an identifier, '<', '\"', or ':', but not '::', " + "after being at the start of a line or preceded by an export at " + "the start of the line}0">; +def err_unexpected_module_or_import_decl : Error< + "%select{module|import}0 declaration can only appear at the top level">; def err_module_expected_ident : Error< - "expected a module name after '%select{module|import}0'">; + "expected %select{identifier after '.' in |}0module name">; def err_attribute_not_module_attr : Error< "%0 attribute cannot be applied to a module">; def err_keyword_not_module_attr : Error< @@ -1792,6 +1802,8 @@ def err_keyword_not_import_attr : Error< "%0 cannot be applied to a module import">; def err_module_expected_semi : Error< "expected ';' after module name">; +def err_expected_semi_after_module_or_import + : Error<"'%0' directive must end with a ';' on the same line">; def err_global_module_introducer_not_at_start : Error< "'module;' introducing a global module fragment can appear only " "at the start of the translation unit">; diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h index e4044bcdfcc60..488c1bf9599a0 100644 --- a/clang/include/clang/Basic/IdentifierTable.h +++ b/clang/include/clang/Basic/IdentifierTable.h @@ -179,6 +179,10 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { LLVM_PREFERRED_TYPE(bool) unsigned IsModulesImport : 1; + // True if this is the 'module' contextual keyword. + LLVM_PREFERRED_TYPE(bool) + unsigned IsModulesDecl : 1; + // True if this is a mangled OpenMP variant name. LLVM_PREFERRED_TYPE(bool) unsigned IsMangledOpenMPVariantName : 1; @@ -215,8 +219,9 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { IsCPPOperatorKeyword(false), NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false), IsModulesImport(false), - IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false), - IsRestrictExpansion(false), IsFinal(false), IsKeywordInCpp(false) {} + IsModulesDecl(false), IsMangledOpenMPVariantName(false), + IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false), + IsKeywordInCpp(false) {} public: IdentifierInfo(const IdentifierInfo &) = delete; @@ -528,6 +533,18 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { RecomputeNeedsHandleIdentifier(); } + /// Determine whether this is the contextual keyword \c module. + bool isModulesDeclaration() const { return IsModulesDecl; } + + /// Set whether this identifier is the contextual keyword \c module. + void setModulesDeclaration(bool I) { + IsModulesDecl = I; + if (I) + NeedsHandleIdentifier = true; + else + RecomputeNeedsHandleIdentifier(); + } + /// Determine whether this is the mangled name of an OpenMP variant. bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; } @@ -745,10 +762,11 @@ class IdentifierTable { // contents. II->Entry = &Entry; - // If this is the 'import' contextual keyword, mark it as such. + // If this is the 'import' or 'module' contextual keyword, mark it as such. if (Name == "import") II->setModulesImport(true); - + else if (Name == "module") + II->setModulesDeclaration(true); return *II; } diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 94e72fea56a68..9df539b7eea09 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -133,6 +133,9 @@ PPKEYWORD(pragma) // C23 & C++26 #embed PPKEYWORD(embed) +// C++20 Module Directive +PPKEYWORD(module) + // GNU Extensions. PPKEYWORD(import) PPKEYWORD(include_next) diff --git a/clang/include/clang/Basic/TokenKinds.h b/clang/include/clang/Basic/TokenKinds.h index d84f3598cbf33..f0d11d43bdf97 100644 --- a/clang/include/clang/Basic/TokenKinds.h +++ b/clang/include/clang/Basic/TokenKinds.h @@ -76,6 +76,10 @@ const char *getPunctuatorSpelling(TokenKind Kind) LLVM_READNONE; /// tokens like 'int' and 'dynamic_cast'. Returns NULL for other token kinds. const char *getKeywordSpelling(TokenKind Kind) LLVM_READNONE; +/// Determines the spelling of simple Objective-C keyword tokens like '@import'. +/// Returns NULL for other token kinds. +const char *getObjCKeywordSpelling(ObjCKeywordKind Kind) LLVM_READNONE; + /// Returns the spelling of preprocessor keywords, such as "else". const char *getPPKeywordSpelling(PPKeywordKind Kind) LLVM_READNONE; diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h index 64ebb70a6a24c..0619d3d4672e3 100644 --- a/clang/include/clang/Frontend/CompilerInstance.h +++ b/clang/include/clang/Frontend/CompilerInstance.h @@ -866,7 +866,7 @@ class CompilerInstance : public ModuleLoader { /// load it. ModuleLoadResult findOrCompileModuleAndReadAST(StringRef ModuleName, SourceLocation ImportLoc, - SourceLocation ModuleNameLoc, + SourceRange ModuleNameRange, bool IsInclusionDirective); /// Creates a \c CompilerInstance for compiling a module. diff --git a/clang/include/clang/Lex/CodeCompletionHandler.h b/clang/include/clang/Lex/CodeCompletionHandler.h index bd3e05a36bb33..2ef29743415ae 100644 --- a/clang/include/clang/Lex/CodeCompletionHandler.h +++ b/clang/include/clang/Lex/CodeCompletionHandler.h @@ -13,12 +13,15 @@ #ifndef LLVM_CLANG_LEX_CODECOMPLETIONHANDLER_H #define LLVM_CLANG_LEX_CODECOMPLETIONHANDLER_H +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/SourceLocation.h" #include "llvm/ADT/StringRef.h" namespace clang { class IdentifierInfo; class MacroInfo; +using ModuleIdPath = ArrayRef; /// Callback handler that receives notifications when performing code /// completion within the preprocessor. @@ -70,6 +73,11 @@ class CodeCompletionHandler { /// file where we expect natural language, e.g., a comment, string, or /// \#error directive. virtual void CodeCompleteNaturalLanguage() { } + + /// Callback invoked when performing code completion inside the module name + /// part of an import directive. + virtual void CodeCompleteModuleImport(SourceLocation ImportLoc, + ModuleIdPath Path) {} }; } diff --git a/clang/include/clang/Lex/ModuleLoader.h b/clang/include/clang/Lex/ModuleLoader.h index a58407200c41c..042a5ab1f4a57 100644 --- a/clang/include/clang/Lex/ModuleLoader.h +++ b/clang/include/clang/Lex/ModuleLoader.h @@ -159,6 +159,7 @@ class ModuleLoader { /// \returns Returns true if any modules with that symbol found. virtual bool lookupMissingImports(StringRef Name, SourceLocation TriggerLoc) = 0; + static std::string getFlatNameFromPath(ModuleIdPath Path); bool HadFatalFailure = false; }; diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 39754847a93e4..44bf97c994093 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -48,6 +48,7 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Registry.h" +#include "llvm/Support/TrailingObjects.h" #include #include #include @@ -339,8 +340,9 @@ class Preprocessor { /// lexed, if any. SourceLocation ModuleImportLoc; - /// The import path for named module that we're currently processing. - SmallVector NamedModuleImportPath; + /// The source location of the \c module contextual keyword we just + /// lexed, if any. + SourceLocation ModuleDeclLoc; llvm::DenseMap> CheckPoints; unsigned CheckPointCounter = 0; @@ -351,6 +353,15 @@ class Preprocessor { /// Whether the last token we lexed was an '@'. bool LastTokenWasAt = false; + /// Whether we're importing a standard C++20 named Modules. + bool ImportingCXXNamedModules = false; + + /// Whether we're declaring a standard C++20 named Modules. + bool DeclaringCXXNamedModules = false; + + /// Whether the last token we lexed was an 'export' keyword. + std::optional ModuleLikeDirectiveIntroducer; + /// First pp-token source location in current translation unit. SourceLocation FirstPPTokenLoc; @@ -639,10 +650,6 @@ class Preprocessor { ModuleDeclSeq ModuleDeclState; - /// Whether the module import expects an identifier next. Otherwise, - /// it expects a '.' or ';'. - bool ModuleImportExpectsIdentifier = false; - /// The identifier and source location of the currently-active /// \#pragma clang arc_cf_code_audited begin. IdentifierLoc PragmaARCCFCodeAuditedInfo; @@ -1776,6 +1783,22 @@ class Preprocessor { /// Lex the parameters for an #embed directive, returns nullopt on error. std::optional LexEmbedParameters(Token &Current, bool ForHasEmbed); + bool LexModuleNameContinue(Token &Tok, SourceLocation UseLoc, + SmallVectorImpl &Suffix, + SmallVectorImpl &Path, + bool AllowMacroExpansion = true); + void EnterModuleSuffixTokenStream(ArrayRef Toks); + void HandleCXXImportDirective(Token Import); + void HandleCXXModuleDirective(Token Module); + void HandleObjCAtImportDirective(Token &ImportTok); + + /// Callback invoked when the lexer sees one of export, import or module token + /// at the start of a line. + /// + /// This consumes the import/module directive, modifies the + /// lexer/preprocessor state, and advances the lexer(s) so that the next token + /// read is the correct one. + bool HandleModuleContextualKeyword(Token &Result); /// Get the start location of the first pp-token in main file. SourceLocation getMainFileFirstPPTokenLoc() const { @@ -1784,8 +1807,10 @@ class Preprocessor { return FirstPPTokenLoc; } - bool LexAfterModuleImport(Token &Result); - void CollectPpImportSuffix(SmallVectorImpl &Toks); + void CollectPPImportSuffix(SmallVectorImpl &Toks, + bool StopUntilEOD = false); + bool CollectPPImportSuffixAndEnterStream(SmallVectorImpl &Toks, + bool StopUntilEOD = false); void makeModuleVisible(Module *M, SourceLocation Loc, bool IncludeExports = true); @@ -2312,6 +2337,7 @@ class Preprocessor { template bool isNextPPTokenOneOf(Ts... Ks) { static_assert(sizeof...(Ts) > 0, "requires at least one tok::TokenKind specified"); + // Do some quick tests for rejection cases. std::optional Val; if (CurLexer) @@ -2401,20 +2427,27 @@ class Preprocessor { /// If \p EnableMacros is true, then we consider macros that expand to zero /// tokens as being ok. /// + /// If \p ExtraToks not null, the extra tokens will be saved in this + /// container. + /// /// \return The location of the end of the directive (the terminating /// newline). - SourceLocation CheckEndOfDirective(const char *DirType, - bool EnableMacros = false); + SourceLocation + CheckEndOfDirective(StringRef DirType, bool EnableMacros = false, + SmallVectorImpl *ExtraToks = nullptr); /// Read and discard all tokens remaining on the current line until /// the tok::eod token is found. Returns the range of the skipped tokens. - SourceRange DiscardUntilEndOfDirective() { + SourceRange + DiscardUntilEndOfDirective(SmallVectorImpl *DiscardedToks = nullptr) { Token Tmp; - return DiscardUntilEndOfDirective(Tmp); + return DiscardUntilEndOfDirective(Tmp, DiscardedToks); } /// Same as above except retains the token that was found. - SourceRange DiscardUntilEndOfDirective(Token &Tok); + SourceRange + DiscardUntilEndOfDirective(Token &Tok, + SmallVectorImpl *DiscardedToks = nullptr); /// Returns true if the preprocessor has seen a use of /// __DATE__ or __TIME__ in the file so far. @@ -2485,11 +2518,16 @@ class Preprocessor { } /// If we're importing a standard C++20 Named Modules. - bool isInImportingCXXNamedModules() const { - // NamedModuleImportPath will be non-empty only if we're importing - // Standard C++ named modules. - return !NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules && - !IsAtImport; + bool isImportingCXXNamedModules() const { + assert(getLangOpts().CPlusPlusModules && + "Import C++ named modules are only valid for C++20 modules"); + return ImportingCXXNamedModules; + } + + bool isDeclaringCXXNamedModules() const { + assert(getLangOpts().CPlusPlusModules && + "Declare C++ named modules are only valid for C++20 modules"); + return DeclaringCXXNamedModules; } /// Allocate a new MacroInfo object with the provided SourceLocation. @@ -3119,9 +3157,6 @@ class Preprocessor { static bool CLK_DependencyDirectivesLexer(Preprocessor &P, Token &Result) { return P.CurLexer->LexDependencyDirectiveToken(Result); } - static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) { - return P.LexAfterModuleImport(Result); - } }; /// Abstract base class that describes a handler that will receive diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h index d9dc5a562d802..91785bd14510d 100644 --- a/clang/include/clang/Lex/Token.h +++ b/clang/include/clang/Lex/Token.h @@ -90,8 +90,10 @@ class Token { // re-added, e.g. via EnterTokenStream. Annotation // tokens are *not* reinjected. HasSeenNoTrivialPPDirective = - 0x1000, // Whether we've seen any 'no-trivial' pp-directives before + 0x1000, // Whether we've seen any 'no-trivial' pp-directives before// // current position. + PhysicalStartOfLine = + 0x2000, // At the physical start of line or only after whitespace. }; tok::TokenKind getKind() const { return Kind; } @@ -277,6 +279,10 @@ class Token { /// bool isAtStartOfLine() const { return getFlag(StartOfLine); } + /// isAtPhysicalStartOfLine - Return true if this token is at the physical + /// start of a line. + bool isAtPhysicalStartOfLine() const { return getFlag(PhysicalStartOfLine); } + /// Return true if this token has whitespace before it. /// bool hasLeadingSpace() const { return getFlag(LeadingSpace); } @@ -291,6 +297,11 @@ class Token { /// Return the ObjC keyword kind. tok::ObjCKeywordKind getObjCKeywordID() const; + /// Return true if we have an C++20 Modules contextual keyword(export, import + /// or module). + bool isModuleContextualKeyword(const LangOptions &LangOpts, + bool AllowExport = true) const; + bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const; /// Return true if this token has trigraphs or escaped newlines in it. diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index e9437e6d46366..7ff72d1662c8c 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -566,10 +566,6 @@ class Parser : public CodeCompletionHandler { /// Contextual keywords for Microsoft extensions. IdentifierInfo *Ident__except; - // C++2a contextual keywords. - mutable IdentifierInfo *Ident_import; - mutable IdentifierInfo *Ident_module; - std::unique_ptr CommentSemaHandler; /// Gets set to true after calling ProduceSignatureHelp, it is for a @@ -1081,6 +1077,9 @@ class Parser : public CodeCompletionHandler { bool ParseModuleName(SourceLocation UseLoc, SmallVectorImpl &Path, bool IsImport); + void DiagnoseInvalidCXXModuleDecl(const Sema::ModuleImportState &ImportState); + void DiagnoseInvalidCXXModuleImport(); + //===--------------------------------------------------------------------===// // Preprocessor code-completion pass-through void CodeCompleteDirective(bool InConditional) override; @@ -1091,6 +1090,8 @@ class Parser : public CodeCompletionHandler { unsigned ArgumentIndex) override; void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled) override; void CodeCompleteNaturalLanguage() override; + void CodeCompleteModuleImport(SourceLocation ImportLoc, + ModuleIdPath Path) override; ///@} diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index 4a2b77cd16bfc..3a4a3d379e021 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -349,8 +349,9 @@ void IdentifierTable::AddKeywords(const LangOptions &LangOpts) { if (LangOpts.IEEE128) AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this); - // Add the 'import' contextual keyword. + // Add the 'import' and 'module' contextual keywords. get("import").setModulesImport(true); + get("module").setModulesDeclaration(true); } /// Checks if the specified token kind represents a keyword in the @@ -464,6 +465,8 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { unsigned Len = getLength(); if (Len < 2) return tok::pp_not_keyword; const char *Name = getNameStart(); + + // clang-format off switch (HASH(Len, Name[0], Name[2])) { default: return tok::pp_not_keyword; CASE( 2, 'i', '\0', if); @@ -482,6 +485,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { CASE( 6, 'd', 'f', define); CASE( 6, 'i', 'n', ifndef); CASE( 6, 'i', 'p', import); + CASE( 6, 'm', 'd', module); CASE( 6, 'p', 'a', pragma); CASE( 7, 'd', 'f', defined); @@ -501,6 +505,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { #undef CASE #undef HASH } + // clang-format on } //===----------------------------------------------------------------------===// diff --git a/clang/lib/Basic/TokenKinds.cpp b/clang/lib/Basic/TokenKinds.cpp index c300175ce90ba..a5b8c998d9b8e 100644 --- a/clang/lib/Basic/TokenKinds.cpp +++ b/clang/lib/Basic/TokenKinds.cpp @@ -46,6 +46,18 @@ const char *tok::getKeywordSpelling(TokenKind Kind) { return nullptr; } +const char *tok::getObjCKeywordSpelling(ObjCKeywordKind Kind) { + switch (Kind) { +#define OBJC_AT_KEYWORD(X) \ + case objc_##X: \ + return "@" #X; +#include "clang/Basic/TokenKinds.def" + default: + break; + } + return nullptr; +} + const char *tok::getPPKeywordSpelling(tok::PPKeywordKind Kind) { switch (Kind) { #define PPKEYWORD(x) case tok::pp_##x: return #x; diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index b2c566f44c27f..e1ad2b2089bdd 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -1888,8 +1888,8 @@ static ModuleSource selectModuleSource( } ModuleLoadResult CompilerInstance::findOrCompileModuleAndReadAST( - StringRef ModuleName, SourceLocation ImportLoc, - SourceLocation ModuleNameLoc, bool IsInclusionDirective) { + StringRef ModuleName, SourceLocation ImportLoc, SourceRange ModuleNameRange, + bool IsInclusionDirective) { // Search for a module with the given name. HeaderSearch &HS = PP->getHeaderSearchInfo(); Module *M = @@ -1906,10 +1906,11 @@ ModuleLoadResult CompilerInstance::findOrCompileModuleAndReadAST( std::string ModuleFilename; ModuleSource Source = selectModuleSource(M, ModuleName, ModuleFilename, BuiltModules, HS); + SourceLocation ModuleNameLoc = ModuleNameRange.getBegin(); if (Source == MS_ModuleNotFound) { // We can't find a module, error out here. getDiagnostics().Report(ModuleNameLoc, diag::err_module_not_found) - << ModuleName << SourceRange(ImportLoc, ModuleNameLoc); + << ModuleName << ModuleNameRange; return nullptr; } if (ModuleFilename.empty()) { @@ -2095,8 +2096,11 @@ CompilerInstance::loadModule(SourceLocation ImportLoc, MM.cacheModuleLoad(*Path[0].getIdentifierInfo(), Module); } else { + SourceLocation ModuleNameEndLoc = Path.back().getLoc().getLocWithOffset( + Path.back().getIdentifierInfo()->getLength()); ModuleLoadResult Result = findOrCompileModuleAndReadAST( - ModuleName, ImportLoc, ModuleNameLoc, IsInclusionDirective); + ModuleName, ImportLoc, SourceRange{ModuleNameLoc, ModuleNameEndLoc}, + IsInclusionDirective); if (!Result.isNormal()) return Result; if (!Result) diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp index eee57c786442a..85ffc87017619 100644 --- a/clang/lib/Lex/DependencyDirectivesScanner.cpp +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -544,6 +544,12 @@ static void skipWhitespace(const char *&First, const char *const End) { bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First, const char *const End) { + assert(Kind == DirectiveKind::cxx_export_import_decl || + Kind == DirectiveKind::cxx_export_module_decl || + Kind == DirectiveKind::cxx_import_decl || + Kind == DirectiveKind::cxx_module_decl || + Kind == DirectiveKind::decl_at_import); + const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset; for (;;) { // Keep a copy of the First char incase it needs to be reset. @@ -555,7 +561,7 @@ bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First, First = Previous; return false; } - if (Tok.is(tok::eof)) + if (Tok.isOneOf(tok::eof, tok::eod)) return reportError( DirectiveLoc, diag::err_dep_source_scanner_missing_semi_after_at_import); @@ -563,6 +569,16 @@ bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First, break; } + bool IsCXXModules = Kind == DirectiveKind::cxx_export_import_decl || + Kind == DirectiveKind::cxx_export_module_decl || + Kind == DirectiveKind::cxx_import_decl || + Kind == DirectiveKind::cxx_module_decl; + if (IsCXXModules) { + lexPPDirectiveBody(First, End); + pushDirective(Kind); + return false; + } + const auto &Tok = lexToken(First, End); pushDirective(Kind); if (Tok.is(tok::eof) || Tok.is(tok::eod)) @@ -924,7 +940,7 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) { auto ScEx2 = make_scope_exit( [&]() { TheLexer.setParsingPreprocessorDirective(false); }); - // Handle "@import". + // FIXME: Shoule we handle @import as a preprocessing directive? if (*First == '@') return lexAt(First, End); diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index b282a600c0e56..4ad2dbd63c830 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -73,6 +73,19 @@ tok::ObjCKeywordKind Token::getObjCKeywordID() const { return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword; } +/// Return true if we have an C++20 Modules contextual keyword(export, import +/// or module). +bool Token::isModuleContextualKeyword(const LangOptions &LangOpts, + bool AllowExport) const { + if (!LangOpts.CPlusPlusModules || isAnnotation()) + return false; + if (AllowExport && is(tok::kw_export)) + return true; + if (const auto *II = getIdentifierInfo()) + return II->isModulesImport() || II->isModulesDeclaration(); + return false; +} + /// Determine whether the token kind starts a simple-type-specifier. bool Token::isSimpleTypeSpecifier(const LangOptions &LangOpts) const { switch (getKind()) { @@ -3742,6 +3755,9 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { assert(!Result.needsCleaning() && "Result needs cleaning"); assert(!Result.hasPtrData() && "Result has not been reset"); + if (TokAtPhysicalStartOfLine) + Result.setFlag(Token::PhysicalStartOfLine); + // CurPtr - Cache BufferPtr in an automatic variable. const char *CurPtr = BufferPtr; @@ -4020,11 +4036,16 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': /*'u'*/ case 'v': case 'w': case 'x': case 'y': case 'z': - case '_': + case '_': { // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); - return LexIdentifierContinue(Result, CurPtr); - + bool returnedToken = LexIdentifierContinue(Result, CurPtr); + if (returnedToken && Result.isNot(tok::eof) && !LexingRawMode && + !Is_PragmaLexer && !ParsingPreprocessorDirective && PP && + PP->HandleModuleContextualKeyword(Result)) + goto HandleDirective; + return returnedToken; + } case '$': // $ in identifiers. if (LangOpts.DollarIdents) { if (!isLexingRawMode()) @@ -4227,8 +4248,12 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // it's actually the start of a preprocessing directive. Callback to // the preprocessor to handle it. // TODO: -fpreprocessed mode?? - if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) + if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) { + // We parsed a # character and it's the start of a preprocessing + // directive. + FormTokenWithChars(Result, CurPtr, tok::hash); goto HandleDirective; + } Kind = tok::hash; } @@ -4415,8 +4440,12 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // it's actually the start of a preprocessing directive. Callback to // the preprocessor to handle it. // TODO: -fpreprocessed mode?? - if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) + if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) { + // We parsed a # character and it's the start of a preprocessing + // directive. + FormTokenWithChars(Result, CurPtr, tok::hash); goto HandleDirective; + } Kind = tok::hash; } @@ -4424,9 +4453,14 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { case '@': // Objective C support. - if (CurPtr[-1] == '@' && LangOpts.ObjC) + if (CurPtr[-1] == '@' && LangOpts.ObjC) { + if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) { + FormTokenWithChars(Result, CurPtr, tok::at); + (void)PP->HandleModuleContextualKeyword(Result); + return true; + } Kind = tok::at; - else + } else Kind = tok::unknown; break; @@ -4506,15 +4540,8 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { return true; HandleDirective: - // We parsed a # character and it's the start of a preprocessing directive. - - FormTokenWithChars(Result, CurPtr, tok::hash); PP->HandleDirective(Result); - if (PP->hadModuleLoaderFatalFailure()) - // With a fatal failure in the module loader, we abort parsing. - return true; - // We parsed the directive; lex a token with the new state. return false; @@ -4531,6 +4558,10 @@ const char *Lexer::convertDependencyDirectiveToken( Result.setKind(DDTok.Kind); Result.setFlag((Token::TokenFlags)DDTok.Flags); Result.setLength(DDTok.Length); + if (Result.is(tok::raw_identifier)) + Result.setRawIdentifierData(TokPtr); + else if (Result.isLiteral()) + Result.setLiteralData(TokPtr); BufferPtr = TokPtr + DDTok.Length; return TokPtr; } @@ -4577,26 +4608,35 @@ bool Lexer::LexDependencyDirectiveToken(Token &Result) { const char *TokPtr = convertDependencyDirectiveToken(DDTok, Result); - if (Result.is(tok::hash) && Result.isAtStartOfLine()) { + if (Result.is(tok::hash) && Result.isAtStartOfLine() && !isLexingRawMode()) { PP->HandleDirective(Result); if (PP->hadModuleLoaderFatalFailure()) // With a fatal failure in the module loader, we abort parsing. return true; return false; } + if (Result.is(tok::at) && Result.isAtStartOfLine() && !isLexingRawMode()) { + (void)PP->HandleModuleContextualKeyword(Result); + return false; + } if (Result.is(tok::raw_identifier)) { Result.setRawIdentifierData(TokPtr); if (!isLexingRawMode()) { const IdentifierInfo *II = PP->LookUpIdentifierInfo(Result); + if (PP->HandleModuleContextualKeyword(Result)) { + PP->HandleDirective(Result); + if (PP->hadModuleLoaderFatalFailure()) + // With a fatal failure in the module loader, we abort parsing. + return true; + return false; + } if (II->isHandleIdentifierCase()) return PP->HandleIdentifier(Result); } return true; } - if (Result.isLiteral()) { - Result.setLiteralData(TokPtr); + if (Result.isLiteral()) return true; - } if (Result.is(tok::colon)) { // Convert consecutive colons to 'tok::coloncolon'. if (*BufferPtr == ':') { diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 9d01b8d99e227..de80f28f1e9f5 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -82,14 +82,19 @@ Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc, /// Read and discard all tokens remaining on the current line until /// the tok::eod token is found. -SourceRange Preprocessor::DiscardUntilEndOfDirective(Token &Tmp) { +SourceRange Preprocessor::DiscardUntilEndOfDirective( + Token &Tmp, SmallVectorImpl *DiscardedToks) { SourceRange Res; - - LexUnexpandedToken(Tmp); + auto ReadNextTok = [&]() { + LexUnexpandedToken(Tmp); + if (DiscardedToks && Tmp.isNot(tok::eod)) + DiscardedToks->push_back(Tmp); + }; + ReadNextTok(); Res.setBegin(Tmp.getLocation()); while (Tmp.isNot(tok::eod)) { assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens"); - LexUnexpandedToken(Tmp); + ReadNextTok(); } Res.setEnd(Tmp.getLocation()); return Res; @@ -439,21 +444,27 @@ void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef, /// true, then we consider macros that expand to zero tokens as being ok. /// /// Returns the location of the end of the directive. -SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType, - bool EnableMacros) { +SourceLocation +Preprocessor::CheckEndOfDirective(StringRef DirType, bool EnableMacros, + SmallVectorImpl *ExtraToks) { Token Tmp; + auto ReadNextTok = [this, ExtraToks, &Tmp](auto &&LexFn) { + std::invoke(LexFn, this, Tmp); + if (ExtraToks && Tmp.isNot(tok::eod)) + ExtraToks->push_back(Tmp); + }; // Lex unexpanded tokens for most directives: macros might expand to zero // tokens, causing us to miss diagnosing invalid lines. Some directives (like // #line) allow empty macros. if (EnableMacros) - Lex(Tmp); + ReadNextTok(&Preprocessor::Lex); else - LexUnexpandedToken(Tmp); + ReadNextTok(&Preprocessor::LexUnexpandedToken); // There should be no tokens after the directive, but we allow them as an // extension. while (Tmp.is(tok::comment)) // Skip comments in -C mode. - LexUnexpandedToken(Tmp); + ReadNextTok(&Preprocessor::LexUnexpandedToken); if (Tmp.is(tok::eod)) return Tmp.getLocation(); @@ -466,7 +477,16 @@ SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType, if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) && !CurTokenLexer) Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//"); - Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint; + + if (getLangOpts().CPlusPlusModules && + (DirType == "import" || DirType == "module")) + Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint; + else if (IsAtImport && DirType == "import") + Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) + << llvm::Twine("@").concat(DirType).str() << Hint; + else + Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) + << llvm::Twine("#").concat(DirType).str() << Hint; return DiscardUntilEndOfDirective().getEnd(); } @@ -1242,12 +1262,14 @@ void Preprocessor::HandleDirective(Token &Result) { // pp-directive. bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal(); - // Save the '#' token in case we need to return it later. - Token SavedHash = Result; + // Save the directive-introducing token('#' and import/module in C++20) in + // case we need to return it later. + Token Introducer = Result; // Read the next token, the directive flavor. This isn't expanded due to // C99 6.10.3p8. - LexUnexpandedToken(Result); + if (Introducer.is(tok::hash)) + LexUnexpandedToken(Result); // C99 6.10.3p11: Is this preprocessor directive in macro invocation? e.g.: // #define A(x) #x @@ -1266,7 +1288,11 @@ void Preprocessor::HandleDirective(Token &Result) { case tok::pp___include_macros: case tok::pp_pragma: case tok::pp_embed: - Diag(Result, diag::err_embedded_directive) << II->getName(); + case tok::pp_module: + Diag(Result, diag::err_embedded_directive) + << Introducer.isModuleContextualKeyword(getLangOpts(), + /*AllowExport=*/false) + << II->getName(); Diag(*ArgMacro, diag::note_macro_expansion_here) << ArgMacro->getIdentifierInfo(); DiscardUntilEndOfDirective(); @@ -1283,7 +1309,8 @@ void Preprocessor::HandleDirective(Token &Result) { ResetMacroExpansionHelper helper(this); if (SkippingUntilPCHThroughHeader || SkippingUntilPragmaHdrStop) - return HandleSkippedDirectiveWhileUsingPCH(Result, SavedHash.getLocation()); + return HandleSkippedDirectiveWhileUsingPCH(Result, + Introducer.getLocation()); switch (Result.getKind()) { case tok::eod: @@ -1303,7 +1330,7 @@ void Preprocessor::HandleDirective(Token &Result) { // directive. However do permit it in the predefines file, as we use line // markers to mark the builtin macros as being in a system header. if (getLangOpts().AsmPreprocessor && - SourceMgr.getFileID(SavedHash.getLocation()) != getPredefinesFileID()) + SourceMgr.getFileID(Introducer.getLocation()) != getPredefinesFileID()) break; return HandleDigitDirective(Result); default: @@ -1315,30 +1342,32 @@ void Preprocessor::HandleDirective(Token &Result) { default: break; // C99 6.10.1 - Conditional Inclusion. case tok::pp_if: - return HandleIfDirective(Result, SavedHash, ReadAnyTokensBeforeDirective); + return HandleIfDirective(Result, Introducer, + ReadAnyTokensBeforeDirective); case tok::pp_ifdef: - return HandleIfdefDirective(Result, SavedHash, false, + return HandleIfdefDirective(Result, Introducer, false, true /*not valid for miopt*/); case tok::pp_ifndef: - return HandleIfdefDirective(Result, SavedHash, true, + return HandleIfdefDirective(Result, Introducer, true, ReadAnyTokensBeforeDirective); case tok::pp_elif: case tok::pp_elifdef: case tok::pp_elifndef: - return HandleElifFamilyDirective(Result, SavedHash, II->getPPKeywordID()); + return HandleElifFamilyDirective(Result, Introducer, + II->getPPKeywordID()); case tok::pp_else: - return HandleElseDirective(Result, SavedHash); + return HandleElseDirective(Result, Introducer); case tok::pp_endif: return HandleEndifDirective(Result); // C99 6.10.2 - Source File Inclusion. case tok::pp_include: // Handle #include. - return HandleIncludeDirective(SavedHash.getLocation(), Result); + return HandleIncludeDirective(Introducer.getLocation(), Result); case tok::pp___include_macros: // Handle -imacros. - return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result); + return HandleIncludeMacrosDirective(Introducer.getLocation(), Result); // C99 6.10.3 - Macro Replacement. case tok::pp_define: @@ -1356,13 +1385,25 @@ void Preprocessor::HandleDirective(Token &Result) { // C99 6.10.6 - Pragma Directive. case tok::pp_pragma: - return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()}); - + return HandlePragmaDirective({PIK_HashPragma, Introducer.getLocation()}); + case tok::pp_module: + return HandleCXXModuleDirective(Result); // GNU Extensions. case tok::pp_import: - return HandleImportDirective(SavedHash.getLocation(), Result); + switch (Introducer.getKind()) { + case tok::kw_import: + if (ModuleLikeDirectiveIntroducer && + ModuleLikeDirectiveIntroducer->is(tok::at)) + return HandleObjCAtImportDirective(Result); + return HandleCXXImportDirective(Result); + case tok::hash: + return HandleImportDirective(Introducer.getLocation(), Result); + default: + llvm_unreachable("Not a valid import directive"); + } + break; case tok::pp_include_next: - return HandleIncludeNextDirective(SavedHash.getLocation(), Result); + return HandleIncludeNextDirective(Introducer.getLocation(), Result); case tok::pp_warning: if (LangOpts.CPlusPlus) @@ -1381,7 +1422,7 @@ void Preprocessor::HandleDirective(Token &Result) { case tok::pp_sccs: return HandleIdentSCCSDirective(Result); case tok::pp_embed: - return HandleEmbedDirective(SavedHash.getLocation(), Result, + return HandleEmbedDirective(Introducer.getLocation(), Result, getCurrentFileLexer() ? *getCurrentFileLexer()->getFileEntry() : static_cast(nullptr)); @@ -1412,7 +1453,7 @@ void Preprocessor::HandleDirective(Token &Result) { if (getLangOpts().AsmPreprocessor) { auto Toks = std::make_unique(2); // Return the # and the token after it. - Toks[0] = SavedHash; + Toks[0] = Introducer; Toks[1] = Result; // If the second token is a hashhash token, then we need to translate it to @@ -4052,3 +4093,294 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok, StringRef(static_cast(Mem), OriginalFilename.size()); HandleEmbedDirectiveImpl(HashLoc, *Params, BinaryContents, FilenameToGo); } + +/// Lex a token following the 'import' contextual keyword. +/// +/// [ObjC] @ import module-name ; +/// module-name: +/// module-name-qualifier[opt] identifier +/// +/// module-name-qualifier +/// module-name-qualifier[opt] identifier . +/// +/// We respond to a pp-import by importing macros from the named module. +void Preprocessor::HandleObjCAtImportDirective(Token &ImportTok) { + assert(ModuleLikeDirectiveIntroducer && + ModuleLikeDirectiveIntroducer->is(tok::at) && + "@ token must set during pervious lexing"); + ModuleLikeDirectiveIntroducer.reset(); + ModuleImportLoc = ImportTok.getLocation(); + SmallVector DirToks{ImportTok}; + SmallVector Path; + Token Tok; + Lex(Tok); + if (LexModuleNameContinue(Tok, ModuleImportLoc, DirToks, Path)) { + if (Tok.isNot(tok::eod)) + CheckEndOfDirective(ImportTok.getIdentifierInfo()->getName(), + /*EnableMacros=*/false, &DirToks); + EnterModuleSuffixTokenStream(DirToks); + return; + } + + // Consume the pp-import-suffix and expand any macros in it now, if we're not + // at the semicolon already. + if (!DirToks.back().isOneOf(tok::semi, tok::eod)) + CollectPPImportSuffix(DirToks); + + if (DirToks.back().isNot(tok::eod)) + CheckEndOfDirective(ImportTok.getIdentifierInfo()->getName()); + else + DirToks.pop_back(); + + // This is not a pp-import after all. + if (DirToks.back().isNot(tok::semi)) { + EnterModuleSuffixTokenStream(DirToks); + return; + } + + SourceLocation SemiLoc = DirToks.back().getLocation(); + Module *Imported = nullptr; + if (getLangOpts().Modules) { + Imported = TheModuleLoader.loadModule(ImportTok.getLocation(), Path, + Module::Hidden, + /*IsInclusionDirective=*/false); + if (Imported) + makeModuleVisible(Imported, SemiLoc); + + // We hit an error processing the import. Bail out. + if (hadModuleLoaderFatalFailure()) { + // With a fatal failure in the module loader, we abort parsing. + assert(CurLexer && "#include but no current lexer set!"); + CurLexer->cutOffLexing(); + } + } + + if (Callbacks) + Callbacks->moduleImport(ModuleImportLoc, Path, Imported); + EnterModuleSuffixTokenStream(DirToks); +} + +void Preprocessor::HandleCXXImportDirective(Token ImportTok) { + assert(getLangOpts().CPlusPlusModules && ImportTok.is(tok::kw_import)); + llvm::SaveAndRestore SaveImportingCXXModules( + this->ImportingCXXNamedModules); + ImportingCXXNamedModules = true; + + if (ModuleLikeDirectiveIntroducer) + ModuleLikeDirectiveIntroducer.reset(); + + Token Tok; + if (LexHeaderName(Tok)) { + if (Tok.isNot(tok::eod)) + CheckEndOfDirective(ImportTok.getIdentifierInfo()->getName()); + return; + } + + SourceLocation UseLoc = ImportTok.getLocation(); + SmallVector DirToks{ImportTok}; + SmallVector Path; + bool ImportingHeader = false; + bool IsPartition = false; + std::string FlatName; + switch (Tok.getKind()) { + case tok::header_name: + ImportingHeader = true; + DirToks.push_back(Tok); + break; + case tok::colon: + IsPartition = true; + DirToks.push_back(Tok); + UseLoc = Tok.getLocation(); + Lex(Tok); + [[fallthrough]]; + case tok::identifier: { + if (LexModuleNameContinue(Tok, UseLoc, DirToks, Path)) { + if (Tok.isNot(tok::eod)) + CheckEndOfDirective(ImportTok.getIdentifierInfo()->getName(), + /*EnableMacros=*/false, &DirToks); + EnterModuleSuffixTokenStream(DirToks); + return; + } + + bool IsValid = + (IsPartition && ModuleDeclState.isNamedModule()) || !IsPartition; + if (Callbacks && IsValid) { + if (IsPartition && ModuleDeclState.isNamedModule()) { + FlatName += ModuleDeclState.getPrimaryName(); + FlatName += ":"; + } + + FlatName += ModuleLoader::getFlatNameFromPath(Path); + SourceLocation StartLoc = IsPartition ? UseLoc : Path[0].getLoc(); + IdentifierLoc FlatNameLoc(StartLoc, getIdentifierInfo(FlatName)); + + // We don't/shouldn't load the standard c++20 modules when preprocessing. + // so the imported module is nullptr. + Callbacks->moduleImport(ImportTok.getLocation(), + ModuleIdPath(FlatNameLoc), + /*Imported=*/nullptr); + } + break; + } + default: + DirToks.push_back(Tok); + break; + } + + // Consume the pp-import-suffix and expand any macros in it now, if we're not + // at the semicolon already. + if (!DirToks.back().isOneOf(tok::semi, tok::eod)) + CollectPPImportSuffix(DirToks); + + if (DirToks.back().isNot(tok::eod)) + CheckEndOfDirective(ImportTok.getIdentifierInfo()->getName()); + else + DirToks.pop_back(); + + // This is not a pp-import after all. + if (DirToks.back().isNot(tok::semi)) { + EnterModuleSuffixTokenStream(DirToks); + return; + } + + if (ImportingHeader) { + // C++2a [cpp.module]p1: + // The ';' preprocessing-token terminating a pp-import shall not have + // been produced by macro replacement. + SourceLocation SemiLoc = DirToks.back().getLocation(); + if (SemiLoc.isMacroID()) + Diag(SemiLoc, diag::err_header_import_semi_in_macro); + + auto Action = HandleHeaderIncludeOrImport( + /*HashLoc*/ SourceLocation(), ImportTok, Tok, SemiLoc); + switch (Action.Kind) { + case ImportAction::None: + break; + + case ImportAction::ModuleBegin: + // Let the parser know we're textually entering the module. + DirToks.emplace_back(); + DirToks.back().startToken(); + DirToks.back().setKind(tok::annot_module_begin); + DirToks.back().setLocation(SemiLoc); + DirToks.back().setAnnotationEndLoc(SemiLoc); + DirToks.back().setAnnotationValue(Action.ModuleForHeader); + [[fallthrough]]; + + case ImportAction::ModuleImport: + case ImportAction::HeaderUnitImport: + case ImportAction::SkippedModuleImport: + // We chose to import (or textually enter) the file. Convert the + // header-name token into a header unit annotation token. + DirToks[1].setKind(tok::annot_header_unit); + DirToks[1].setAnnotationEndLoc(DirToks[0].getLocation()); + DirToks[1].setAnnotationValue(Action.ModuleForHeader); + // FIXME: Call the moduleImport callback? + break; + case ImportAction::Failure: + assert(TheModuleLoader.HadFatalFailure && + "This should be an early exit only to a fatal error"); + CurLexer->cutOffLexing(); + return; + } + } + + EnterModuleSuffixTokenStream(DirToks); +} + +void Preprocessor::HandleCXXModuleDirective(Token ModuleTok) { + assert(getLangOpts().CPlusPlusModules && ModuleTok.is(tok::kw_module)); + Token Introducer = ModuleTok; + if (ModuleLikeDirectiveIntroducer) { + Introducer = *ModuleLikeDirectiveIntroducer; + ModuleLikeDirectiveIntroducer.reset(); + } + + SourceLocation StartLoc = Introducer.getLocation(); + if (!IncludeMacroStack.empty()) { + SourceLocation End = DiscardUntilEndOfDirective().getEnd(); + Diag(StartLoc, diag::err_module_decl_in_header) + << SourceRange(StartLoc, End); + return; + } + + if (CurPPLexer->getConditionalStackDepth() != 0) { + SourceLocation End = DiscardUntilEndOfDirective().getEnd(); + Diag(StartLoc, diag::err_pp_cond_span_module_decl) + << SourceRange(StartLoc, End); + return; + } + + Token Tok; + SourceLocation UseLoc = ModuleTok.getLocation(); + SmallVector DirToks{ModuleTok}; + SmallVector Path, Partition; + LexUnexpandedToken(Tok); + + switch (Tok.getKind()) { + // Global Module Fragment. + case tok::semi: + DirToks.push_back(Tok); + break; + case tok::colon: + DirToks.push_back(Tok); + LexUnexpandedToken(Tok); + if (Tok.isNot(tok::kw_private)) { + if (Tok.isNot(tok::eod)) + CheckEndOfDirective(ModuleTok.getIdentifierInfo()->getName(), + /*EnableMacros=*/false, &DirToks); + EnterModuleSuffixTokenStream(DirToks); + return; + } + DirToks.push_back(Tok); + break; + case tok::identifier: { + // C++ [cpp.module]p3: Any preprocessing tokens after the module + // preprocessing token in the module directive are processed just as in + // normal text. + // + // P3034R1 Module Declarations Shouldn’t be Macros. + if (LexModuleNameContinue(Tok, UseLoc, DirToks, Path, + /*AllowMacroExpansion=*/false)) { + if (Tok.isNot(tok::eod)) + CheckEndOfDirective(ModuleTok.getIdentifierInfo()->getName(), + /*EnableMacros=*/false, &DirToks); + EnterModuleSuffixTokenStream(DirToks); + return; + } + + // C++20 [cpp.module]p + // The pp-tokens, if any, of a pp-module shall be of the form: + // pp-module-name pp-module-partition[opt] pp-tokens[opt] + if (Tok.is(tok::colon)) { + LexUnexpandedToken(Tok); + if (LexModuleNameContinue(Tok, UseLoc, DirToks, Partition)) { + if (Tok.isNot(tok::eod)) + CheckEndOfDirective(ModuleTok.getIdentifierInfo()->getName(), + /*EnableMacros=*/false, &DirToks); + EnterModuleSuffixTokenStream(DirToks); + return; + } + } + break; + } + default: + DirToks.push_back(Tok); + break; + } + + // Consume the pp-import-suffix and expand any macros in it now, if we're not + // at the semicolon already. + SourceLocation End = DirToks.back().getLocation(); + if (!DirToks.back().isOneOf(tok::semi, tok::eod)) { + CollectPPImportSuffix(DirToks); + End = DirToks.back().getLocation(); + } + + if (DirToks.back().isNot(tok::eod)) + End = CheckEndOfDirective(ModuleTok.getIdentifierInfo()->getName(), + /*EnableMacros=*/false, &DirToks); + else + End = DirToks.pop_back_val().getLocation(); + EnterModuleSuffixTokenStream(DirToks); +} diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp index d8f61c02a9837..8bd3749dd8ca2 100644 --- a/clang/lib/Lex/PPLexerChange.cpp +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -115,10 +115,9 @@ void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer, CurPPLexer = TheLexer; CurDirLookup = CurDir; CurLexerSubmodule = nullptr; - if (CurLexerCallback != CLK_LexAfterModuleImport) - CurLexerCallback = TheLexer->isDependencyDirectivesLexer() - ? CLK_DependencyDirectivesLexer - : CLK_Lexer; + CurLexerCallback = TheLexer->isDependencyDirectivesLexer() + ? CLK_DependencyDirectivesLexer + : CLK_Lexer; // Notify the client, if desired, that we are in a new source file. if (Callbacks && !CurLexer->Is_PragmaLexer) { @@ -154,8 +153,7 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd, PushIncludeMacroStack(); CurDirLookup = nullptr; CurTokenLexer = std::move(TokLexer); - if (CurLexerCallback != CLK_LexAfterModuleImport) - CurLexerCallback = CLK_TokenLexer; + CurLexerCallback = CLK_TokenLexer; } /// EnterTokenStream - Add a "macro" context to the top of the include stack, @@ -209,8 +207,7 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks, PushIncludeMacroStack(); CurDirLookup = nullptr; CurTokenLexer = std::move(TokLexer); - if (CurLexerCallback != CLK_LexAfterModuleImport) - CurLexerCallback = CLK_TokenLexer; + CurLexerCallback = CLK_TokenLexer; } /// Compute the relative path that names the given file relative to diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp index bba3c89bed38f..914974ef5f450 100644 --- a/clang/lib/Lex/Pragma.cpp +++ b/clang/lib/Lex/Pragma.cpp @@ -817,7 +817,7 @@ void Preprocessor::HandlePragmaModuleBuild(Token &Tok) { LexUnexpandedToken(Tok); if (Tok.isNot(tok::eod)) { - Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma"; + Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "#pragma"; DiscardUntilEndOfDirective(); } @@ -898,7 +898,7 @@ void Preprocessor::HandlePragmaHdrstop(Token &Tok) { } if (Tok.isNot(tok::eod)) Diag(Tok.getLocation(), diag::ext_pp_extra_tokens_at_eol) - << "pragma hdrstop"; + << "#pragma hdrstop"; if (creatingPCHWithPragmaHdrStop() && SourceMgr.isInMainFile(Tok.getLocation())) { @@ -1236,7 +1236,7 @@ struct PragmaDebugHandler : public PragmaHandler { if (Tok.isNot(tok::eod)) { PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) - << "pragma clang __debug captured"; + << "#pragma clang __debug captured"; return; } @@ -1538,7 +1538,7 @@ struct PragmaWarningHandler : public PragmaHandler { PP.Lex(Tok); if (Tok.isNot(tok::eod)) - PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma warning"; + PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "#pragma warning"; } }; @@ -1602,7 +1602,8 @@ struct PragmaExecCharsetHandler : public PragmaHandler { PP.Lex(Tok); if (Tok.isNot(tok::eod)) - PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma execution_character_set"; + PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) + << "#pragma execution_character_set"; } }; @@ -1719,7 +1720,7 @@ struct PragmaModuleImportHandler : public PragmaHandler { return; if (Tok.isNot(tok::eod)) - PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma"; + PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "#pragma"; // If we have a non-empty module path, load the named module. Module *Imported = @@ -1755,7 +1756,7 @@ struct PragmaModuleBeginHandler : public PragmaHandler { return; if (Tok.isNot(tok::eod)) - PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma"; + PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "#pragma"; // We can only enter submodules of the current module. StringRef Current = PP.getLangOpts().CurrentModule; @@ -1814,7 +1815,7 @@ struct PragmaModuleEndHandler : public PragmaHandler { PP.LexUnexpandedToken(Tok); if (Tok.isNot(tok::eod)) - PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma"; + PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "#pragma"; Module *M = PP.LeaveSubmodule(/*ForPragma*/true); if (M) @@ -1848,7 +1849,7 @@ struct PragmaModuleLoadHandler : public PragmaHandler { return; if (Tok.isNot(tok::eod)) - PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma"; + PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "#pragma"; // Load the module, don't make it visible. PP.getModuleLoader().loadModule(Loc, ModuleName, Module::Hidden, @@ -1905,7 +1906,7 @@ struct PragmaARCCFCodeAuditedHandler : public PragmaHandler { // Verify that this is followed by EOD. PP.LexUnexpandedToken(Tok); if (Tok.isNot(tok::eod)) - PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma"; + PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "#pragma"; // The start location of the active audit. SourceLocation BeginLoc = PP.getPragmaARCCFCodeAuditedInfo().getLoc(); @@ -1960,7 +1961,7 @@ struct PragmaAssumeNonNullHandler : public PragmaHandler { // Verify that this is followed by EOD. PP.LexUnexpandedToken(Tok); if (Tok.isNot(tok::eod)) - PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma"; + PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "#pragma"; // The start location of the active audit. SourceLocation BeginLoc = PP.getPragmaAssumeNonNullLoc(); diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index e003ad3a95570..5436122da00e1 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -55,6 +55,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Capacity.h" @@ -866,26 +867,6 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { // like "#define TY typeof", "TY(1) x". if (II.isExtensionToken() && !DisableMacroExpansion) Diag(Identifier, diag::ext_token_used); - - // If this is the 'import' contextual keyword following an '@', note - // that the next token indicates a module name. - // - // Note that we do not treat 'import' as a contextual - // keyword when we're in a caching lexer, because caching lexers only get - // used in contexts where import declarations are disallowed. - // - // Likewise if this is the standard C++ import keyword. - if (((LastTokenWasAt && II.isModulesImport()) || - Identifier.is(tok::kw_import)) && - !InMacroArgs && !DisableMacroExpansion && - (getLangOpts().Modules || getLangOpts().DebuggerSupport) && - CurLexerCallback != CLK_CachingLexer) { - ModuleImportLoc = Identifier.getLocation(); - NamedModuleImportPath.clear(); - IsAtImport = true; - ModuleImportExpectsIdentifier = true; - CurLexerCallback = CLK_LexAfterModuleImport; - } return true; } @@ -932,6 +913,7 @@ void Preprocessor::Lex(Token &Result) { // This token is injected to represent the translation of '#include "a.h"' // into "import a.h;". Mimic the notional ';'. case tok::annot_module_include: + case tok::annot_repl_input_end: case tok::semi: TrackGMFState.handleSemi(); StdCXXImportSeqState.handleSemi(); @@ -954,31 +936,21 @@ void Preprocessor::Lex(Token &Result) { case tok::period: ModuleDeclState.handlePeriod(); break; - case tok::eod: + case tok::kw_import: + if (StdCXXImportSeqState.atTopLevel()) { + TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq()); + StdCXXImportSeqState.handleImport(); + } break; - case tok::identifier: - // Check "import" and "module" when there is no open bracket. The two - // identifiers are not meaningful with open brackets. + case tok::kw_module: if (StdCXXImportSeqState.atTopLevel()) { - if (Result.getIdentifierInfo()->isModulesImport()) { - TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq()); - StdCXXImportSeqState.handleImport(); - if (StdCXXImportSeqState.afterImportSeq()) { - ModuleImportLoc = Result.getLocation(); - NamedModuleImportPath.clear(); - IsAtImport = false; - ModuleImportExpectsIdentifier = true; - CurLexerCallback = CLK_LexAfterModuleImport; - } - break; - } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) { - if (hasSeenNoTrivialPPDirective()) - Result.setFlag(Token::HasSeenNoTrivialPPDirective); - TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq()); - ModuleDeclState.handleModule(); - break; - } + if (hasSeenNoTrivialPPDirective()) + Result.setFlag(Token::HasSeenNoTrivialPPDirective); + TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq()); + ModuleDeclState.handleModule(); } + break; + case tok::identifier: ModuleDeclState.handleIdentifier(Result.getIdentifierInfo()); if (ModuleDeclState.isModuleCandidate()) break; @@ -997,6 +969,9 @@ void Preprocessor::Lex(Token &Result) { } LastTokenWasAt = Result.is(tok::at); + if (!Result.isOneOf(tok::at, tok::kw_export)) + ModuleLikeDirectiveIntroducer.reset(); + --LexLevel; if ((LexLevel == 0 || PreprocessToken) && @@ -1119,248 +1094,164 @@ bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) { return false; } -/// Collect the tokens of a C++20 pp-import-suffix. -void Preprocessor::CollectPpImportSuffix(SmallVectorImpl &Toks) { - // FIXME: For error recovery, consider recognizing attribute syntax here - // and terminating / diagnosing a missing semicolon if we find anything - // else? (Can we leave that to the parser?) - unsigned BracketDepth = 0; - while (true) { - Toks.emplace_back(); - Lex(Toks.back()); +// We represent the primary and partition names as 'Paths' which are sections +// of the hierarchical access path for a clang module. However for C++20 +// the periods in a name are just another character, and we will need to +// flatten them into a string. +std::string ModuleLoader::getFlatNameFromPath(ModuleIdPath Path) { + std::string Name; + if (Path.empty()) + return Name; + + for (auto &Piece : Path) { + assert(Piece.getIdentifierInfo() && Piece.getLoc().isValid()); + if (!Name.empty()) + Name += "."; + Name += Piece.getIdentifierInfo()->getName(); + } + return Name; +} - switch (Toks.back().getKind()) { - case tok::l_paren: case tok::l_square: case tok::l_brace: - ++BracketDepth; - break; +bool Preprocessor::LexModuleNameContinue(Token &Tok, SourceLocation UseLoc, + SmallVectorImpl &Suffix, + SmallVectorImpl &Path, + bool AllowMacroExpansion) { + auto ConsumeToken = [&]() { + if (AllowMacroExpansion) + Lex(Tok); + else + LexUnexpandedToken(Tok); + Suffix.push_back(Tok); + }; - case tok::r_paren: case tok::r_square: case tok::r_brace: - if (BracketDepth == 0) - return; - --BracketDepth; - break; + Suffix.push_back(Tok); + while (true) { + if (Tok.isNot(tok::identifier)) + return true; - case tok::semi: - if (BracketDepth == 0) - return; - break; + // Record this part of the module path. + Path.emplace_back(Tok.getLocation(), Tok.getIdentifierInfo()); + ConsumeToken(); - case tok::eof: - return; + if (Tok.isNot(tok::period)) + return false; - default: - break; - } + ConsumeToken(); } } - -/// Lex a token following the 'import' contextual keyword. +/// P1857R3: Modules Dependency Discovery /// -/// pp-import: [C++20] -/// import header-name pp-import-suffix[opt] ; -/// import header-name-tokens pp-import-suffix[opt] ; -/// [ObjC] @ import module-name ; -/// [Clang] import module-name ; -/// -/// header-name-tokens: -/// string-literal -/// < [any sequence of preprocessing-tokens other than >] > -/// -/// module-name: -/// module-name-qualifier[opt] identifier -/// -/// module-name-qualifier -/// module-name-qualifier[opt] identifier . -/// -/// We respond to a pp-import by importing macros from the named module. -bool Preprocessor::LexAfterModuleImport(Token &Result) { - // Figure out what kind of lexer we actually have. - recomputeCurLexerKind(); - - // Lex the next token. The header-name lexing rules are used at the start of - // a pp-import. - // - // For now, we only support header-name imports in C++20 mode. - // FIXME: Should we allow this in all language modes that support an import - // declaration as an extension? - if (NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules) { - if (LexHeaderName(Result)) - return true; +/// At the start of phase 4 an import or module token is treated as starting a +/// directive and are converted to their respective keywords iff: +/// - After skipping horizontal whitespace are +/// - at the start of a logical line, or +/// - preceded by an 'export' at the start of the logical line. +/// - Are followed by an identifier pp token (before macro expansion), or +/// - <, ", or : (but not ::) pp tokens for 'import', or +/// - ; for 'module' +/// Otherwise the token is treated as an identifier. +bool Preprocessor::HandleModuleContextualKeyword(Token &Result) { + if (!Result.isModuleContextualKeyword(getLangOpts()) && + Result.isNot(tok::at) && !Result.isObjCAtKeyword(tok::objc_import)) + return false; - if (Result.is(tok::colon) && ModuleDeclState.isNamedModule()) { - std::string Name = ModuleDeclState.getPrimaryName().str(); - Name += ":"; - NamedModuleImportPath.emplace_back(Result.getLocation(), - getIdentifierInfo(Name)); - CurLexerCallback = CLK_LexAfterModuleImport; - return true; - } - } else { - Lex(Result); + if (Result.isOneOf(tok::kw_export, tok::at)) { + ModuleLikeDirectiveIntroducer = Result; + return false; } - // Allocate a holding buffer for a sequence of tokens and introduce it into - // the token stream. - auto EnterTokens = [this](ArrayRef Toks) { - auto ToksCopy = std::make_unique(Toks.size()); - std::copy(Toks.begin(), Toks.end(), ToksCopy.get()); - EnterTokenStream(std::move(ToksCopy), Toks.size(), - /*DisableMacroExpansion*/ true, /*IsReinject*/ false); - }; - - bool ImportingHeader = Result.is(tok::header_name); - // Check for a header-name. - SmallVector Suffix; - if (ImportingHeader) { - // Enter the header-name token into the token stream; a Lex action cannot - // both return a token and cache tokens (doing so would corrupt the token - // cache if the call to Lex comes from CachingLex / PeekAhead). - Suffix.push_back(Result); - - // Consume the pp-import-suffix and expand any macros in it now. We'll add - // it back into the token stream later. - CollectPpImportSuffix(Suffix); - if (Suffix.back().isNot(tok::semi)) { - // This is not a pp-import after all. - EnterTokens(Suffix); + if (ModuleLikeDirectiveIntroducer) { + // The export keyword was not at the start of line, it's not a + // directive-introducing token. + if (!ModuleLikeDirectiveIntroducer->isAtPhysicalStartOfLine()) + return false; + // [cpp.pre]/1.4 + // export // not a preprocessing directive + // import foo; // preprocessing directive (ill-formed at phase7) + if (Result.isAtPhysicalStartOfLine()) return false; - } - - // C++2a [cpp.module]p1: - // The ';' preprocessing-token terminating a pp-import shall not have - // been produced by macro replacement. - SourceLocation SemiLoc = Suffix.back().getLocation(); - if (SemiLoc.isMacroID()) - Diag(SemiLoc, diag::err_header_import_semi_in_macro); - - // Reconstitute the import token. - Token ImportTok; - ImportTok.startToken(); - ImportTok.setKind(tok::kw_import); - ImportTok.setLocation(ModuleImportLoc); - ImportTok.setIdentifierInfo(getIdentifierInfo("import")); - ImportTok.setLength(6); - - auto Action = HandleHeaderIncludeOrImport( - /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc); - switch (Action.Kind) { - case ImportAction::None: - break; - - case ImportAction::ModuleBegin: - // Let the parser know we're textually entering the module. - Suffix.emplace_back(); - Suffix.back().startToken(); - Suffix.back().setKind(tok::annot_module_begin); - Suffix.back().setLocation(SemiLoc); - Suffix.back().setAnnotationEndLoc(SemiLoc); - Suffix.back().setAnnotationValue(Action.ModuleForHeader); - [[fallthrough]]; - case ImportAction::ModuleImport: - case ImportAction::HeaderUnitImport: - case ImportAction::SkippedModuleImport: - // We chose to import (or textually enter) the file. Convert the - // header-name token into a header unit annotation token. - Suffix[0].setKind(tok::annot_header_unit); - Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation()); - Suffix[0].setAnnotationValue(Action.ModuleForHeader); - // FIXME: Call the moduleImport callback? - break; - case ImportAction::Failure: - assert(TheModuleLoader.HadFatalFailure && - "This should be an early exit only to a fatal error"); - Result.setKind(tok::eof); - CurLexer->cutOffLexing(); - EnterTokens(Suffix); - return true; + if (ModuleLikeDirectiveIntroducer->is(tok::at)) { + if (Result.isObjCAtKeyword(tok::objc_import)) { + Result.setKind(tok::kw_import); + ModuleImportLoc = Result.getLocation(); + IsAtImport = true; + return true; + } + return false; } - - EnterTokens(Suffix); + } else if (!Result.isAtPhysicalStartOfLine()) return false; - } - // The token sequence - // - // import identifier (. identifier)* - // - // indicates a module import directive. We already saw the 'import' - // contextual keyword, so now we're looking for the identifiers. - if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { - // We expected to see an identifier here, and we did; continue handling - // identifiers. - NamedModuleImportPath.emplace_back(Result.getLocation(), - Result.getIdentifierInfo()); - ModuleImportExpectsIdentifier = false; - CurLexerCallback = CLK_LexAfterModuleImport; + bool SavedParsingPreprocessorDirective = + CurPPLexer->ParsingPreprocessorDirective; + CurPPLexer->ParsingPreprocessorDirective = true; + auto _ = llvm::make_scope_exit([&]() { + CurPPLexer->ParsingPreprocessorDirective = + SavedParsingPreprocessorDirective; + }); + + if (Result.getIdentifierInfo()->isModulesImport() && + isNextPPTokenOneOf(tok::raw_identifier, tok::less, tok::string_literal, + tok::colon)) { + Result.setKind(tok::kw_import); + ModuleImportLoc = Result.getLocation(); + IsAtImport = false; return true; } - // If we're expecting a '.' or a ';', and we got a '.', then wait until we - // see the next identifier. (We can also see a '[[' that begins an - // attribute-specifier-seq here under the Standard C++ Modules.) - if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { - ModuleImportExpectsIdentifier = true; - CurLexerCallback = CLK_LexAfterModuleImport; + if (Result.getIdentifierInfo()->isModulesDeclaration() && + isNextPPTokenOneOf(tok::raw_identifier, tok::colon, tok::semi)) { + Result.setKind(tok::kw_module); + ModuleDeclLoc = Result.getLocation(); return true; } - // If we didn't recognize a module name at all, this is not a (valid) import. - if (NamedModuleImportPath.empty() || Result.is(tok::eof)) - return true; + // Ok, it's an identifier. + return false; +} - // Consume the pp-import-suffix and expand any macros in it now, if we're not - // at the semicolon already. - SourceLocation SemiLoc = Result.getLocation(); - if (Result.isNot(tok::semi)) { - Suffix.push_back(Result); - CollectPpImportSuffix(Suffix); - if (Suffix.back().isNot(tok::semi)) { - // This is not an import after all. - EnterTokens(Suffix); - return false; - } - SemiLoc = Suffix.back().getLocation(); - } +bool Preprocessor::CollectPPImportSuffixAndEnterStream( + SmallVectorImpl &Toks, bool StopUntilEOD) { + CollectPPImportSuffix(Toks); + EnterModuleSuffixTokenStream(Toks); + return false; +} - // Under the standard C++ Modules, the dot is just part of the module name, - // and not a real hierarchy separator. Flatten such module names now. - // - // FIXME: Is this the right level to be performing this transformation? - std::string FlatModuleName; - if (getLangOpts().CPlusPlusModules) { - for (auto &Piece : NamedModuleImportPath) { - // If the FlatModuleName ends with colon, it implies it is a partition. - if (!FlatModuleName.empty() && FlatModuleName.back() != ':') - FlatModuleName += "."; - FlatModuleName += Piece.getIdentifierInfo()->getName(); - } - SourceLocation FirstPathLoc = NamedModuleImportPath[0].getLoc(); - NamedModuleImportPath.clear(); - NamedModuleImportPath.emplace_back(FirstPathLoc, - getIdentifierInfo(FlatModuleName)); - } +/// Collect the tokens of a C++20 pp-import-suffix. +void Preprocessor::CollectPPImportSuffix(SmallVectorImpl &Toks, + bool StopUntilEOD) { + // FIXME: For error recovery, consider recognizing attribute syntax here + // and terminating / diagnosing a missing semicolon if we find anything + // else? (Can we leave that to the parser?) + while (true) { + Toks.emplace_back(); + Lex(Toks.back()); - Module *Imported = nullptr; - // We don't/shouldn't load the standard c++20 modules when preprocessing. - if (getLangOpts().Modules && !isInImportingCXXNamedModules()) { - Imported = TheModuleLoader.loadModule(ModuleImportLoc, - NamedModuleImportPath, - Module::Hidden, - /*IsInclusionDirective=*/false); - if (Imported) - makeModuleVisible(Imported, SemiLoc); + switch (Toks.back().getKind()) { + case tok::semi: + if (!StopUntilEOD) + return; + [[fallthrough]]; + case tok::eod: + case tok::eof: + return; + default: + break; + } } +} - if (Callbacks) - Callbacks->moduleImport(ModuleImportLoc, NamedModuleImportPath, Imported); - - if (!Suffix.empty()) { - EnterTokens(Suffix); - return false; - } - return true; +// Allocate a holding buffer for a sequence of tokens and introduce it into +// the token stream. +void Preprocessor::EnterModuleSuffixTokenStream(ArrayRef Toks) { + if (Toks.empty()) + return; + auto ToksCopy = std::make_unique(Toks.size()); + std::copy(Toks.begin(), Toks.end(), ToksCopy.get()); + EnterTokenStream(std::move(ToksCopy), Toks.size(), + /*DisableMacroExpansion*/ true, /*IsReinject*/ false); } void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc, diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp index 47f4134fb1465..9846ffc3a5f86 100644 --- a/clang/lib/Lex/TokenLexer.cpp +++ b/clang/lib/Lex/TokenLexer.cpp @@ -699,7 +699,10 @@ bool TokenLexer::Lex(Token &Tok) { HasLeadingSpace = false; // Handle recursive expansion! - if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) { + if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr && + (!PP.getLangOpts().CPlusPlusModules || + !Tok.isModuleContextualKeyword(PP.getLangOpts())) && + (!PP.getLangOpts().ObjC || !Tok.isObjCAtKeyword(tok::objc_import))) { // Change the kind of this identifier to the appropriate token kind, e.g. // turning "for" into a keyword. IdentifierInfo *II = Tok.getIdentifierInfo(); diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index a17398b84c6a6..e0cc6bc7792ff 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -17,6 +17,8 @@ #include "clang/AST/DeclTemplate.h" #include "clang/Basic/DiagnosticParse.h" #include "clang/Basic/StackExhaustionHandler.h" +#include "clang/Basic/TokenKinds.h" +#include "clang/Lex/ModuleLoader.h" #include "clang/Parse/RAIIObjectsForParser.h" #include "clang/Sema/DeclSpec.h" #include "clang/Sema/EnterExpressionEvaluationContext.h" @@ -517,8 +519,6 @@ void Parser::Initialize() { Ident_trivially_relocatable_if_eligible = nullptr; Ident_replaceable_if_eligible = nullptr; Ident_GNU_final = nullptr; - Ident_import = nullptr; - Ident_module = nullptr; Ident_super = &PP.getIdentifierTable().get("super"); @@ -574,11 +574,6 @@ void Parser::Initialize() { PP.SetPoisonReason(Ident_AbnormalTermination,diag::err_seh___finally_block); } - if (getLangOpts().CPlusPlusModules) { - Ident_import = PP.getIdentifierInfo("import"); - Ident_module = PP.getIdentifierInfo("module"); - } - Actions.Initialize(); // Prime the lexer look-ahead. @@ -626,25 +621,26 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result, switch (NextToken().getKind()) { case tok::kw_module: goto module_decl; - - // Note: no need to handle kw_import here. We only form kw_import under - // the Standard C++ Modules, and in that case 'export import' is parsed as - // an export-declaration containing an import-declaration. - - // Recognize context-sensitive C++20 'export module' and 'export import' - // declarations. + case tok::kw_import: + goto import_decl; + // Error recovery and recognize context-sensitive C++20 'export module' and + // 'export import' declarations. If the module/import directive is + // well-formed, it should be converted to a keyword in preprocessor, but not + // an identifier we saw here. + // + // FIXME: We should generate better diagnostic information here to explain + // why the module/import directive is ill-formed. case tok::identifier: { - IdentifierInfo *II = NextToken().getIdentifierInfo(); - if ((II == Ident_module || II == Ident_import) && + if (NextToken().isModuleContextualKeyword(getLangOpts()) && GetLookAheadToken(2).isNot(tok::coloncolon)) { - if (II == Ident_module) + if (NextToken().getIdentifierInfo()->isStr( + tok::getKeywordSpelling(tok::kw_module))) goto module_decl; else goto import_decl; } break; } - default: break; } @@ -712,22 +708,25 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result, Actions.ActOnEndOfTranslationUnit(); //else don't tell Sema that we ended parsing: more input might come. return true; - case tok::identifier: - // C++2a [basic.link]p3: + // C++20 [basic.link]p3: // A token sequence beginning with 'export[opt] module' or // 'export[opt] import' and not immediately followed by '::' // is never interpreted as the declaration of a top-level-declaration. - if ((Tok.getIdentifierInfo() == Ident_module || - Tok.getIdentifierInfo() == Ident_import) && + // + // Error recovery and recognize context-sensitive C++20 'export module' and + // 'export import' declarations. If the module/import directive is + // well-formed, it should be converted to a keyword in preprocessor, but not + // an identifier we saw here. + if (Tok.isModuleContextualKeyword(getLangOpts()) && NextToken().isNot(tok::coloncolon)) { - if (Tok.getIdentifierInfo() == Ident_module) + if (Tok.getIdentifierInfo()->isStr( + tok::getKeywordSpelling(tok::kw_module))) goto module_decl; else goto import_decl; } break; - default: break; } @@ -920,8 +919,10 @@ Parser::ParseExternalDeclaration(ParsedAttributes &Attrs, case tok::kw_import: { Sema::ModuleImportState IS = Sema::ModuleImportState::NotACXX20Module; if (getLangOpts().CPlusPlusModules) { - llvm_unreachable("not expecting a c++20 import here"); - ProhibitAttributes(Attrs); + Diag(Tok, diag::err_unexpected_module_or_import_decl) + << /*IsImport*/ true; + SkipUntil(tok::semi); + return nullptr; } SingleDecl = ParseModuleImport(SourceLocation(), IS); } break; @@ -1013,7 +1014,7 @@ Parser::ParseExternalDeclaration(ParsedAttributes &Attrs, return nullptr; case tok::kw_module: - Diag(Tok, diag::err_unexpected_module_decl); + Diag(Tok, diag::err_unexpected_module_or_import_decl) << /*IsImport*/ false; SkipUntil(tok::semi); return nullptr; @@ -2239,6 +2240,11 @@ void Parser::CodeCompleteNaturalLanguage() { Actions.CodeCompletion().CodeCompleteNaturalLanguage(); } +void Parser::CodeCompleteModuleImport(SourceLocation ImportLoc, + ModuleIdPath Path) { + Actions.CodeCompletion().CodeCompleteModuleImport(ImportLoc, Path); +} + bool Parser::ParseMicrosoftIfExistsCondition(IfExistsCondition& Result) { assert((Tok.is(tok::kw___if_exists) || Tok.is(tok::kw___if_not_exists)) && "Expected '__if_exists' or '__if_not_exists'"); @@ -2350,10 +2356,16 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) { ? Sema::ModuleDeclKind::Interface : Sema::ModuleDeclKind::Implementation; - assert( - (Tok.is(tok::kw_module) || - (Tok.is(tok::identifier) && Tok.getIdentifierInfo() == Ident_module)) && - "not a module declaration"); + assert((Tok.is(tok::kw_module) || + (Tok.is(tok::identifier) && + Tok.getIdentifierInfo()->isStr( + tok::getKeywordSpelling(tok::kw_module)))) && + "not a module declaration"); + + if (getLangOpts().CPlusPlusModules && Tok.is(tok::identifier)) + Diag(StartLoc, diag::err_invalid_module_or_import_directive) + << /*IsImport=*/false; + SourceLocation ModuleLoc = ConsumeToken(); // Attributes appear after the module name, not before. @@ -2418,7 +2430,8 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) { /*DiagnoseEmptyAttrs=*/false, /*WarnOnUnknownAttrs=*/true); - ExpectAndConsumeSemi(diag::err_module_expected_semi); + ExpectAndConsumeSemi(diag::err_expected_semi_after_module_or_import, + tok::getKeywordSpelling(tok::kw_module)); return Actions.ActOnModuleDecl(StartLoc, ModuleLoc, MDK, Path, Partition, ImportState, @@ -2436,6 +2449,12 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc, : Tok.isObjCAtKeyword(tok::objc_import)) && "Improper start to module import"); bool IsObjCAtImport = Tok.isObjCAtKeyword(tok::objc_import); + if (getLangOpts().CPlusPlusModules && !IsObjCAtImport && + Tok.is(tok::identifier)) { + Diag(StartLoc, diag::err_invalid_module_or_import_directive) + << /*IsImport=*/true; + } + SourceLocation ImportLoc = ConsumeToken(); // For C++20 modules, we can have "name" or ":Partition name" as valid input. @@ -2457,12 +2476,12 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc, Diag(ColonLoc, diag::err_unsupported_module_partition) << SourceRange(ColonLoc, Path.back().getLoc()); // Recover by leaving partition empty. - else if (ParseModuleName(ColonLoc, Path, /*IsImport*/ true)) + else if (ParseModuleName(ColonLoc, Path, /*IsImport=*/true)) return nullptr; else IsPartition = true; } else { - if (ParseModuleName(ImportLoc, Path, /*IsImport*/ true)) + if (ParseModuleName(ImportLoc, Path, /*IsImport=*/true)) return nullptr; } @@ -2522,8 +2541,12 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc, SeenError = false; break; } - ExpectAndConsumeSemi(diag::err_module_expected_semi); - TryConsumeToken(tok::eod); + + if (getLangOpts().CPlusPlusModules) + ExpectAndConsumeSemi(diag::err_expected_semi_after_module_or_import, + tok::getKeywordSpelling(tok::kw_import)); + else + ExpectAndConsumeSemi(diag::err_module_expected_semi); if (SeenError) return nullptr; @@ -2563,7 +2586,7 @@ bool Parser::ParseModuleName(SourceLocation UseLoc, return true; } - Diag(Tok, diag::err_module_expected_ident) << IsImport; + Diag(Tok, diag::err_module_expected_ident) << Path.empty(); SkipUntil(tok::semi); return true; } diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index 773bcb225c188..529521e7185a4 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -58,23 +58,6 @@ static void checkModuleImportContext(Sema &S, Module *M, } } -// We represent the primary and partition names as 'Paths' which are sections -// of the hierarchical access path for a clang module. However for C++20 -// the periods in a name are just another character, and we will need to -// flatten them into a string. -static std::string stringFromPath(ModuleIdPath Path) { - std::string Name; - if (Path.empty()) - return Name; - - for (auto &Piece : Path) { - if (!Name.empty()) - Name += "."; - Name += Piece.getIdentifierInfo()->getName(); - } - return Name; -} - /// Helper function for makeTransitiveImportsVisible to decide whether /// the \param Imported module unit is in the same module with the \param /// CurrentModule. @@ -305,7 +288,7 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, // We were asked to compile a module interface unit but this is a module // implementation unit. Diag(ModuleLoc, diag::err_module_interface_implementation_mismatch) - << FixItHint::CreateInsertion(ModuleLoc, "export "); + << FixItHint::CreateInsertion(ModuleLoc, "export "); MDK = ModuleDeclKind::Interface; break; @@ -372,10 +355,10 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, // Flatten the dots in a module name. Unlike Clang's hierarchical module map // modules, the dots here are just another character that can appear in a // module name. - std::string ModuleName = stringFromPath(Path); + std::string ModuleName = ModuleLoader::getFlatNameFromPath(Path); if (IsPartition) { ModuleName += ":"; - ModuleName += stringFromPath(Partition); + ModuleName += ModuleLoader::getFlatNameFromPath(Partition); } // If a module name was explicitly specified on the command line, it must be // correct. @@ -388,7 +371,7 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, << getLangOpts().CurrentModule; return nullptr; } - const_cast(getLangOpts()).CurrentModule = ModuleName; + const_cast(getLangOpts()).CurrentModule = ModuleName; auto &Map = PP.getHeaderSearchInfo().getModuleMap(); Module *Mod; // The module we are creating. @@ -433,7 +416,7 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, Interface = getModuleLoader().loadModule(ModuleLoc, {ModuleNameLoc}, Module::AllVisible, /*IsInclusionDirective=*/false); - const_cast(getLangOpts()).CurrentModule = ModuleName; + const_cast(getLangOpts()).CurrentModule = ModuleName; if (!Interface) { Diag(ModuleLoc, diag::err_module_not_defined) << ModuleName; @@ -596,12 +579,12 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc, // otherwise, the name of the importing named module. ModuleName = NamedMod->getPrimaryModuleInterfaceName().str(); ModuleName += ":"; - ModuleName += stringFromPath(Path); + ModuleName += ModuleLoader::getFlatNameFromPath(Path); ModuleNameLoc = IdentifierLoc(Path[0].getLoc(), PP.getIdentifierInfo(ModuleName)); Path = ModuleIdPath(ModuleNameLoc); } else if (getLangOpts().CPlusPlusModules) { - ModuleName = stringFromPath(Path); + ModuleName = ModuleLoader::getFlatNameFromPath(Path); ModuleNameLoc = IdentifierLoc(Path[0].getLoc(), PP.getIdentifierInfo(ModuleName)); Path = ModuleIdPath(ModuleNameLoc); diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index d67178c153e88..23d89e88279ac 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -631,7 +631,8 @@ void ModuleDepCollectorPP::InclusionDirective( void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, const Module *Imported) { - if (MDC.ScanInstance.getPreprocessor().isInImportingCXXNamedModules()) { + auto &PP = MDC.ScanInstance.getPreprocessor(); + if (PP.getLangOpts().CPlusPlusModules && PP.isImportingCXXNamedModules()) { P1689ModuleInfo RequiredModule; RequiredModule.ModuleName = Path[0].getIdentifierInfo()->getName().str(); RequiredModule.Type = P1689ModuleInfo::ModuleType::NamedCXXModule; diff --git a/clang/test/CXX/basic/basic.link/p3.cpp b/clang/test/CXX/basic/basic.link/p3.cpp index e6633a777ddef..f389b07d39883 100644 --- a/clang/test/CXX/basic/basic.link/p3.cpp +++ b/clang/test/CXX/basic/basic.link/p3.cpp @@ -23,8 +23,8 @@ import::inner xi = {}; module::inner yi = {}; namespace N { - module a; - import b; + module a; // expected-error {{module declaration can only appear at the top level}} + import b; // expected-error {{import declaration can only appear at the top level}} } extern "C++" module cxxm; @@ -45,10 +45,11 @@ constexpr int n = 123; export module m; // #1 -import x = {}; // expected-error {{expected ';' after module name}} +import x = {}; // expected-error {{'import' directive must end with a ';' on the same line}} // expected-error@-1 {{module 'x' not found}} //--- ImportError2.cpp +// expected-no-diagnostics module; struct module { struct inner {}; }; @@ -63,7 +64,4 @@ template<> struct import { static X y; }; -// This is not valid because the 'import ' is a pp-import, even though it -// grammatically can't possibly be an import declaration. -struct X {} import::y; // expected-error {{'n' file not found}} - +struct X {} import::y; diff --git a/clang/test/CXX/basic/basic.scope/basic.scope.namespace/p2.cpp b/clang/test/CXX/basic/basic.scope/basic.scope.namespace/p2.cpp index fd0038b3f7745..a57919f48afdd 100644 --- a/clang/test/CXX/basic/basic.scope/basic.scope.namespace/p2.cpp +++ b/clang/test/CXX/basic/basic.scope/basic.scope.namespace/p2.cpp @@ -107,4 +107,4 @@ void test_late() { // expected-error@-2 {{undeclared identifier}} internal_private = 1; // expected-error {{use of undeclared identifier 'internal_private'}} -} \ No newline at end of file +} diff --git a/clang/test/CXX/lex/lex.pptoken/p3-2a.cpp b/clang/test/CXX/lex/lex.pptoken/p3-2a.cpp index 0e0e5fec6e9d8..f51066806947f 100644 --- a/clang/test/CXX/lex/lex.pptoken/p3-2a.cpp +++ b/clang/test/CXX/lex/lex.pptoken/p3-2a.cpp @@ -15,7 +15,7 @@ import ; // CHECK: import ; import ; -// CHECK: foo; import ; +// CHECK: foo; import ; foo; import ; // CHECK: foo import ; @@ -45,7 +45,7 @@ export export import ; import ; UNBALANCED_PAREN -// CHECK: import ; +// CHECK: import ; import ; ) @@ -57,14 +57,19 @@ import ; // CHECK: import ; import HEADER; -// CHECK: import ; +// CHECK: {{^}}foo{{$}} +// CHECK-NEXT: {{^}} bar{{$}} +// CHECK-NEXT: {{^}}>;{{$}} import < foo bar >; // CHECK: import{{$}} -// CHECK: {{^}}; +// CHECK-NEXT: {{^}}<{{$}} +// CHECK-NEXT: {{^}}foo{{$}} +// CHECK-NEXT: {{^}} bar{{$}} +// CHECK-NEXT: {{^}}>;{{$}} import < foo @@ -72,7 +77,7 @@ foo >; // CHECK: import{{$}} -// CHECK: {{^}}; +// CHECK: {{^}}; import ; diff --git a/clang/test/CXX/module/basic/basic.link/module-declaration.cpp b/clang/test/CXX/module/basic/basic.link/module-declaration.cpp index 4bdcc9e5f278e..10254cd064de0 100644 --- a/clang/test/CXX/module/basic/basic.link/module-declaration.cpp +++ b/clang/test/CXX/module/basic/basic.link/module-declaration.cpp @@ -46,7 +46,7 @@ export module z; export module x; //--- invalid_module_name.cppm -export module z elderberry; // expected-error {{expected ';'}} \ +export module z elderberry; // expected-error {{'module' directive must end with a ';' on the same line}} \ // expected-error {{a type specifier is required}} //--- empty_attribute.cppm diff --git a/clang/test/CXX/module/cpp.pre/p1.cpp b/clang/test/CXX/module/cpp.pre/p1.cpp new file mode 100644 index 0000000000000..a3928f3753e16 --- /dev/null +++ b/clang/test/CXX/module/cpp.pre/p1.cpp @@ -0,0 +1,75 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -std=c++20 %t/hash.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/module.cpp -fsyntax-only -verify + +// RUN: %clang_cc1 -std=c++20 %t/rightpad.cppm -emit-module-interface -o %t/rightpad.pcm +// RUN: %clang_cc1 -std=c++20 %t/M_part.cppm -emit-module-interface -o %t/M_part.pcm +// RUN: %clang_cc1 -std=c++20 -xc++-system-header %t/string -emit-header-unit -o %t/string.pcm +// RUN: %clang_cc1 -std=c++20 -xc++-user-header %t/squee -emit-header-unit -o %t/squee.pcm +// RUN: %clang_cc1 -std=c++20 %t/import.cpp -isystem %t \ +// RUN: -fmodule-file=rightpad=%t/rightpad.pcm \ +// RUN: -fmodule-file=M:part=%t/M_part.pcm \ +// RUN: -fmodule-file=%t/string.pcm \ +// RUN: -fmodule-file=%t/squee.pcm \ +// RUN: -fsyntax-only -verify + +// RUN: %clang_cc1 -std=c++20 %t/module_decl_not_in_same_line.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/foo.cppm -emit-module-interface -o %t/foo.pcm +// RUN: %clang_cc1 -std=c++20 %t/import_decl_not_in_same_line.cpp -fmodule-file=foo=%t/foo.pcm -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/not_import.cpp -fsyntax-only -verify + +//--- hash.cpp +// expected-no-diagnostics +# // preprocessing directive + +//--- module.cpp +// expected-no-diagnostics +module ; // preprocessing directive +export module leftpad; // preprocessing directive + +//--- string +#ifndef STRING_H +#define STRING_H +#endif // STRING_H + +//--- squee +#ifndef SQUEE_H +#define SQUEE_H +#endif + +//--- rightpad.cppm +export module rightpad; + +//--- M_part.cppm +export module M:part; + +//--- import.cpp +export module M; +import ; // expected-warning {{the implementation of header units is in an experimental phase}} +export import "squee"; // expected-warning {{the implementation of header units is in an experimental phase}} +import rightpad; // preprocessing directive +import :part; // preprocessing directive + +//--- module_decl_not_in_same_line.cpp +module // expected-error {{the module directive is ill-formed, module contextual keyword must be immediately followed on the same line by an identifier, or a ';' after being at the start of a line, or preceded by an export keyword at the start of a line}} +;export module M; // expected-error {{the module directive is ill-formed, module contextual keyword must be immediately followed on the same line by an identifier, or a ';' after being at the start of a line, or preceded by an export keyword at the start of a line}} + +//--- foo.cppm +export module foo; + +//--- import_decl_not_in_same_line.cpp +export module M; +export // expected-error {{the import directive is ill-formed, import contextual keyword must be immediately followed on the same line by an identifier, '<', '"', or ':', but not '::', after being at the start of a line or preceded by an export at the start of the line}} +import +foo; + +export // expected-error {{the import directive is ill-formed, import contextual keyword must be immediately followed on the same line by an identifier, '<', '"', or ':', but not '::', after being at the start of a line or preceded by an export at the start of the line}} +import foo; + +//--- not_import.cpp +export module M; +import :: // expected-error {{use of undeclared identifier 'import'}} +import -> // expected-error {{cannot use arrow operator on a type}} diff --git a/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm b/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm index f65f050a3c7bd..c4842d2642f41 100644 --- a/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm +++ b/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm @@ -44,8 +44,8 @@ import x [[noreturn]]; // expected-error {{'noreturn' attribute cannot be applie import x [[blarg::noreturn]]; // expected-warning-re {{unknown attribute 'blarg::noreturn' ignored{{.*}}}} import x.y; -import x.; // expected-error {{expected a module name after 'import'}} -import .x; // expected-error {{expected a module name after 'import'}} +import x.; // expected-error {{expected identifier after '.' in module name}} +import .x; // expected-error-re {{the import directive is ill-formed{{.*}}}} expected-error {{expected module name}} import blarg; // expected-error {{module 'blarg' not found}} @@ -62,8 +62,8 @@ import x [[noreturn]]; // expected-error {{'noreturn' attribute cannot be applie import x [[blarg::noreturn]]; // expected-warning-re {{unknown attribute 'blarg::noreturn' ignored{{.*}}}} import x.y; -import x.; // expected-error {{expected a module name after 'import'}} -import .x; // expected-error {{expected a module name after 'import'}} +import x.; // expected-error {{expected identifier after '.' in module name}} +import .x; // expected-error-re {{the import directive is ill-formed{{.*}}}} expected-error {{expected module name}} import blarg; // expected-error {{module 'blarg' not found}} diff --git a/clang/test/Modules/no-stale-modtime.m b/clang/test/Modules/no-stale-modtime.m index 92c18ac591add..6c62853db358e 100644 --- a/clang/test/Modules/no-stale-modtime.m +++ b/clang/test/Modules/no-stale-modtime.m @@ -4,7 +4,8 @@ // RUN: rm -rf %t // RUN: mkdir -p %t // This could be replaced by diamond_*, except we want to modify the top header -// RUN: echo '@import l; @import r;' > %t/b.h +// RUN: echo '@import l;' > %t/b.h +// RUN: echo '@import r;' >> %t/b.h // RUN: echo '@import t; // fromt l' > %t/l.h // RUN: echo '@import t; // fromt r' > %t/r.h diff --git a/clang/test/Modules/pr121066.cpp b/clang/test/Modules/pr121066.cpp index e92a81c53d683..33bedd22ac487 100644 --- a/clang/test/Modules/pr121066.cpp +++ b/clang/test/Modules/pr121066.cpp @@ -1,4 +1,6 @@ // RUN: %clang_cc1 -std=c++20 -fsyntax-only %s -verify -import mod // expected-error {{expected ';' after module name}} +// This import directive is ill-formed, it's missing an ';' after +// module name, but we try to recovery from error and import the module. +import mod // expected-error {{'import' directive must end with a ';' on the same line}} // expected-error@-1 {{module 'mod' not found}} diff --git a/clang/test/Modules/pr62359.cppm b/clang/test/Modules/pr62359.cppm index fab0b7d03d814..8d9bd0a8c1376 100644 --- a/clang/test/Modules/pr62359.cppm +++ b/clang/test/Modules/pr62359.cppm @@ -56,4 +56,4 @@ int use2() { } // CHECK: OpenMP{{.*}}differs in precompiled file '{{.*}}Hello.pcm' vs. current file -// CHECK: use of undeclared identifier 'pragma' +// CHECK: use of undeclared identifier 'hello' diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp index d7df9cae01f33..e8e943f44162d 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp @@ -193,7 +193,8 @@ TEST_P(ASTMatchersTest, ExportDecl) { if (!GetParam().isCXX20OrLater()) { return; } - const std::string moduleHeader = "module;export module ast_matcher_test;"; + const std::string moduleHeader = + "module;\n export module ast_matcher_test;\n"; EXPECT_TRUE(matches(moduleHeader + "export void foo();", exportDecl(has(functionDecl())))); EXPECT_TRUE(matches(moduleHeader + "export { void foo(); int v; }", diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp index ddc87921ea084..156de36b185ef 100644 --- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp +++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp @@ -639,8 +639,8 @@ TEST(MinimizeSourceToDependencyDirectivesTest, AtImport) { ASSERT_FALSE(minimizeSourceToDependencyDirectives(" @ import A;\n", Out)); EXPECT_STREQ("@import A;\n", Out.data()); - ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A\n;", Out)); - EXPECT_STREQ("@import A\n;\n", Out.data()); + ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n;", Out)); + EXPECT_STREQ("@import A;\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A.B;\n", Out)); EXPECT_STREQ("@import A.B;\n", Out.data()); @@ -685,18 +685,19 @@ TEST(MinimizeSourceToDependencyDirectivesTest, ImportFailures) { minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out)); ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out)); - ASSERT_FALSE(minimizeSourceToDependencyDirectives("import \n" + ASSERT_FALSE(minimizeSourceToDependencyDirectives("import ;\n" "@import Foo;", Out)); - EXPECT_STREQ("@import Foo;\n", Out.data()); + EXPECT_STREQ("import;\n@import Foo;\n", Out.data()); ASSERT_FALSE( - minimizeSourceToDependencyDirectives("import \n" + minimizeSourceToDependencyDirectives("import ;\n" "#import \n" "@;\n" "#pragma clang module import Foo", Out)); - EXPECT_STREQ("#import \n" + EXPECT_STREQ("import;\n" + "#import \n" "#pragma clang module import Foo\n", Out.data()); } diff --git a/clang/unittests/Lex/ModuleDeclStateTest.cpp b/clang/unittests/Lex/ModuleDeclStateTest.cpp index ac2ddfaf52cd0..3117c4f2f1af0 100644 --- a/clang/unittests/Lex/ModuleDeclStateTest.cpp +++ b/clang/unittests/Lex/ModuleDeclStateTest.cpp @@ -40,7 +40,7 @@ class CheckNamedModuleImportingCB : public PPCallbacks { void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, const Module *Imported) override { ASSERT_TRUE(NextCheckingIndex < IsImportingNamedModulesAssertions.size()); - EXPECT_EQ(PP.isInImportingCXXNamedModules(), + EXPECT_EQ(PP.isImportingCXXNamedModules(), IsImportingNamedModulesAssertions[NextCheckingIndex]); NextCheckingIndex++; diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index bb7144b827c3c..ed310310ff187 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -915,7 +915,7 @@

C++20 implementation status

P1703R1 - Subsumed by P1857 + Subsumed by P1857 P1874R1 @@ -931,7 +931,7 @@

C++20 implementation status

P1857R3 - No + Clang 21 P2115R0