-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[clang-format] Fix a bug that changes keyword or to an identifier
#128996
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-format Author: Owen Pan (owenca) ChangesBackports ffc61dc 0968df9 2d585cc Fixes #105482 Full diff: https://github.com/llvm/llvm-project/pull/128996.diff 11 Files Affected:
diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index bbb912eb10e94..4b4c412a13323 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -4735,15 +4735,24 @@ the configuration (without a prefix: ``Auto``).
.. _Language:
**Language** (``LanguageKind``) :versionbadge:`clang-format 3.5` :ref:`¶ <Language>`
- Language, this format style is targeted at.
+ The language that this format style targets.
+
+ .. note::
+
+ You can specify the language (``C``, ``Cpp``, or ``ObjC``) for ``.h``
+ files by adding a ``// clang-format Language:`` line before the first
+ non-comment (and non-empty) line, e.g. ``// clang-format Language: Cpp``.
Possible values:
* ``LK_None`` (in configuration: ``None``)
Do not use.
+ * ``LK_C`` (in configuration: ``C``)
+ Should be used for C.
+
* ``LK_Cpp`` (in configuration: ``Cpp``)
- Should be used for C, C++.
+ Should be used for C++.
* ``LK_CSharp`` (in configuration: ``CSharp``)
Should be used for C#.
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 153afdb3d59e3..57a567509a068 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1358,6 +1358,10 @@ clang-format
- Adds ``WrapNamespaceBodyWithEmptyLines`` option.
- Adds the ``IndentExportBlock`` option.
- Adds ``PenaltyBreakBeforeMemberAccess`` option.
+- Add the C language instead of treating it like C++.
+- Allow specifying the language (C, C++, or Objective-C) for a ``.h`` file by
+ adding a special comment (e.g. ``// clang-format Language: ObjC``) near the
+ top of the file.
libclang
--------
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 6f432d1d50315..abab543518222 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -3275,7 +3275,9 @@ struct FormatStyle {
enum LanguageKind : int8_t {
/// Do not use.
LK_None,
- /// Should be used for C, C++.
+ /// Should be used for C.
+ LK_C,
+ /// Should be used for C++.
LK_Cpp,
/// Should be used for C#.
LK_CSharp,
@@ -3300,7 +3302,9 @@ struct FormatStyle {
/// https://sci-hub.st/10.1109/IEEESTD.2018.8299595
LK_Verilog
};
- bool isCpp() const { return Language == LK_Cpp || Language == LK_ObjC; }
+ bool isCpp() const {
+ return Language == LK_Cpp || Language == LK_C || Language == LK_ObjC;
+ }
bool isCSharp() const { return Language == LK_CSharp; }
bool isJson() const { return Language == LK_Json; }
bool isJavaScript() const { return Language == LK_JavaScript; }
@@ -3310,7 +3314,12 @@ struct FormatStyle {
}
bool isTableGen() const { return Language == LK_TableGen; }
- /// Language, this format style is targeted at.
+ /// The language that this format style targets.
+ /// \note
+ /// You can specify the language (``C``, ``Cpp``, or ``ObjC``) for ``.h``
+ /// files by adding a ``// clang-format Language:`` line before the first
+ /// non-comment (and non-empty) line, e.g. ``// clang-format Language: Cpp``.
+ /// \endnote
/// \version 3.5
LanguageKind Language;
@@ -5665,6 +5674,8 @@ FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code);
// Returns a string representation of ``Language``.
inline StringRef getLanguageName(FormatStyle::LanguageKind Language) {
switch (Language) {
+ case FormatStyle::LK_C:
+ return "C";
case FormatStyle::LK_Cpp:
return "C++";
case FormatStyle::LK_CSharp:
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index f02bf95cfeed7..0bb8545884442 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -401,6 +401,7 @@ template <> struct MappingTraits<FormatStyle::KeepEmptyLinesStyle> {
template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
+ IO.enumCase(Value, "C", FormatStyle::LK_C);
IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
IO.enumCase(Value, "Java", FormatStyle::LK_Java);
IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
@@ -3952,7 +3953,12 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) {
LangOpts.Digraphs = LexingStd >= FormatStyle::LS_Cpp11;
LangOpts.LineComment = 1;
- LangOpts.CXXOperatorNames = Style.isCpp();
+
+ const auto Language = Style.Language;
+ LangOpts.C17 = Language == FormatStyle::LK_C;
+ LangOpts.CXXOperatorNames =
+ Language == FormatStyle::LK_Cpp || Language == FormatStyle::LK_ObjC;
+
LangOpts.Bool = 1;
LangOpts.ObjC = 1;
LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
@@ -3977,6 +3983,8 @@ const char *StyleOptionHelpDescription =
" --style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
+ if (FileName.ends_with(".c"))
+ return FormatStyle::LK_C;
if (FileName.ends_with(".java"))
return FormatStyle::LK_Java;
if (FileName.ends_with_insensitive(".js") ||
@@ -4016,6 +4024,35 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
return FormatStyle::LK_Cpp;
}
+static FormatStyle::LanguageKind getLanguageByComment(const Environment &Env) {
+ const auto ID = Env.getFileID();
+ const auto &SourceMgr = Env.getSourceManager();
+
+ LangOptions LangOpts;
+ LangOpts.CPlusPlus = 1;
+ LangOpts.LineComment = 1;
+
+ Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
+ Lex.SetCommentRetentionState(true);
+
+ for (Token Tok; !Lex.LexFromRawLexer(Tok) && Tok.is(tok::comment);) {
+ auto Text = StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
+ Tok.getLength());
+ if (!Text.consume_front("// clang-format Language:"))
+ continue;
+
+ Text = Text.trim();
+ if (Text == "C")
+ return FormatStyle::LK_C;
+ if (Text == "Cpp")
+ return FormatStyle::LK_Cpp;
+ if (Text == "ObjC")
+ return FormatStyle::LK_ObjC;
+ }
+
+ return FormatStyle::LK_None;
+}
+
FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code) {
const auto GuessedLanguage = getLanguageByFileName(FileName);
if (GuessedLanguage == FormatStyle::LK_Cpp) {
@@ -4025,6 +4062,10 @@ FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code) {
if (!Code.empty() && (Extension.empty() || Extension == ".h")) {
auto NonEmptyFileName = FileName.empty() ? "guess.h" : FileName;
Environment Env(Code, NonEmptyFileName, /*Ranges=*/{});
+ if (const auto Language = getLanguageByComment(Env);
+ Language != FormatStyle::LK_None) {
+ return Language;
+ }
ObjCHeaderStyleGuesser Guesser(Env, getLLVMStyle());
Guesser.process();
if (Guesser.isObjC())
diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp
index 963e8f87793fa..60e428123d26d 100644
--- a/clang/lib/Format/FormatToken.cpp
+++ b/clang/lib/Format/FormatToken.cpp
@@ -42,11 +42,11 @@ static SmallVector<StringRef> CppNonKeywordTypes = {
};
bool FormatToken::isTypeName(const LangOptions &LangOpts) const {
- const bool IsCpp = LangOpts.CXXOperatorNames;
- return is(TT_TypeName) || Tok.isSimpleTypeSpecifier(LangOpts) ||
- (IsCpp && is(tok::identifier) &&
- std::binary_search(CppNonKeywordTypes.begin(),
- CppNonKeywordTypes.end(), TokenText));
+ if (is(TT_TypeName) || Tok.isSimpleTypeSpecifier(LangOpts))
+ return true;
+ return (LangOpts.CXXOperatorNames || LangOpts.C17) && is(tok::identifier) &&
+ std::binary_search(CppNonKeywordTypes.begin(),
+ CppNonKeywordTypes.end(), TokenText);
}
bool FormatToken::isTypeOrIdentifier(const LangOptions &LangOpts) const {
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 29aba281ae103..02429970599c0 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -743,29 +743,6 @@ struct FormatToken {
return isOneOf(tok::star, tok::amp, tok::ampamp);
}
- bool isCppAlternativeOperatorKeyword() const {
- assert(!TokenText.empty());
- if (!isalpha(TokenText[0]))
- return false;
-
- switch (Tok.getKind()) {
- case tok::ampamp:
- case tok::ampequal:
- case tok::amp:
- case tok::pipe:
- case tok::tilde:
- case tok::exclaim:
- case tok::exclaimequal:
- case tok::pipepipe:
- case tok::pipeequal:
- case tok::caret:
- case tok::caretequal:
- return true;
- default:
- return false;
- }
- }
-
bool isUnaryOperator() const {
switch (Tok.getKind()) {
case tok::plus:
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index ac5b25d52ce84..976c4d888e1fd 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -129,7 +129,7 @@ class AnnotatingParser {
: Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
IsCpp(Style.isCpp()), LangOpts(getFormattingLangOpts(Style)),
Keywords(Keywords), Scopes(Scopes), TemplateDeclarationDepth(0) {
- assert(IsCpp == LangOpts.CXXOperatorNames);
+ assert(IsCpp == (LangOpts.CXXOperatorNames || LangOpts.C17));
Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
resetTokenMetadata();
}
@@ -3820,7 +3820,7 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts,
};
const auto *Next = Current.Next;
- const bool IsCpp = LangOpts.CXXOperatorNames;
+ const bool IsCpp = LangOpts.CXXOperatorNames || LangOpts.C17;
// Find parentheses of parameter list.
if (Current.is(tok::kw_operator)) {
diff --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h
index 6aea310a56d69..c0c13941ef4f7 100644
--- a/clang/lib/Format/TokenAnnotator.h
+++ b/clang/lib/Format/TokenAnnotator.h
@@ -225,7 +225,7 @@ class TokenAnnotator {
TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
: Style(Style), IsCpp(Style.isCpp()),
LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) {
- assert(IsCpp == LangOpts.CXXOperatorNames);
+ assert(IsCpp == (LangOpts.CXXOperatorNames || LangOpts.C17));
}
/// Adapts the indent levels of comment lines to the indent of the
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 1411197e32554..9b4257fdd8c8f 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -168,7 +168,7 @@ UnwrappedLineParser::UnwrappedLineParser(
: IG_Inited),
IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {
- assert(IsCpp == LangOpts.CXXOperatorNames);
+ assert(IsCpp == (LangOpts.CXXOperatorNames || LangOpts.C17));
}
void UnwrappedLineParser::reset() {
@@ -1712,12 +1712,6 @@ void UnwrappedLineParser::parseStructuralElement(
OpeningBrace && OpeningBrace->isOneOf(TT_RequiresExpressionLBrace,
TT_CompoundRequirementLBrace);
!eof();) {
- if (IsCpp && FormatTok->isCppAlternativeOperatorKeyword()) {
- if (auto *Next = Tokens->peekNextToken(/*SkipComment=*/true);
- Next && Next->isBinaryOperator()) {
- FormatTok->Tok.setKind(tok::identifier);
- }
- }
const FormatToken *Previous = FormatTok->Previous;
switch (FormatTok->Tok.getKind()) {
case tok::at:
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 3b7856d6ee150..d1e96e0fa544a 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -17784,9 +17784,11 @@ TEST_F(FormatTest, ConfigurableSpaceBeforeAssignmentOperators) {
verifyFormat("int a = 5;");
verifyFormat("a += 42;");
verifyFormat("a or_eq 8;");
- verifyFormat("xor = foo;");
- FormatStyle Spaces = getLLVMStyle();
+ auto Spaces = getLLVMStyle(FormatStyle::LK_C);
+ verifyFormat("xor = foo;", Spaces);
+
+ Spaces.Language = FormatStyle::LK_Cpp;
Spaces.SpaceBeforeAssignmentOperators = false;
verifyFormat("int a= 5;", Spaces);
verifyFormat("a+= 42;", Spaces);
@@ -24683,6 +24685,7 @@ TEST_F(FormatTest, StructuredBindings) {
}
TEST_F(FormatTest, FileAndCode) {
+ EXPECT_EQ(FormatStyle::LK_C, guessLanguage("foo.c", ""));
EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.cc", ""));
EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo.m", ""));
EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo.mm", ""));
@@ -24848,6 +24851,18 @@ TEST_F(FormatTest, GuessLanguageWithChildLines) {
guessLanguage("foo.h", "#define FOO ({ foo(); ({ NSString *s; }) })"));
}
+TEST_F(FormatTest, GetLanguageByComment) {
+ EXPECT_EQ(FormatStyle::LK_C,
+ guessLanguage("foo.h", "// clang-format Language: C\n"
+ "int i;"));
+ EXPECT_EQ(FormatStyle::LK_Cpp,
+ guessLanguage("foo.h", "// clang-format Language: Cpp\n"
+ "int DoStuff(CGRect rect);"));
+ EXPECT_EQ(FormatStyle::LK_ObjC,
+ guessLanguage("foo.h", "// clang-format Language: ObjC\n"
+ "int i;"));
+}
+
TEST_F(FormatTest, TypenameMacros) {
std::vector<std::string> TypenameMacros = {"STACK_OF", "LIST", "TAILQ_ENTRY"};
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index dffb07c89bacc..f1a6999cfdfb8 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -3646,6 +3646,11 @@ TEST_F(TokenAnnotatorTest, CppAltOperatorKeywords) {
ASSERT_EQ(Tokens.size(), 7u) << Tokens;
EXPECT_TOKEN(Tokens[3], tok::pipepipe, TT_BinaryOperator);
+ Tokens = annotate("return segment < *this or *this < segment;");
+ ASSERT_EQ(Tokens.size(), 12u) << Tokens;
+ EXPECT_TOKEN(Tokens[5], tok::pipepipe, TT_BinaryOperator);
+ EXPECT_TOKEN(Tokens[6], tok::star, TT_UnaryOperator);
+
Tokens = annotate("a = b or_eq c;");
ASSERT_EQ(Tokens.size(), 7u) << Tokens;
EXPECT_TOKEN(Tokens[3], tok::pipeequal, TT_BinaryOperator);
@@ -3658,11 +3663,13 @@ TEST_F(TokenAnnotatorTest, CppAltOperatorKeywords) {
ASSERT_EQ(Tokens.size(), 7u) << Tokens;
EXPECT_TOKEN(Tokens[3], tok::caretequal, TT_BinaryOperator);
- Tokens = annotate("xor = foo;");
+ const auto StyleC = getLLVMStyle(FormatStyle::LK_C);
+
+ Tokens = annotate("xor = foo;", StyleC);
ASSERT_EQ(Tokens.size(), 5u) << Tokens;
EXPECT_TOKEN(Tokens[0], tok::identifier, TT_Unknown);
- Tokens = annotate("int xor = foo;");
+ Tokens = annotate("int xor = foo;", StyleC);
ASSERT_EQ(Tokens.size(), 6u) << Tokens;
EXPECT_TOKEN(Tokens[1], tok::identifier, TT_StartOfName);
}
|
|
This has a header change that will impact the ABI, so it should be merged prior to 20.1.0. |
|
@owenca (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
Backports ffc61dc 0968df9 2d585cc
Fixes #105482