-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LLVM][AsmParser] Add support for C style comments #111554
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
60f0ab0 to
629e20e
Compare
|
@llvm/pr-subscribers-llvm-ir Author: Rahul Joshi (jurahul) ChangesAdd support for C style comments in LLVM assembly. Full diff: https://github.com/llvm/llvm-project/pull/111554.diff 5 Files Affected:
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h
index a9f51fb925f5d5..8e0c5638eef37d 100644
--- a/llvm/include/llvm/AsmParser/LLLexer.h
+++ b/llvm/include/llvm/AsmParser/LLLexer.h
@@ -94,7 +94,9 @@ namespace llvm {
lltok::Kind LexToken();
int getNextChar();
+ int peekNextChar() const;
void SkipLineComment();
+ bool SkipCComment();
lltok::Kind ReadString(lltok::Kind kind);
bool ReadVarName();
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 759db6db60774c..dbccfcde74caa8 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -175,17 +175,25 @@ LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err,
}
int LLLexer::getNextChar() {
- char CurChar = *CurPtr++;
+ int NextChar = peekNextChar();
+ // Keeping CurPtr unchanged at EOF, so that another call to `getNextChar`
+ // returns EOF again.
+ if (NextChar != EOF)
+ ++CurPtr;
+ return NextChar;
+}
+
+int LLLexer::peekNextChar() const {
+ char CurChar = *CurPtr;
switch (CurChar) {
default: return (unsigned char)CurChar;
case 0:
// A nul character in the stream is either the end of the current buffer or
// a random nul in the file. Disambiguate that here.
- if (CurPtr-1 != CurBuf.end())
+ if (CurPtr != CurBuf.end())
return 0; // Just whitespace.
// Otherwise, return end of file.
- --CurPtr; // Another call to lex will return EOF again.
return EOF;
}
}
@@ -251,6 +259,10 @@ lltok::Kind LLLexer::LexToken() {
case ',': return lltok::comma;
case '*': return lltok::star;
case '|': return lltok::bar;
+ case '/':
+ if (peekNextChar() == '*' && SkipCComment())
+ return lltok::Error;
+ continue;
}
}
}
@@ -262,6 +274,38 @@ void LLLexer::SkipLineComment() {
}
}
+/// SkipCComment - This skips C-style /**/ comments. The only difference from C
+/// is that we allow nesting.
+bool LLLexer::SkipCComment() {
+ getNextChar(); // skip the star.
+ unsigned CommentDepth = 1;
+
+ while (true) {
+ int CurChar = getNextChar();
+ switch (CurChar) {
+ case EOF:
+ LexError("Unterminated comment!");
+ return true;
+ case '*':
+ // End of the comment?
+ if (peekNextChar() != '/')
+ break;
+
+ getNextChar(); // End the '/'.
+ if (--CommentDepth == 0)
+ return false;
+ break;
+ case '/':
+ // Start of a nested comment?
+ if (peekNextChar() != '*')
+ break;
+ getNextChar(); // Eat the '*'.
+ ++CommentDepth;
+ break;
+ }
+ }
+}
+
/// Lex all tokens that start with an @ character.
/// GlobalVar @\"[^\"]*\"
/// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]*
diff --git a/llvm/test/Assembler/c-style-comment.ll b/llvm/test/Assembler/c-style-comment.ll
new file mode 100644
index 00000000000000..b24a3e560e0e90
--- /dev/null
+++ b/llvm/test/Assembler/c-style-comment.ll
@@ -0,0 +1,30 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+/* Simple C style comment */
+
+; CHECK: @B = external global i32
+@B = external global i32
+
+/* multiline C ctyle comment at "top-level"
+ * This is the second line
+ * and this is third
+ */
+
+
+; CHECK: @foo
+define <4 x i1> @foo(<4 x float> %a, <4 x float> %b) nounwind {
+entry: /* inline comment */
+ %cmp = fcmp olt <4 x float> %a, /* to be ignored */ %b
+ ret <4 x i1> %cmp /* ignore */
+ /* C style nested comment
+ /* Nest
+ /*
+ * ; ignored
+ */
+ */
+ */
+
+}
+
+/* End of the assembly file */
+
diff --git a/llvm/test/Assembler/invalid-c-style-comment0.ll b/llvm/test/Assembler/invalid-c-style-comment0.ll
new file mode 100644
index 00000000000000..e3d1c6f4ef732d
--- /dev/null
+++ b/llvm/test/Assembler/invalid-c-style-comment0.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as --disable-output %s 2>&1 | FileCheck %s -DFILE=%s
+
+@B = external global i32
+
+; CHECK: [[FILE]]:[[@LINE+1]]:1: error: Unterminated comment!
+/* End of the assembly file
diff --git a/llvm/test/Assembler/invalid-c-style-comment1.ll b/llvm/test/Assembler/invalid-c-style-comment1.ll
new file mode 100644
index 00000000000000..9ca97af4b4875c
--- /dev/null
+++ b/llvm/test/Assembler/invalid-c-style-comment1.ll
@@ -0,0 +1,11 @@
+; RUN: not llvm-as --disable-output %s 2>&1 | FileCheck %s -DFILE=%s
+
+@B = external global i32
+
+; CHECK: [[FILE]]:[[@LINE+1]]:1: error: Unterminated comment!
+/* End of the assembly file
+ /* Unterminated comment with multiple nesting depths */
+ /* /* ignored */ */
+ /* /* /* ignored */ */ */
+* /
+
|
629e20e to
7c9b538
Compare
|
@arsenm any further comments? Are we at this point waiting for some consensus to be reached around the RFC? |
|
This is pretty independent of the RFC |
|
It is. Lets wait for @nikic to review as well.
…On Wed, Oct 30, 2024 at 6:57 PM Matt Arsenault ***@***.***> wrote:
This is pretty independent of the RFC
—
Reply to this email directly, view it on GitHub
<#111554 (comment)>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/APRMUBYU4YFQYKO5KOHNYJLZ6GE7VAVCNFSM6AAAAABPSVE2GCVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDINBYHA3DANRSGM>
.
You are receiving this because you authored the thread.Message ID:
***@***.***>
|
|
Yes, it is adopted from TGLexer. There is no good reason to support nested
comments though.
…On Thu, Oct 31, 2024 at 11:58 AM Jessica Clarke ***@***.***> wrote:
***@***.**** commented on this pull request.
------------------------------
In llvm/lib/AsmParser/LLLexer.cpp
<#111554 (comment)>:
> @@ -262,6 +274,38 @@ void LLLexer::SkipLineComment() {
}
}
+/// SkipCComment - This skips C-style /**/ comments. The only difference from C
+/// is that we allow nesting.
Interesting. Dates back to af282ae
<af282ae>,
July 2003.
—
Reply to this email directly, view it on GitHub
<#111554 (comment)>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/APRMUB44Q4KMZTZ2BHVAJE3Z6J4TRAVCNFSM6AAAAABPSVE2GCVHI2DSMVQWIX3LMV43YUDVNRWFEZLROVSXG5CSMV3GSZLXHMZDIMBYHA3DIOJXG4>
.
You are receiving this because you authored the thread.Message ID:
***@***.***>
|
7c9b538 to
bbc9b7d
Compare
nikic
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This needs to be documented in LangRef.
bbc9b7d to
d6e23a6
Compare
Done. |
d6e23a6 to
43e3a01
Compare
Add support for C style comments in LLVM assembly.
43e3a01 to
c7d916d
Compare
nikic
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, but please wait for a second approval.
Co-authored-by: Nikita Popov <[email protected]>
Thanks, will do. |
|
Thanks @arsenm |
Add support for C style comments in LLVM assembly.