From 85ea00cbad3001bd3076f3ee52b627b19b6c37de Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Thu, 4 Jul 2024 01:25:21 +0000 Subject: [PATCH 01/29] [lld][ELF] Added tokenizer file for linker script --- lld/ELF/ScriptTokenizer.h | 153 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 lld/ELF/ScriptTokenizer.h diff --git a/lld/ELF/ScriptTokenizer.h b/lld/ELF/ScriptTokenizer.h new file mode 100644 index 0000000000000..1536091367743 --- /dev/null +++ b/lld/ELF/ScriptTokenizer.h @@ -0,0 +1,153 @@ +//===- ScriptLexer.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_SCRIPT_TOKENIZER_H +#define LLD_ELF_SCRIPT_TOKENIZER_H + +namespace lld::elf { +enum CommandTokens { + // Entry point + ENTRY, // ENTRY + + // Commands Files + INCLUDE, + INPUT, + GROUP, + OUTPUT, + SEARCH_DIR, + STARTUP, + + INSERT, // TODO + AFTER, // TODO + + // Commands for object file formats + OUTPUT_FORMAT, + TARGET, + + // Other linker script commands + ASSERT, + EXTERN, + // FORCE_COMMON_ALLOCATION + // INHIBIT_COMMON_ALLOCATION + // NOCROSSREFS + OUTPUT_ARCH + + // Assignment + PROVIDE, + HIDDEN, + PROVIDE_HIDDEN, + + // Section Command + SECTIONS, + // Input Section + EXCLUDE_FILE, + KEEP, + INPUT_SECTION_FLAGS, + + // Read section + OVERLAY, + NOLOAD, + COPY, + INFO + + // Output Section + OUTPUT, + + OVERWRITE_SECTIONS // TODO + + ALIGN, // TODO + SUBALIGN, + ONLY_IF_RO, + ONLY_IF_RW, + FILL, // TODO + SORT, // + + // Builtin Functions + ABSOLUTE, + ADDR, + ALIGN, + // BLOCK, // synonym for ALIGN for compatibility with older linker script + DATA_SEGMENT_ALIGN, + DATA_SEGMENT_END, + DEFINED, + LOADADDR, + LOG2CEIL, // TODO + MAX, + MIN, + ORIGIN, // TODO + SEGMENT_START, // TODO + // NEXT, // This function is closely related to ALIGN(exp); unless you use the + // MEMORY command to define discontinuous memory for the output file, the two + // functions are equivalent. + SIZEOF, + SIZEOF_HEADERS + + // PHDRS Command + FILEHDR, + PHDRS, + AT, + FLAGS, + + // Version Command + VERSION, + + REGION_ALIAS // TODO + AS_NEEDED, // TODO + CONSTRUCTORS, // TODO: readsort? +} + +enum labelTokens { + Local, // local + Global, // global + Extern, // extern +} + +enum TargetType { + ELF, + Binary, + Error +} + +enum SymbolTokens { + CurlyBegin, // { + CurlyEnd, // } + BracektBegin, // ( + BracektEnd, // ) + Comma, // , + Semicolon, // ; + Colon, // : + Asterisk, // * + QuestionMark, // ? + Bacckslash, // "\" + Slash, // / + Greater // > + Less, // < + Minus, // - + Plus, // + + Bitwise, // & + Not, // ^ + VerticalBar, // | + PercentSign, // % + // TODO:: operator +} + +enum AssignmentSymbols { + // Assignmemnt + Assign, // = + PlusAssign, // += + MinussAssign, // -= + MulAssign, // *= + DivAssign, // /= + LeftShiftAssign, // <<= + RightShiftAssign, // >>= + BitWiseAssign, // &= + BarAssign // |= +} +} // namespace lld::elf + +#endif // LLD_ELF_SCRIPT_TOKENIZER_H From 9c3090275d10e49c36d212f57fb8a46ee873d6db Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Sat, 6 Jul 2024 04:57:12 +0000 Subject: [PATCH 02/29] [lld][ELF] update Linkerscript lexer --- lld/ELF/ScriptTokenizer.h | 81 +++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 41 deletions(-) diff --git a/lld/ELF/ScriptTokenizer.h b/lld/ELF/ScriptTokenizer.h index 1536091367743..b8c7f581ea84e 100644 --- a/lld/ELF/ScriptTokenizer.h +++ b/lld/ELF/ScriptTokenizer.h @@ -5,14 +5,18 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +// +// This file defines the nums for LinkerScript lexer +// +//===----------------------------------------------------------------------===// #ifndef LLD_ELF_SCRIPT_TOKENIZER_H #define LLD_ELF_SCRIPT_TOKENIZER_H -namespace lld::elf { -enum CommandTokens { - // Entry point - ENTRY, // ENTRY +namespace lld { +namespace elf { +enum Tokens { + ENTRY, // Commands Files INCLUDE, @@ -22,8 +26,8 @@ enum CommandTokens { SEARCH_DIR, STARTUP, - INSERT, // TODO - AFTER, // TODO + INSERT, + AFTER, // Commands for object file formats OUTPUT_FORMAT, @@ -35,15 +39,15 @@ enum CommandTokens { // FORCE_COMMON_ALLOCATION // INHIBIT_COMMON_ALLOCATION // NOCROSSREFS - OUTPUT_ARCH + OUTPUT_ARCH, - // Assignment - PROVIDE, + // Assignment + PROVIDE, HIDDEN, PROVIDE_HIDDEN, - // Section Command SECTIONS, + // Input Section EXCLUDE_FILE, KEEP, @@ -53,19 +57,17 @@ enum CommandTokens { OVERLAY, NOLOAD, COPY, - INFO + INFO, - // Output Section - OUTPUT, - - OVERWRITE_SECTIONS // TODO - - ALIGN, // TODO + // Output Section + OUTPUT, + OVERWRITE_SECTIONS, + ALIGN, SUBALIGN, ONLY_IF_RO, ONLY_IF_RW, - FILL, // TODO - SORT, // + FILL, + SORT, // Builtin Functions ABSOLUTE, @@ -76,19 +78,20 @@ enum CommandTokens { DATA_SEGMENT_END, DEFINED, LOADADDR, - LOG2CEIL, // TODO + + LOG2CEIL, MAX, MIN, - ORIGIN, // TODO - SEGMENT_START, // TODO + ORIGIN, + SEGMENT_START, // NEXT, // This function is closely related to ALIGN(exp); unless you use the // MEMORY command to define discontinuous memory for the output file, the two // functions are equivalent. SIZEOF, - SIZEOF_HEADERS + SIZEOF_HEADERS, - // PHDRS Command - FILEHDR, + // PHDRS Command + FILEHDR, PHDRS, AT, FLAGS, @@ -96,24 +99,22 @@ enum CommandTokens { // Version Command VERSION, - REGION_ALIAS // TODO - AS_NEEDED, // TODO - CONSTRUCTORS, // TODO: readsort? -} + REGION_ALIAS, + AS_NEEDED, + CONSTRUCTORS, + + // Symbolic Constants + MAXPAGESIZE, + COMMONPAGESIZE, -enum labelTokens { Local, // local Global, // global Extern, // extern -} -enum TargetType { ELF, Binary, - Error -} + Error, -enum SymbolTokens { CurlyBegin, // { CurlyEnd, // } BracektBegin, // ( @@ -125,18 +126,15 @@ enum SymbolTokens { QuestionMark, // ? Bacckslash, // "\" Slash, // / - Greater // > - Less, // < + Greater, // > + Less, // < Minus, // - Plus, // + Bitwise, // & Not, // ^ VerticalBar, // | PercentSign, // % - // TODO:: operator -} -enum AssignmentSymbols { // Assignmemnt Assign, // = PlusAssign, // += @@ -148,6 +146,7 @@ enum AssignmentSymbols { BitWiseAssign, // &= BarAssign // |= } -} // namespace lld::elf +} // namespace elf +} // namespace lld #endif // LLD_ELF_SCRIPT_TOKENIZER_H From 98ea9726a3a6142c4f88f48dc672db515e4c4f7d Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Sun, 7 Jul 2024 02:16:31 +0000 Subject: [PATCH 03/29] [lld][ELF] added new files for new linker script lexer --- lld/ELF/CMakeLists.txt | 1 + lld/ELF/LinkerScriptLexer.cpp | 22 ++++++++++++++++++++++ lld/ELF/LinkerScriptLexer.h | 31 +++++++++++++++++++++++++++++++ lld/ELF/ScriptTokenizer.h | 6 ++---- 4 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 lld/ELF/LinkerScriptLexer.cpp create mode 100644 lld/ELF/LinkerScriptLexer.h diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt index 83d816ddb0601..18593c8e36940 100644 --- a/lld/ELF/CMakeLists.txt +++ b/lld/ELF/CMakeLists.txt @@ -47,6 +47,7 @@ add_lld_library(lldELF InputSection.cpp LTO.cpp LinkerScript.cpp + LinkerScriptLexer.cpp MapFile.cpp MarkLive.cpp OutputSections.cpp diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp new file mode 100644 index 0000000000000..7d8bb3f8111b2 --- /dev/null +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -0,0 +1,22 @@ +//===- ScriptParser.cpp ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LinkerScriptLexer.h" +#include "lld/Common/ErrorHandler.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; +using namespace lld; +using namespace lld::elf; + +LinkerScriptLexer::LinkerScriptLexer(MemoryBufferRef MB) {} + +ScriptToken LinkerScriptLexer::getScriptToken() { return ScriptToken::ENTRY; } + +unsigned LinkerScriptLexer::getNextChar() { return 0; } diff --git a/lld/ELF/LinkerScriptLexer.h b/lld/ELF/LinkerScriptLexer.h new file mode 100644 index 0000000000000..0f824dbd96484 --- /dev/null +++ b/lld/ELF/LinkerScriptLexer.h @@ -0,0 +1,31 @@ +//===- LinkerScriptLexer.h --------------------------------------------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_LINKER_SCRIPT_Lexer_H +#define LLD_ELF_LINKER_SCRIPT_Lexer_H + +#include "ScriptTokenizer.h" +#include "lld/Common/LLVM.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBufferRef.h" + +namespace lld::elf { + +class LinkerScriptLexer { +public: + explicit LinkerScriptLexer(MemoryBufferRef MB); + ScriptToken getScriptToken(); + +private: + size_t pos = 0; + unsigned getNextChar(); +}; +} // namespace lld::elf + +#endif // LLD_ELF_LINKER_SCRIPT_Lexer_H diff --git a/lld/ELF/ScriptTokenizer.h b/lld/ELF/ScriptTokenizer.h index b8c7f581ea84e..2afd376a2a489 100644 --- a/lld/ELF/ScriptTokenizer.h +++ b/lld/ELF/ScriptTokenizer.h @@ -15,7 +15,7 @@ namespace lld { namespace elf { -enum Tokens { +enum ScriptToken { ENTRY, // Commands Files @@ -60,9 +60,7 @@ enum Tokens { INFO, // Output Section - OUTPUT, OVERWRITE_SECTIONS, - ALIGN, SUBALIGN, ONLY_IF_RO, ONLY_IF_RW, @@ -145,7 +143,7 @@ enum Tokens { RightShiftAssign, // >>= BitWiseAssign, // &= BarAssign // |= -} +}; } // namespace elf } // namespace lld From 571c9b48142bc0602cbc8208616afee6d54e3756 Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Sun, 7 Jul 2024 06:22:23 +0000 Subject: [PATCH 04/29] [lld][ELF] update swtich current char --- lld/ELF/LinkerScriptLexer.cpp | 46 ++++++++++++++++++++++++++++++++--- lld/ELF/LinkerScriptLexer.h | 5 +++- 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp index 7d8bb3f8111b2..a2e7865fdc058 100644 --- a/lld/ELF/LinkerScriptLexer.cpp +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -15,8 +15,48 @@ using namespace llvm; using namespace lld; using namespace lld::elf; -LinkerScriptLexer::LinkerScriptLexer(MemoryBufferRef MB) {} +LinkerScriptLexer::LinkerScriptLexer(MemoryBufferRef MB) { + curStringRef = MB.getBuffer(); +} -ScriptToken LinkerScriptLexer::getScriptToken() { return ScriptToken::ENTRY; } +ScriptToken LinkerScriptLexer::getToken() { + while (true) { + int curChar = getNextChar(); -unsigned LinkerScriptLexer::getNextChar() { return 0; } + switch (curChar) { + case 'A' ... 'Z': + case 'a' ... 'z': + // TODO + break; + case '0' ... '9': + // TODO + break; + case '{': + return ScriptToken::CurlyBegin; + case '}': + return ScriptToken::CurlyEnd; + case '?': + return ScriptToken::QuestionMark; + case '(': + return ScriptToken::BracektBegin; + case ')': + return ScriptToken::BracektEnd; + } + } +} + +unsigned LinkerScriptLexer::getNextChar() { + char curChar = *curPtr; + switch (curChar) { + case 0: + if (curPtr != curStringRef.end()) { + curPtr++; + return 0; + } else { + return EOF; + } + default: + curPtr++; + return (unsigned char)curChar; + } +} diff --git a/lld/ELF/LinkerScriptLexer.h b/lld/ELF/LinkerScriptLexer.h index 0f824dbd96484..8d66bae7122b4 100644 --- a/lld/ELF/LinkerScriptLexer.h +++ b/lld/ELF/LinkerScriptLexer.h @@ -20,11 +20,14 @@ namespace lld::elf { class LinkerScriptLexer { public: explicit LinkerScriptLexer(MemoryBufferRef MB); - ScriptToken getScriptToken(); private: + const char *curPtr; + llvm::StringRef curStringRef; size_t pos = 0; unsigned getNextChar(); + + ScriptToken getToken(); }; } // namespace lld::elf From ba0cdf35e6673efecb68e46906c11f0510505ba7 Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Mon, 8 Jul 2024 16:57:54 +0000 Subject: [PATCH 05/29] [lld][ELF] updated token and lexer --- lld/ELF/LinkerScriptLexer.cpp | 37 ++++++++++++++++++++++++++++------- lld/ELF/LinkerScriptLexer.h | 3 +++ lld/ELF/ScriptTokenizer.h | 5 ++++- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp index a2e7865fdc058..c095dd390704b 100644 --- a/lld/ELF/LinkerScriptLexer.cpp +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -24,11 +24,23 @@ ScriptToken LinkerScriptLexer::getToken() { int curChar = getNextChar(); switch (curChar) { - case 'A' ... 'Z': - case 'a' ... 'z': + case ' ': + case '\n': + case '\t': + case '\r': + continue; // ignore whitespace // TODO break; - case '0' ... '9': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': // TODO break; case '{': @@ -41,22 +53,33 @@ ScriptToken LinkerScriptLexer::getToken() { return ScriptToken::BracektBegin; case ')': return ScriptToken::BracektEnd; + case '=': + return ScriptToken::Assign; + case '+': + return ScriptToken::Plus; + default: + // default for [A-Z][a-z] + break; } } } unsigned LinkerScriptLexer::getNextChar() { - char curChar = *curPtr; + char curChar = curStringRef[pos]; switch (curChar) { case 0: - if (curPtr != curStringRef.end()) { - curPtr++; + if (pos < curStringRef.size()) { + pos++; return 0; } else { return EOF; } default: - curPtr++; + pos++; return (unsigned char)curChar; } } + +ScriptToken LinkerScriptLexer::getCommandOrSymbolName() { + return ScriptToken::SymbolName; +} diff --git a/lld/ELF/LinkerScriptLexer.h b/lld/ELF/LinkerScriptLexer.h index 8d66bae7122b4..21bb75ef92c83 100644 --- a/lld/ELF/LinkerScriptLexer.h +++ b/lld/ELF/LinkerScriptLexer.h @@ -28,6 +28,9 @@ class LinkerScriptLexer { unsigned getNextChar(); ScriptToken getToken(); + ScriptToken getIdentify(); + ScriptToken getArithmeticOrAssignment(); + ScriptToken getCommandOrSymbolName(); }; } // namespace lld::elf diff --git a/lld/ELF/ScriptTokenizer.h b/lld/ELF/ScriptTokenizer.h index 2afd376a2a489..91cb0ae8c007c 100644 --- a/lld/ELF/ScriptTokenizer.h +++ b/lld/ELF/ScriptTokenizer.h @@ -15,7 +15,7 @@ namespace lld { namespace elf { -enum ScriptToken { +enum class ScriptToken { ENTRY, // Commands Files @@ -113,6 +113,9 @@ enum ScriptToken { Binary, Error, + SymbolName, + FileName, + CurlyBegin, // { CurlyEnd, // } BracektBegin, // ( From 252e217b209f8d9ec780ddec119cce0001a4a3bb Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Mon, 8 Jul 2024 21:43:58 +0000 Subject: [PATCH 06/29] [lld][ELF] added llvm source manager and SMDiagnostic For error and warning messages --- lld/ELF/LinkerScriptLexer.cpp | 16 +++++++++++++--- lld/ELF/LinkerScriptLexer.h | 18 +++++++++++++++++- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp index c095dd390704b..d695da49a8088 100644 --- a/lld/ELF/LinkerScriptLexer.cpp +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -8,15 +8,26 @@ #include "LinkerScriptLexer.h" #include "lld/Common/ErrorHandler.h" -#include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; using namespace lld; using namespace lld::elf; -LinkerScriptLexer::LinkerScriptLexer(MemoryBufferRef MB) { +LinkerScriptLexer::LinkerScriptLexer(MemoryBufferRef MB, llvm::SourceMgr &SM, + llvm::SMDiagnostic &Err) + : ErrorInfo(Err), SM(SM) { curStringRef = MB.getBuffer(); + curPtr = curStringRef.begin(); +} + +bool LinkerScriptLexer::Error(SMLoc ErrorLoc, const Twine &Msg) const { + ErrorInfo = SM.GetMessage(ErrorLoc, llvm::SourceMgr::DK_Error, Msg); + return true; +} + +void LinkerScriptLexer::Warning(SMLoc WarningLoc, const Twine &Msg) const { + SM.PrintMessage(WarningLoc, llvm::SourceMgr::DK_Warning, Msg); } ScriptToken LinkerScriptLexer::getToken() { @@ -30,7 +41,6 @@ ScriptToken LinkerScriptLexer::getToken() { case '\r': continue; // ignore whitespace // TODO - break; case '0': case '1': case '2': diff --git a/lld/ELF/LinkerScriptLexer.h b/lld/ELF/LinkerScriptLexer.h index 21bb75ef92c83..7304fc221c959 100644 --- a/lld/ELF/LinkerScriptLexer.h +++ b/lld/ELF/LinkerScriptLexer.h @@ -13,20 +13,36 @@ #include "ScriptTokenizer.h" #include "lld/Common/LLVM.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/MemoryBufferRef.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/SourceMgr.h" namespace lld::elf { +class SMDiagnostic; +class SourceMgr; class LinkerScriptLexer { public: - explicit LinkerScriptLexer(MemoryBufferRef MB); + explicit LinkerScriptLexer(MemoryBufferRef MB, llvm::SourceMgr &SM, + llvm::SMDiagnostic &Err); + llvm::SMLoc getLoc() const { return llvm::SMLoc::getFromPointer(tokStart); } + bool Error(llvm::SMLoc ErrorLoc, const llvm::Twine &Msg) const; + bool Error(const llvm::Twine &Msg) const { return Error(getLoc(), Msg); } + void Warning(llvm::SMLoc WarningLoc, const llvm::Twine &Msg) const; + void Warning(const Twine &Msg) const { return Warning(getLoc(), Msg); } private: + llvm::SMDiagnostic &ErrorInfo; + llvm::SourceMgr &SM; const char *curPtr; llvm::StringRef curStringRef; + + const char *tokStart; size_t pos = 0; unsigned getNextChar(); + void skipComment(); ScriptToken getToken(); ScriptToken getIdentify(); ScriptToken getArithmeticOrAssignment(); From 4cabbbbc0495fcf34a176b954f3dcaa41ae033a2 Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Tue, 9 Jul 2024 05:16:11 +0000 Subject: [PATCH 07/29] [lld][ELF] updated slicing token from char to StringRef Using StringRef would be eariser to generate token so plan to change code design. --- lld/ELF/LinkerScriptLexer.cpp | 69 +++++++++-------------------------- lld/ELF/LinkerScriptLexer.h | 14 +++---- 2 files changed, 24 insertions(+), 59 deletions(-) diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp index d695da49a8088..91b1a22172959 100644 --- a/lld/ELF/LinkerScriptLexer.cpp +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -16,7 +16,7 @@ using namespace lld::elf; LinkerScriptLexer::LinkerScriptLexer(MemoryBufferRef MB, llvm::SourceMgr &SM, llvm::SMDiagnostic &Err) - : ErrorInfo(Err), SM(SM) { + : MB(MB), ErrorInfo(Err), SM(SM) { curStringRef = MB.getBuffer(); curPtr = curStringRef.begin(); } @@ -32,64 +32,29 @@ void LinkerScriptLexer::Warning(SMLoc WarningLoc, const Twine &Msg) const { ScriptToken LinkerScriptLexer::getToken() { while (true) { - int curChar = getNextChar(); - - switch (curChar) { - case ' ': - case '\n': - case '\t': - case '\r': - continue; // ignore whitespace - // TODO - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - // TODO - break; - case '{': - return ScriptToken::CurlyBegin; - case '}': - return ScriptToken::CurlyEnd; - case '?': - return ScriptToken::QuestionMark; - case '(': - return ScriptToken::BracektBegin; - case ')': - return ScriptToken::BracektEnd; - case '=': - return ScriptToken::Assign; - case '+': - return ScriptToken::Plus; - default: - // default for [A-Z][a-z] - break; - } + skipComments(); } } -unsigned LinkerScriptLexer::getNextChar() { - char curChar = curStringRef[pos]; - switch (curChar) { - case 0: - if (pos < curStringRef.size()) { - pos++; - return 0; - } else { - return EOF; +llvm::StringRef LinkerScriptLexer::skipComments() { + // this code now is copied from ScriptLexer.cpp + // and modified so it can use SourceMgr + while (true) { + if (curStringRef.starts_with("/*")) { + size_t e = curStringRef.find("*/", 2); + if (e == llvm::StringRef::npos) { + // TODO: set up error message + } + curStringRef = curStringRef.substr(e + 2); + continue; + } + if (curStringRef.starts_with("#")) { + size_t e = curStringRef.find("\n", 1); } - default: - pos++; - return (unsigned char)curChar; } } ScriptToken LinkerScriptLexer::getCommandOrSymbolName() { + return ScriptToken::SymbolName; } diff --git a/lld/ELF/LinkerScriptLexer.h b/lld/ELF/LinkerScriptLexer.h index 7304fc221c959..3a770a045e8b8 100644 --- a/lld/ELF/LinkerScriptLexer.h +++ b/lld/ELF/LinkerScriptLexer.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLD_ELF_LINKER_SCRIPT_Lexer_H -#define LLD_ELF_LINKER_SCRIPT_Lexer_H +#ifndef LLD_ELF_LINKER_SCRIPT_LEXER_H +#define LLD_ELF_LINKER_SCRIPT_LEXER_H #include "ScriptTokenizer.h" #include "lld/Common/LLVM.h" @@ -32,22 +32,22 @@ class LinkerScriptLexer { void Warning(llvm::SMLoc WarningLoc, const llvm::Twine &Msg) const; void Warning(const Twine &Msg) const { return Warning(getLoc(), Msg); } + void checkToken(ScriptToken token); + private: llvm::SMDiagnostic &ErrorInfo; llvm::SourceMgr &SM; const char *curPtr; + llvm::MemoryBufferRef MB; llvm::StringRef curStringRef; const char *tokStart; - size_t pos = 0; - unsigned getNextChar(); - void skipComment(); + llvm::StringRef skipComments(); ScriptToken getToken(); - ScriptToken getIdentify(); ScriptToken getArithmeticOrAssignment(); ScriptToken getCommandOrSymbolName(); }; } // namespace lld::elf -#endif // LLD_ELF_LINKER_SCRIPT_Lexer_H +#endif // LLD_ELF_LINKER_SCRIPT_LEXER_H From 4cafc77fd7f702f43467448eae8b3195805bd999 Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Wed, 10 Jul 2024 05:37:30 +0000 Subject: [PATCH 08/29] [lld][ELF] update lexer and tokens Decided to have the lexer to generate token as parser requests. Current lexer generates all token in vector. Added more TODO comments here. --- lld/ELF/LinkerScriptLexer.cpp | 79 +++++++++++++++++++- lld/ELF/LinkerScriptLexer.h | 7 +- lld/ELF/ScriptTokenizer.h | 135 ++++++++++++++++++---------------- 3 files changed, 154 insertions(+), 67 deletions(-) diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp index 91b1a22172959..d5d55b248d0da 100644 --- a/lld/ELF/LinkerScriptLexer.cpp +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -32,7 +32,70 @@ void LinkerScriptLexer::Warning(SMLoc WarningLoc, const Twine &Msg) const { ScriptToken LinkerScriptLexer::getToken() { while (true) { - skipComments(); + curStringRef = skipComments(); + + // Quoted token. Note that double-quote characters are parts of a token + // because, in a glob match context, only unquoted tokens are interpreted as + // glob patterns. Double-quoted tokens are literal patterns in that context. + if (curStringRef.starts_with("\"")) { + size_t e = curStringRef.find("\"", 1); + if (e == StringRef::npos) { + StringRef fileName = MB.getBufferIdentifier(); + Error(fileName + ": unclosed quote"); + return ScriptToken::Error; + } + llvm::StringRef quotedRef = curStringRef.take_front(e + 1); + // TODO: need a function to tell if quotedRef is a keyword or not + // and return ScriptToken + curStringRef = curStringRef.substr(e + 1); + continue; + } + + // Unquoted token. This is more relaxed than tokens in C-like language, + // so that you can write "file-name.cpp" as one bare token, for example. + size_t pos = curStringRef.find_first_not_of( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789_.$/\\~=+[]*?-!^:"); + // TODO: a new function for this for keeping the code clean + if (pos == 0) { + // single char + + const char *curChar = curStringRef.begin(); + switch (*curChar) { + case EOF: + return ScriptToken::Eof; + case '(': + return ScriptToken::BracektBegin; + case ')': + return ScriptToken::BracektEnd; + case '{': + return ScriptToken::CurlyBegin; + case '}': + return ScriptToken::CurlyEnd; + case ';': + return ScriptToken::Semicolon; + case ',': + return ScriptToken::Comma; + case ':': + return ScriptToken::Colon; + case '*': + case '/': + case '+': + case '-': + case '<': + case '>': + case '&': + case '^': + case '|': + default: + // TODO + break; + } + } else { + // TODO: COMMAND / user defined symbol/file name match + } + + return ScriptToken::Error; } } @@ -43,18 +106,28 @@ llvm::StringRef LinkerScriptLexer::skipComments() { if (curStringRef.starts_with("/*")) { size_t e = curStringRef.find("*/", 2); if (e == llvm::StringRef::npos) { - // TODO: set up error message + Error("Unclosed comment in a linker script"); + return ""; } curStringRef = curStringRef.substr(e + 2); continue; } if (curStringRef.starts_with("#")) { size_t e = curStringRef.find("\n", 1); + if (e == StringRef::npos) + e = curStringRef.size() - 1; + curStringRef = curStringRef.substr(e + 1); + continue; } + + size_t size = curStringRef.size(); + curStringRef = curStringRef.ltrim(); + if (curStringRef.size() == size) + return curStringRef; } } ScriptToken LinkerScriptLexer::getCommandOrSymbolName() { - + // TODO: use marco like .ll AsmLexer return ScriptToken::SymbolName; } diff --git a/lld/ELF/LinkerScriptLexer.h b/lld/ELF/LinkerScriptLexer.h index 3a770a045e8b8..3b25c81731747 100644 --- a/lld/ELF/LinkerScriptLexer.h +++ b/lld/ELF/LinkerScriptLexer.h @@ -26,12 +26,16 @@ class LinkerScriptLexer { public: explicit LinkerScriptLexer(MemoryBufferRef MB, llvm::SourceMgr &SM, llvm::SMDiagnostic &Err); - llvm::SMLoc getLoc() const { return llvm::SMLoc::getFromPointer(tokStart); } + llvm::SMLoc getLoc() const { + return llvm::SMLoc::getFromPointer(curStringRef.begin()); + } bool Error(llvm::SMLoc ErrorLoc, const llvm::Twine &Msg) const; bool Error(const llvm::Twine &Msg) const { return Error(getLoc(), Msg); } void Warning(llvm::SMLoc WarningLoc, const llvm::Twine &Msg) const; void Warning(const Twine &Msg) const { return Warning(getLoc(), Msg); } + const std::string &getStrVal() const { return strVal; } + void checkToken(ScriptToken token); private: @@ -40,6 +44,7 @@ class LinkerScriptLexer { const char *curPtr; llvm::MemoryBufferRef MB; llvm::StringRef curStringRef; + std::string strVal; const char *tokStart; diff --git a/lld/ELF/ScriptTokenizer.h b/lld/ELF/ScriptTokenizer.h index 91cb0ae8c007c..2c81e19ea638c 100644 --- a/lld/ELF/ScriptTokenizer.h +++ b/lld/ELF/ScriptTokenizer.h @@ -16,102 +16,103 @@ namespace lld { namespace elf { enum class ScriptToken { - ENTRY, + LS_ENTRY, // Commands Files - INCLUDE, - INPUT, - GROUP, - OUTPUT, - SEARCH_DIR, - STARTUP, + LS_INCLUDE, + LS_INPUT, + LS_GROUP, + LS_OUTPUT, + LS_SEARCH_DIR, + LS_STARTUP, - INSERT, - AFTER, + LS_INSERT, + LS_AFTER, // Commands for object file formats - OUTPUT_FORMAT, - TARGET, + LS_OUTPUT_FORMAT, + LS_TARGET, // Other linker script commands - ASSERT, - EXTERN, + LS_ASSERT, + LS_EXTERN, // FORCE_COMMON_ALLOCATION // INHIBIT_COMMON_ALLOCATION // NOCROSSREFS - OUTPUT_ARCH, + LS_OUTPUT_ARCH, // Assignment - PROVIDE, - HIDDEN, - PROVIDE_HIDDEN, + LS_PROVIDE, + LS_HIDDEN, + LS_PROVIDE_HIDDEN, - SECTIONS, + LS_SECTIONS, // Input Section - EXCLUDE_FILE, - KEEP, - INPUT_SECTION_FLAGS, + LS_EXCLUDE_FILE, + LS_KEEP, + LS_INPUT_SECTION_FLAGS, // Read section - OVERLAY, - NOLOAD, - COPY, - INFO, + LS_OVERLAY, + LS_NOLOAD, + LS_COPY, + LS_INFO, // Output Section - OVERWRITE_SECTIONS, - SUBALIGN, - ONLY_IF_RO, - ONLY_IF_RW, - FILL, - SORT, + LS_OVERWRITE_SECTIONS, + LS_SUBALIGN, + LS_ONLY_IF_RO, + LS_ONLY_IF_RW, + LS_FILL, + LS_SORT, // Builtin Functions - ABSOLUTE, - ADDR, - ALIGN, + LS_ABSOLUTE, + LS_ADDR, + LS_ALIGN, // BLOCK, // synonym for ALIGN for compatibility with older linker script - DATA_SEGMENT_ALIGN, - DATA_SEGMENT_END, - DEFINED, - LOADADDR, - - LOG2CEIL, - MAX, - MIN, - ORIGIN, - SEGMENT_START, + LS_DATA_SEGMENT_ALIGN, + LS_DATA_SEGMENT_END, + LS_DEFINED, + LS_LOADADDR, + + LS_LOG2CEIL, + LS_MAX, + LS_MIN, + LS_ORIGIN, + LS_SEGMENT_START, // NEXT, // This function is closely related to ALIGN(exp); unless you use the // MEMORY command to define discontinuous memory for the output file, the two // functions are equivalent. - SIZEOF, - SIZEOF_HEADERS, + LS_SIZEOF, + LS_SIZEOF_HEADERS, // PHDRS Command - FILEHDR, - PHDRS, - AT, - FLAGS, + LS_FILEHDR, + LS_PHDRS, + LS_AT, + LS_FLAGS, // Version Command - VERSION, + LS_VERSION, - REGION_ALIAS, - AS_NEEDED, - CONSTRUCTORS, + LS_REGION_ALIAS, + LS_AS_NEEDED, + LS_CONSTRUCTORS, // Symbolic Constants - MAXPAGESIZE, - COMMONPAGESIZE, + LS_MAXPAGESIZE, + LS_COMMONPAGESIZE, - Local, // local - Global, // global - Extern, // extern + LS_Local, // local + LS_Global, // global + LS_Extern, // extern - ELF, - Binary, + LS_ELF, + LS_Binary, Error, + Eof, SymbolName, FileName, @@ -125,7 +126,7 @@ enum class ScriptToken { Colon, // : Asterisk, // * QuestionMark, // ? - Bacckslash, // "\" + Backslash, // "\" Slash, // / Greater, // > Less, // < @@ -145,7 +146,15 @@ enum class ScriptToken { LeftShiftAssign, // <<= RightShiftAssign, // >>= BitWiseAssign, // &= - BarAssign // |= + BarAssign, // |= + + // operator token + NotEqual, // != + Equal, // == + GreatEqual, // >= + LessEqual, // <= + LeftShift, // << + RightShift, // >> }; } // namespace elf } // namespace lld From df59067769e801a9457dcb6da56a2b77aefc2dd7 Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Thu, 11 Jul 2024 05:44:34 +0000 Subject: [PATCH 09/29] [lld][ELF] added more switch case for generating tokens This commit contains imcomplete code; this does not introduce a build error but API in this commit is not a working one. This commit is for saving changes. --- lld/ELF/LinkerScriptLexer.cpp | 164 ++++++++++++++++++++++++---------- lld/ELF/LinkerScriptLexer.h | 22 +++-- lld/ELF/ScriptTokenizer.h | 26 +++--- 3 files changed, 150 insertions(+), 62 deletions(-) diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp index d5d55b248d0da..a9d08dc661902 100644 --- a/lld/ELF/LinkerScriptLexer.cpp +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -19,6 +19,7 @@ LinkerScriptLexer::LinkerScriptLexer(MemoryBufferRef MB, llvm::SourceMgr &SM, : MB(MB), ErrorInfo(Err), SM(SM) { curStringRef = MB.getBuffer(); curPtr = curStringRef.begin(); + // TODO: set up the start for tok1 and tok2 } bool LinkerScriptLexer::Error(SMLoc ErrorLoc, const Twine &Msg) const { @@ -30,6 +31,21 @@ void LinkerScriptLexer::Warning(SMLoc WarningLoc, const Twine &Msg) const { SM.PrintMessage(WarningLoc, llvm::SourceMgr::DK_Warning, Msg); } +ScriptToken LinkerScriptLexer::next() { + tok1 = tok2; + tok1Pos = tok2Pos; + tok1Val = tok2Val; + + tok2 = getToken(); + return tok1; +} + +ScriptToken LinkerScriptLexer::peek() { return tok1; } + +ScriptToken LinkerScriptLexer::peek2() { return tok2; } + +bool LinkerScriptLexer::expect(ScriptToken token) { return token == tok1; } + ScriptToken LinkerScriptLexer::getToken() { while (true) { curStringRef = skipComments(); @@ -51,49 +67,108 @@ ScriptToken LinkerScriptLexer::getToken() { continue; } - // Unquoted token. This is more relaxed than tokens in C-like language, - // so that you can write "file-name.cpp" as one bare token, for example. - size_t pos = curStringRef.find_first_not_of( - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" - "0123456789_.$/\\~=+[]*?-!^:"); - // TODO: a new function for this for keeping the code clean - if (pos == 0) { - // single char - - const char *curChar = curStringRef.begin(); - switch (*curChar) { - case EOF: - return ScriptToken::Eof; - case '(': - return ScriptToken::BracektBegin; - case ')': - return ScriptToken::BracektEnd; - case '{': - return ScriptToken::CurlyBegin; - case '}': - return ScriptToken::CurlyEnd; - case ';': - return ScriptToken::Semicolon; - case ',': - return ScriptToken::Comma; - case ':': - return ScriptToken::Colon; - case '*': - case '/': - case '+': - case '-': - case '<': - case '>': - case '&': - case '^': - case '|': - default: - // TODO - break; + const char *curChar = curStringRef.begin(); + switch (*curChar) { + case EOF: + return ScriptToken::Eof; + case '(': + return ScriptToken::BracektBegin; + case ')': + return ScriptToken::BracektEnd; + case '{': + return ScriptToken::CurlyBegin; + case '}': + return ScriptToken::CurlyEnd; + case ';': + return ScriptToken::Semicolon; + case ',': + return ScriptToken::Comma; + case '<': + if (curStringRef.size() > 2 && curStringRef[1] == '<' && + curStringRef[2] == '=') { + curStringRef = curStringRef.substr(3); + return ScriptToken::RightShiftAssign; } - } else { - // TODO: COMMAND / user defined symbol/file name match - } + if (curStringRef.size() > 1) { + if (curStringRef[1] == '=') { + curStringRef = curStringRef.substr(2); + return ScriptToken::LessEqual; + } else if (curStringRef[1] == '<') { + curStringRef = curStringRef.substr(2); + return ScriptToken::LeftShift; + } + } + curStringRef = curStringRef.substr(1); + return ScriptToken::Less; + case '>': + if (curStringRef.size() > 2 && curStringRef[1] == '>' && + curStringRef[2] == '=') { + curStringRef = curStringRef.substr(3); + return ScriptToken::LeftShiftAssign; + } + if (curStringRef.size() > 1) { + if (curStringRef[1] == '=') { + curStringRef = curStringRef.substr(2); + return ScriptToken::GreaterEqual; + } else if (curStringRef[1] == '>') { + curStringRef = curStringRef.substr(2); + return ScriptToken::RightShift; + } + } + curStringRef = curStringRef.substr(1); + return ScriptToken::Greater; + case '&': + if (curStringRef.size() > 1) { + if (curStringRef[1] == '=') { + curStringRef = curStringRef.substr(2); + return ScriptToken::AndAssign; + } else if (curStringRef[1] == '&') { + curStringRef = curStringRef.substr(2); + return ScriptToken::AndGate; + } + } + curStringRef = curStringRef.substr(1); + return ScriptToken::Bitwise; + case '^': + if (curStringRef.size() > 1) { + if (curStringRef[1] == '=') { + curStringRef = curStringRef.substr(2); + return ScriptToken::AndAssign; + } + } + curStringRef = curStringRef.substr(1); + return ScriptToken::Xor; + case '|': + if (curStringRef.size() > 1) { + if (curStringRef[1] == '=') { + curStringRef = curStringRef.substr(2); + return ScriptToken::OrAssign; + } else if (curStringRef[1] == '|') { + curStringRef = curStringRef.substr(2); + return ScriptToken::OrGate; + } + } + curStringRef = curStringRef.substr(1); + return ScriptToken::Or; + default: + // Unquoted token. This is more relaxed than tokens in C-like language, + // so that you can write "file-name.cpp" as one bare token, for example. + size_t pos = curStringRef.find_first_not_of( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789_.$/\\~=+[]*?-!^:"); + + // Quoted strings are literal strings, so we don't want to split it. + if (inExpression && !curStringRef.starts_with("\"")) { + StringRef ops = "!~*/+-<>?^:="; // List of operators + size_t e = curStringRef.find_first_of(ops); + if (e != StringRef::npos && e != 0) { + curStringRef = curStringRef.substr(e); + return ScriptToken::Identify; + } + } + curStringRef = curStringRef.substr(pos); + return ScriptToken::Identify; + }; return ScriptToken::Error; } @@ -127,7 +202,6 @@ llvm::StringRef LinkerScriptLexer::skipComments() { } } -ScriptToken LinkerScriptLexer::getCommandOrSymbolName() { - // TODO: use marco like .ll AsmLexer - return ScriptToken::SymbolName; +ScriptToken LinkerScriptLexer::getCommandOrIdentify(size_t pos) { + return ScriptToken::Identify; } diff --git a/lld/ELF/LinkerScriptLexer.h b/lld/ELF/LinkerScriptLexer.h index 3b25c81731747..09c6b890565fa 100644 --- a/lld/ELF/LinkerScriptLexer.h +++ b/lld/ELF/LinkerScriptLexer.h @@ -34,24 +34,36 @@ class LinkerScriptLexer { void Warning(llvm::SMLoc WarningLoc, const llvm::Twine &Msg) const; void Warning(const Twine &Msg) const { return Warning(getLoc(), Msg); } - const std::string &getStrVal() const { return strVal; } + ScriptToken next(); // update tok1 and tok2 + ScriptToken peek(); // return tok1 + ScriptToken peek2(); // return tok2 - void checkToken(ScriptToken token); + const std::string &getTok1Val() const { return tok1Val; } + const std::string &getTok2Val() const { return tok2Val; } + + bool expect(ScriptToken token); // check if tok1 matches argument token + bool inExpression = false; private: llvm::SMDiagnostic &ErrorInfo; llvm::SourceMgr &SM; + const char *curPtr; llvm::MemoryBufferRef MB; + llvm::StringRef curStringRef; - std::string strVal; + ScriptToken tok1; + ScriptToken tok2; + size_t tok1Pos = 0; + size_t tok2Pos = 0; + std::string tok1Val; + std::string tok2Val; const char *tokStart; llvm::StringRef skipComments(); ScriptToken getToken(); - ScriptToken getArithmeticOrAssignment(); - ScriptToken getCommandOrSymbolName(); + ScriptToken getCommandOrIdentify(size_t pos); }; } // namespace lld::elf diff --git a/lld/ELF/ScriptTokenizer.h b/lld/ELF/ScriptTokenizer.h index 2c81e19ea638c..750d0bc43f76b 100644 --- a/lld/ELF/ScriptTokenizer.h +++ b/lld/ELF/ScriptTokenizer.h @@ -114,8 +114,7 @@ enum class ScriptToken { Error, Eof, - SymbolName, - FileName, + Identify, CurlyBegin, // { CurlyEnd, // } @@ -133,8 +132,8 @@ enum class ScriptToken { Minus, // - Plus, // + Bitwise, // & - Not, // ^ - VerticalBar, // | + Xor, // ^ + Or, // | PercentSign, // % // Assignmemnt @@ -145,16 +144,19 @@ enum class ScriptToken { DivAssign, // /= LeftShiftAssign, // <<= RightShiftAssign, // >>= - BitWiseAssign, // &= - BarAssign, // |= + AndAssign, // &= + OrAssign, // |= + XorAssign, // ^= // operator token - NotEqual, // != - Equal, // == - GreatEqual, // >= - LessEqual, // <= - LeftShift, // << - RightShift, // >> + NotEqual, // != + Equal, // == + GreaterEqual, // >= + LessEqual, // <= + LeftShift, // << + RightShift, // >> + AndGate, // && + OrGate, // || }; } // namespace elf } // namespace lld From 4b0583fe19f52fe8610f76735eb3bb26a1328089 Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Sat, 13 Jul 2024 01:27:29 +0000 Subject: [PATCH 10/29] [lld][ELF] Added TokenInfo struct and unittest files Added TokenInfo struct for token kind and StringRef val and basic file setup for adding unittests for later. --- lld/ELF/LinkerScriptLexer.cpp | 292 ++++++++++---------- lld/ELF/LinkerScriptLexer.h | 44 +-- lld/ELF/ScriptTokenizer.h | 10 + lld/unittests/CMakeLists.txt | 1 + lld/unittests/ELF/CMakeLists.txt | 11 + lld/unittests/ELF/LinkerScriptLexerTest.cpp | 45 +++ 6 files changed, 239 insertions(+), 164 deletions(-) create mode 100644 lld/unittests/ELF/CMakeLists.txt create mode 100644 lld/unittests/ELF/LinkerScriptLexerTest.cpp diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp index a9d08dc661902..e3684178e5b78 100644 --- a/lld/ELF/LinkerScriptLexer.cpp +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -18,10 +18,9 @@ LinkerScriptLexer::LinkerScriptLexer(MemoryBufferRef MB, llvm::SourceMgr &SM, llvm::SMDiagnostic &Err) : MB(MB), ErrorInfo(Err), SM(SM) { curStringRef = MB.getBuffer(); - curPtr = curStringRef.begin(); - // TODO: set up the start for tok1 and tok2 } +/* bool LinkerScriptLexer::Error(SMLoc ErrorLoc, const Twine &Msg) const { ErrorInfo = SM.GetMessage(ErrorLoc, llvm::SourceMgr::DK_Error, Msg); return true; @@ -30,148 +29,30 @@ bool LinkerScriptLexer::Error(SMLoc ErrorLoc, const Twine &Msg) const { void LinkerScriptLexer::Warning(SMLoc WarningLoc, const Twine &Msg) const { SM.PrintMessage(WarningLoc, llvm::SourceMgr::DK_Warning, Msg); } +*/ -ScriptToken LinkerScriptLexer::next() { - tok1 = tok2; - tok1Pos = tok2Pos; - tok1Val = tok2Val; +// bool LinkerScriptLexer::expect(ScriptToken token) { return token == tok1; } - tok2 = getToken(); - return tok1; +inline LinkerScriptLexer::TokenInfo +LinkerScriptLexer::advanceTokenInfo(ScriptToken kind, size_t pos = 1) { + // TODO: special case for kind == ScriptToken::Error + llvm::StringRef valRef = curStringRef.substr(0, pos); + curStringRef = curStringRef.substr(pos); + return {kind, valRef}; } -ScriptToken LinkerScriptLexer::peek() { return tok1; } - -ScriptToken LinkerScriptLexer::peek2() { return tok2; } - -bool LinkerScriptLexer::expect(ScriptToken token) { return token == tok1; } - -ScriptToken LinkerScriptLexer::getToken() { - while (true) { - curStringRef = skipComments(); - - // Quoted token. Note that double-quote characters are parts of a token - // because, in a glob match context, only unquoted tokens are interpreted as - // glob patterns. Double-quoted tokens are literal patterns in that context. - if (curStringRef.starts_with("\"")) { - size_t e = curStringRef.find("\"", 1); - if (e == StringRef::npos) { - StringRef fileName = MB.getBufferIdentifier(); - Error(fileName + ": unclosed quote"); - return ScriptToken::Error; - } - llvm::StringRef quotedRef = curStringRef.take_front(e + 1); - // TODO: need a function to tell if quotedRef is a keyword or not - // and return ScriptToken - curStringRef = curStringRef.substr(e + 1); - continue; - } - - const char *curChar = curStringRef.begin(); - switch (*curChar) { - case EOF: - return ScriptToken::Eof; - case '(': - return ScriptToken::BracektBegin; - case ')': - return ScriptToken::BracektEnd; - case '{': - return ScriptToken::CurlyBegin; - case '}': - return ScriptToken::CurlyEnd; - case ';': - return ScriptToken::Semicolon; - case ',': - return ScriptToken::Comma; - case '<': - if (curStringRef.size() > 2 && curStringRef[1] == '<' && - curStringRef[2] == '=') { - curStringRef = curStringRef.substr(3); - return ScriptToken::RightShiftAssign; - } - if (curStringRef.size() > 1) { - if (curStringRef[1] == '=') { - curStringRef = curStringRef.substr(2); - return ScriptToken::LessEqual; - } else if (curStringRef[1] == '<') { - curStringRef = curStringRef.substr(2); - return ScriptToken::LeftShift; - } - } - curStringRef = curStringRef.substr(1); - return ScriptToken::Less; - case '>': - if (curStringRef.size() > 2 && curStringRef[1] == '>' && - curStringRef[2] == '=') { - curStringRef = curStringRef.substr(3); - return ScriptToken::LeftShiftAssign; - } - if (curStringRef.size() > 1) { - if (curStringRef[1] == '=') { - curStringRef = curStringRef.substr(2); - return ScriptToken::GreaterEqual; - } else if (curStringRef[1] == '>') { - curStringRef = curStringRef.substr(2); - return ScriptToken::RightShift; - } - } - curStringRef = curStringRef.substr(1); - return ScriptToken::Greater; - case '&': - if (curStringRef.size() > 1) { - if (curStringRef[1] == '=') { - curStringRef = curStringRef.substr(2); - return ScriptToken::AndAssign; - } else if (curStringRef[1] == '&') { - curStringRef = curStringRef.substr(2); - return ScriptToken::AndGate; - } - } - curStringRef = curStringRef.substr(1); - return ScriptToken::Bitwise; - case '^': - if (curStringRef.size() > 1) { - if (curStringRef[1] == '=') { - curStringRef = curStringRef.substr(2); - return ScriptToken::AndAssign; - } - } - curStringRef = curStringRef.substr(1); - return ScriptToken::Xor; - case '|': - if (curStringRef.size() > 1) { - if (curStringRef[1] == '=') { - curStringRef = curStringRef.substr(2); - return ScriptToken::OrAssign; - } else if (curStringRef[1] == '|') { - curStringRef = curStringRef.substr(2); - return ScriptToken::OrGate; - } - } - curStringRef = curStringRef.substr(1); - return ScriptToken::Or; - default: - // Unquoted token. This is more relaxed than tokens in C-like language, - // so that you can write "file-name.cpp" as one bare token, for example. - size_t pos = curStringRef.find_first_not_of( - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" - "0123456789_.$/\\~=+[]*?-!^:"); - - // Quoted strings are literal strings, so we don't want to split it. - if (inExpression && !curStringRef.starts_with("\"")) { - StringRef ops = "!~*/+-<>?^:="; // List of operators - size_t e = curStringRef.find_first_of(ops); - if (e != StringRef::npos && e != 0) { - curStringRef = curStringRef.substr(e); - return ScriptToken::Identify; - } - } - curStringRef = curStringRef.substr(pos); - return ScriptToken::Identify; - }; - - return ScriptToken::Error; - } +LinkerScriptLexer::TokenInfo LinkerScriptLexer::getTokenInfo() { + curStringRef = skipComments(); + + // TODO: make sure the empty situation is not an error + if (curStringRef.empty()) + return advanceTokenInfo(ScriptToken::Eof); + const char c = curStringRef.front(); + if (std::isdigit(c)) + return getDigits(); + if (std::isalpha(c)) + return getCommandOrIdentify(); + return getSymbolToken(); } llvm::StringRef LinkerScriptLexer::skipComments() { @@ -181,7 +62,7 @@ llvm::StringRef LinkerScriptLexer::skipComments() { if (curStringRef.starts_with("/*")) { size_t e = curStringRef.find("*/", 2); if (e == llvm::StringRef::npos) { - Error("Unclosed comment in a linker script"); + // TODO: Error("Unclosed comment in a linker script"); return ""; } curStringRef = curStringRef.substr(e + 2); @@ -202,6 +83,131 @@ llvm::StringRef LinkerScriptLexer::skipComments() { } } -ScriptToken LinkerScriptLexer::getCommandOrIdentify(size_t pos) { - return ScriptToken::Identify; +LinkerScriptLexer::TokenInfo LinkerScriptLexer::getSymbolToken() { + const char c = curStringRef.front(); + // TODO: single char token needs to substr(1) + switch (c) { + case EOF: + return advanceTokenInfo(ScriptToken::Eof); + case '"': + return getQuotedToken(); + case '(': + return advanceTokenInfo(ScriptToken::BracektBegin); + case ')': + return advanceTokenInfo(ScriptToken::BracektEnd); + case '{': + return advanceTokenInfo(ScriptToken::CurlyBegin); + case '}': + return advanceTokenInfo(ScriptToken::CurlyEnd); + case ';': + return advanceTokenInfo(ScriptToken::Semicolon); + case ',': + return advanceTokenInfo(ScriptToken::Comma); + case '<': + if (curStringRef.size() > 2 && curStringRef[1] == '<' && + curStringRef[2] == '=') { + return advanceTokenInfo(ScriptToken::RightShiftAssign, 3); + } + if (curStringRef.size() > 1) { + if (curStringRef[1] == '=') { + return advanceTokenInfo(ScriptToken::LessEqual, 2); + } else if (curStringRef[1] == '<') { + return advanceTokenInfo(ScriptToken::LeftShift, 2); + } + } + return advanceTokenInfo(ScriptToken::Less); + case '>': + if (curStringRef.size() > 2 && curStringRef[1] == '>' && + curStringRef[2] == '=') { + return advanceTokenInfo(ScriptToken::LeftShiftAssign, 3); + } + if (curStringRef.size() > 1) { + if (curStringRef[1] == '=') { + return advanceTokenInfo(ScriptToken::GreaterEqual, 2); + } else if (curStringRef[1] == '>') { + return advanceTokenInfo(ScriptToken::RightShift, 2); + } + } + return advanceTokenInfo(ScriptToken::Greater); + case '&': + if (curStringRef.size() > 1) { + if (curStringRef[1] == '=') { + return advanceTokenInfo(ScriptToken::AndAssign, 2); + } else if (curStringRef[1] == '&') { + return advanceTokenInfo(ScriptToken::AndGate, 2); + } + } + return advanceTokenInfo(ScriptToken::Bitwise); + case '^': + if (curStringRef.size() > 1) { + if (curStringRef[1] == '=') { + return advanceTokenInfo(ScriptToken::AndAssign, 2); + } + } + return advanceTokenInfo(ScriptToken::Xor); + case '|': + if (curStringRef.size() > 1) { + if (curStringRef[1] == '=') { + return advanceTokenInfo(ScriptToken::OrAssign, 2); + } else if (curStringRef[1] == '|') { + return advanceTokenInfo(ScriptToken::OrGate, 2); + } + } + return advanceTokenInfo(ScriptToken::Or); + default: + return advanceTokenInfo(ScriptToken::Error); + } +} + +LinkerScriptLexer::TokenInfo LinkerScriptLexer::getQuotedToken() { + // Quoted token. Note that double-quote characters are parts of a token + // because, in a glob match context, only unquoted tokens are interpreted as + // glob patterns. Double-quoted tokens are literal patterns in that context. + size_t e = curStringRef.find("\"", 1); + if (e == StringRef::npos) { + StringRef fileName = MB.getBufferIdentifier(); + // TODO: Error(fileName + ": unclosed quote"); + return advanceTokenInfo(ScriptToken::Error, e); + } + return advanceTokenInfo(ScriptToken::Quote, e + 1); +} + +LinkerScriptLexer::TokenInfo LinkerScriptLexer::getDigits() { + size_t pos = curStringRef.find_first_not_of("0123456789XxHhKkMm"); + if (curStringRef.starts_with_insensitive("0x")) { + return advanceTokenInfo(ScriptToken::Hexdecimal, pos); + } + const char c = curStringRef[pos]; + switch (c) { + case 'H': + case 'h': + return advanceTokenInfo(ScriptToken::Hexdecimal_H, pos); + case 'K': + case 'k': + return advanceTokenInfo(ScriptToken::Decimal_K, pos); + case 'M': + case 'm': + return advanceTokenInfo(ScriptToken::Decimal_M, pos); + default: + return advanceTokenInfo(ScriptToken::Decimal, pos); + }; +} + +LinkerScriptLexer::TokenInfo LinkerScriptLexer::getCommandOrIdentify() { + // Unquoted token. This is more relaxed than tokens in C-like language, + // so that you can write "file-name.cpp" as one bare token, for example. + size_t pos = curStringRef.find_first_not_of( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789_.$/\\~=+[]*?-!^:"); + + // Quoted strings are literal strings, so we don't want to split it. + if (inExpression && !curStringRef.starts_with("\"")) { + StringRef ops = "!~*/+-<>?^:="; // List of operators + size_t e = curStringRef.find_first_of(ops); + if (e != StringRef::npos && e != 0) { + curStringRef = curStringRef.substr(e); + return advanceTokenInfo(ScriptToken::Identify, e); + } + } + return advanceTokenInfo(ScriptToken::Identify, pos); } diff --git a/lld/ELF/LinkerScriptLexer.h b/lld/ELF/LinkerScriptLexer.h index 09c6b890565fa..15f2ed7979f6f 100644 --- a/lld/ELF/LinkerScriptLexer.h +++ b/lld/ELF/LinkerScriptLexer.h @@ -26,44 +26,46 @@ class LinkerScriptLexer { public: explicit LinkerScriptLexer(MemoryBufferRef MB, llvm::SourceMgr &SM, llvm::SMDiagnostic &Err); - llvm::SMLoc getLoc() const { + struct TokenInfo { + ScriptToken kind; + llvm::StringRef val; + }; + + // LLVM SourceMgr and SMDiagnostic cannot be used now since + // ctx CommonLinkerContext has ownership of all MemoryBuffer + // by using SmallVector> memoryBuffers in ELF/Config.h + /*llvm::SMLoc getLoc() const { return llvm::SMLoc::getFromPointer(curStringRef.begin()); } + bool Error(llvm::SMLoc ErrorLoc, const llvm::Twine &Msg) const; bool Error(const llvm::Twine &Msg) const { return Error(getLoc(), Msg); } void Warning(llvm::SMLoc WarningLoc, const llvm::Twine &Msg) const; - void Warning(const Twine &Msg) const { return Warning(getLoc(), Msg); } - - ScriptToken next(); // update tok1 and tok2 - ScriptToken peek(); // return tok1 - ScriptToken peek2(); // return tok2 - - const std::string &getTok1Val() const { return tok1Val; } - const std::string &getTok2Val() const { return tok2Val; } + void Warning(const Twine &Msg) const { return Warning(getLoc(), Msg); }*/ bool expect(ScriptToken token); // check if tok1 matches argument token bool inExpression = false; + // TODO: rewrite next(), peek(), and peek2() since TokenInfo change + private: llvm::SMDiagnostic &ErrorInfo; llvm::SourceMgr &SM; - - const char *curPtr; llvm::MemoryBufferRef MB; - llvm::StringRef curStringRef; - ScriptToken tok1; - ScriptToken tok2; - size_t tok1Pos = 0; - size_t tok2Pos = 0; - std::string tok1Val; - std::string tok2Val; - const char *tokStart; + // ScriptToken tok1; + // ScriptToken tok2; + // size_t tok1Pos = 0; + // size_t tok2Pos = 0; llvm::StringRef skipComments(); - ScriptToken getToken(); - ScriptToken getCommandOrIdentify(size_t pos); + TokenInfo advanceTokenInfo(ScriptToken kind, size_t pos); + TokenInfo getTokenInfo(); + TokenInfo getSymbolToken(); + TokenInfo getQuotedToken(); + TokenInfo getDigits(); + TokenInfo getCommandOrIdentify(); }; } // namespace lld::elf diff --git a/lld/ELF/ScriptTokenizer.h b/lld/ELF/ScriptTokenizer.h index 750d0bc43f76b..a960106a35b18 100644 --- a/lld/ELF/ScriptTokenizer.h +++ b/lld/ELF/ScriptTokenizer.h @@ -16,6 +16,7 @@ namespace lld { namespace elf { enum class ScriptToken { + // LS stands for LinkerScript LS_ENTRY, // Commands Files @@ -115,7 +116,13 @@ enum class ScriptToken { Eof, Identify, + Hexdecimal, // 0x + Hexdecimal_H, // end with H/h + Decimal, + Decimal_K, // end with K/k + Decimal_M, // end with M/m + // Symbol tokens CurlyBegin, // { CurlyEnd, // } BracektBegin, // ( @@ -135,6 +142,9 @@ enum class ScriptToken { Xor, // ^ Or, // | PercentSign, // % + Quote, // Quoted token. Note that double-quote characters are parts of a token + // because, in a glob match context, only unquoted tokens are interpreted as + // glob patterns. Double-quoted tokens are literal patterns in that context. // Assignmemnt Assign, // = diff --git a/lld/unittests/CMakeLists.txt b/lld/unittests/CMakeLists.txt index ffaea3f207833..237e8679020a0 100644 --- a/lld/unittests/CMakeLists.txt +++ b/lld/unittests/CMakeLists.txt @@ -4,3 +4,4 @@ endfunction() add_subdirectory(AsLibAll) add_subdirectory(AsLibELF) +add_subdirectory(ELF) diff --git a/lld/unittests/ELF/CMakeLists.txt b/lld/unittests/ELF/CMakeLists.txt new file mode 100644 index 0000000000000..1476323299230 --- /dev/null +++ b/lld/unittests/ELF/CMakeLists.txt @@ -0,0 +1,11 @@ +# Test usage of LLD Linker Script Lexer + +add_lld_unittests(LLDELFLexerTests + LinkerScriptLexerTest.cpp +) + +target_link_libraries(LLDELFLexerTests + PRIVATE + lldELF + lldCommon +) diff --git a/lld/unittests/ELF/LinkerScriptLexerTest.cpp b/lld/unittests/ELF/LinkerScriptLexerTest.cpp new file mode 100644 index 0000000000000..d135fd34a3e4e --- /dev/null +++ b/lld/unittests/ELF/LinkerScriptLexerTest.cpp @@ -0,0 +1,45 @@ +//===- lld/unittests/ELF/LinkerScriptLexerTest.cpp --------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lld/ELF/LinkerScriptLexer.h" +#include "llvm/ADT/SmallVector" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuff.h" +#include "llvm/Support/SourceMgr.h" + +#include "gtest/gtest.h" + +namespace lld { +namespace elf { + +class LinkerScriptLexerTest : public testing::Test { +protected: + llvm::SourceMgr SrcMgr; + llvm::SMDiagnostic Err; + std::unique_ptr Lexer; + + /*void setupCallToLinkScriptLexer(llvm::StringRef scriptStr) { + std::unique_ptr + Buffer(llvm::MemoryBuffer::getMemBuffer(scriptStr)); + SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc()); + EXPECT_EQ(Buffer, nullptr); + + Lexer.reset(LinkerScriptLexer()) + } + + void lexAndCheckTokens(llvm::StringRef scriptStr, + llvm::SmallVector ExpectedTokens) { + + for(size_t I = 0; I < ExpectedTokens.size(); ++I) { + EXPECTED_EQ(); + } + } */ +}; +} // namespace elf +} // namespace lld From 2116448de77830cd79aca3f68a4b2811ef2ffd8b Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Sun, 14 Jul 2024 05:10:43 +0000 Subject: [PATCH 11/29] [lld][ELF] Lexer update Added a private function for checking linker script keyword and public functions for accessing TokenInfo variables. --- lld/ELF/LinkerScriptLexer.cpp | 84 ++++++++++++++++++++++++++++++++++- lld/ELF/LinkerScriptLexer.h | 19 ++++---- 2 files changed, 92 insertions(+), 11 deletions(-) diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp index e3684178e5b78..1f13c2dbf5633 100644 --- a/lld/ELF/LinkerScriptLexer.cpp +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -33,6 +33,8 @@ void LinkerScriptLexer::Warning(SMLoc WarningLoc, const Twine &Msg) const { // bool LinkerScriptLexer::expect(ScriptToken token) { return token == tok1; } +void LinkerScriptLexer::advanceLexer() { curToken = getTokenInfo(); } + inline LinkerScriptLexer::TokenInfo LinkerScriptLexer::advanceTokenInfo(ScriptToken kind, size_t pos = 1) { // TODO: special case for kind == ScriptToken::Error @@ -205,9 +207,87 @@ LinkerScriptLexer::TokenInfo LinkerScriptLexer::getCommandOrIdentify() { StringRef ops = "!~*/+-<>?^:="; // List of operators size_t e = curStringRef.find_first_of(ops); if (e != StringRef::npos && e != 0) { - curStringRef = curStringRef.substr(e); return advanceTokenInfo(ScriptToken::Identify, e); } } - return advanceTokenInfo(ScriptToken::Identify, pos); + + return advanceTokenInfo(getTokenfromKeyword(curStringRef.substr(0, pos)), + pos); +} + +ScriptToken +LinkerScriptLexer::getTokenfromKeyword(llvm::StringRef keyword) const { +#define KEYWORD(STR) \ + do { \ + if (keyword == #STR) \ + return ScriptToken::LS_##STR; \ + } while (false) + + KEYWORD(ENTRY); + KEYWORD(INCLUDE); + KEYWORD(GROUP); + KEYWORD(OUTPUT); + KEYWORD(SEARCH_DIR); + KEYWORD(STARTUP); + KEYWORD(INSERT); + KEYWORD(AFTER); + KEYWORD(OUTPUT_FORMAT); + KEYWORD(TARGET); + KEYWORD(OUTPUT_FORMAT); + KEYWORD(ASSERT); + KEYWORD(EXTERN); + KEYWORD(OUTPUT_ARCH); + KEYWORD(PROVIDE); + KEYWORD(HIDDEN); + KEYWORD(PROVIDE_HIDDEN); + KEYWORD(SECTIONS); + KEYWORD(EXCLUDE_FILE); + KEYWORD(KEEP); + KEYWORD(INPUT_SECTION_FLAGS); + KEYWORD(OVERLAY); + KEYWORD(NOLOAD); + KEYWORD(COPY); + KEYWORD(INFO); + KEYWORD(OVERWRITE_SECTIONS); + KEYWORD(SUBALIGN); + KEYWORD(ONLY_IF_RO); + KEYWORD(ONLY_IF_RW); + KEYWORD(FILL); + KEYWORD(SORT); + KEYWORD(ABSOLUTE); + KEYWORD(ADDR); + KEYWORD(ALIGN); + KEYWORD(DATA_SEGMENT_ALIGN); + KEYWORD(DATA_SEGMENT_END); + KEYWORD(DEFINED); + KEYWORD(LOADADDR); + KEYWORD(LOG2CEIL); + KEYWORD(MAX); + KEYWORD(MIN); + KEYWORD(ORIGIN); + KEYWORD(SEGMENT_START); + KEYWORD(SIZEOF); + KEYWORD(SIZEOF_HEADERS); + KEYWORD(FILEHDR); + KEYWORD(PHDRS); + KEYWORD(AT); + KEYWORD(FLAGS); + KEYWORD(VERSION); + KEYWORD(REGION_ALIAS); + KEYWORD(AS_NEEDED); + KEYWORD(CONSTRUCTORS); + KEYWORD(MAXPAGESIZE); + KEYWORD(COMMONPAGESIZE); + +#undef KEYWORD + + if (keyword == "local") { + return ScriptToken::LS_Local; + } else if (keyword == "global") { + return ScriptToken::LS_Global; + } else if (keyword == "extern") { + return ScriptToken::LS_Extern; + } else { + return ScriptToken::Identify; + } } diff --git a/lld/ELF/LinkerScriptLexer.h b/lld/ELF/LinkerScriptLexer.h index 15f2ed7979f6f..37298187cd1fc 100644 --- a/lld/ELF/LinkerScriptLexer.h +++ b/lld/ELF/LinkerScriptLexer.h @@ -26,10 +26,6 @@ class LinkerScriptLexer { public: explicit LinkerScriptLexer(MemoryBufferRef MB, llvm::SourceMgr &SM, llvm::SMDiagnostic &Err); - struct TokenInfo { - ScriptToken kind; - llvm::StringRef val; - }; // LLVM SourceMgr and SMDiagnostic cannot be used now since // ctx CommonLinkerContext has ownership of all MemoryBuffer @@ -47,18 +43,22 @@ class LinkerScriptLexer { bool inExpression = false; // TODO: rewrite next(), peek(), and peek2() since TokenInfo change + void advanceLexer(); + const ScriptToken getTokenKind() const { return curToken.kind; }; + llvm::StringRef getTokenStringRef() const { return curToken.val; }; private: + struct TokenInfo { + ScriptToken kind; + llvm::StringRef val; + }; + + TokenInfo curToken; llvm::SMDiagnostic &ErrorInfo; llvm::SourceMgr &SM; llvm::MemoryBufferRef MB; llvm::StringRef curStringRef; - // ScriptToken tok1; - // ScriptToken tok2; - // size_t tok1Pos = 0; - // size_t tok2Pos = 0; - llvm::StringRef skipComments(); TokenInfo advanceTokenInfo(ScriptToken kind, size_t pos); TokenInfo getTokenInfo(); @@ -66,6 +66,7 @@ class LinkerScriptLexer { TokenInfo getQuotedToken(); TokenInfo getDigits(); TokenInfo getCommandOrIdentify(); + ScriptToken getTokenfromKeyword(llvm::StringRef keyword) const; }; } // namespace lld::elf From c6e9ff293e6631564853adf7715f0bd0ed1f6122 Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Sun, 14 Jul 2024 06:08:29 +0000 Subject: [PATCH 12/29] [lld][ELF] updated unittest This updates for keeping the commit; the unittests are not working yet --- lld/ELF/LinkerScriptLexer.cpp | 10 ++++-- lld/ELF/LinkerScriptLexer.h | 10 +++--- lld/unittests/ELF/LinkerScriptLexerTest.cpp | 34 ++++++++++----------- 3 files changed, 30 insertions(+), 24 deletions(-) diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp index 1f13c2dbf5633..d7b59a05ff1b6 100644 --- a/lld/ELF/LinkerScriptLexer.cpp +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -14,9 +14,13 @@ using namespace llvm; using namespace lld; using namespace lld::elf; -LinkerScriptLexer::LinkerScriptLexer(MemoryBufferRef MB, llvm::SourceMgr &SM, - llvm::SMDiagnostic &Err) - : MB(MB), ErrorInfo(Err), SM(SM) { +// LinkerScriptLexer::LinkerScriptLexer(MemoryBufferRef MB, llvm::SourceMgr &SM, +// llvm::SMDiagnostic &Err) +// : MB(MB), ErrorInfo(Err), SM(SM) { +// curStringRef = MB.getBuffer(); +// } + +LinkerScriptLexer::LinkerScriptLexer(MemoryBufferRef MB) : MB(MB) { curStringRef = MB.getBuffer(); } diff --git a/lld/ELF/LinkerScriptLexer.h b/lld/ELF/LinkerScriptLexer.h index 37298187cd1fc..6dd7fd680c559 100644 --- a/lld/ELF/LinkerScriptLexer.h +++ b/lld/ELF/LinkerScriptLexer.h @@ -24,8 +24,10 @@ class SourceMgr; class LinkerScriptLexer { public: - explicit LinkerScriptLexer(MemoryBufferRef MB, llvm::SourceMgr &SM, - llvm::SMDiagnostic &Err); + // explicit LinkerScriptLexer(MemoryBufferRef MB, llvm::SourceMgr &SM, + // llvm::SMDiagnostic &Err); + + explicit LinkerScriptLexer(MemoryBufferRef MB); // LLVM SourceMgr and SMDiagnostic cannot be used now since // ctx CommonLinkerContext has ownership of all MemoryBuffer @@ -54,8 +56,8 @@ class LinkerScriptLexer { }; TokenInfo curToken; - llvm::SMDiagnostic &ErrorInfo; - llvm::SourceMgr &SM; + // llvm::SMDiagnostic &ErrorInfo; + // llvm::SourceMgr &SM; llvm::MemoryBufferRef MB; llvm::StringRef curStringRef; diff --git a/lld/unittests/ELF/LinkerScriptLexerTest.cpp b/lld/unittests/ELF/LinkerScriptLexerTest.cpp index d135fd34a3e4e..bb534d472eb00 100644 --- a/lld/unittests/ELF/LinkerScriptLexerTest.cpp +++ b/lld/unittests/ELF/LinkerScriptLexerTest.cpp @@ -6,12 +6,11 @@ // //===----------------------------------------------------------------------===// -#include "lld/ELF/LinkerScriptLexer.h" +#include "../ELF/LinkerScriptLexer.h" #include "llvm/ADT/SmallVector" #include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuff.h" -#include "llvm/Support/SourceMgr.h" #include "gtest/gtest.h" @@ -20,26 +19,27 @@ namespace elf { class LinkerScriptLexerTest : public testing::Test { protected: - llvm::SourceMgr SrcMgr; - llvm::SMDiagnostic Err; std::unique_ptr Lexer; + std::unique_ptr Buffer; - /*void setupCallToLinkScriptLexer(llvm::StringRef scriptStr) { - std::unique_ptr - Buffer(llvm::MemoryBuffer::getMemBuffer(scriptStr)); - SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc()); - EXPECT_EQ(Buffer, nullptr); - - Lexer.reset(LinkerScriptLexer()) + void setupCallToLinkScriptLexer(llvm::StringRef scriptStr) { + Buffer.reset(llvm::MemoryBuffer::getMemBuffer(scriptStr)); + Lexer.reset(LinkerScriptLexer(Buffer->getMemBufferRef())) } - void lexAndCheckTokens(llvm::StringRef scriptStr, - llvm::SmallVector ExpectedTokens) { - - for(size_t I = 0; I < ExpectedTokens.size(); ++I) { - EXPECTED_EQ(); + void lexAndCheckTokens(llvm::SmallVector ExpectedTokens) { + for (const auto &expected : ExpectedTokens) { + Lexer->advanceLexer(); + EXPECTED_EQ(Lexer->getTokenKind, expected); } - } */ + } }; + +TEST(LinkerScriptLexerTest, CheckEntry) { + llvm::StringRef testRef = " ENTRY"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens({ScriptToken::ENTRY}); + lexAndCheckTokens(ExpectedTokens); +} } // namespace elf } // namespace lld From c5463ebddff347368d7b2b64a96a627fc0c18f26 Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Mon, 15 Jul 2024 01:52:47 +0000 Subject: [PATCH 13/29] [lld][ELF] update lexer unittest --- lld/unittests/ELF/CMakeLists.txt | 4 ++++ lld/unittests/ELF/LinkerScriptLexerTest.cpp | 16 +++++++++------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/lld/unittests/ELF/CMakeLists.txt b/lld/unittests/ELF/CMakeLists.txt index 1476323299230..152a92ee476fd 100644 --- a/lld/unittests/ELF/CMakeLists.txt +++ b/lld/unittests/ELF/CMakeLists.txt @@ -1,4 +1,7 @@ # Test usage of LLD Linker Script Lexer +set(LLVM_LINK_COMPONENTS + Support +) add_lld_unittests(LLDELFLexerTests LinkerScriptLexerTest.cpp @@ -8,4 +11,5 @@ target_link_libraries(LLDELFLexerTests PRIVATE lldELF lldCommon + LLVMTestingSupport ) diff --git a/lld/unittests/ELF/LinkerScriptLexerTest.cpp b/lld/unittests/ELF/LinkerScriptLexerTest.cpp index bb534d472eb00..b042920691f56 100644 --- a/lld/unittests/ELF/LinkerScriptLexerTest.cpp +++ b/lld/unittests/ELF/LinkerScriptLexerTest.cpp @@ -7,10 +7,11 @@ //===----------------------------------------------------------------------===// #include "../ELF/LinkerScriptLexer.h" -#include "llvm/ADT/SmallVector" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MemoryBuff.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryBufferRef.h" #include "gtest/gtest.h" @@ -23,22 +24,23 @@ class LinkerScriptLexerTest : public testing::Test { std::unique_ptr Buffer; void setupCallToLinkScriptLexer(llvm::StringRef scriptStr) { - Buffer.reset(llvm::MemoryBuffer::getMemBuffer(scriptStr)); - Lexer.reset(LinkerScriptLexer(Buffer->getMemBufferRef())) + Buffer = llvm::MemoryBuffer::getMemBuffer(scriptStr); + Lexer = std::make_unique( + LinkerScriptLexer(Buffer->getMemBufferRef())); } void lexAndCheckTokens(llvm::SmallVector ExpectedTokens) { for (const auto &expected : ExpectedTokens) { Lexer->advanceLexer(); - EXPECTED_EQ(Lexer->getTokenKind, expected); + EXPECT_EQ(Lexer->getTokenKind(), expected); } } }; -TEST(LinkerScriptLexerTest, CheckEntry) { +TEST_F(LinkerScriptLexerTest, CheckEntry) { llvm::StringRef testRef = " ENTRY"; setupCallToLinkScriptLexer(testRef); - llvm::SmallVector ExpectedTokens({ScriptToken::ENTRY}); + llvm::SmallVector ExpectedTokens({ScriptToken::LS_ENTRY}); lexAndCheckTokens(ExpectedTokens); } } // namespace elf From 1fde4178c40bf0c3ee2d0cb54a91a64a6a0f7eb2 Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Mon, 15 Jul 2024 03:20:35 +0000 Subject: [PATCH 14/29] [lld][ELF] added more unittest cases for linker script lexer --- lld/ELF/LinkerScriptLexer.cpp | 6 ++++ lld/ELF/LinkerScriptLexer.h | 2 +- lld/ELF/ScriptTokenizer.h | 2 ++ lld/unittests/ELF/LinkerScriptLexerTest.cpp | 31 +++++++++++++++++++++ 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp index d7b59a05ff1b6..f90096db15deb 100644 --- a/lld/ELF/LinkerScriptLexer.cpp +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -109,6 +109,12 @@ LinkerScriptLexer::TokenInfo LinkerScriptLexer::getSymbolToken() { return advanceTokenInfo(ScriptToken::Semicolon); case ',': return advanceTokenInfo(ScriptToken::Comma); + case '_': + return advanceTokenInfo(ScriptToken::Underscore); + case '.': + return advanceTokenInfo(ScriptToken::Dot); + case ':': + return advanceTokenInfo(ScriptToken::Colon); case '<': if (curStringRef.size() > 2 && curStringRef[1] == '<' && curStringRef[2] == '=') { diff --git a/lld/ELF/LinkerScriptLexer.h b/lld/ELF/LinkerScriptLexer.h index 6dd7fd680c559..a4aed5978161f 100644 --- a/lld/ELF/LinkerScriptLexer.h +++ b/lld/ELF/LinkerScriptLexer.h @@ -46,7 +46,7 @@ class LinkerScriptLexer { // TODO: rewrite next(), peek(), and peek2() since TokenInfo change void advanceLexer(); - const ScriptToken getTokenKind() const { return curToken.kind; }; + ScriptToken getTokenKind() const { return curToken.kind; }; llvm::StringRef getTokenStringRef() const { return curToken.val; }; private: diff --git a/lld/ELF/ScriptTokenizer.h b/lld/ELF/ScriptTokenizer.h index a960106a35b18..c5e44243d1884 100644 --- a/lld/ELF/ScriptTokenizer.h +++ b/lld/ELF/ScriptTokenizer.h @@ -142,6 +142,8 @@ enum class ScriptToken { Xor, // ^ Or, // | PercentSign, // % + Underscore, // _ + Dot, // . Quote, // Quoted token. Note that double-quote characters are parts of a token // because, in a glob match context, only unquoted tokens are interpreted as // glob patterns. Double-quoted tokens are literal patterns in that context. diff --git a/lld/unittests/ELF/LinkerScriptLexerTest.cpp b/lld/unittests/ELF/LinkerScriptLexerTest.cpp index b042920691f56..e2646995a3c42 100644 --- a/lld/unittests/ELF/LinkerScriptLexerTest.cpp +++ b/lld/unittests/ELF/LinkerScriptLexerTest.cpp @@ -12,6 +12,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/MemoryBufferRef.h" +#include "llvm/Support/raw_ostream.h" #include "gtest/gtest.h" @@ -30,9 +31,15 @@ class LinkerScriptLexerTest : public testing::Test { } void lexAndCheckTokens(llvm::SmallVector ExpectedTokens) { + bool outputinfo = true; for (const auto &expected : ExpectedTokens) { Lexer->advanceLexer(); EXPECT_EQ(Lexer->getTokenKind(), expected); + if (outputinfo) { + llvm::errs() << Lexer->getTokenStringRef() << " " + << static_cast(Lexer->getTokenKind()) << " " + << static_cast(expected) << "\n"; + } } } }; @@ -43,5 +50,29 @@ TEST_F(LinkerScriptLexerTest, CheckEntry) { llvm::SmallVector ExpectedTokens({ScriptToken::LS_ENTRY}); lexAndCheckTokens(ExpectedTokens); } + +TEST_F(LinkerScriptLexerTest, CheckEntryLabel) { + llvm::StringRef testRef = "ENTRY(_label)"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_ENTRY, ScriptToken::BracektBegin, + ScriptToken::Underscore, ScriptToken::Identify, + ScriptToken::BracektEnd}); + lexAndCheckTokens(ExpectedTokens); +} + +TEST_F(LinkerScriptLexerTest, CheckSECTIONSandALIGN) { + llvm::StringRef testRef = "SECTIONS { \ + .super_aligned : ALIGN(16) { /* ... */ }}"; + + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, ScriptToken::Dot, + ScriptToken::Identify, ScriptToken::Colon, ScriptToken::LS_ALIGN, + ScriptToken::BracektBegin, ScriptToken::Decimal, ScriptToken::BracektEnd, + ScriptToken::CurlyBegin, ScriptToken::CurlyEnd, ScriptToken::CurlyEnd}); + lexAndCheckTokens(ExpectedTokens); +} + } // namespace elf } // namespace lld From 05c781c913fcb18ad405695fc3c0d56c7bf5bae2 Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Mon, 15 Jul 2024 17:29:39 +0000 Subject: [PATCH 15/29] [lld][ELF] added * and = cases in new lexer --- lld/ELF/LinkerScriptLexer.cpp | 4 +++ lld/unittests/ELF/LinkerScriptLexerTest.cpp | 30 +++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp index f90096db15deb..f3ae18207d411 100644 --- a/lld/ELF/LinkerScriptLexer.cpp +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -115,6 +115,10 @@ LinkerScriptLexer::TokenInfo LinkerScriptLexer::getSymbolToken() { return advanceTokenInfo(ScriptToken::Dot); case ':': return advanceTokenInfo(ScriptToken::Colon); + case '*': + return advanceTokenInfo(ScriptToken::Asterisk); + case '=': + return advanceTokenInfo(ScriptToken::Assign); case '<': if (curStringRef.size() > 2 && curStringRef[1] == '<' && curStringRef[2] == '=') { diff --git a/lld/unittests/ELF/LinkerScriptLexerTest.cpp b/lld/unittests/ELF/LinkerScriptLexerTest.cpp index e2646995a3c42..cd6c40f7dacf7 100644 --- a/lld/unittests/ELF/LinkerScriptLexerTest.cpp +++ b/lld/unittests/ELF/LinkerScriptLexerTest.cpp @@ -74,5 +74,35 @@ TEST_F(LinkerScriptLexerTest, CheckSECTIONSandALIGN) { lexAndCheckTokens(ExpectedTokens); } +TEST_F(LinkerScriptLexerTest, CheckHex) { + llvm::StringRef testRef = + "SECTIONS{ \n . = 0x10000;\n .text : { *(.text) }\n \ + . = 0x8000000;\n .data : { *(.data) }\n .bss : { *(.bss) }}"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Dot, ScriptToken::Assign, + ScriptToken::Hexdecimal, ScriptToken::Semicolon, + ScriptToken::Dot, ScriptToken::Identify, + ScriptToken::Colon, ScriptToken::CurlyBegin, + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Dot, ScriptToken::Identify, + ScriptToken::BracektEnd, ScriptToken::CurlyEnd, + ScriptToken::Dot, ScriptToken::Assign, + ScriptToken::Hexdecimal, ScriptToken::Semicolon, + ScriptToken::Dot, ScriptToken::Identify, + ScriptToken::Colon, ScriptToken::CurlyBegin, + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Dot, ScriptToken::Identify, + ScriptToken::BracektEnd, ScriptToken::CurlyEnd, + ScriptToken::Dot, ScriptToken::Identify, + ScriptToken::Colon, ScriptToken::CurlyBegin, + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Dot, ScriptToken::Identify, + ScriptToken::BracektEnd, ScriptToken::CurlyEnd, + ScriptToken::CurlyEnd}); + lexAndCheckTokens(ExpectedTokens); +} + } // namespace elf } // namespace lld From 200afda6b6f22f073976db0666ecdb6468862a03 Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Mon, 15 Jul 2024 18:19:07 +0000 Subject: [PATCH 16/29] [lld][ELF] change leading undercase case as identifier --- lld/ELF/LinkerScriptLexer.cpp | 2 +- lld/unittests/ELF/LinkerScriptLexerTest.cpp | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp index f3ae18207d411..01554ec74b8eb 100644 --- a/lld/ELF/LinkerScriptLexer.cpp +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -110,7 +110,7 @@ LinkerScriptLexer::TokenInfo LinkerScriptLexer::getSymbolToken() { case ',': return advanceTokenInfo(ScriptToken::Comma); case '_': - return advanceTokenInfo(ScriptToken::Underscore); + return getCommandOrIdentify(); case '.': return advanceTokenInfo(ScriptToken::Dot); case ':': diff --git a/lld/unittests/ELF/LinkerScriptLexerTest.cpp b/lld/unittests/ELF/LinkerScriptLexerTest.cpp index cd6c40f7dacf7..d13465510c98b 100644 --- a/lld/unittests/ELF/LinkerScriptLexerTest.cpp +++ b/lld/unittests/ELF/LinkerScriptLexerTest.cpp @@ -55,8 +55,7 @@ TEST_F(LinkerScriptLexerTest, CheckEntryLabel) { llvm::StringRef testRef = "ENTRY(_label)"; setupCallToLinkScriptLexer(testRef); llvm::SmallVector ExpectedTokens( - {ScriptToken::LS_ENTRY, ScriptToken::BracektBegin, - ScriptToken::Underscore, ScriptToken::Identify, + {ScriptToken::LS_ENTRY, ScriptToken::BracektBegin, ScriptToken::Identify, ScriptToken::BracektEnd}); lexAndCheckTokens(ExpectedTokens); } From 7077d18ecb5f7bdc307d060f31822b4190974277 Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Mon, 15 Jul 2024 18:27:41 +0000 Subject: [PATCH 17/29] [lld][ELF] NFC update identify to identifier --- lld/ELF/LinkerScriptLexer.cpp | 10 +++++----- lld/ELF/LinkerScriptLexer.h | 2 +- lld/ELF/ScriptTokenizer.h | 2 +- lld/unittests/ELF/LinkerScriptLexerTest.cpp | 18 +++++++++--------- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp index 01554ec74b8eb..50599f459f6ad 100644 --- a/lld/ELF/LinkerScriptLexer.cpp +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -57,7 +57,7 @@ LinkerScriptLexer::TokenInfo LinkerScriptLexer::getTokenInfo() { if (std::isdigit(c)) return getDigits(); if (std::isalpha(c)) - return getCommandOrIdentify(); + return getCommandOrIdentifier(); return getSymbolToken(); } @@ -110,7 +110,7 @@ LinkerScriptLexer::TokenInfo LinkerScriptLexer::getSymbolToken() { case ',': return advanceTokenInfo(ScriptToken::Comma); case '_': - return getCommandOrIdentify(); + return getCommandOrIdentifier(); case '.': return advanceTokenInfo(ScriptToken::Dot); case ':': @@ -209,7 +209,7 @@ LinkerScriptLexer::TokenInfo LinkerScriptLexer::getDigits() { }; } -LinkerScriptLexer::TokenInfo LinkerScriptLexer::getCommandOrIdentify() { +LinkerScriptLexer::TokenInfo LinkerScriptLexer::getCommandOrIdentifier() { // Unquoted token. This is more relaxed than tokens in C-like language, // so that you can write "file-name.cpp" as one bare token, for example. size_t pos = curStringRef.find_first_not_of( @@ -221,7 +221,7 @@ LinkerScriptLexer::TokenInfo LinkerScriptLexer::getCommandOrIdentify() { StringRef ops = "!~*/+-<>?^:="; // List of operators size_t e = curStringRef.find_first_of(ops); if (e != StringRef::npos && e != 0) { - return advanceTokenInfo(ScriptToken::Identify, e); + return advanceTokenInfo(ScriptToken::Identifier, e); } } @@ -302,6 +302,6 @@ LinkerScriptLexer::getTokenfromKeyword(llvm::StringRef keyword) const { } else if (keyword == "extern") { return ScriptToken::LS_Extern; } else { - return ScriptToken::Identify; + return ScriptToken::Identifier; } } diff --git a/lld/ELF/LinkerScriptLexer.h b/lld/ELF/LinkerScriptLexer.h index a4aed5978161f..4e75f37a59ed1 100644 --- a/lld/ELF/LinkerScriptLexer.h +++ b/lld/ELF/LinkerScriptLexer.h @@ -67,7 +67,7 @@ class LinkerScriptLexer { TokenInfo getSymbolToken(); TokenInfo getQuotedToken(); TokenInfo getDigits(); - TokenInfo getCommandOrIdentify(); + TokenInfo getCommandOrIdentifier(); ScriptToken getTokenfromKeyword(llvm::StringRef keyword) const; }; } // namespace lld::elf diff --git a/lld/ELF/ScriptTokenizer.h b/lld/ELF/ScriptTokenizer.h index c5e44243d1884..8c7cd44dee93d 100644 --- a/lld/ELF/ScriptTokenizer.h +++ b/lld/ELF/ScriptTokenizer.h @@ -115,7 +115,7 @@ enum class ScriptToken { Error, Eof, - Identify, + Identifier, Hexdecimal, // 0x Hexdecimal_H, // end with H/h Decimal, diff --git a/lld/unittests/ELF/LinkerScriptLexerTest.cpp b/lld/unittests/ELF/LinkerScriptLexerTest.cpp index d13465510c98b..8c9d438437782 100644 --- a/lld/unittests/ELF/LinkerScriptLexerTest.cpp +++ b/lld/unittests/ELF/LinkerScriptLexerTest.cpp @@ -55,8 +55,8 @@ TEST_F(LinkerScriptLexerTest, CheckEntryLabel) { llvm::StringRef testRef = "ENTRY(_label)"; setupCallToLinkScriptLexer(testRef); llvm::SmallVector ExpectedTokens( - {ScriptToken::LS_ENTRY, ScriptToken::BracektBegin, ScriptToken::Identify, - ScriptToken::BracektEnd}); + {ScriptToken::LS_ENTRY, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd}); lexAndCheckTokens(ExpectedTokens); } @@ -67,7 +67,7 @@ TEST_F(LinkerScriptLexerTest, CheckSECTIONSandALIGN) { setupCallToLinkScriptLexer(testRef); llvm::SmallVector ExpectedTokens( {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, ScriptToken::Dot, - ScriptToken::Identify, ScriptToken::Colon, ScriptToken::LS_ALIGN, + ScriptToken::Identifier, ScriptToken::Colon, ScriptToken::LS_ALIGN, ScriptToken::BracektBegin, ScriptToken::Decimal, ScriptToken::BracektEnd, ScriptToken::CurlyBegin, ScriptToken::CurlyEnd, ScriptToken::CurlyEnd}); lexAndCheckTokens(ExpectedTokens); @@ -82,22 +82,22 @@ TEST_F(LinkerScriptLexerTest, CheckHex) { {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, ScriptToken::Dot, ScriptToken::Assign, ScriptToken::Hexdecimal, ScriptToken::Semicolon, - ScriptToken::Dot, ScriptToken::Identify, + ScriptToken::Dot, ScriptToken::Identifier, ScriptToken::Colon, ScriptToken::CurlyBegin, ScriptToken::Asterisk, ScriptToken::BracektBegin, - ScriptToken::Dot, ScriptToken::Identify, + ScriptToken::Dot, ScriptToken::Identifier, ScriptToken::BracektEnd, ScriptToken::CurlyEnd, ScriptToken::Dot, ScriptToken::Assign, ScriptToken::Hexdecimal, ScriptToken::Semicolon, - ScriptToken::Dot, ScriptToken::Identify, + ScriptToken::Dot, ScriptToken::Identifier, ScriptToken::Colon, ScriptToken::CurlyBegin, ScriptToken::Asterisk, ScriptToken::BracektBegin, - ScriptToken::Dot, ScriptToken::Identify, + ScriptToken::Dot, ScriptToken::Identifier, ScriptToken::BracektEnd, ScriptToken::CurlyEnd, - ScriptToken::Dot, ScriptToken::Identify, + ScriptToken::Dot, ScriptToken::Identifier, ScriptToken::Colon, ScriptToken::CurlyBegin, ScriptToken::Asterisk, ScriptToken::BracektBegin, - ScriptToken::Dot, ScriptToken::Identify, + ScriptToken::Dot, ScriptToken::Identifier, ScriptToken::BracektEnd, ScriptToken::CurlyEnd, ScriptToken::CurlyEnd}); lexAndCheckTokens(ExpectedTokens); From ef9b874859f4ed34746c02288d9823d4a1a2a20f Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Mon, 15 Jul 2024 21:05:10 +0000 Subject: [PATCH 18/29] [lld][ELF] added test case for PROVIDE --- lld/unittests/ELF/LinkerScriptLexerTest.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/lld/unittests/ELF/LinkerScriptLexerTest.cpp b/lld/unittests/ELF/LinkerScriptLexerTest.cpp index 8c9d438437782..f898b768a7954 100644 --- a/lld/unittests/ELF/LinkerScriptLexerTest.cpp +++ b/lld/unittests/ELF/LinkerScriptLexerTest.cpp @@ -103,5 +103,26 @@ TEST_F(LinkerScriptLexerTest, CheckHex) { lexAndCheckTokens(ExpectedTokens); } +TEST_F(LinkerScriptLexerTest, CheckPROVIDECommand) { + llvm::StringRef testRef = "SECTIONS\n{.text :\n{\n*(.text)\n\t_etext = .;\n \ + \t PROVIDE(etext = .);\n}\n}"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens({ + ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Dot, ScriptToken::Identifier, + ScriptToken::Colon, ScriptToken::CurlyBegin, + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Dot, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::Identifier, + ScriptToken::Assign, ScriptToken::Dot, + ScriptToken::Semicolon, ScriptToken::LS_PROVIDE, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::Assign, ScriptToken::Dot, + ScriptToken::BracektEnd, ScriptToken::Semicolon, + ScriptToken::CurlyEnd, ScriptToken::CurlyEnd, + }); + + lexAndCheckTokens(ExpectedTokens); +} } // namespace elf } // namespace lld From 5960675c32927ddf2cfba5a0fd330df29fa3b8a5 Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Mon, 15 Jul 2024 22:24:59 +0000 Subject: [PATCH 19/29] [lld][ELF] added test case and change "." tokens If we have single '.' we return ScriptToken::Dot, but if we have characters and symbols following it we generate token based on first not of "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" "0123456789_.$/\\~=+[]*?-!^:" --- lld/ELF/LinkerScriptLexer.cpp | 6 +- lld/ELF/ScriptTokenizer.h | 1 + lld/unittests/ELF/LinkerScriptLexerTest.cpp | 61 ++++++++++++--------- 3 files changed, 42 insertions(+), 26 deletions(-) diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp index 50599f459f6ad..1cbf083c521e7 100644 --- a/lld/ELF/LinkerScriptLexer.cpp +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -112,7 +112,7 @@ LinkerScriptLexer::TokenInfo LinkerScriptLexer::getSymbolToken() { case '_': return getCommandOrIdentifier(); case '.': - return advanceTokenInfo(ScriptToken::Dot); + return getCommandOrIdentifier(); case ':': return advanceTokenInfo(ScriptToken::Colon); case '*': @@ -225,6 +225,9 @@ LinkerScriptLexer::TokenInfo LinkerScriptLexer::getCommandOrIdentifier() { } } + if (pos == 1 && curStringRef[0] == '.') + return advanceTokenInfo(ScriptToken::Dot); + return advanceTokenInfo(getTokenfromKeyword(curStringRef.substr(0, pos)), pos); } @@ -255,6 +258,7 @@ LinkerScriptLexer::getTokenfromKeyword(llvm::StringRef keyword) const { KEYWORD(HIDDEN); KEYWORD(PROVIDE_HIDDEN); KEYWORD(SECTIONS); + KEYWORD(BEFORE); KEYWORD(EXCLUDE_FILE); KEYWORD(KEEP); KEYWORD(INPUT_SECTION_FLAGS); diff --git a/lld/ELF/ScriptTokenizer.h b/lld/ELF/ScriptTokenizer.h index 8c7cd44dee93d..45e4316702a26 100644 --- a/lld/ELF/ScriptTokenizer.h +++ b/lld/ELF/ScriptTokenizer.h @@ -48,6 +48,7 @@ enum class ScriptToken { LS_PROVIDE_HIDDEN, LS_SECTIONS, + LS_BEFORE, // Input Section LS_EXCLUDE_FILE, diff --git a/lld/unittests/ELF/LinkerScriptLexerTest.cpp b/lld/unittests/ELF/LinkerScriptLexerTest.cpp index f898b768a7954..635dd883936a5 100644 --- a/lld/unittests/ELF/LinkerScriptLexerTest.cpp +++ b/lld/unittests/ELF/LinkerScriptLexerTest.cpp @@ -66,7 +66,7 @@ TEST_F(LinkerScriptLexerTest, CheckSECTIONSandALIGN) { setupCallToLinkScriptLexer(testRef); llvm::SmallVector ExpectedTokens( - {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, ScriptToken::Dot, + {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, ScriptToken::Identifier, ScriptToken::Colon, ScriptToken::LS_ALIGN, ScriptToken::BracektBegin, ScriptToken::Decimal, ScriptToken::BracektEnd, ScriptToken::CurlyBegin, ScriptToken::CurlyEnd, ScriptToken::CurlyEnd}); @@ -79,26 +79,23 @@ TEST_F(LinkerScriptLexerTest, CheckHex) { . = 0x8000000;\n .data : { *(.data) }\n .bss : { *(.bss) }}"; setupCallToLinkScriptLexer(testRef); llvm::SmallVector ExpectedTokens( - {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, - ScriptToken::Dot, ScriptToken::Assign, - ScriptToken::Hexdecimal, ScriptToken::Semicolon, - ScriptToken::Dot, ScriptToken::Identifier, - ScriptToken::Colon, ScriptToken::CurlyBegin, - ScriptToken::Asterisk, ScriptToken::BracektBegin, - ScriptToken::Dot, ScriptToken::Identifier, - ScriptToken::BracektEnd, ScriptToken::CurlyEnd, - ScriptToken::Dot, ScriptToken::Assign, - ScriptToken::Hexdecimal, ScriptToken::Semicolon, - ScriptToken::Dot, ScriptToken::Identifier, - ScriptToken::Colon, ScriptToken::CurlyBegin, - ScriptToken::Asterisk, ScriptToken::BracektBegin, - ScriptToken::Dot, ScriptToken::Identifier, - ScriptToken::BracektEnd, ScriptToken::CurlyEnd, - ScriptToken::Dot, ScriptToken::Identifier, - ScriptToken::Colon, ScriptToken::CurlyBegin, - ScriptToken::Asterisk, ScriptToken::BracektBegin, - ScriptToken::Dot, ScriptToken::Identifier, - ScriptToken::BracektEnd, ScriptToken::CurlyEnd, + {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Dot, ScriptToken::Assign, + ScriptToken::Hexdecimal, ScriptToken::Semicolon, + ScriptToken::Identifier, ScriptToken::Colon, + ScriptToken::CurlyBegin, ScriptToken::Asterisk, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::CurlyEnd, + ScriptToken::Dot, ScriptToken::Assign, + ScriptToken::Hexdecimal, ScriptToken::Semicolon, + ScriptToken::Identifier, ScriptToken::Colon, + ScriptToken::CurlyBegin, ScriptToken::Asterisk, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::CurlyEnd, + ScriptToken::Identifier, ScriptToken::Colon, + ScriptToken::CurlyBegin, ScriptToken::Asterisk, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::CurlyEnd, ScriptToken::CurlyEnd}); lexAndCheckTokens(ExpectedTokens); } @@ -109,10 +106,9 @@ TEST_F(LinkerScriptLexerTest, CheckPROVIDECommand) { setupCallToLinkScriptLexer(testRef); llvm::SmallVector ExpectedTokens({ ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, - ScriptToken::Dot, ScriptToken::Identifier, - ScriptToken::Colon, ScriptToken::CurlyBegin, - ScriptToken::Asterisk, ScriptToken::BracektBegin, - ScriptToken::Dot, ScriptToken::Identifier, + ScriptToken::Identifier, ScriptToken::Colon, + ScriptToken::CurlyBegin, ScriptToken::Asterisk, + ScriptToken::BracektBegin, ScriptToken::Identifier, ScriptToken::BracektEnd, ScriptToken::Identifier, ScriptToken::Assign, ScriptToken::Dot, ScriptToken::Semicolon, ScriptToken::LS_PROVIDE, @@ -124,5 +120,20 @@ TEST_F(LinkerScriptLexerTest, CheckPROVIDECommand) { lexAndCheckTokens(ExpectedTokens); } + +TEST_F(LinkerScriptLexerTest, CheckINSERTandBEFORE) { + llvm::StringRef testRef = + "SECTIONS { .foo.data : { *(.foo.data) } } INSERT BEFORE .data;"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Identifier, ScriptToken::Colon, ScriptToken::CurlyBegin, + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, ScriptToken::CurlyEnd, + ScriptToken::CurlyEnd, ScriptToken::LS_INSERT, ScriptToken::LS_BEFORE, + ScriptToken::Identifier, ScriptToken::Semicolon}); + + lexAndCheckTokens(ExpectedTokens); +} } // namespace elf } // namespace lld From 882c59f53ba7bab196bf8d12728546353bee4059 Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Mon, 15 Jul 2024 23:03:00 +0000 Subject: [PATCH 20/29] [lld][ELF] fixed pos error in getDigits --- lld/ELF/LinkerScriptLexer.cpp | 2 +- lld/unittests/ELF/LinkerScriptLexerTest.cpp | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp index 1cbf083c521e7..61a63fc1cdbbf 100644 --- a/lld/ELF/LinkerScriptLexer.cpp +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -193,7 +193,7 @@ LinkerScriptLexer::TokenInfo LinkerScriptLexer::getDigits() { if (curStringRef.starts_with_insensitive("0x")) { return advanceTokenInfo(ScriptToken::Hexdecimal, pos); } - const char c = curStringRef[pos]; + const char c = curStringRef[pos - 1]; switch (c) { case 'H': case 'h': diff --git a/lld/unittests/ELF/LinkerScriptLexerTest.cpp b/lld/unittests/ELF/LinkerScriptLexerTest.cpp index 635dd883936a5..e0d5b4c34daa7 100644 --- a/lld/unittests/ELF/LinkerScriptLexerTest.cpp +++ b/lld/unittests/ELF/LinkerScriptLexerTest.cpp @@ -135,5 +135,19 @@ TEST_F(LinkerScriptLexerTest, CheckINSERTandBEFORE) { lexAndCheckTokens(ExpectedTokens); } + +TEST_F(LinkerScriptLexerTest, CheckALIGNandDecimal) { + llvm::StringRef testRef = "SECTIONS {.foo : ALIGN(2M) { *(.foo) }}"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Identifier, ScriptToken::Colon, ScriptToken::LS_ALIGN, + ScriptToken::BracektBegin, ScriptToken::Decimal_M, + ScriptToken::BracektEnd, ScriptToken::CurlyBegin, ScriptToken::Asterisk, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::CurlyEnd, ScriptToken::CurlyEnd}); + + lexAndCheckTokens(ExpectedTokens); +} } // namespace elf } // namespace lld From b1a46ae5f6fc836cb306da306624e48c20263bff Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Tue, 16 Jul 2024 21:38:18 +0000 Subject: [PATCH 21/29] [lld][ELF] missing '-' and '+' case --- lld/ELF/LinkerScriptLexer.cpp | 10 +++ lld/ELF/ScriptTokenizer.h | 4 +- lld/unittests/ELF/LinkerScriptLexerTest.cpp | 70 +++++++++++++++++++++ 3 files changed, 83 insertions(+), 1 deletion(-) diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp index 61a63fc1cdbbf..a76f98cc97e6f 100644 --- a/lld/ELF/LinkerScriptLexer.cpp +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -119,6 +119,14 @@ LinkerScriptLexer::TokenInfo LinkerScriptLexer::getSymbolToken() { return advanceTokenInfo(ScriptToken::Asterisk); case '=': return advanceTokenInfo(ScriptToken::Assign); + case '+': + if (curStringRef.size() > 1 && curStringRef[1] == '=') + return advanceTokenInfo(ScriptToken::PlusAssign, 2); + return advanceTokenInfo(ScriptToken::Plus); + case '-': + if (curStringRef.size() > 1 && curStringRef[1] == '=') + return advanceTokenInfo(ScriptToken::MinusAssign, 2); + return advanceTokenInfo(ScriptToken::Minus); case '<': if (curStringRef.size() > 2 && curStringRef[1] == '<' && curStringRef[2] == '=') { @@ -252,6 +260,7 @@ LinkerScriptLexer::getTokenfromKeyword(llvm::StringRef keyword) const { KEYWORD(TARGET); KEYWORD(OUTPUT_FORMAT); KEYWORD(ASSERT); + KEYWORD(CONSTANT); KEYWORD(EXTERN); KEYWORD(OUTPUT_ARCH); KEYWORD(PROVIDE); @@ -275,6 +284,7 @@ LinkerScriptLexer::getTokenfromKeyword(llvm::StringRef keyword) const { KEYWORD(ABSOLUTE); KEYWORD(ADDR); KEYWORD(ALIGN); + KEYWORD(ALIGNOF); KEYWORD(DATA_SEGMENT_ALIGN); KEYWORD(DATA_SEGMENT_END); KEYWORD(DEFINED); diff --git a/lld/ELF/ScriptTokenizer.h b/lld/ELF/ScriptTokenizer.h index 45e4316702a26..dc11d76e46bef 100644 --- a/lld/ELF/ScriptTokenizer.h +++ b/lld/ELF/ScriptTokenizer.h @@ -36,6 +36,7 @@ enum class ScriptToken { // Other linker script commands LS_ASSERT, + LS_CONSTANT, LS_EXTERN, // FORCE_COMMON_ALLOCATION // INHIBIT_COMMON_ALLOCATION @@ -73,6 +74,7 @@ enum class ScriptToken { LS_ABSOLUTE, LS_ADDR, LS_ALIGN, + LS_ALIGNOF, // BLOCK, // synonym for ALIGN for compatibility with older linker script LS_DATA_SEGMENT_ALIGN, LS_DATA_SEGMENT_END, @@ -152,7 +154,7 @@ enum class ScriptToken { // Assignmemnt Assign, // = PlusAssign, // += - MinussAssign, // -= + MinusAssign, // -= MulAssign, // *= DivAssign, // /= LeftShiftAssign, // <<= diff --git a/lld/unittests/ELF/LinkerScriptLexerTest.cpp b/lld/unittests/ELF/LinkerScriptLexerTest.cpp index e0d5b4c34daa7..9a6b1a3179786 100644 --- a/lld/unittests/ELF/LinkerScriptLexerTest.cpp +++ b/lld/unittests/ELF/LinkerScriptLexerTest.cpp @@ -149,5 +149,75 @@ TEST_F(LinkerScriptLexerTest, CheckALIGNandDecimal) { lexAndCheckTokens(ExpectedTokens); } + +TEST_F(LinkerScriptLexerTest, CheckAbsoluteExprTest) { + llvm::StringRef testRef = "SECTIONS { \ + .text : { \ + bar1 = ALIGNOF(.text); \ + bar2 = CONSTANT (MAXPAGESIZE); \ + bar3 = SIZEOF (.text); \ + bar4 = SIZEOF_HEADERS; \ + bar5 = 0x42; \ + bar6 = foo + 1; \ + *(.text) \ + } \ +}"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens({ScriptToken::LS_SECTIONS, + ScriptToken::CurlyBegin, + ScriptToken::Identifier, + ScriptToken::Colon, + ScriptToken::CurlyBegin, + ScriptToken::Identifier, + ScriptToken::Assign, + ScriptToken::LS_ALIGNOF, + ScriptToken::BracektBegin, + ScriptToken::Identifier, + ScriptToken::BracektEnd, + ScriptToken::Semicolon, + + ScriptToken::Identifier, + ScriptToken::Assign, + ScriptToken::LS_CONSTANT, + ScriptToken::BracektBegin, + ScriptToken::LS_MAXPAGESIZE, + ScriptToken::BracektEnd, + ScriptToken::Semicolon, + + ScriptToken::Identifier, + ScriptToken::Assign, + ScriptToken::LS_SIZEOF, + ScriptToken::BracektBegin, + ScriptToken::Identifier, + ScriptToken::BracektEnd, + ScriptToken::Semicolon, + + ScriptToken::Identifier, + ScriptToken::Assign, + ScriptToken::LS_SIZEOF_HEADERS, + ScriptToken::Semicolon, + + ScriptToken::Identifier, + ScriptToken::Assign, + ScriptToken::Hexdecimal, + ScriptToken::Semicolon, + + ScriptToken::Identifier, + ScriptToken::Assign, + ScriptToken::Identifier, + ScriptToken::Plus, + ScriptToken::Decimal, + ScriptToken::Semicolon, + + ScriptToken::Asterisk, + ScriptToken::BracektBegin, + ScriptToken::Identifier, + ScriptToken::BracektEnd, + ScriptToken::CurlyEnd, + ScriptToken::CurlyEnd}); + + lexAndCheckTokens(ExpectedTokens); +} + } // namespace elf } // namespace lld From bcccd0ab62bad664db9c0296ea5869a63524d58f Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Tue, 16 Jul 2024 22:17:54 +0000 Subject: [PATCH 22/29] [lld][ELF] Added test for addr-zero --- lld/unittests/ELF/LinkerScriptLexerTest.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/lld/unittests/ELF/LinkerScriptLexerTest.cpp b/lld/unittests/ELF/LinkerScriptLexerTest.cpp index 9a6b1a3179786..639649d6b3263 100644 --- a/lld/unittests/ELF/LinkerScriptLexerTest.cpp +++ b/lld/unittests/ELF/LinkerScriptLexerTest.cpp @@ -219,5 +219,23 @@ TEST_F(LinkerScriptLexerTest, CheckAbsoluteExprTest) { lexAndCheckTokens(ExpectedTokens); } +TEST_F(LinkerScriptLexerTest, checkAddrZeroTest) { + llvm::StringRef testRef = "SECTIONS {\ + foo = ADDR(.text) - ABSOLUTE(ADDR(.text));\ +};"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Identifier, ScriptToken::Assign, ScriptToken::LS_ADDR, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::Minus, ScriptToken::LS_ABSOLUTE, + ScriptToken::BracektBegin, ScriptToken::LS_ADDR, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::BracektEnd, ScriptToken::Semicolon, + ScriptToken::CurlyEnd}); + + lexAndCheckTokens(ExpectedTokens); +} + } // namespace elf } // namespace lld From 75fa6a55a50d74c58d4d3de07f873b8717d481e9 Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Tue, 16 Jul 2024 22:57:58 +0000 Subject: [PATCH 23/29] [lld][ELF] Add unit test for addr.test --- lld/ELF/LinkerScriptLexer.cpp | 4 +- lld/unittests/ELF/LinkerScriptLexerTest.cpp | 136 ++++++++++++-------- 2 files changed, 87 insertions(+), 53 deletions(-) diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp index a76f98cc97e6f..444dd217af34d 100644 --- a/lld/ELF/LinkerScriptLexer.cpp +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -197,7 +197,9 @@ LinkerScriptLexer::TokenInfo LinkerScriptLexer::getQuotedToken() { } LinkerScriptLexer::TokenInfo LinkerScriptLexer::getDigits() { - size_t pos = curStringRef.find_first_not_of("0123456789XxHhKkMm"); + size_t pos = curStringRef.find_first_not_of( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789"); if (curStringRef.starts_with_insensitive("0x")) { return advanceTokenInfo(ScriptToken::Hexdecimal, pos); } diff --git a/lld/unittests/ELF/LinkerScriptLexerTest.cpp b/lld/unittests/ELF/LinkerScriptLexerTest.cpp index 639649d6b3263..4ae95637efa50 100644 --- a/lld/unittests/ELF/LinkerScriptLexerTest.cpp +++ b/lld/unittests/ELF/LinkerScriptLexerTest.cpp @@ -163,58 +163,33 @@ TEST_F(LinkerScriptLexerTest, CheckAbsoluteExprTest) { } \ }"; setupCallToLinkScriptLexer(testRef); - llvm::SmallVector ExpectedTokens({ScriptToken::LS_SECTIONS, - ScriptToken::CurlyBegin, - ScriptToken::Identifier, - ScriptToken::Colon, - ScriptToken::CurlyBegin, - ScriptToken::Identifier, - ScriptToken::Assign, - ScriptToken::LS_ALIGNOF, - ScriptToken::BracektBegin, - ScriptToken::Identifier, - ScriptToken::BracektEnd, - ScriptToken::Semicolon, - - ScriptToken::Identifier, - ScriptToken::Assign, - ScriptToken::LS_CONSTANT, - ScriptToken::BracektBegin, - ScriptToken::LS_MAXPAGESIZE, - ScriptToken::BracektEnd, - ScriptToken::Semicolon, - - ScriptToken::Identifier, - ScriptToken::Assign, - ScriptToken::LS_SIZEOF, - ScriptToken::BracektBegin, - ScriptToken::Identifier, - ScriptToken::BracektEnd, - ScriptToken::Semicolon, - - ScriptToken::Identifier, - ScriptToken::Assign, - ScriptToken::LS_SIZEOF_HEADERS, - ScriptToken::Semicolon, - - ScriptToken::Identifier, - ScriptToken::Assign, - ScriptToken::Hexdecimal, - ScriptToken::Semicolon, - - ScriptToken::Identifier, - ScriptToken::Assign, - ScriptToken::Identifier, - ScriptToken::Plus, - ScriptToken::Decimal, - ScriptToken::Semicolon, - - ScriptToken::Asterisk, - ScriptToken::BracektBegin, - ScriptToken::Identifier, - ScriptToken::BracektEnd, - ScriptToken::CurlyEnd, - ScriptToken::CurlyEnd}); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Identifier, ScriptToken::Colon, + ScriptToken::CurlyBegin, ScriptToken::Identifier, + ScriptToken::Assign, ScriptToken::LS_ALIGNOF, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::Semicolon, + + ScriptToken::Identifier, ScriptToken::Assign, + ScriptToken::LS_CONSTANT, ScriptToken::BracektBegin, + ScriptToken::LS_MAXPAGESIZE, ScriptToken::BracektEnd, + ScriptToken::Semicolon, + + ScriptToken::Identifier, ScriptToken::Assign, + ScriptToken::LS_SIZEOF, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::Semicolon, ScriptToken::Identifier, + ScriptToken::Assign, ScriptToken::LS_SIZEOF_HEADERS, + + ScriptToken::Identifier, ScriptToken::Assign, + ScriptToken::Hexdecimal, ScriptToken::Semicolon, + ScriptToken::Identifier, ScriptToken::Assign, + ScriptToken::Identifier, ScriptToken::Plus, + ScriptToken::Decimal, ScriptToken::Semicolon, + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::CurlyEnd, ScriptToken::CurlyEnd}); lexAndCheckTokens(ExpectedTokens); } @@ -237,5 +212,62 @@ TEST_F(LinkerScriptLexerTest, checkAddrZeroTest) { lexAndCheckTokens(ExpectedTokens); } +TEST_F(LinkerScriptLexerTest, checkAddrTest) { + llvm::StringRef testRef = "SECTIONS {\ + . = 0x1000; \ + .text : { \ + *(.text*) \ + x1 = ADDR(.text) + 1; x2 = 1 + ADDR(.text);\ + x3 = ADDR(.text) & 0xffff;\ + }\ + .foo-1 : { *(.foo-1) }\ + .foo-2 ADDR(.foo-1) + 0x100 : { *(.foo-2) }\ + .foo-3 : { *(.foo-3) }\ +}"; + + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Dot, ScriptToken::Assign, + ScriptToken::Hexdecimal, ScriptToken::Semicolon, + ScriptToken::Identifier, ScriptToken::Colon, + ScriptToken::CurlyBegin, ScriptToken::Asterisk, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::Identifier, + ScriptToken::Assign, ScriptToken::LS_ADDR, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::Plus, + ScriptToken::Decimal, ScriptToken::Semicolon, + + ScriptToken::Identifier, ScriptToken::Assign, + ScriptToken::Decimal, ScriptToken::Plus, + ScriptToken::LS_ADDR, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::Semicolon, ScriptToken::Identifier, + ScriptToken::Assign, ScriptToken::LS_ADDR, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::Bitwise, + ScriptToken::Hexdecimal, ScriptToken::Semicolon, + ScriptToken::CurlyEnd, + + ScriptToken::Identifier, ScriptToken::Colon, + ScriptToken::CurlyBegin, ScriptToken::Asterisk, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::CurlyEnd, + ScriptToken::Identifier, ScriptToken::LS_ADDR, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::Plus, + ScriptToken::Hexdecimal, ScriptToken::Colon, + ScriptToken::CurlyBegin, ScriptToken::Asterisk, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::CurlyEnd, + ScriptToken::Identifier, ScriptToken::Colon, + ScriptToken::CurlyBegin, ScriptToken::Asterisk, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::CurlyEnd, + ScriptToken::CurlyEnd}); + + lexAndCheckTokens(ExpectedTokens); +} } // namespace elf } // namespace lld From eecdc0bd1b893bcbe6a1ef0c23dfe37b217886dd Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Tue, 16 Jul 2024 23:15:03 +0000 Subject: [PATCH 24/29] [lld][ELF] Add unittest for align-empty.test --- lld/unittests/ELF/LinkerScriptLexerTest.cpp | 39 +++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/lld/unittests/ELF/LinkerScriptLexerTest.cpp b/lld/unittests/ELF/LinkerScriptLexerTest.cpp index 4ae95637efa50..50433617472dd 100644 --- a/lld/unittests/ELF/LinkerScriptLexerTest.cpp +++ b/lld/unittests/ELF/LinkerScriptLexerTest.cpp @@ -181,6 +181,7 @@ TEST_F(LinkerScriptLexerTest, CheckAbsoluteExprTest) { ScriptToken::Identifier, ScriptToken::BracektEnd, ScriptToken::Semicolon, ScriptToken::Identifier, ScriptToken::Assign, ScriptToken::LS_SIZEOF_HEADERS, + ScriptToken::Semicolon, ScriptToken::Identifier, ScriptToken::Assign, ScriptToken::Hexdecimal, ScriptToken::Semicolon, @@ -269,5 +270,43 @@ TEST_F(LinkerScriptLexerTest, checkAddrTest) { lexAndCheckTokens(ExpectedTokens); } + +TEST_F(LinkerScriptLexerTest, checkAlignEmptyTest) { + llvm::StringRef testRef = "SECTIONS { \ + . = SIZEOF_HEADERS; \ + abc : {} \ + . = ALIGN(0x1000); \ + foo : { *(foo) } \ +}"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens({ScriptToken::LS_SECTIONS, + ScriptToken::CurlyBegin, + ScriptToken::Dot, + ScriptToken::Assign, + ScriptToken::LS_SIZEOF_HEADERS, + ScriptToken::Semicolon, + ScriptToken::Identifier, + ScriptToken::Colon, + ScriptToken::CurlyBegin, + ScriptToken::CurlyEnd, + ScriptToken::Dot, + ScriptToken::Assign, + ScriptToken::LS_ALIGN, + ScriptToken::BracektBegin, + ScriptToken::Hexdecimal, + ScriptToken::BracektEnd, + ScriptToken::Semicolon, + ScriptToken::Identifier, + ScriptToken::Colon, + ScriptToken::CurlyBegin, + ScriptToken::Asterisk, + ScriptToken::BracektBegin, + ScriptToken::Identifier, + ScriptToken::BracektEnd, + ScriptToken::CurlyEnd, + ScriptToken::CurlyEnd}); + + lexAndCheckTokens(ExpectedTokens); +} } // namespace elf } // namespace lld From 21b84b820766ae506a91289cb50270869801896e Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Tue, 16 Jul 2024 23:29:35 +0000 Subject: [PATCH 25/29] [lld][ELF] Add test case for Memory --- lld/ELF/LinkerScriptLexer.cpp | 2 + lld/ELF/ScriptTokenizer.h | 2 + lld/unittests/ELF/LinkerScriptLexerTest.cpp | 43 +++++++++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp index 444dd217af34d..c7c0a6cc65e65 100644 --- a/lld/ELF/LinkerScriptLexer.cpp +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -253,6 +253,7 @@ LinkerScriptLexer::getTokenfromKeyword(llvm::StringRef keyword) const { KEYWORD(ENTRY); KEYWORD(INCLUDE); KEYWORD(GROUP); + KEYWORD(MEMORY); KEYWORD(OUTPUT); KEYWORD(SEARCH_DIR); KEYWORD(STARTUP); @@ -290,6 +291,7 @@ LinkerScriptLexer::getTokenfromKeyword(llvm::StringRef keyword) const { KEYWORD(DATA_SEGMENT_ALIGN); KEYWORD(DATA_SEGMENT_END); KEYWORD(DEFINED); + KEYWORD(LENGTH); KEYWORD(LOADADDR); KEYWORD(LOG2CEIL); KEYWORD(MAX); diff --git a/lld/ELF/ScriptTokenizer.h b/lld/ELF/ScriptTokenizer.h index dc11d76e46bef..718a9bf3c579f 100644 --- a/lld/ELF/ScriptTokenizer.h +++ b/lld/ELF/ScriptTokenizer.h @@ -23,6 +23,7 @@ enum class ScriptToken { LS_INCLUDE, LS_INPUT, LS_GROUP, + LS_MEMORY, LS_OUTPUT, LS_SEARCH_DIR, LS_STARTUP, @@ -79,6 +80,7 @@ enum class ScriptToken { LS_DATA_SEGMENT_ALIGN, LS_DATA_SEGMENT_END, LS_DEFINED, + LS_LENGTH, LS_LOADADDR, LS_LOG2CEIL, diff --git a/lld/unittests/ELF/LinkerScriptLexerTest.cpp b/lld/unittests/ELF/LinkerScriptLexerTest.cpp index 50433617472dd..da55b2afeb4ba 100644 --- a/lld/unittests/ELF/LinkerScriptLexerTest.cpp +++ b/lld/unittests/ELF/LinkerScriptLexerTest.cpp @@ -308,5 +308,48 @@ TEST_F(LinkerScriptLexerTest, checkAlignEmptyTest) { lexAndCheckTokens(ExpectedTokens); } + +TEST_F(LinkerScriptLexerTest, checkMemoryTest) { + llvm::StringRef testRef = "MEMORY { \ + AX (ax) : ORIGIN = 0x2000, LENGTH = 0x100 \ + AW (aw) : ORIGIN = 0x3000, LENGTH = 0x100 \ + FLASH (ax) : ORIGIN = 0x6000, LENGTH = 0x100 \ + RAM (aw) : ORIGIN = 0x7000, LENGTH = 0x100}"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_MEMORY, ScriptToken::CurlyBegin, + ScriptToken::Identifier, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::Colon, ScriptToken::LS_ORIGIN, + ScriptToken::Assign, ScriptToken::Hexdecimal, + ScriptToken::Comma, ScriptToken::LS_LENGTH, + ScriptToken::Assign, ScriptToken::Hexdecimal, + + ScriptToken::Identifier, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::Colon, ScriptToken::LS_ORIGIN, + ScriptToken::Assign, ScriptToken::Hexdecimal, + ScriptToken::Comma, ScriptToken::LS_LENGTH, + ScriptToken::Assign, ScriptToken::Hexdecimal, + + ScriptToken::Identifier, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::Colon, ScriptToken::LS_ORIGIN, + ScriptToken::Assign, ScriptToken::Hexdecimal, + ScriptToken::Comma, ScriptToken::LS_LENGTH, + ScriptToken::Assign, ScriptToken::Hexdecimal, + + ScriptToken::Identifier, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::Colon, ScriptToken::LS_ORIGIN, + ScriptToken::Assign, ScriptToken::Hexdecimal, + ScriptToken::Comma, ScriptToken::LS_LENGTH, + ScriptToken::Assign, ScriptToken::Hexdecimal, + + ScriptToken::CurlyEnd}); + + lexAndCheckTokens(ExpectedTokens); +} + } // namespace elf } // namespace lld From 42af51064d4100a087ca572937f175a3d9d36b41 Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Tue, 16 Jul 2024 23:40:35 +0000 Subject: [PATCH 26/29] [lld][ELF] add unittest case from bss-fill.test --- lld/unittests/ELF/LinkerScriptLexerTest.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/lld/unittests/ELF/LinkerScriptLexerTest.cpp b/lld/unittests/ELF/LinkerScriptLexerTest.cpp index da55b2afeb4ba..b02c70b693f14 100644 --- a/lld/unittests/ELF/LinkerScriptLexerTest.cpp +++ b/lld/unittests/ELF/LinkerScriptLexerTest.cpp @@ -309,6 +309,26 @@ TEST_F(LinkerScriptLexerTest, checkAlignEmptyTest) { lexAndCheckTokens(ExpectedTokens); } +TEST_F(LinkerScriptLexerTest, checkBSSFillTest) { + llvm::StringRef testRef = "SECTIONS {\ + .bss : {\ + . += 0x10000; \ + *(.bss)\ + } =0xFF};"; + + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Identifier, ScriptToken::Colon, ScriptToken::CurlyBegin, + ScriptToken::Dot, ScriptToken::PlusAssign, ScriptToken::Hexdecimal, + ScriptToken::Semicolon, ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, ScriptToken::CurlyEnd, + ScriptToken::Assign, ScriptToken::Hexdecimal, ScriptToken::CurlyEnd, + ScriptToken::Semicolon}); + + lexAndCheckTokens(ExpectedTokens); +} + TEST_F(LinkerScriptLexerTest, checkMemoryTest) { llvm::StringRef testRef = "MEMORY { \ AX (ax) : ORIGIN = 0x2000, LENGTH = 0x100 \ From e25a2b40588b005c2d4597fd513aefa652fb6abb Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Wed, 17 Jul 2024 03:07:47 +0000 Subject: [PATCH 27/29] [lld][ELF] Add unit test case for CONSTRUCTORS --- lld/unittests/ELF/LinkerScriptLexerTest.cpp | 23 ++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/lld/unittests/ELF/LinkerScriptLexerTest.cpp b/lld/unittests/ELF/LinkerScriptLexerTest.cpp index b02c70b693f14..024ddbfe0aacf 100644 --- a/lld/unittests/ELF/LinkerScriptLexerTest.cpp +++ b/lld/unittests/ELF/LinkerScriptLexerTest.cpp @@ -273,11 +273,10 @@ TEST_F(LinkerScriptLexerTest, checkAddrTest) { TEST_F(LinkerScriptLexerTest, checkAlignEmptyTest) { llvm::StringRef testRef = "SECTIONS { \ - . = SIZEOF_HEADERS; \ - abc : {} \ - . = ALIGN(0x1000); \ - foo : { *(foo) } \ -}"; + . = SIZEOF_HEADERS; \ + abc : {} \ + . = ALIGN(0x1000); \ + foo : { *(foo) }}"; setupCallToLinkScriptLexer(testRef); llvm::SmallVector ExpectedTokens({ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, @@ -371,5 +370,19 @@ TEST_F(LinkerScriptLexerTest, checkMemoryTest) { lexAndCheckTokens(ExpectedTokens); } +TEST_F(LinkerScriptLexerTest, checkCONSTRUCTORS) { + llvm::StringRef testRef = "SECTIONS {foo : {*(.foo) CONSTRUCTORS}}"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Identifier, ScriptToken::Colon, ScriptToken::CurlyBegin, + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::LS_CONSTRUCTORS, ScriptToken::CurlyEnd, + ScriptToken::CurlyEnd}); + + lexAndCheckTokens(ExpectedTokens); +} + } // namespace elf } // namespace lld From b4ec7d74750223ae833f6e41640028a9c2c6c98f Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Wed, 17 Jul 2024 03:50:29 +0000 Subject: [PATCH 28/29] [lld][ELF] Add unit test based on data-commands2.test --- lld/ELF/LinkerScriptLexer.cpp | 4 + lld/ELF/ScriptTokenizer.h | 5 ++ lld/unittests/ELF/LinkerScriptLexerTest.cpp | 96 +++++++++++++++++++++ 3 files changed, 105 insertions(+) diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp index c7c0a6cc65e65..f83d609e2e734 100644 --- a/lld/ELF/LinkerScriptLexer.cpp +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -310,6 +310,10 @@ LinkerScriptLexer::getTokenfromKeyword(llvm::StringRef keyword) const { KEYWORD(CONSTRUCTORS); KEYWORD(MAXPAGESIZE); KEYWORD(COMMONPAGESIZE); + KEYWORD(BYTE); + KEYWORD(SHORT); + KEYWORD(LONG); + KEYWORD(QUAD); #undef KEYWORD diff --git a/lld/ELF/ScriptTokenizer.h b/lld/ELF/ScriptTokenizer.h index 718a9bf3c579f..51693378aade9 100644 --- a/lld/ELF/ScriptTokenizer.h +++ b/lld/ELF/ScriptTokenizer.h @@ -111,6 +111,11 @@ enum class ScriptToken { LS_MAXPAGESIZE, LS_COMMONPAGESIZE, + LS_BYTE, + LS_SHORT, + LS_LONG, + LS_QUAD, + LS_Local, // local LS_Global, // global LS_Extern, // extern diff --git a/lld/unittests/ELF/LinkerScriptLexerTest.cpp b/lld/unittests/ELF/LinkerScriptLexerTest.cpp index 024ddbfe0aacf..04eb6f15cc498 100644 --- a/lld/unittests/ELF/LinkerScriptLexerTest.cpp +++ b/lld/unittests/ELF/LinkerScriptLexerTest.cpp @@ -384,5 +384,101 @@ TEST_F(LinkerScriptLexerTest, checkCONSTRUCTORS) { lexAndCheckTokens(ExpectedTokens); } +TEST_F(LinkerScriptLexerTest, checkDataCommands) { + // this test case comes from lld/test/ELF/linkerscript/data-commands2.test + llvm::StringRef testRef = "MEMORY {\ + rom (rwx) : ORIGIN = 0x00, LENGTH = 2K\ + } \ + SECTIONS {\ + .foo : {\ + *(.foo.1) \ + BYTE(0x11)\ + *(.foo.2)\ + SHORT(0x1122)\ + *(.foo.3)\ + LONG(0x11223344)\ + *(.foo.4)\ + QUAD(0x1122334455667788)\ + } > rom \ + .bar : { \ + *(.bar.1) \ + BYTE(a + 1) \ + *(.bar.2) \ + SHORT(b) \ + *(.bar.3) \ + LONG(c + 2) \ + *(.bar.4) \ + QUAD(d) \ + } > rom}"; + + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_MEMORY, ScriptToken::CurlyBegin, + ScriptToken::Identifier, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::Colon, ScriptToken::LS_ORIGIN, + ScriptToken::Assign, ScriptToken::Hexdecimal, + ScriptToken::Comma, ScriptToken::LS_LENGTH, + ScriptToken::Assign, ScriptToken::Decimal_K, + ScriptToken::CurlyEnd, + + ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Identifier, ScriptToken::Colon, + ScriptToken::CurlyBegin, + + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::LS_BYTE, ScriptToken::BracektBegin, + ScriptToken::Hexdecimal, ScriptToken::BracektEnd, + + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::LS_SHORT, ScriptToken::BracektBegin, + ScriptToken::Hexdecimal, ScriptToken::BracektEnd, + + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::LS_LONG, ScriptToken::BracektBegin, + ScriptToken::Hexdecimal, ScriptToken::BracektEnd, + + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::LS_QUAD, ScriptToken::BracektBegin, + ScriptToken::Hexdecimal, ScriptToken::BracektEnd, + + ScriptToken::CurlyEnd, ScriptToken::Greater, + ScriptToken::Identifier, + + ScriptToken::Identifier, ScriptToken::Colon, + ScriptToken::CurlyBegin, + + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::LS_BYTE, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::Plus, + ScriptToken::Decimal, ScriptToken::BracektEnd, + + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::LS_SHORT, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::LS_LONG, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::Plus, + ScriptToken::Decimal, ScriptToken::BracektEnd, + + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::LS_QUAD, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + + ScriptToken::CurlyEnd, ScriptToken::Greater, + ScriptToken::Identifier, ScriptToken::CurlyEnd}); + + lexAndCheckTokens(ExpectedTokens); +} + } // namespace elf } // namespace lld From 1f9979d3f77a7d41e6ce52ce2e1136f149d831b4 Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Wed, 17 Jul 2024 04:05:29 +0000 Subject: [PATCH 29/29] [lld][ELF] Add unittest for DEFINED --- lld/ELF/LinkerScriptLexer.cpp | 2 ++ lld/unittests/ELF/LinkerScriptLexerTest.cpp | 22 +++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp index f83d609e2e734..165d6044d7090 100644 --- a/lld/ELF/LinkerScriptLexer.cpp +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -119,6 +119,8 @@ LinkerScriptLexer::TokenInfo LinkerScriptLexer::getSymbolToken() { return advanceTokenInfo(ScriptToken::Asterisk); case '=': return advanceTokenInfo(ScriptToken::Assign); + case '?': + return advanceTokenInfo(ScriptToken::QuestionMark); case '+': if (curStringRef.size() > 1 && curStringRef[1] == '=') return advanceTokenInfo(ScriptToken::PlusAssign, 2); diff --git a/lld/unittests/ELF/LinkerScriptLexerTest.cpp b/lld/unittests/ELF/LinkerScriptLexerTest.cpp index 04eb6f15cc498..77b2e3ac52d0f 100644 --- a/lld/unittests/ELF/LinkerScriptLexerTest.cpp +++ b/lld/unittests/ELF/LinkerScriptLexerTest.cpp @@ -480,5 +480,27 @@ TEST_F(LinkerScriptLexerTest, checkDataCommands) { lexAndCheckTokens(ExpectedTokens); } +TEST_F(LinkerScriptLexerTest, checkDefinedTest) { + llvm::StringRef testRef = "EXTERN(extern_defined) \nSECTIONS { . = \ + DEFINED(defined) ? 0x11000 : .;.foo : { *(.foo*) }"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_EXTERN, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Dot, ScriptToken::Assign, + ScriptToken::LS_DEFINED, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::QuestionMark, ScriptToken::Hexdecimal, + ScriptToken::Colon, ScriptToken::Dot, + ScriptToken::Semicolon, ScriptToken::Identifier, + ScriptToken::Colon, ScriptToken::CurlyBegin, + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::CurlyEnd}); + + lexAndCheckTokens(ExpectedTokens); +} + } // namespace elf } // namespace lld