diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt index 83d816ddb0601..18593c8e36940 100644 --- a/lld/ELF/CMakeLists.txt +++ b/lld/ELF/CMakeLists.txt @@ -47,6 +47,7 @@ add_lld_library(lldELF InputSection.cpp LTO.cpp LinkerScript.cpp + LinkerScriptLexer.cpp MapFile.cpp MarkLive.cpp OutputSections.cpp diff --git a/lld/ELF/LinkerScriptLexer.cpp b/lld/ELF/LinkerScriptLexer.cpp new file mode 100644 index 0000000000000..165d6044d7090 --- /dev/null +++ b/lld/ELF/LinkerScriptLexer.cpp @@ -0,0 +1,331 @@ +//===- ScriptParser.cpp ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LinkerScriptLexer.h" +#include "lld/Common/ErrorHandler.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; +using namespace lld; +using namespace lld::elf; + +// LinkerScriptLexer::LinkerScriptLexer(MemoryBufferRef MB, llvm::SourceMgr &SM, +// llvm::SMDiagnostic &Err) +// : MB(MB), ErrorInfo(Err), SM(SM) { +// curStringRef = MB.getBuffer(); +// } + +LinkerScriptLexer::LinkerScriptLexer(MemoryBufferRef MB) : MB(MB) { + curStringRef = MB.getBuffer(); +} + +/* +bool LinkerScriptLexer::Error(SMLoc ErrorLoc, const Twine &Msg) const { + ErrorInfo = SM.GetMessage(ErrorLoc, llvm::SourceMgr::DK_Error, Msg); + return true; +} + +void LinkerScriptLexer::Warning(SMLoc WarningLoc, const Twine &Msg) const { + SM.PrintMessage(WarningLoc, llvm::SourceMgr::DK_Warning, Msg); +} +*/ + +// bool LinkerScriptLexer::expect(ScriptToken token) { return token == tok1; } + +void LinkerScriptLexer::advanceLexer() { curToken = getTokenInfo(); } + +inline LinkerScriptLexer::TokenInfo +LinkerScriptLexer::advanceTokenInfo(ScriptToken kind, size_t pos = 1) { + // TODO: special case for kind == ScriptToken::Error + llvm::StringRef valRef = curStringRef.substr(0, pos); + curStringRef = curStringRef.substr(pos); + return {kind, valRef}; +} + +LinkerScriptLexer::TokenInfo LinkerScriptLexer::getTokenInfo() { + curStringRef = skipComments(); + + // TODO: make sure the empty situation is not an error + if (curStringRef.empty()) + return advanceTokenInfo(ScriptToken::Eof); + const char c = curStringRef.front(); + if (std::isdigit(c)) + return getDigits(); + if (std::isalpha(c)) + return getCommandOrIdentifier(); + return getSymbolToken(); +} + +llvm::StringRef LinkerScriptLexer::skipComments() { + // this code now is copied from ScriptLexer.cpp + // and modified so it can use SourceMgr + while (true) { + if (curStringRef.starts_with("/*")) { + size_t e = curStringRef.find("*/", 2); + if (e == llvm::StringRef::npos) { + // TODO: Error("Unclosed comment in a linker script"); + return ""; + } + curStringRef = curStringRef.substr(e + 2); + continue; + } + if (curStringRef.starts_with("#")) { + size_t e = curStringRef.find("\n", 1); + if (e == StringRef::npos) + e = curStringRef.size() - 1; + curStringRef = curStringRef.substr(e + 1); + continue; + } + + size_t size = curStringRef.size(); + curStringRef = curStringRef.ltrim(); + if (curStringRef.size() == size) + return curStringRef; + } +} + +LinkerScriptLexer::TokenInfo LinkerScriptLexer::getSymbolToken() { + const char c = curStringRef.front(); + // TODO: single char token needs to substr(1) + switch (c) { + case EOF: + return advanceTokenInfo(ScriptToken::Eof); + case '"': + return getQuotedToken(); + case '(': + return advanceTokenInfo(ScriptToken::BracektBegin); + case ')': + return advanceTokenInfo(ScriptToken::BracektEnd); + case '{': + return advanceTokenInfo(ScriptToken::CurlyBegin); + case '}': + return advanceTokenInfo(ScriptToken::CurlyEnd); + case ';': + return advanceTokenInfo(ScriptToken::Semicolon); + case ',': + return advanceTokenInfo(ScriptToken::Comma); + case '_': + return getCommandOrIdentifier(); + case '.': + return getCommandOrIdentifier(); + case ':': + return advanceTokenInfo(ScriptToken::Colon); + case '*': + return advanceTokenInfo(ScriptToken::Asterisk); + case '=': + return advanceTokenInfo(ScriptToken::Assign); + case '?': + return advanceTokenInfo(ScriptToken::QuestionMark); + case '+': + if (curStringRef.size() > 1 && curStringRef[1] == '=') + return advanceTokenInfo(ScriptToken::PlusAssign, 2); + return advanceTokenInfo(ScriptToken::Plus); + case '-': + if (curStringRef.size() > 1 && curStringRef[1] == '=') + return advanceTokenInfo(ScriptToken::MinusAssign, 2); + return advanceTokenInfo(ScriptToken::Minus); + case '<': + if (curStringRef.size() > 2 && curStringRef[1] == '<' && + curStringRef[2] == '=') { + return advanceTokenInfo(ScriptToken::RightShiftAssign, 3); + } + if (curStringRef.size() > 1) { + if (curStringRef[1] == '=') { + return advanceTokenInfo(ScriptToken::LessEqual, 2); + } else if (curStringRef[1] == '<') { + return advanceTokenInfo(ScriptToken::LeftShift, 2); + } + } + return advanceTokenInfo(ScriptToken::Less); + case '>': + if (curStringRef.size() > 2 && curStringRef[1] == '>' && + curStringRef[2] == '=') { + return advanceTokenInfo(ScriptToken::LeftShiftAssign, 3); + } + if (curStringRef.size() > 1) { + if (curStringRef[1] == '=') { + return advanceTokenInfo(ScriptToken::GreaterEqual, 2); + } else if (curStringRef[1] == '>') { + return advanceTokenInfo(ScriptToken::RightShift, 2); + } + } + return advanceTokenInfo(ScriptToken::Greater); + case '&': + if (curStringRef.size() > 1) { + if (curStringRef[1] == '=') { + return advanceTokenInfo(ScriptToken::AndAssign, 2); + } else if (curStringRef[1] == '&') { + return advanceTokenInfo(ScriptToken::AndGate, 2); + } + } + return advanceTokenInfo(ScriptToken::Bitwise); + case '^': + if (curStringRef.size() > 1) { + if (curStringRef[1] == '=') { + return advanceTokenInfo(ScriptToken::AndAssign, 2); + } + } + return advanceTokenInfo(ScriptToken::Xor); + case '|': + if (curStringRef.size() > 1) { + if (curStringRef[1] == '=') { + return advanceTokenInfo(ScriptToken::OrAssign, 2); + } else if (curStringRef[1] == '|') { + return advanceTokenInfo(ScriptToken::OrGate, 2); + } + } + return advanceTokenInfo(ScriptToken::Or); + default: + return advanceTokenInfo(ScriptToken::Error); + } +} + +LinkerScriptLexer::TokenInfo LinkerScriptLexer::getQuotedToken() { + // Quoted token. Note that double-quote characters are parts of a token + // because, in a glob match context, only unquoted tokens are interpreted as + // glob patterns. Double-quoted tokens are literal patterns in that context. + size_t e = curStringRef.find("\"", 1); + if (e == StringRef::npos) { + StringRef fileName = MB.getBufferIdentifier(); + // TODO: Error(fileName + ": unclosed quote"); + return advanceTokenInfo(ScriptToken::Error, e); + } + return advanceTokenInfo(ScriptToken::Quote, e + 1); +} + +LinkerScriptLexer::TokenInfo LinkerScriptLexer::getDigits() { + size_t pos = curStringRef.find_first_not_of( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789"); + if (curStringRef.starts_with_insensitive("0x")) { + return advanceTokenInfo(ScriptToken::Hexdecimal, pos); + } + const char c = curStringRef[pos - 1]; + switch (c) { + case 'H': + case 'h': + return advanceTokenInfo(ScriptToken::Hexdecimal_H, pos); + case 'K': + case 'k': + return advanceTokenInfo(ScriptToken::Decimal_K, pos); + case 'M': + case 'm': + return advanceTokenInfo(ScriptToken::Decimal_M, pos); + default: + return advanceTokenInfo(ScriptToken::Decimal, pos); + }; +} + +LinkerScriptLexer::TokenInfo LinkerScriptLexer::getCommandOrIdentifier() { + // Unquoted token. This is more relaxed than tokens in C-like language, + // so that you can write "file-name.cpp" as one bare token, for example. + size_t pos = curStringRef.find_first_not_of( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789_.$/\\~=+[]*?-!^:"); + + // Quoted strings are literal strings, so we don't want to split it. + if (inExpression && !curStringRef.starts_with("\"")) { + StringRef ops = "!~*/+-<>?^:="; // List of operators + size_t e = curStringRef.find_first_of(ops); + if (e != StringRef::npos && e != 0) { + return advanceTokenInfo(ScriptToken::Identifier, e); + } + } + + if (pos == 1 && curStringRef[0] == '.') + return advanceTokenInfo(ScriptToken::Dot); + + return advanceTokenInfo(getTokenfromKeyword(curStringRef.substr(0, pos)), + pos); +} + +ScriptToken +LinkerScriptLexer::getTokenfromKeyword(llvm::StringRef keyword) const { +#define KEYWORD(STR) \ + do { \ + if (keyword == #STR) \ + return ScriptToken::LS_##STR; \ + } while (false) + + KEYWORD(ENTRY); + KEYWORD(INCLUDE); + KEYWORD(GROUP); + KEYWORD(MEMORY); + KEYWORD(OUTPUT); + KEYWORD(SEARCH_DIR); + KEYWORD(STARTUP); + KEYWORD(INSERT); + KEYWORD(AFTER); + KEYWORD(OUTPUT_FORMAT); + KEYWORD(TARGET); + KEYWORD(OUTPUT_FORMAT); + KEYWORD(ASSERT); + KEYWORD(CONSTANT); + KEYWORD(EXTERN); + KEYWORD(OUTPUT_ARCH); + KEYWORD(PROVIDE); + KEYWORD(HIDDEN); + KEYWORD(PROVIDE_HIDDEN); + KEYWORD(SECTIONS); + KEYWORD(BEFORE); + KEYWORD(EXCLUDE_FILE); + KEYWORD(KEEP); + KEYWORD(INPUT_SECTION_FLAGS); + KEYWORD(OVERLAY); + KEYWORD(NOLOAD); + KEYWORD(COPY); + KEYWORD(INFO); + KEYWORD(OVERWRITE_SECTIONS); + KEYWORD(SUBALIGN); + KEYWORD(ONLY_IF_RO); + KEYWORD(ONLY_IF_RW); + KEYWORD(FILL); + KEYWORD(SORT); + KEYWORD(ABSOLUTE); + KEYWORD(ADDR); + KEYWORD(ALIGN); + KEYWORD(ALIGNOF); + KEYWORD(DATA_SEGMENT_ALIGN); + KEYWORD(DATA_SEGMENT_END); + KEYWORD(DEFINED); + KEYWORD(LENGTH); + KEYWORD(LOADADDR); + KEYWORD(LOG2CEIL); + KEYWORD(MAX); + KEYWORD(MIN); + KEYWORD(ORIGIN); + KEYWORD(SEGMENT_START); + KEYWORD(SIZEOF); + KEYWORD(SIZEOF_HEADERS); + KEYWORD(FILEHDR); + KEYWORD(PHDRS); + KEYWORD(AT); + KEYWORD(FLAGS); + KEYWORD(VERSION); + KEYWORD(REGION_ALIAS); + KEYWORD(AS_NEEDED); + KEYWORD(CONSTRUCTORS); + KEYWORD(MAXPAGESIZE); + KEYWORD(COMMONPAGESIZE); + KEYWORD(BYTE); + KEYWORD(SHORT); + KEYWORD(LONG); + KEYWORD(QUAD); + +#undef KEYWORD + + if (keyword == "local") { + return ScriptToken::LS_Local; + } else if (keyword == "global") { + return ScriptToken::LS_Global; + } else if (keyword == "extern") { + return ScriptToken::LS_Extern; + } else { + return ScriptToken::Identifier; + } +} diff --git a/lld/ELF/LinkerScriptLexer.h b/lld/ELF/LinkerScriptLexer.h new file mode 100644 index 0000000000000..4e75f37a59ed1 --- /dev/null +++ b/lld/ELF/LinkerScriptLexer.h @@ -0,0 +1,75 @@ +//===- LinkerScriptLexer.h --------------------------------------------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_LINKER_SCRIPT_LEXER_H +#define LLD_ELF_LINKER_SCRIPT_LEXER_H + +#include "ScriptTokenizer.h" +#include "lld/Common/LLVM.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/MemoryBufferRef.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/SourceMgr.h" + +namespace lld::elf { +class SMDiagnostic; +class SourceMgr; + +class LinkerScriptLexer { +public: + // explicit LinkerScriptLexer(MemoryBufferRef MB, llvm::SourceMgr &SM, + // llvm::SMDiagnostic &Err); + + explicit LinkerScriptLexer(MemoryBufferRef MB); + + // LLVM SourceMgr and SMDiagnostic cannot be used now since + // ctx CommonLinkerContext has ownership of all MemoryBuffer + // by using SmallVector> memoryBuffers in ELF/Config.h + /*llvm::SMLoc getLoc() const { + return llvm::SMLoc::getFromPointer(curStringRef.begin()); + } + + bool Error(llvm::SMLoc ErrorLoc, const llvm::Twine &Msg) const; + bool Error(const llvm::Twine &Msg) const { return Error(getLoc(), Msg); } + void Warning(llvm::SMLoc WarningLoc, const llvm::Twine &Msg) const; + void Warning(const Twine &Msg) const { return Warning(getLoc(), Msg); }*/ + + bool expect(ScriptToken token); // check if tok1 matches argument token + bool inExpression = false; + + // TODO: rewrite next(), peek(), and peek2() since TokenInfo change + void advanceLexer(); + ScriptToken getTokenKind() const { return curToken.kind; }; + llvm::StringRef getTokenStringRef() const { return curToken.val; }; + +private: + struct TokenInfo { + ScriptToken kind; + llvm::StringRef val; + }; + + TokenInfo curToken; + // llvm::SMDiagnostic &ErrorInfo; + // llvm::SourceMgr &SM; + llvm::MemoryBufferRef MB; + llvm::StringRef curStringRef; + + llvm::StringRef skipComments(); + TokenInfo advanceTokenInfo(ScriptToken kind, size_t pos); + TokenInfo getTokenInfo(); + TokenInfo getSymbolToken(); + TokenInfo getQuotedToken(); + TokenInfo getDigits(); + TokenInfo getCommandOrIdentifier(); + ScriptToken getTokenfromKeyword(llvm::StringRef keyword) const; +}; +} // namespace lld::elf + +#endif // LLD_ELF_LINKER_SCRIPT_LEXER_H diff --git a/lld/ELF/ScriptTokenizer.h b/lld/ELF/ScriptTokenizer.h new file mode 100644 index 0000000000000..51693378aade9 --- /dev/null +++ b/lld/ELF/ScriptTokenizer.h @@ -0,0 +1,186 @@ +//===- ScriptLexer.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the nums for LinkerScript lexer +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_SCRIPT_TOKENIZER_H +#define LLD_ELF_SCRIPT_TOKENIZER_H + +namespace lld { +namespace elf { +enum class ScriptToken { + // LS stands for LinkerScript + LS_ENTRY, + + // Commands Files + LS_INCLUDE, + LS_INPUT, + LS_GROUP, + LS_MEMORY, + LS_OUTPUT, + LS_SEARCH_DIR, + LS_STARTUP, + + LS_INSERT, + LS_AFTER, + + // Commands for object file formats + LS_OUTPUT_FORMAT, + LS_TARGET, + + // Other linker script commands + LS_ASSERT, + LS_CONSTANT, + LS_EXTERN, + // FORCE_COMMON_ALLOCATION + // INHIBIT_COMMON_ALLOCATION + // NOCROSSREFS + LS_OUTPUT_ARCH, + + // Assignment + LS_PROVIDE, + LS_HIDDEN, + LS_PROVIDE_HIDDEN, + + LS_SECTIONS, + LS_BEFORE, + + // Input Section + LS_EXCLUDE_FILE, + LS_KEEP, + LS_INPUT_SECTION_FLAGS, + + // Read section + LS_OVERLAY, + LS_NOLOAD, + LS_COPY, + LS_INFO, + + // Output Section + LS_OVERWRITE_SECTIONS, + LS_SUBALIGN, + LS_ONLY_IF_RO, + LS_ONLY_IF_RW, + LS_FILL, + LS_SORT, + + // Builtin Functions + LS_ABSOLUTE, + LS_ADDR, + LS_ALIGN, + LS_ALIGNOF, + // BLOCK, // synonym for ALIGN for compatibility with older linker script + LS_DATA_SEGMENT_ALIGN, + LS_DATA_SEGMENT_END, + LS_DEFINED, + LS_LENGTH, + LS_LOADADDR, + + LS_LOG2CEIL, + LS_MAX, + LS_MIN, + LS_ORIGIN, + LS_SEGMENT_START, + // NEXT, // This function is closely related to ALIGN(exp); unless you use the + // MEMORY command to define discontinuous memory for the output file, the two + // functions are equivalent. + LS_SIZEOF, + LS_SIZEOF_HEADERS, + + // PHDRS Command + LS_FILEHDR, + LS_PHDRS, + LS_AT, + LS_FLAGS, + + // Version Command + LS_VERSION, + + LS_REGION_ALIAS, + LS_AS_NEEDED, + LS_CONSTRUCTORS, + + // Symbolic Constants + LS_MAXPAGESIZE, + LS_COMMONPAGESIZE, + + LS_BYTE, + LS_SHORT, + LS_LONG, + LS_QUAD, + + LS_Local, // local + LS_Global, // global + LS_Extern, // extern + + LS_ELF, + LS_Binary, + Error, + Eof, + + Identifier, + Hexdecimal, // 0x + Hexdecimal_H, // end with H/h + Decimal, + Decimal_K, // end with K/k + Decimal_M, // end with M/m + + // Symbol tokens + CurlyBegin, // { + CurlyEnd, // } + BracektBegin, // ( + BracektEnd, // ) + Comma, // , + Semicolon, // ; + Colon, // : + Asterisk, // * + QuestionMark, // ? + Backslash, // "\" + Slash, // / + Greater, // > + Less, // < + Minus, // - + Plus, // + + Bitwise, // & + Xor, // ^ + Or, // | + PercentSign, // % + Underscore, // _ + Dot, // . + Quote, // Quoted token. Note that double-quote characters are parts of a token + // because, in a glob match context, only unquoted tokens are interpreted as + // glob patterns. Double-quoted tokens are literal patterns in that context. + + // Assignmemnt + Assign, // = + PlusAssign, // += + MinusAssign, // -= + MulAssign, // *= + DivAssign, // /= + LeftShiftAssign, // <<= + RightShiftAssign, // >>= + AndAssign, // &= + OrAssign, // |= + XorAssign, // ^= + + // operator token + NotEqual, // != + Equal, // == + GreaterEqual, // >= + LessEqual, // <= + LeftShift, // << + RightShift, // >> + AndGate, // && + OrGate, // || +}; +} // namespace elf +} // namespace lld + +#endif // LLD_ELF_SCRIPT_TOKENIZER_H diff --git a/lld/unittests/CMakeLists.txt b/lld/unittests/CMakeLists.txt index ffaea3f207833..237e8679020a0 100644 --- a/lld/unittests/CMakeLists.txt +++ b/lld/unittests/CMakeLists.txt @@ -4,3 +4,4 @@ endfunction() add_subdirectory(AsLibAll) add_subdirectory(AsLibELF) +add_subdirectory(ELF) diff --git a/lld/unittests/ELF/CMakeLists.txt b/lld/unittests/ELF/CMakeLists.txt new file mode 100644 index 0000000000000..152a92ee476fd --- /dev/null +++ b/lld/unittests/ELF/CMakeLists.txt @@ -0,0 +1,15 @@ +# Test usage of LLD Linker Script Lexer +set(LLVM_LINK_COMPONENTS + Support +) + +add_lld_unittests(LLDELFLexerTests + LinkerScriptLexerTest.cpp +) + +target_link_libraries(LLDELFLexerTests + PRIVATE + lldELF + lldCommon + LLVMTestingSupport +) diff --git a/lld/unittests/ELF/LinkerScriptLexerTest.cpp b/lld/unittests/ELF/LinkerScriptLexerTest.cpp new file mode 100644 index 0000000000000..77b2e3ac52d0f --- /dev/null +++ b/lld/unittests/ELF/LinkerScriptLexerTest.cpp @@ -0,0 +1,506 @@ +//===- lld/unittests/ELF/LinkerScriptLexerTest.cpp --------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../ELF/LinkerScriptLexer.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryBufferRef.h" +#include "llvm/Support/raw_ostream.h" + +#include "gtest/gtest.h" + +namespace lld { +namespace elf { + +class LinkerScriptLexerTest : public testing::Test { +protected: + std::unique_ptr Lexer; + std::unique_ptr Buffer; + + void setupCallToLinkScriptLexer(llvm::StringRef scriptStr) { + Buffer = llvm::MemoryBuffer::getMemBuffer(scriptStr); + Lexer = std::make_unique( + LinkerScriptLexer(Buffer->getMemBufferRef())); + } + + void lexAndCheckTokens(llvm::SmallVector ExpectedTokens) { + bool outputinfo = true; + for (const auto &expected : ExpectedTokens) { + Lexer->advanceLexer(); + EXPECT_EQ(Lexer->getTokenKind(), expected); + if (outputinfo) { + llvm::errs() << Lexer->getTokenStringRef() << " " + << static_cast(Lexer->getTokenKind()) << " " + << static_cast(expected) << "\n"; + } + } + } +}; + +TEST_F(LinkerScriptLexerTest, CheckEntry) { + llvm::StringRef testRef = " ENTRY"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens({ScriptToken::LS_ENTRY}); + lexAndCheckTokens(ExpectedTokens); +} + +TEST_F(LinkerScriptLexerTest, CheckEntryLabel) { + llvm::StringRef testRef = "ENTRY(_label)"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_ENTRY, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd}); + lexAndCheckTokens(ExpectedTokens); +} + +TEST_F(LinkerScriptLexerTest, CheckSECTIONSandALIGN) { + llvm::StringRef testRef = "SECTIONS { \ + .super_aligned : ALIGN(16) { /* ... */ }}"; + + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Identifier, ScriptToken::Colon, ScriptToken::LS_ALIGN, + ScriptToken::BracektBegin, ScriptToken::Decimal, ScriptToken::BracektEnd, + ScriptToken::CurlyBegin, ScriptToken::CurlyEnd, ScriptToken::CurlyEnd}); + lexAndCheckTokens(ExpectedTokens); +} + +TEST_F(LinkerScriptLexerTest, CheckHex) { + llvm::StringRef testRef = + "SECTIONS{ \n . = 0x10000;\n .text : { *(.text) }\n \ + . = 0x8000000;\n .data : { *(.data) }\n .bss : { *(.bss) }}"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Dot, ScriptToken::Assign, + ScriptToken::Hexdecimal, ScriptToken::Semicolon, + ScriptToken::Identifier, ScriptToken::Colon, + ScriptToken::CurlyBegin, ScriptToken::Asterisk, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::CurlyEnd, + ScriptToken::Dot, ScriptToken::Assign, + ScriptToken::Hexdecimal, ScriptToken::Semicolon, + ScriptToken::Identifier, ScriptToken::Colon, + ScriptToken::CurlyBegin, ScriptToken::Asterisk, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::CurlyEnd, + ScriptToken::Identifier, ScriptToken::Colon, + ScriptToken::CurlyBegin, ScriptToken::Asterisk, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::CurlyEnd, + ScriptToken::CurlyEnd}); + lexAndCheckTokens(ExpectedTokens); +} + +TEST_F(LinkerScriptLexerTest, CheckPROVIDECommand) { + llvm::StringRef testRef = "SECTIONS\n{.text :\n{\n*(.text)\n\t_etext = .;\n \ + \t PROVIDE(etext = .);\n}\n}"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens({ + ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Identifier, ScriptToken::Colon, + ScriptToken::CurlyBegin, ScriptToken::Asterisk, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::Identifier, + ScriptToken::Assign, ScriptToken::Dot, + ScriptToken::Semicolon, ScriptToken::LS_PROVIDE, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::Assign, ScriptToken::Dot, + ScriptToken::BracektEnd, ScriptToken::Semicolon, + ScriptToken::CurlyEnd, ScriptToken::CurlyEnd, + }); + + lexAndCheckTokens(ExpectedTokens); +} + +TEST_F(LinkerScriptLexerTest, CheckINSERTandBEFORE) { + llvm::StringRef testRef = + "SECTIONS { .foo.data : { *(.foo.data) } } INSERT BEFORE .data;"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Identifier, ScriptToken::Colon, ScriptToken::CurlyBegin, + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, ScriptToken::CurlyEnd, + ScriptToken::CurlyEnd, ScriptToken::LS_INSERT, ScriptToken::LS_BEFORE, + ScriptToken::Identifier, ScriptToken::Semicolon}); + + lexAndCheckTokens(ExpectedTokens); +} + +TEST_F(LinkerScriptLexerTest, CheckALIGNandDecimal) { + llvm::StringRef testRef = "SECTIONS {.foo : ALIGN(2M) { *(.foo) }}"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Identifier, ScriptToken::Colon, ScriptToken::LS_ALIGN, + ScriptToken::BracektBegin, ScriptToken::Decimal_M, + ScriptToken::BracektEnd, ScriptToken::CurlyBegin, ScriptToken::Asterisk, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::CurlyEnd, ScriptToken::CurlyEnd}); + + lexAndCheckTokens(ExpectedTokens); +} + +TEST_F(LinkerScriptLexerTest, CheckAbsoluteExprTest) { + llvm::StringRef testRef = "SECTIONS { \ + .text : { \ + bar1 = ALIGNOF(.text); \ + bar2 = CONSTANT (MAXPAGESIZE); \ + bar3 = SIZEOF (.text); \ + bar4 = SIZEOF_HEADERS; \ + bar5 = 0x42; \ + bar6 = foo + 1; \ + *(.text) \ + } \ +}"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Identifier, ScriptToken::Colon, + ScriptToken::CurlyBegin, ScriptToken::Identifier, + ScriptToken::Assign, ScriptToken::LS_ALIGNOF, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::Semicolon, + + ScriptToken::Identifier, ScriptToken::Assign, + ScriptToken::LS_CONSTANT, ScriptToken::BracektBegin, + ScriptToken::LS_MAXPAGESIZE, ScriptToken::BracektEnd, + ScriptToken::Semicolon, + + ScriptToken::Identifier, ScriptToken::Assign, + ScriptToken::LS_SIZEOF, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::Semicolon, ScriptToken::Identifier, + ScriptToken::Assign, ScriptToken::LS_SIZEOF_HEADERS, + ScriptToken::Semicolon, + + ScriptToken::Identifier, ScriptToken::Assign, + ScriptToken::Hexdecimal, ScriptToken::Semicolon, + ScriptToken::Identifier, ScriptToken::Assign, + ScriptToken::Identifier, ScriptToken::Plus, + ScriptToken::Decimal, ScriptToken::Semicolon, + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::CurlyEnd, ScriptToken::CurlyEnd}); + + lexAndCheckTokens(ExpectedTokens); +} + +TEST_F(LinkerScriptLexerTest, checkAddrZeroTest) { + llvm::StringRef testRef = "SECTIONS {\ + foo = ADDR(.text) - ABSOLUTE(ADDR(.text));\ +};"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Identifier, ScriptToken::Assign, ScriptToken::LS_ADDR, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::Minus, ScriptToken::LS_ABSOLUTE, + ScriptToken::BracektBegin, ScriptToken::LS_ADDR, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::BracektEnd, ScriptToken::Semicolon, + ScriptToken::CurlyEnd}); + + lexAndCheckTokens(ExpectedTokens); +} + +TEST_F(LinkerScriptLexerTest, checkAddrTest) { + llvm::StringRef testRef = "SECTIONS {\ + . = 0x1000; \ + .text : { \ + *(.text*) \ + x1 = ADDR(.text) + 1; x2 = 1 + ADDR(.text);\ + x3 = ADDR(.text) & 0xffff;\ + }\ + .foo-1 : { *(.foo-1) }\ + .foo-2 ADDR(.foo-1) + 0x100 : { *(.foo-2) }\ + .foo-3 : { *(.foo-3) }\ +}"; + + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Dot, ScriptToken::Assign, + ScriptToken::Hexdecimal, ScriptToken::Semicolon, + ScriptToken::Identifier, ScriptToken::Colon, + ScriptToken::CurlyBegin, ScriptToken::Asterisk, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::Identifier, + ScriptToken::Assign, ScriptToken::LS_ADDR, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::Plus, + ScriptToken::Decimal, ScriptToken::Semicolon, + + ScriptToken::Identifier, ScriptToken::Assign, + ScriptToken::Decimal, ScriptToken::Plus, + ScriptToken::LS_ADDR, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::Semicolon, ScriptToken::Identifier, + ScriptToken::Assign, ScriptToken::LS_ADDR, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::Bitwise, + ScriptToken::Hexdecimal, ScriptToken::Semicolon, + ScriptToken::CurlyEnd, + + ScriptToken::Identifier, ScriptToken::Colon, + ScriptToken::CurlyBegin, ScriptToken::Asterisk, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::CurlyEnd, + ScriptToken::Identifier, ScriptToken::LS_ADDR, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::Plus, + ScriptToken::Hexdecimal, ScriptToken::Colon, + ScriptToken::CurlyBegin, ScriptToken::Asterisk, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::CurlyEnd, + ScriptToken::Identifier, ScriptToken::Colon, + ScriptToken::CurlyBegin, ScriptToken::Asterisk, + ScriptToken::BracektBegin, ScriptToken::Identifier, + ScriptToken::BracektEnd, ScriptToken::CurlyEnd, + ScriptToken::CurlyEnd}); + + lexAndCheckTokens(ExpectedTokens); +} + +TEST_F(LinkerScriptLexerTest, checkAlignEmptyTest) { + llvm::StringRef testRef = "SECTIONS { \ + . = SIZEOF_HEADERS; \ + abc : {} \ + . = ALIGN(0x1000); \ + foo : { *(foo) }}"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens({ScriptToken::LS_SECTIONS, + ScriptToken::CurlyBegin, + ScriptToken::Dot, + ScriptToken::Assign, + ScriptToken::LS_SIZEOF_HEADERS, + ScriptToken::Semicolon, + ScriptToken::Identifier, + ScriptToken::Colon, + ScriptToken::CurlyBegin, + ScriptToken::CurlyEnd, + ScriptToken::Dot, + ScriptToken::Assign, + ScriptToken::LS_ALIGN, + ScriptToken::BracektBegin, + ScriptToken::Hexdecimal, + ScriptToken::BracektEnd, + ScriptToken::Semicolon, + ScriptToken::Identifier, + ScriptToken::Colon, + ScriptToken::CurlyBegin, + ScriptToken::Asterisk, + ScriptToken::BracektBegin, + ScriptToken::Identifier, + ScriptToken::BracektEnd, + ScriptToken::CurlyEnd, + ScriptToken::CurlyEnd}); + + lexAndCheckTokens(ExpectedTokens); +} + +TEST_F(LinkerScriptLexerTest, checkBSSFillTest) { + llvm::StringRef testRef = "SECTIONS {\ + .bss : {\ + . += 0x10000; \ + *(.bss)\ + } =0xFF};"; + + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Identifier, ScriptToken::Colon, ScriptToken::CurlyBegin, + ScriptToken::Dot, ScriptToken::PlusAssign, ScriptToken::Hexdecimal, + ScriptToken::Semicolon, ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, ScriptToken::CurlyEnd, + ScriptToken::Assign, ScriptToken::Hexdecimal, ScriptToken::CurlyEnd, + ScriptToken::Semicolon}); + + lexAndCheckTokens(ExpectedTokens); +} + +TEST_F(LinkerScriptLexerTest, checkMemoryTest) { + llvm::StringRef testRef = "MEMORY { \ + AX (ax) : ORIGIN = 0x2000, LENGTH = 0x100 \ + AW (aw) : ORIGIN = 0x3000, LENGTH = 0x100 \ + FLASH (ax) : ORIGIN = 0x6000, LENGTH = 0x100 \ + RAM (aw) : ORIGIN = 0x7000, LENGTH = 0x100}"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_MEMORY, ScriptToken::CurlyBegin, + ScriptToken::Identifier, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::Colon, ScriptToken::LS_ORIGIN, + ScriptToken::Assign, ScriptToken::Hexdecimal, + ScriptToken::Comma, ScriptToken::LS_LENGTH, + ScriptToken::Assign, ScriptToken::Hexdecimal, + + ScriptToken::Identifier, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::Colon, ScriptToken::LS_ORIGIN, + ScriptToken::Assign, ScriptToken::Hexdecimal, + ScriptToken::Comma, ScriptToken::LS_LENGTH, + ScriptToken::Assign, ScriptToken::Hexdecimal, + + ScriptToken::Identifier, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::Colon, ScriptToken::LS_ORIGIN, + ScriptToken::Assign, ScriptToken::Hexdecimal, + ScriptToken::Comma, ScriptToken::LS_LENGTH, + ScriptToken::Assign, ScriptToken::Hexdecimal, + + ScriptToken::Identifier, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::Colon, ScriptToken::LS_ORIGIN, + ScriptToken::Assign, ScriptToken::Hexdecimal, + ScriptToken::Comma, ScriptToken::LS_LENGTH, + ScriptToken::Assign, ScriptToken::Hexdecimal, + + ScriptToken::CurlyEnd}); + + lexAndCheckTokens(ExpectedTokens); +} + +TEST_F(LinkerScriptLexerTest, checkCONSTRUCTORS) { + llvm::StringRef testRef = "SECTIONS {foo : {*(.foo) CONSTRUCTORS}}"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Identifier, ScriptToken::Colon, ScriptToken::CurlyBegin, + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::LS_CONSTRUCTORS, ScriptToken::CurlyEnd, + ScriptToken::CurlyEnd}); + + lexAndCheckTokens(ExpectedTokens); +} + +TEST_F(LinkerScriptLexerTest, checkDataCommands) { + // this test case comes from lld/test/ELF/linkerscript/data-commands2.test + llvm::StringRef testRef = "MEMORY {\ + rom (rwx) : ORIGIN = 0x00, LENGTH = 2K\ + } \ + SECTIONS {\ + .foo : {\ + *(.foo.1) \ + BYTE(0x11)\ + *(.foo.2)\ + SHORT(0x1122)\ + *(.foo.3)\ + LONG(0x11223344)\ + *(.foo.4)\ + QUAD(0x1122334455667788)\ + } > rom \ + .bar : { \ + *(.bar.1) \ + BYTE(a + 1) \ + *(.bar.2) \ + SHORT(b) \ + *(.bar.3) \ + LONG(c + 2) \ + *(.bar.4) \ + QUAD(d) \ + } > rom}"; + + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_MEMORY, ScriptToken::CurlyBegin, + ScriptToken::Identifier, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::Colon, ScriptToken::LS_ORIGIN, + ScriptToken::Assign, ScriptToken::Hexdecimal, + ScriptToken::Comma, ScriptToken::LS_LENGTH, + ScriptToken::Assign, ScriptToken::Decimal_K, + ScriptToken::CurlyEnd, + + ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Identifier, ScriptToken::Colon, + ScriptToken::CurlyBegin, + + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::LS_BYTE, ScriptToken::BracektBegin, + ScriptToken::Hexdecimal, ScriptToken::BracektEnd, + + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::LS_SHORT, ScriptToken::BracektBegin, + ScriptToken::Hexdecimal, ScriptToken::BracektEnd, + + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::LS_LONG, ScriptToken::BracektBegin, + ScriptToken::Hexdecimal, ScriptToken::BracektEnd, + + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::LS_QUAD, ScriptToken::BracektBegin, + ScriptToken::Hexdecimal, ScriptToken::BracektEnd, + + ScriptToken::CurlyEnd, ScriptToken::Greater, + ScriptToken::Identifier, + + ScriptToken::Identifier, ScriptToken::Colon, + ScriptToken::CurlyBegin, + + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::LS_BYTE, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::Plus, + ScriptToken::Decimal, ScriptToken::BracektEnd, + + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::LS_SHORT, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::LS_LONG, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::Plus, + ScriptToken::Decimal, ScriptToken::BracektEnd, + + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::LS_QUAD, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + + ScriptToken::CurlyEnd, ScriptToken::Greater, + ScriptToken::Identifier, ScriptToken::CurlyEnd}); + + lexAndCheckTokens(ExpectedTokens); +} + +TEST_F(LinkerScriptLexerTest, checkDefinedTest) { + llvm::StringRef testRef = "EXTERN(extern_defined) \nSECTIONS { . = \ + DEFINED(defined) ? 0x11000 : .;.foo : { *(.foo*) }"; + setupCallToLinkScriptLexer(testRef); + llvm::SmallVector ExpectedTokens( + {ScriptToken::LS_EXTERN, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::LS_SECTIONS, ScriptToken::CurlyBegin, + ScriptToken::Dot, ScriptToken::Assign, + ScriptToken::LS_DEFINED, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::QuestionMark, ScriptToken::Hexdecimal, + ScriptToken::Colon, ScriptToken::Dot, + ScriptToken::Semicolon, ScriptToken::Identifier, + ScriptToken::Colon, ScriptToken::CurlyBegin, + ScriptToken::Asterisk, ScriptToken::BracektBegin, + ScriptToken::Identifier, ScriptToken::BracektEnd, + ScriptToken::CurlyEnd}); + + lexAndCheckTokens(ExpectedTokens); +} + +} // namespace elf +} // namespace lld