Skip to content

Commit f04d5a9

Browse files
committed
[AArch64] Refactor @plt, @gotpcrel, and @AUTH to use parseDataExpr
Following PR llvm#132569, which added `parseDataExpr` for parsing expressions in data directives (e.g., `.word`), this PR migrates AArch64 `@plt`, `@gotpcrel`, and `@AUTH` from the `parsePrimaryExpr` workaround to `parseDataExpr`. The goal is to align with the GNU assembler model, where relocation specifiers apply to the entire operand rather than individual terms, reducing complexity-especially evident in `@AUTH` parsing. Note: AArch64 ELF lacks an official syntax for data directives (llvm#132570). A prefix notation might be a preferable future direction. In the test elf-reloc-ptrauth.s, many errors are now reported at parse time.
1 parent 36978fa commit f04d5a9

File tree

10 files changed

+150
-138
lines changed

10 files changed

+150
-138
lines changed

llvm/include/llvm/MC/MCParser/MCAsmLexer.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,9 @@ class MCAsmLexer {
148148
void setSkipSpace(bool val) { SkipSpace = val; }
149149

150150
bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
151-
void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
151+
bool setAllowAtInIdentifier(bool v) {
152+
return std::exchange(AllowAtInIdentifier, v);
153+
}
152154

153155
void setAllowHashInIdentifier(bool V) { AllowHashInIdentifier = V; }
154156

llvm/include/llvm/MC/MCParser/MCAsmParser.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,10 @@ class MCAsmParser {
136136
MCTargetAsmParser *TargetParser = nullptr;
137137

138138
protected: // Can only create subclasses.
139-
MCAsmParser();
139+
MCAsmParser(MCContext &, const MCAsmInfo &);
140140

141+
MCContext &Ctx;
142+
const MCAsmInfo &MAI;
141143
SmallVector<MCPendingError, 0> PendingErrors;
142144

143145
/// Flag tracking whether any errors have been encountered.
@@ -333,6 +335,9 @@ class MCAsmParser {
333335

334336
/// Parse a .gnu_attribute.
335337
bool parseGNUAttribute(SMLoc L, int64_t &Tag, int64_t &IntegerValue);
338+
339+
bool parseAtSpecifier(const MCExpr *&Res, SMLoc &EndLoc);
340+
const MCExpr *applySpecifier(const MCExpr *E, uint32_t Variant);
336341
};
337342

338343
/// Create an MCAsmParser instance for parsing assembly similar to gas syntax

llvm/lib/MC/MCParser/AsmParser.cpp

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/ADT/APInt.h"
1515
#include "llvm/ADT/ArrayRef.h"
1616
#include "llvm/ADT/STLExtras.h"
17+
#include "llvm/ADT/ScopeExit.h"
1718
#include "llvm/ADT/SmallSet.h"
1819
#include "llvm/ADT/SmallString.h"
1920
#include "llvm/ADT/SmallVector.h"
@@ -118,9 +119,7 @@ struct ParseStatementInfo {
118119
class AsmParser : public MCAsmParser {
119120
private:
120121
AsmLexer Lexer;
121-
MCContext &Ctx;
122122
MCStreamer &Out;
123-
const MCAsmInfo &MAI;
124123
SourceMgr &SrcMgr;
125124
SourceMgr::DiagHandlerTy SavedDiagHandler;
126125
void *SavedDiagContext;
@@ -680,8 +679,6 @@ class AsmParser : public MCAsmParser {
680679
bool parseEscapedString(std::string &Data) override;
681680
bool parseAngleBracketString(std::string &Data) override;
682681

683-
const MCExpr *applySpecifier(const MCExpr *E, uint32_t Variant);
684-
685682
// Macro-like directives
686683
MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
687684
void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
@@ -773,7 +770,7 @@ enum { DEFAULT_ADDRSPACE = 0 };
773770

774771
AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
775772
const MCAsmInfo &MAI, unsigned CB = 0)
776-
: Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
773+
: MCAsmParser(Ctx, MAI), Lexer(MAI), Out(Out), SrcMgr(SM),
777774
CurBuffer(CB ? CB : SM.getMainFileID()), MacrosEnabledFlag(true) {
778775
HadError = false;
779776
// Save the old handler.
@@ -1204,7 +1201,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
12041201

12051202
Split = std::make_pair(Identifier, VName);
12061203
}
1207-
} else {
1204+
} else if (Lexer.getAllowAtInIdentifier()) {
12081205
Split = Identifier.split('@');
12091206
}
12101207
} else if (MAI.useParensForSpecifier() &&
@@ -1352,7 +1349,7 @@ bool AsmParser::parseExpression(const MCExpr *&Res) {
13521349
return parseExpression(Res, EndLoc);
13531350
}
13541351

1355-
const MCExpr *AsmParser::applySpecifier(const MCExpr *E, uint32_t Spec) {
1352+
const MCExpr *MCAsmParser::applySpecifier(const MCExpr *E, uint32_t Spec) {
13561353
// Ask the target implementation about this expression first.
13571354
const MCExpr *NewE = getTargetParser().applySpecifier(E, Spec, Ctx);
13581355
if (NewE)
@@ -1443,6 +1440,27 @@ static std::string angleBracketString(StringRef AltMacroStr) {
14431440
return Res;
14441441
}
14451442

1443+
bool MCAsmParser::parseAtSpecifier(const MCExpr *&Res, SMLoc &EndLoc) {
1444+
bool SavedAllowAt = getLexer().getAllowAtInIdentifier();
1445+
getLexer().setAllowAtInIdentifier(true);
1446+
auto _ = make_scope_exit(
1447+
[&]() { getLexer().setAllowAtInIdentifier(SavedAllowAt); });
1448+
if (parseOptionalToken(AsmToken::At)) {
1449+
if (getLexer().isNot(AsmToken::Identifier))
1450+
return TokError("expected specifier following '@'");
1451+
1452+
auto Spec = MAI.getSpecifierForName(getTok().getIdentifier());
1453+
if (!Spec)
1454+
return TokError("invalid specifier '@" + getTok().getIdentifier() + "'");
1455+
1456+
const MCExpr *ModifiedRes = applySpecifier(Res, *Spec);
1457+
if (ModifiedRes)
1458+
Res = ModifiedRes;
1459+
Lex();
1460+
}
1461+
return false;
1462+
}
1463+
14461464
/// Parse an expression and return it.
14471465
///
14481466
/// expr ::= expr &&,|| expr -> lowest.
@@ -1463,8 +1481,7 @@ bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
14631481
// As a special case, we support 'a op b @ modifier' by rewriting the
14641482
// expression to include the modifier. This is inefficient, but in general we
14651483
// expect users to use 'a@modifier op b'.
1466-
if (Ctx.getAsmInfo()->useAtForSpecifier() &&
1467-
parseOptionalToken(AsmToken::At)) {
1484+
if (Lexer.getAllowAtInIdentifier() && parseOptionalToken(AsmToken::At)) {
14681485
if (Lexer.isNot(AsmToken::Identifier))
14691486
return TokError("unexpected symbol modifier following '@'");
14701487

llvm/lib/MC/MCParser/MCAsmParser.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ cl::opt<unsigned> AsmMacroMaxNestingDepth(
2727
cl::desc("The maximum nesting depth allowed for assembly macros."));
2828
}
2929

30-
MCAsmParser::MCAsmParser() = default;
30+
MCAsmParser::MCAsmParser(MCContext &Ctx, const MCAsmInfo &MAI)
31+
: Ctx(Ctx), MAI(MAI) {}
3132

3233
MCAsmParser::~MCAsmParser() = default;
3334

llvm/lib/MC/MCParser/MasmParser.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -376,9 +376,7 @@ FieldInitializer &FieldInitializer::operator=(FieldInitializer &&Initializer) {
376376
class MasmParser : public MCAsmParser {
377377
private:
378378
AsmLexer Lexer;
379-
MCContext &Ctx;
380379
MCStreamer &Out;
381-
const MCAsmInfo &MAI;
382380
SourceMgr &SrcMgr;
383381
SourceMgr::DiagHandlerTy SavedDiagHandler;
384382
void *SavedDiagContext;
@@ -973,7 +971,7 @@ enum { DEFAULT_ADDRSPACE = 0 };
973971

974972
MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
975973
const MCAsmInfo &MAI, struct tm TM, unsigned CB)
976-
: Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
974+
: MCAsmParser(Ctx, MAI), Lexer(MAI), Out(Out), SrcMgr(SM),
977975
CurBuffer(CB ? CB : SM.getMainFileID()), TM(TM) {
978976
HadError = false;
979977
// Save the old handler.

llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp

Lines changed: 70 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@
1818
#include "llvm/ADT/APInt.h"
1919
#include "llvm/ADT/ArrayRef.h"
2020
#include "llvm/ADT/STLExtras.h"
21+
#include "llvm/ADT/ScopeExit.h"
2122
#include "llvm/ADT/SmallSet.h"
2223
#include "llvm/ADT/SmallVector.h"
2324
#include "llvm/ADT/StringExtras.h"
2425
#include "llvm/ADT/StringMap.h"
2526
#include "llvm/ADT/StringRef.h"
2627
#include "llvm/ADT/StringSwitch.h"
2728
#include "llvm/ADT/Twine.h"
29+
#include "llvm/MC/MCAsmInfo.h"
2830
#include "llvm/MC/MCContext.h"
2931
#include "llvm/MC/MCExpr.h"
3032
#include "llvm/MC/MCInst.h"
@@ -180,6 +182,7 @@ class AArch64AsmParser : public MCTargetAsmParser {
180182
bool showMatchError(SMLoc Loc, unsigned ErrCode, uint64_t ErrorInfo,
181183
OperandVector &Operands);
182184

185+
bool parseDataExpr(const MCExpr *&Res) override;
183186
bool parseAuthExpr(const MCExpr *&Res, SMLoc &EndLoc);
184187

185188
bool parseDirectiveArch(SMLoc L);
@@ -335,8 +338,6 @@ class AArch64AsmParser : public MCTargetAsmParser {
335338
unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
336339
unsigned Kind) override;
337340

338-
bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
339-
340341
static bool classifySymbolRef(const MCExpr *Expr,
341342
AArch64MCExpr::Specifier &ELFSpec,
342343
MCSymbolRefExpr::VariantKind &DarwinRefKind,
@@ -4478,6 +4479,19 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
44784479
if (HasELFModifier)
44794480
ImmVal = AArch64MCExpr::create(ImmVal, RefKind, getContext());
44804481

4482+
SMLoc EndLoc;
4483+
if (getContext().getAsmInfo()->hasSubsectionsViaSymbols()) {
4484+
if (getParser().parseAtSpecifier(ImmVal, EndLoc))
4485+
return true;
4486+
const MCExpr *Term;
4487+
if (parseOptionalToken(AsmToken::Plus)) {
4488+
if (getParser().parseExpression(Term, EndLoc))
4489+
return true;
4490+
ImmVal =
4491+
MCBinaryExpr::create(MCBinaryExpr::Add, ImmVal, Term, getContext());
4492+
}
4493+
}
4494+
44814495
return false;
44824496
}
44834497

@@ -5007,11 +5021,18 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
50075021

50085022
// This was not a register so parse other operands that start with an
50095023
// identifier (like labels) as expressions and create them as immediates.
5010-
const MCExpr *IdVal;
5024+
const MCExpr *IdVal, *Term;
50115025
S = getLoc();
50125026
if (getParser().parseExpression(IdVal))
50135027
return true;
5014-
E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
5028+
if (getParser().parseAtSpecifier(IdVal, E))
5029+
return true;
5030+
if (parseOptionalToken(AsmToken::Plus)) {
5031+
if (getParser().parseExpression(Term, E))
5032+
return true;
5033+
IdVal =
5034+
MCBinaryExpr::create(MCBinaryExpr::Add, IdVal, Term, getContext());
5035+
}
50155036
Operands.push_back(AArch64Operand::CreateImm(IdVal, S, E, getContext()));
50165037

50175038
// Parse an optional shift/extend modifier.
@@ -8086,11 +8107,52 @@ bool AArch64AsmParser::parseDirectiveAeabiAArch64Attr(SMLoc L) {
80868107
return false;
80878108
}
80888109

8089-
bool AArch64AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8090-
// Try @AUTH expressions: they're more complex than the usual symbol variants.
8091-
if (!parseAuthExpr(Res, EndLoc))
8110+
bool AArch64AsmParser::parseDataExpr(const MCExpr *&Res) {
8111+
SMLoc EndLoc;
8112+
8113+
if (getParser().parseExpression(Res))
8114+
return true;
8115+
MCAsmParser &Parser = getParser();
8116+
if (!parseOptionalToken(AsmToken::At))
80928117
return false;
8093-
return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
8118+
if (getLexer().getKind() != AsmToken::Identifier)
8119+
return Error(getLoc(), "expected relocation specifier");
8120+
8121+
std::string Identifier = Parser.getTok().getIdentifier().lower();
8122+
SMLoc Loc = getLoc();
8123+
Lex();
8124+
if (Identifier == "auth")
8125+
return parseAuthExpr(Res, EndLoc);
8126+
8127+
auto Spec = MCSymbolRefExpr::VK_PLT;
8128+
if (Identifier == "gotpcrel")
8129+
Spec = MCSymbolRefExpr::VK_GOTPCREL;
8130+
else if (Identifier == "plt")
8131+
Spec = MCSymbolRefExpr::VK_PLT;
8132+
else if (Identifier == "got") // Mach-O specific
8133+
Spec = MCSymbolRefExpr::VK_GOT;
8134+
else
8135+
return Error(Loc, "invalid relocation specifier");
8136+
if (auto *SRE = dyn_cast<MCSymbolRefExpr>(Res))
8137+
Res = MCSymbolRefExpr::create(&SRE->getSymbol(), Spec, getContext(),
8138+
SRE->getLoc());
8139+
else
8140+
return Error(Loc, "this relocation specifier must follow a symbol");
8141+
8142+
for (;;) {
8143+
std::optional<MCBinaryExpr::Opcode> Opcode;
8144+
if (parseOptionalToken(AsmToken::Plus))
8145+
Opcode = MCBinaryExpr::Add;
8146+
else if (parseOptionalToken(AsmToken::Minus))
8147+
Opcode = MCBinaryExpr::Sub;
8148+
else
8149+
break;
8150+
const MCExpr *Term;
8151+
if (getParser().parsePrimaryExpr(Term, EndLoc, nullptr))
8152+
return true;
8153+
Res = MCBinaryExpr::create(*Opcode, Res, Term, getContext());
8154+
}
8155+
return false;
80948156
}
80958157

80968158
/// parseAuthExpr
@@ -8100,54 +8162,8 @@ bool AArch64AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
81008162
bool AArch64AsmParser::parseAuthExpr(const MCExpr *&Res, SMLoc &EndLoc) {
81018163
MCAsmParser &Parser = getParser();
81028164
MCContext &Ctx = getContext();
8103-
81048165
AsmToken Tok = Parser.getTok();
81058166

8106-
// Look for '_sym@AUTH' ...
8107-
if (Tok.is(AsmToken::Identifier) && Tok.getIdentifier().ends_with("@AUTH")) {
8108-
StringRef SymName = Tok.getIdentifier().drop_back(strlen("@AUTH"));
8109-
if (SymName.contains('@'))
8110-
return TokError(
8111-
"combination of @AUTH with other modifiers not supported");
8112-
Res = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(SymName), Ctx);
8113-
8114-
Parser.Lex(); // Eat the identifier.
8115-
} else {
8116-
// ... or look for a more complex symbol reference, such as ...
8117-
SmallVector<AsmToken, 6> Tokens;
8118-
8119-
// ... '"_long sym"@AUTH' ...
8120-
if (Tok.is(AsmToken::String))
8121-
Tokens.resize(2);
8122-
// ... or '(_sym + 5)@AUTH'.
8123-
else if (Tok.is(AsmToken::LParen))
8124-
Tokens.resize(6);
8125-
else
8126-
return true;
8127-
8128-
if (Parser.getLexer().peekTokens(Tokens) != Tokens.size())
8129-
return true;
8130-
8131-
// In either case, the expression ends with '@' 'AUTH'.
8132-
if (Tokens[Tokens.size() - 2].isNot(AsmToken::At) ||
8133-
Tokens[Tokens.size() - 1].isNot(AsmToken::Identifier) ||
8134-
Tokens[Tokens.size() - 1].getIdentifier() != "AUTH")
8135-
return true;
8136-
8137-
if (Tok.is(AsmToken::String)) {
8138-
StringRef SymName;
8139-
if (Parser.parseIdentifier(SymName))
8140-
return true;
8141-
Res = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(SymName), Ctx);
8142-
} else {
8143-
if (Parser.parsePrimaryExpr(Res, EndLoc, nullptr))
8144-
return true;
8145-
}
8146-
8147-
Parser.Lex(); // '@'
8148-
Parser.Lex(); // 'AUTH'
8149-
}
8150-
81518167
// At this point, we encountered "<id>@AUTH". There is no fallback anymore.
81528168
if (parseToken(AsmToken::LParen, "expected '('"))
81538169
return true;

llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin(bool IsILP32) {
6161
UsesELFSectionDirectiveForBSS = true;
6262
SupportsDebugInformation = true;
6363
UseDataRegionDirectives = true;
64+
UseAtForSpecifier = false;
6465

6566
ExceptionsType = ExceptionHandling::DwarfCFI;
6667

@@ -105,6 +106,7 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) {
105106
Data64bitsDirective = "\t.xword\t";
106107

107108
UseDataRegionDirectives = false;
109+
UseAtForSpecifier = false;
108110

109111
WeakRefDirective = "\t.weak\t";
110112

llvm/test/MC/AArch64/data-directive-specifier.s

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# RUN: llvm-mc -triple=aarch64 -filetype=obj %s | llvm-readobj -r - | FileCheck %s
2-
# RUN: not llvm-mc -triple=aarch64 -filetype=obj %s --defsym ERR=1 -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error:
2+
# RUN: not llvm-mc -triple=aarch64 %s --defsym ERR=1 -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error:
3+
# RUN: not llvm-mc -triple=aarch64 -filetype=obj %s --defsym OBJERR=1 -o /dev/null 2>&1 | FileCheck %s --check-prefix=OBJERR --implicit-check-not=error:
34

45
.globl g
56
g:
@@ -32,13 +33,21 @@ data1:
3233
.word extern@GOTPCREL-5
3334

3435
.ifdef ERR
35-
# ERR: [[#@LINE+1]]:7: error: symbol 'und' can not be undefined in a subtraction expression
36-
.word extern@plt - und
36+
# ERR: [[#@LINE+1]]:9: error: this relocation specifier must follow a symbol
37+
.quad 3@plt - .
38+
39+
# ERR: [[#@LINE+1]]:9: error: expected ')'
40+
.quad (l@plt - .)
41+
.endif
3742

43+
.ifdef OBJERR
3844
.quad g@plt - .
3945

4046
.word extern@gotpcrel - .
4147

42-
# ERR: [[#@LINE+1]]:7: error: symbol 'und' can not be undefined in a subtraction expression
48+
# OBJERR: [[#@LINE+1]]:7: error: symbol 'und' can not be undefined in a subtraction expression
49+
.word extern@plt - und
50+
51+
# OBJERR: [[#@LINE+1]]:7: error: symbol 'und' can not be undefined in a subtraction expression
4352
.word extern@gotpcrel - und
4453
.endif

0 commit comments

Comments
 (0)