Skip to content

Commit 0cf6688

Browse files
authored
MC: Better handle backslash-escaped symbols (#158780)
The MCContext::getOrCreateSymbol change in #138817 was a workaround. With #158106, we can replace `getOrCreateSymbol` with `parseSymbol`, in llvm/lib/MC/MCParser to handle backslash-escaped symbols.
1 parent e75e28a commit 0cf6688

File tree

12 files changed

+64
-47
lines changed

12 files changed

+64
-47
lines changed

llvm/include/llvm/MC/MCContext.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,10 @@ class MCContext {
484484
/// \param Name - The symbol name, which must be unique across all symbols.
485485
LLVM_ABI MCSymbol *getOrCreateSymbol(const Twine &Name);
486486

487+
/// Variant of getOrCreateSymbol that handles backslash-escaped symbols.
488+
/// For example, parse "a\"b\\" as a"\.
489+
LLVM_ABI MCSymbol *parseSymbol(const Twine &Name);
490+
487491
/// Gets a symbol that will be defined to the final stack offset of a local
488492
/// variable after codegen.
489493
///

llvm/lib/MC/MCContext.cpp

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -203,27 +203,6 @@ MCInst *MCContext::createMCInst() {
203203
MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) {
204204
SmallString<128> NameSV;
205205
StringRef NameRef = Name.toStringRef(NameSV);
206-
if (NameRef.contains('\\')) {
207-
NameSV = NameRef;
208-
size_t S = 0;
209-
// Support escaped \\ and \" as in GNU Assembler. GAS issues a warning for
210-
// other characters following \\, which we do not implement due to code
211-
// structure.
212-
for (size_t I = 0, E = NameSV.size(); I != E; ++I) {
213-
char C = NameSV[I];
214-
if (C == '\\' && I + 1 != E) {
215-
switch (NameSV[I + 1]) {
216-
case '"':
217-
case '\\':
218-
C = NameSV[++I];
219-
break;
220-
}
221-
}
222-
NameSV[S++] = C;
223-
}
224-
NameSV.resize(S);
225-
NameRef = NameSV;
226-
}
227206

228207
assert(!NameRef.empty() && "Normal symbols cannot be unnamed!");
229208

@@ -244,6 +223,34 @@ MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) {
244223
return Entry.second.Symbol;
245224
}
246225

226+
MCSymbol *MCContext::parseSymbol(const Twine &Name) {
227+
SmallString<128> SV;
228+
StringRef NameRef = Name.toStringRef(SV);
229+
if (NameRef.contains('\\')) {
230+
SV = NameRef;
231+
size_t S = 0;
232+
// Support escaped \\ and \" as in GNU Assembler. GAS issues a warning for
233+
// other characters following \\, which we do not implement due to code
234+
// structure.
235+
for (size_t I = 0, E = SV.size(); I != E; ++I) {
236+
char C = SV[I];
237+
if (C == '\\' && I + 1 != E) {
238+
switch (SV[I + 1]) {
239+
case '"':
240+
case '\\':
241+
C = SV[++I];
242+
break;
243+
}
244+
}
245+
SV[S++] = C;
246+
}
247+
SV.resize(S);
248+
NameRef = SV;
249+
}
250+
251+
return getOrCreateSymbol(NameRef);
252+
}
253+
247254
MCSymbol *MCContext::getOrCreateFrameAllocSymbol(const Twine &FuncName,
248255
unsigned Idx) {
249256
return getOrCreateSymbol(MAI->getPrivateGlobalPrefix() + FuncName +

llvm/lib/MC/MCParser/AsmParser.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1213,8 +1213,8 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
12131213

12141214
MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
12151215
if (!Sym)
1216-
Sym = getContext().getOrCreateSymbol(MAI.isHLASM() ? SymbolName.upper()
1217-
: SymbolName);
1216+
Sym = getContext().parseSymbol(MAI.isHLASM() ? SymbolName.upper()
1217+
: SymbolName);
12181218

12191219
// If this is an absolute variable reference, substitute it now to preserve
12201220
// semantics in the face of reassignment.
@@ -1845,7 +1845,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
18451845
RewrittenLabel);
18461846
IDVal = RewrittenLabel;
18471847
}
1848-
Sym = getContext().getOrCreateSymbol(IDVal);
1848+
Sym = getContext().parseSymbol(IDVal);
18491849
} else
18501850
Sym = Ctx.createDirectionalLocalSymbol(LocalLabelVal);
18511851
// End of Labels should be treated as end of line for lexing
@@ -4885,7 +4885,7 @@ bool AsmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
48854885
if (discardLTOSymbol(Name))
48864886
return false;
48874887

4888-
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
4888+
MCSymbol *Sym = getContext().parseSymbol(Name);
48894889

48904890
// Assembler local symbols don't make any sense here, except for directives
48914891
// that the symbol should be tagged.
@@ -6142,7 +6142,7 @@ bool HLASMAsmParser::parseAsHLASMLabel(ParseStatementInfo &Info,
61426142
return Error(LabelLoc,
61436143
"Cannot have just a label for an HLASM inline asm statement");
61446144

6145-
MCSymbol *Sym = getContext().getOrCreateSymbol(
6145+
MCSymbol *Sym = getContext().parseSymbol(
61466146
getContext().getAsmInfo()->isHLASM() ? LabelVal.upper() : LabelVal);
61476147

61486148
// Emit the label.
@@ -6270,7 +6270,7 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef,
62706270
Parser.getStreamer().emitValueToOffset(Value, 0, EqualLoc);
62716271
return false;
62726272
} else
6273-
Sym = Parser.getContext().getOrCreateSymbol(Name);
6273+
Sym = Parser.getContext().parseSymbol(Name);
62746274

62756275
Sym->setRedefinable(allow_redef);
62766276

llvm/lib/MC/MCParser/COFFMasmParser.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -510,8 +510,8 @@ bool COFFMasmParser::parseDirectiveAlias(StringRef Directive, SMLoc Loc) {
510510
getParser().parseAngleBracketString(ActualName))
511511
return Error(getTok().getLoc(), "expected <actualName>");
512512

513-
MCSymbol *Alias = getContext().getOrCreateSymbol(AliasName);
514-
MCSymbol *Actual = getContext().getOrCreateSymbol(ActualName);
513+
MCSymbol *Alias = getContext().parseSymbol(AliasName);
514+
MCSymbol *Actual = getContext().parseSymbol(ActualName);
515515

516516
getStreamer().emitWeakReference(Alias, Actual);
517517

llvm/lib/MC/MCParser/ELFAsmParser.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ bool ELFAsmParser::parseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
163163
continue;
164164
}
165165

166-
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
166+
MCSymbol *Sym = getContext().parseSymbol(Name);
167167

168168
getStreamer().emitSymbolAttribute(Sym, Attr);
169169

llvm/lib/MC/MCParser/MCAsmParser.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ bool MCAsmParser::parseSymbol(MCSymbol *&Res) {
168168
if (parseIdentifier(Name))
169169
return true;
170170

171-
Res = getContext().getOrCreateSymbol(Name);
171+
Res = getContext().parseSymbol(Name);
172172
return false;
173173
}
174174

llvm/lib/MC/MCParser/MCAsmParserExtension.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ bool MCAsmParserExtension::parseDirectiveCGProfile(StringRef, SMLoc) {
5050
if (getLexer().isNot(AsmToken::EndOfStatement))
5151
return TokError("unexpected token in directive");
5252

53-
MCSymbol *FromSym = getContext().getOrCreateSymbol(From);
54-
MCSymbol *ToSym = getContext().getOrCreateSymbol(To);
53+
MCSymbol *FromSym = getContext().parseSymbol(From);
54+
MCSymbol *ToSym = getContext().parseSymbol(To);
5555

5656
getStreamer().emitCGProfileEntry(
5757
MCSymbolRefExpr::create(FromSym, getContext(), FromLoc),

llvm/lib/MC/MCParser/MasmParser.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1480,7 +1480,7 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
14801480
auto VarIt = Variables.find(SymbolName.lower());
14811481
if (VarIt != Variables.end())
14821482
SymbolName = VarIt->second.Name;
1483-
Sym = getContext().getOrCreateSymbol(SymbolName);
1483+
Sym = getContext().parseSymbol(SymbolName);
14841484
}
14851485

14861486
// If this is an absolute variable reference, substitute it now to preserve
@@ -1965,7 +1965,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
19651965
if (IDVal == "@@") {
19661966
Sym = Ctx.createDirectionalLocalSymbol(0);
19671967
} else {
1968-
Sym = getContext().getOrCreateSymbol(IDVal);
1968+
Sym = getContext().parseSymbol(IDVal);
19691969
}
19701970

19711971
// End of Labels should be treated as end of line for lexing
@@ -3009,8 +3009,7 @@ bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
30093009
return false;
30103010
}
30113011

3012-
auto *Sym =
3013-
static_cast<MCSymbolCOFF *>(getContext().getOrCreateSymbol(Var.Name));
3012+
auto *Sym = static_cast<MCSymbolCOFF *>(getContext().parseSymbol(Var.Name));
30143013
const MCConstantExpr *PrevValue =
30153014
Sym->isVariable()
30163015
? dyn_cast_or_null<MCConstantExpr>(Sym->getVariableValue())
@@ -3318,7 +3317,7 @@ bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
33183317
StringRef Name, SMLoc NameLoc) {
33193318
if (StructInProgress.empty()) {
33203319
// Initialize named data value.
3321-
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3320+
MCSymbol *Sym = getContext().parseSymbol(Name);
33223321
getStreamer().emitLabel(Sym);
33233322
unsigned Count;
33243323
if (emitIntegralValues(Size, &Count))
@@ -3509,7 +3508,7 @@ bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName,
35093508
SMLoc NameLoc) {
35103509
if (StructInProgress.empty()) {
35113510
// Initialize named data value.
3512-
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3511+
MCSymbol *Sym = getContext().parseSymbol(Name);
35133512
getStreamer().emitLabel(Sym);
35143513
unsigned Count;
35153514
if (emitRealValues(Semantics, &Count))
@@ -4003,7 +4002,7 @@ bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure,
40034002
SMLoc DirLoc, StringRef Name) {
40044003
if (StructInProgress.empty()) {
40054004
// Initialize named data value.
4006-
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
4005+
MCSymbol *Sym = getContext().parseSymbol(Name);
40074006
getStreamer().emitLabel(Sym);
40084007
unsigned Count;
40094008
if (emitStructValues(Structure, &Count))

llvm/lib/MC/MCParser/WasmAsmParser.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,7 @@ class WasmAsmParser : public MCAsmParserExtension {
240240
return error("Expected label after .type directive, got: ",
241241
Lexer->getTok());
242242
auto *WasmSym = static_cast<MCSymbolWasm *>(
243-
getStreamer().getContext().getOrCreateSymbol(
244-
Lexer->getTok().getString()));
243+
getStreamer().getContext().parseSymbol(Lexer->getTok().getString()));
245244
Lex();
246245
if (!(isNext(AsmToken::Comma) && isNext(AsmToken::At) &&
247246
Lexer->is(AsmToken::Identifier)))
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
; RUN: llc < %s -mtriple=x86_64 -relocation-model=pic | FileCheck %s
2+
3+
; CHECK: .globl "\\\""
4+
; CHECK-NEXT: "\\\"":
5+
@"\\\22" = constant i8 0

0 commit comments

Comments
 (0)