Skip to content

Commit 3333c28

Browse files
alanzhao1ericastor
authored andcommitted
[llvm-ml] Improve indirect call parsing
In MASM, if a QWORD symbol is passed to a jmp or call instruction in 64-bit mode or a DWORD or WORD symbol is passed in 32-bit mode, then MSVC's assembler recognizes that as an indirect call. Additionally, if the operand is qualified as a ptr, then that should also be an indirect call. Furthermore, in 64-bit mode, such operands are implicitly rip-relative (in fact, MSVC's assembler ml64.exe does not allow explicitly specifying rip as a base register.) To keep this patch managable, this patch does not include: * error messages for wrong operand types (e.g. passing a QWORD in 32-bit mode) * resolving indirect calls if the symbol is declared after it's first use (llvm-ml currently only runs a single pass). * imlementing the extern keyword (required to resolve https://crbug.com/762167.) This patch is likely missing a bunch of edge cases, so please do point them out in the review. Reviewed By: epastor, hans, MaskRay Committed By: epastor (on behalf of ayzhao) Differential Revision: https://reviews.llvm.org/D124413
1 parent a9215ed commit 3333c28

File tree

3 files changed

+249
-24
lines changed

3 files changed

+249
-24
lines changed

llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1107,9 +1107,9 @@ class X86AsmParser : public MCTargetAsmParser {
11071107
std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
11081108
bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
11091109
OperandVector &FinalOperands);
1110-
bool ParseOperand(OperandVector &Operands);
1111-
bool ParseATTOperand(OperandVector &Operands);
1112-
bool ParseIntelOperand(OperandVector &Operands);
1110+
bool parseOperand(OperandVector &Operands, StringRef Name);
1111+
bool parseATTOperand(OperandVector &Operands);
1112+
bool parseIntelOperand(OperandVector &Operands, StringRef Name);
11131113
bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
11141114
InlineAsmIdentifierInfo &Info, SMLoc &End);
11151115
bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
@@ -1736,11 +1736,11 @@ bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
17361736
return false;
17371737
}
17381738

1739-
bool X86AsmParser::ParseOperand(OperandVector &Operands) {
1739+
bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) {
17401740
if (isParsingIntelSyntax())
1741-
return ParseIntelOperand(Operands);
1741+
return parseIntelOperand(Operands, Name);
17421742

1743-
return ParseATTOperand(Operands);
1743+
return parseATTOperand(Operands);
17441744
}
17451745

17461746
bool X86AsmParser::CreateMemForMSInlineAsm(
@@ -2513,7 +2513,7 @@ bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
25132513
return false;
25142514
}
25152515

2516-
bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) {
2516+
bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
25172517
MCAsmParser &Parser = getParser();
25182518
const AsmToken &Tok = Parser.getTok();
25192519
SMLoc Start, End;
@@ -2635,25 +2635,49 @@ bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) {
26352635

26362636
// When parsing x64 MS-style assembly, all non-absolute references to a named
26372637
// variable default to RIP-relative.
2638-
if (Parser.isParsingMasm() && is64BitMode() && SM.getElementSize() > 0) {
2639-
Operands.push_back(X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
2640-
BaseReg, IndexReg, Scale, Start,
2641-
End, Size,
2642-
/*DefaultBaseReg=*/X86::RIP));
2643-
return false;
2638+
unsigned DefaultBaseReg = X86::NoRegister;
2639+
bool MaybeDirectBranchDest = true;
2640+
2641+
if (Parser.isParsingMasm()) {
2642+
bool IsUnconditionalBranch =
2643+
Name.equals_insensitive("jmp") || Name.equals_insensitive("call");
2644+
if (is64BitMode() && SM.getElementSize() > 0) {
2645+
DefaultBaseReg = X86::RIP;
2646+
}
2647+
if (IsUnconditionalBranch) {
2648+
if (PtrInOperand) {
2649+
MaybeDirectBranchDest = false;
2650+
if (is64BitMode())
2651+
DefaultBaseReg = X86::RIP;
2652+
} else if (!BaseReg && !IndexReg && Disp &&
2653+
Disp->getKind() == MCExpr::SymbolRef) {
2654+
if (is64BitMode()) {
2655+
if (SM.getSize() == 8) {
2656+
MaybeDirectBranchDest = false;
2657+
DefaultBaseReg = X86::RIP;
2658+
}
2659+
} else {
2660+
if (SM.getSize() == 4 || SM.getSize() == 2)
2661+
MaybeDirectBranchDest = false;
2662+
}
2663+
}
2664+
}
26442665
}
26452666

2646-
if ((BaseReg || IndexReg || RegNo))
2647-
Operands.push_back(X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
2648-
BaseReg, IndexReg, Scale, Start,
2649-
End, Size));
2667+
if ((BaseReg || IndexReg || RegNo || DefaultBaseReg != X86::NoRegister))
2668+
Operands.push_back(X86Operand::CreateMem(
2669+
getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End,
2670+
Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
2671+
/*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest));
26502672
else
2651-
Operands.push_back(
2652-
X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size));
2673+
Operands.push_back(X86Operand::CreateMem(
2674+
getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(),
2675+
/*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
2676+
MaybeDirectBranchDest));
26532677
return false;
26542678
}
26552679

2656-
bool X86AsmParser::ParseATTOperand(OperandVector &Operands) {
2680+
bool X86AsmParser::parseATTOperand(OperandVector &Operands) {
26572681
MCAsmParser &Parser = getParser();
26582682
switch (getLexer().getKind()) {
26592683
case AsmToken::Dollar: {
@@ -3409,7 +3433,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
34093433

34103434
// Read the operands.
34113435
while (true) {
3412-
if (ParseOperand(Operands))
3436+
if (parseOperand(Operands, Name))
34133437
return true;
34143438
if (HandleAVX512Operand(Operands))
34153439
return true;

llvm/lib/Target/X86/AsmParser/X86Operand.h

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ struct X86Operand final : public MCParsedAsmOperand {
7272
/// If the memory operand is unsized and there are multiple instruction
7373
/// matches, prefer the one with this size.
7474
unsigned FrontendSize;
75+
76+
/// If false, then this operand must be a memory operand for an indirect
77+
/// branch instruction. Otherwise, this operand may belong to either a
78+
/// direct or indirect branch instruction.
79+
bool MaybeDirectBranchDest;
7580
};
7681

7782
union {
@@ -209,6 +214,10 @@ struct X86Operand final : public MCParsedAsmOperand {
209214
assert(Kind == Memory && "Invalid access!");
210215
return Mem.FrontendSize;
211216
}
217+
bool isMaybeDirectBranchDest() const {
218+
assert(Kind == Memory && "Invalid access!");
219+
return Mem.MaybeDirectBranchDest;
220+
}
212221

213222
bool isToken() const override {return Kind == Token; }
214223

@@ -374,8 +383,9 @@ struct X86Operand final : public MCParsedAsmOperand {
374383

375384
bool isAbsMem() const {
376385
return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
377-
!getMemIndexReg() && getMemScale() == 1;
386+
!getMemIndexReg() && getMemScale() == 1 && isMaybeDirectBranchDest();
378387
}
388+
379389
bool isAVX512RC() const{
380390
return isImm();
381391
}
@@ -672,7 +682,7 @@ struct X86Operand final : public MCParsedAsmOperand {
672682
CreateMem(unsigned ModeSize, const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
673683
unsigned Size = 0, StringRef SymName = StringRef(),
674684
void *OpDecl = nullptr, unsigned FrontendSize = 0,
675-
bool UseUpRegs = false) {
685+
bool UseUpRegs = false, bool MaybeDirectBranchDest = true) {
676686
auto Res = std::make_unique<X86Operand>(Memory, StartLoc, EndLoc);
677687
Res->Mem.SegReg = 0;
678688
Res->Mem.Disp = Disp;
@@ -683,6 +693,7 @@ struct X86Operand final : public MCParsedAsmOperand {
683693
Res->Mem.Size = Size;
684694
Res->Mem.ModeSize = ModeSize;
685695
Res->Mem.FrontendSize = FrontendSize;
696+
Res->Mem.MaybeDirectBranchDest = MaybeDirectBranchDest;
686697
Res->UseUpRegs = UseUpRegs;
687698
Res->SymName = SymName;
688699
Res->OpDecl = OpDecl;
@@ -697,7 +708,8 @@ struct X86Operand final : public MCParsedAsmOperand {
697708
SMLoc EndLoc, unsigned Size = 0,
698709
unsigned DefaultBaseReg = X86::NoRegister,
699710
StringRef SymName = StringRef(), void *OpDecl = nullptr,
700-
unsigned FrontendSize = 0, bool UseUpRegs = false) {
711+
unsigned FrontendSize = 0, bool UseUpRegs = false,
712+
bool MaybeDirectBranchDest = true) {
701713
// We should never just have a displacement, that should be parsed as an
702714
// absolute memory operand.
703715
assert((SegReg || BaseReg || IndexReg || DefaultBaseReg) &&
@@ -716,6 +728,7 @@ struct X86Operand final : public MCParsedAsmOperand {
716728
Res->Mem.Size = Size;
717729
Res->Mem.ModeSize = ModeSize;
718730
Res->Mem.FrontendSize = FrontendSize;
731+
Res->Mem.MaybeDirectBranchDest = MaybeDirectBranchDest;
719732
Res->UseUpRegs = UseUpRegs;
720733
Res->SymName = SymName;
721734
Res->OpDecl = OpDecl;
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
; RUN: llvm-ml -m64 -filetype=s %s /Fo - | FileCheck %s --check-prefixes=CHECK-64,CHECK
2+
; RUN: llvm-ml -m32 -filetype=s %s /Fo - | FileCheck %s --check-prefixes=CHECK-32,CHECK
3+
4+
.data
5+
6+
ifdef rax
7+
fn_ref qword 1
8+
else
9+
fn_ref dword 1
10+
endif
11+
12+
fn_ref_word word 2
13+
fn PROC
14+
15+
BranchTargetStruc struc
16+
member0 dword ?
17+
ifdef rax
18+
member1 dword ?
19+
endif
20+
BranchTargetStruc ends
21+
22+
23+
ifdef rax
24+
fn_ref_struc BranchTargetStruc {3, 3}
25+
else
26+
fn_ref_struc BranchTargetStruc {3}
27+
endif
28+
29+
.code
30+
31+
t0:
32+
call fn_ref
33+
jmp fn_ref
34+
; CHECK-LABEL: t0:
35+
; CHECK-64: call qword ptr [rip + fn_ref]
36+
; CHECK-64: jmp qword ptr [rip + fn_ref]
37+
; CHECK-32: call dword ptr [fn_ref]
38+
; CHECK-32: jmp dword ptr [fn_ref]
39+
40+
t1:
41+
call [fn_ref]
42+
jmp [fn_ref]
43+
; CHECK-LABEL: t1:
44+
; CHECK-64: call qword ptr [rip + fn_ref]
45+
; CHECK-64: jmp qword ptr [rip + fn_ref]
46+
; CHECK-32: call dword ptr [fn_ref]
47+
; CHECK-32: jmp dword ptr [fn_ref]
48+
49+
ifdef rax
50+
t2:
51+
call qword ptr [fn_ref]
52+
jmp qword ptr [fn_ref]
53+
; CHECK-64-LABEL: t2:
54+
; CHECK-64: call qword ptr [rip + fn_ref]
55+
; CHECK-64: jmp qword ptr [rip + fn_ref]
56+
else
57+
t2:
58+
call dword ptr [fn_ref]
59+
jmp dword ptr [fn_ref]
60+
; CHECK-32-LABEL: t2:
61+
; CHECK-32: call dword ptr [fn_ref]
62+
; CHECK-32: jmp dword ptr [fn_ref]
63+
64+
t3:
65+
call fn_ref_word
66+
jmp fn_ref_word
67+
; CHECK-32-LABEL: t3:
68+
; CHECK-32: call word ptr [fn_ref_word]
69+
; CHECK-32-NEXT: jmp word ptr [fn_ref_word]
70+
71+
t4:
72+
call [fn_ref_word]
73+
jmp [fn_ref_word]
74+
; CHECK-32-LABEL: t4:
75+
; CHECK-32: call word ptr [fn_ref_word]
76+
; CHECK-32-NEXT: jmp word ptr [fn_ref_word]
77+
78+
t5:
79+
call word ptr [fn_ref_word]
80+
jmp word ptr [fn_ref_word]
81+
; CHECK-32-LABEL: t5:
82+
; CHECK-32: call word ptr [fn_ref_word]
83+
; CHECK-32-NEXT: jmp word ptr [fn_ref_word]
84+
endif
85+
86+
t6:
87+
call t6
88+
jmp t6
89+
; CHECK-LABEL: t6:
90+
; CHECK: call t6
91+
; CHECK-NEXT: jmp t6
92+
93+
t7:
94+
call [t7]
95+
jmp [t7]
96+
; CHECK-LABEL: t7:
97+
; CHECK: call t7
98+
; CHECK-NEXT: jmp t7
99+
100+
ifdef rax
101+
t8:
102+
call qword ptr [t8]
103+
jmp qword ptr [t8]
104+
; CHECK-64-LABEL: t8:
105+
; CHECK-64: call qword ptr [rip + t8]
106+
; CHECK-64-NEXT: jmp qword ptr [rip + t8]
107+
else
108+
t8:
109+
call dword ptr [t8]
110+
jmp dword ptr [t8]
111+
; CHECK-32-LABEL: t8:
112+
; CHECK-32: call dword ptr [t8]
113+
; CHECK-32-NEXT: jmp dword ptr [t8]
114+
endif
115+
116+
t9:
117+
call fn
118+
jmp fn
119+
; CHECK-LABEL: t9:
120+
; CHECK: call fn
121+
; CHECK-NEXT: jmp fn
122+
123+
t10:
124+
call [fn]
125+
jmp [fn]
126+
; CHECK-LABEL: t10:
127+
; CHECK: call fn
128+
; CHECK-NEXT: jmp fn
129+
130+
ifdef rax
131+
t11:
132+
call qword ptr [fn]
133+
jmp qword ptr [fn]
134+
; CHECK-64-LABEL: t11:
135+
; CHECK-64: call qword ptr [rip + fn]
136+
; CHECK-64-NEXT: jmp qword ptr [rip + fn]
137+
else
138+
t11:
139+
call dword ptr [fn]
140+
jmp dword ptr [fn]
141+
; CHECK-32-LABEL: t11:
142+
; CHECK-32: call dword ptr [fn]
143+
; CHECK-32-NEXT: jmp dword ptr [fn]
144+
endif
145+
146+
t12:
147+
call fn_ref_struc
148+
jmp fn_ref_struc
149+
; CHECK-LABEL: t12:
150+
; CHECK-64: call qword ptr [rip + fn_ref_struc]
151+
; CHECK-64: jmp qword ptr [rip + fn_ref_struc]
152+
; CHECK-32: call dword ptr [fn_ref_struc]
153+
; CHECK-32: jmp dword ptr [fn_ref_struc]
154+
155+
t13:
156+
call [fn_ref_struc]
157+
jmp [fn_ref_struc]
158+
; CHECK-LABEL: t13:
159+
; CHECK-64: call qword ptr [rip + fn_ref_struc]
160+
; CHECK-64: jmp qword ptr [rip + fn_ref_struc]
161+
; CHECK-32: call dword ptr [fn_ref_struc]
162+
; CHECK-32: jmp dword ptr [fn_ref_struc]
163+
164+
ifdef rax
165+
t14:
166+
call qword ptr [fn_ref_struc]
167+
jmp qword ptr [fn_ref_struc]
168+
; CHECK-64-LABEL: t14:
169+
; CHECK-64: call qword ptr [rip + fn_ref_struc]
170+
; CHECK-64: jmp qword ptr [rip + fn_ref_struc]
171+
else
172+
t14:
173+
call dword ptr [fn_ref_struc]
174+
jmp dword ptr [fn_ref_struc]
175+
; CHECK-32-LABEL: t14:
176+
; CHECK-32: call dword ptr [fn_ref_struc]
177+
; CHECK-32: jmp dword ptr [fn_ref_struc]
178+
endif
179+
180+
t15:
181+
je t15
182+
; CHECK-LABEL: t15:
183+
; CHECK: je t15
184+
185+
t16:
186+
je [t16];
187+
; CHECK-LABEL: t16:
188+
; CHECK: je t16

0 commit comments

Comments
 (0)