Skip to content

Commit eb3b7dd

Browse files
authored
X86: Fix win64 tail call regression for tail call to loaded pointer (#158055)
Fix regression after 62f2641. Previous patch handled the register case, but the memory case snuck another use of ptr_rc_tailcall hidden inside i64mem_TC
1 parent 05a705e commit eb3b7dd

File tree

9 files changed

+76
-11
lines changed

9 files changed

+76
-11
lines changed

llvm/lib/Target/X86/X86AsmPrinter.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -478,9 +478,9 @@ static bool isIndirectBranchOrTailCall(const MachineInstr &MI) {
478478
Opc == X86::TAILJMPr64 || Opc == X86::TAILJMPm64 ||
479479
Opc == X86::TCRETURNri || Opc == X86::TCRETURN_WIN64ri ||
480480
Opc == X86::TCRETURN_HIPE32ri || Opc == X86::TCRETURNmi ||
481-
Opc == X86::TCRETURNri64 || Opc == X86::TCRETURNmi64 ||
482-
Opc == X86::TCRETURNri64_ImpCall || Opc == X86::TAILJMPr64_REX ||
483-
Opc == X86::TAILJMPm64_REX;
481+
Opc == X86::TCRETURN_WINmi64 || Opc == X86::TCRETURNri64 ||
482+
Opc == X86::TCRETURNmi64 || Opc == X86::TCRETURNri64_ImpCall ||
483+
Opc == X86::TAILJMPr64_REX || Opc == X86::TAILJMPm64_REX;
484484
}
485485

486486
void X86AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) {

llvm/lib/Target/X86/X86ExpandPseudo.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -276,8 +276,10 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
276276
case X86::TCRETURNdi64cc:
277277
case X86::TCRETURNri64:
278278
case X86::TCRETURNri64_ImpCall:
279-
case X86::TCRETURNmi64: {
280-
bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64;
279+
case X86::TCRETURNmi64:
280+
case X86::TCRETURN_WINmi64: {
281+
bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64 ||
282+
Opcode == X86::TCRETURN_WINmi64;
281283
MachineOperand &JumpTarget = MBBI->getOperand(0);
282284
MachineOperand &StackAdjust = MBBI->getOperand(isMem ? X86::AddrNumOperands
283285
: 1);
@@ -341,7 +343,8 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
341343
MIB.addImm(MBBI->getOperand(2).getImm());
342344
}
343345

344-
} else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) {
346+
} else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64 ||
347+
Opcode == X86::TCRETURN_WINmi64) {
345348
unsigned Op = (Opcode == X86::TCRETURNmi)
346349
? X86::TAILJMPm
347350
: (IsX64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64);

llvm/lib/Target/X86/X86FrameLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2402,7 +2402,7 @@ static bool isTailCallOpcode(unsigned Opc) {
24022402
Opc == X86::TCRETURN_HIPE32ri || Opc == X86::TCRETURNdi ||
24032403
Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
24042404
Opc == X86::TCRETURNri64_ImpCall || Opc == X86::TCRETURNdi64 ||
2405-
Opc == X86::TCRETURNmi64;
2405+
Opc == X86::TCRETURNmi64 || Opc == X86::TCRETURN_WINmi64;
24062406
}
24072407

24082408
void X86FrameLowering::emitEpilogue(MachineFunction &MF,

llvm/lib/Target/X86/X86InstrCompiler.td

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1364,15 +1364,19 @@ def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
13641364
// There wouldn't be enough scratch registers for base+index.
13651365
def : Pat<(X86tcret_6regs (load addr:$dst), timm:$off),
13661366
(TCRETURNmi64 addr:$dst, timm:$off)>,
1367-
Requires<[In64BitMode, NotUseIndirectThunkCalls]>;
1367+
Requires<[In64BitMode, IsNotWin64CCFunc, NotUseIndirectThunkCalls]>;
1368+
1369+
def : Pat<(X86tcret_6regs (load addr:$dst), timm:$off),
1370+
(TCRETURN_WINmi64 addr:$dst, timm:$off)>,
1371+
Requires<[IsWin64CCFunc, NotUseIndirectThunkCalls]>;
13681372

13691373
def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
13701374
(INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, timm:$off)>,
1371-
Requires<[In64BitMode, UseIndirectThunkCalls]>;
1375+
Requires<[In64BitMode, IsNotWin64CCFunc, UseIndirectThunkCalls]>;
13721376

13731377
def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
13741378
(INDIRECT_THUNK_TCRETURN32 ptr_rc_tailcall:$dst, timm:$off)>,
1375-
Requires<[Not64BitMode, UseIndirectThunkCalls]>;
1379+
Requires<[Not64BitMode, IsNotWin64CCFunc, UseIndirectThunkCalls]>;
13761380

13771381
def : Pat<(X86tcret (i64 tglobaladdr:$dst), timm:$off),
13781382
(TCRETURNdi64 tglobaladdr:$dst, timm:$off)>,
@@ -2215,7 +2219,7 @@ let Predicates = [HasZU] in {
22152219
def : Pat<(i64 (zext (i16 (mul (loadi16 addr:$src1), imm:$src2)))),
22162220
(SUBREG_TO_REG (i64 0), (IMULZU16rmi addr:$src1, imm:$src2), sub_16bit)>;
22172221
}
2218-
2222+
22192223
// mul reg, imm
22202224
def : Pat<(mul GR16:$src1, imm:$src2),
22212225
(IMUL16rri GR16:$src1, imm:$src2)>;

llvm/lib/Target/X86/X86InstrControl.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
372372
def TCRETURNmi64 : PseudoI<(outs),
373373
(ins i64mem_TC:$dst, i32imm:$offset),
374374
[]>, Sched<[WriteJumpLd]>;
375+
def TCRETURN_WINmi64 : PseudoI<(outs),
376+
(ins i64mem_w64TC:$dst, i32imm:$offset),
377+
[]>, Sched<[WriteJumpLd]>;
375378

376379
def TAILJMPd64 : PseudoI<(outs), (ins i64i32imm_brtarget:$dst),
377380
[]>, Sched<[WriteJump]>;

llvm/lib/Target/X86/X86InstrOperands.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,11 @@ def i64mem_TC : X86MemOperand<"printqwordmem", X86Mem64AsmOperand, 64> {
141141
ptr_rc_tailcall, i32imm, SEGMENT_REG);
142142
}
143143

144+
def i64mem_w64TC : X86MemOperand<"printqwordmem", X86Mem64AsmOperand, 64> {
145+
let MIOperandInfo = (ops GR64_TCW64, i8imm,
146+
GR64_TCW64, i32imm, SEGMENT_REG);
147+
}
148+
144149
// Special parser to detect 16-bit mode to select 16-bit displacement.
145150
def X86AbsMemMode16AsmOperand : AsmOperandClass {
146151
let Name = "AbsMemMode16";

llvm/lib/Target/X86/X86RegisterInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,6 +1010,7 @@ unsigned X86RegisterInfo::findDeadCallerSavedReg(
10101010
case X86::TCRETURNri64:
10111011
case X86::TCRETURNri64_ImpCall:
10121012
case X86::TCRETURNmi64:
1013+
case X86::TCRETURN_WINmi64:
10131014
case X86::EH_RETURN:
10141015
case X86::EH_RETURN64: {
10151016
LiveRegUnits LRU(*this);

llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -893,6 +893,7 @@ void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
893893
case X86::TAILJMPm64_REX:
894894
case X86::TAILJMPm:
895895
case X86::TCRETURNmi64:
896+
case X86::TCRETURN_WINmi64:
896897
case X86::TCRETURNmi: {
897898
// Use the generic unfold logic now that we know we're dealing with
898899
// expected instructions.
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=x86_64-unknown-windows-gnu < %s | FileCheck %s
3+
4+
; Check calling convention is correct for win64 when doing a tailcall
5+
; for a pointer loaded from memory.
6+
7+
declare void @foo(i64, ptr)
8+
9+
define void @do_tailcall(ptr %objp) nounwind {
10+
; CHECK-LABEL: do_tailcall:
11+
; CHECK: # %bb.0:
12+
; CHECK-NEXT: pushq %rsi
13+
; CHECK-NEXT: subq $32, %rsp
14+
; CHECK-NEXT: movq %rcx, %rsi
15+
; CHECK-NEXT: xorl %ecx, %ecx
16+
; CHECK-NEXT: xorl %edx, %edx
17+
; CHECK-NEXT: callq foo
18+
; CHECK-NEXT: xorl %ecx, %ecx
19+
; CHECK-NEXT: movq %rsi, %rax
20+
; CHECK-NEXT: addq $32, %rsp
21+
; CHECK-NEXT: popq %rsi
22+
; CHECK-NEXT: rex64 jmpq *(%rax) # TAILCALL
23+
tail call void @foo(i64 0, ptr null)
24+
%fptr = load ptr, ptr %objp, align 8
25+
tail call void %fptr(ptr null)
26+
ret void
27+
}
28+
29+
; Make sure aliases of ccc are also treated as win64 functions
30+
define fastcc void @do_tailcall_fastcc(ptr %objp) nounwind {
31+
; CHECK-LABEL: do_tailcall_fastcc:
32+
; CHECK: # %bb.0:
33+
; CHECK-NEXT: pushq %rsi
34+
; CHECK-NEXT: subq $32, %rsp
35+
; CHECK-NEXT: movq %rcx, %rsi
36+
; CHECK-NEXT: xorl %ecx, %ecx
37+
; CHECK-NEXT: xorl %edx, %edx
38+
; CHECK-NEXT: callq foo
39+
; CHECK-NEXT: xorl %ecx, %ecx
40+
; CHECK-NEXT: movq %rsi, %rax
41+
; CHECK-NEXT: addq $32, %rsp
42+
; CHECK-NEXT: popq %rsi
43+
; CHECK-NEXT: rex64 jmpq *(%rax) # TAILCALL
44+
tail call void @foo(i64 0, ptr null)
45+
%fptr = load ptr, ptr %objp, align 8
46+
tail call fastcc void %fptr(ptr null)
47+
ret void
48+
}

0 commit comments

Comments
 (0)