Skip to content

Commit 74b8d74

Browse files
committed
[X86] Fix ABI for passing after i128
If we're passing an i128 value and we no longer have enough argument registers (only r9 unallocated), the value gets passed via the stack. However, r9 is still allocated as a shadow register, which means that a following i64 argument will not use it. This doesn't match the x86-64 psABI. Fix this by making i128 arguments as requiring consecutive registers, and then adding a custom CC lowering that will allocate both parts of the i128 at the same time, either to register or to stack, without reserving a shadow register. Fixes #123935.
1 parent 6fe0fc6 commit 74b8d74

File tree

14 files changed

+132
-88
lines changed

14 files changed

+132
-88
lines changed

llvm/lib/Target/X86/X86CallingConv.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,5 +340,41 @@ static bool CC_X86_64_Pointer(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
340340
return false;
341341
}
342342

343+
/// Special handling for i128: Either allocate the value to two consecutive
344+
/// i64 registers, or to the stack. Do not partially allocate in registers,
345+
/// and do not reserve any registers when allocating to the stack.
346+
static bool CC_X86_64_I128(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
347+
CCValAssign::LocInfo &LocInfo,
348+
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
349+
assert(ValVT == MVT::i64 && "Should have i64 parts");
350+
SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
351+
PendingMembers.push_back(
352+
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
353+
354+
if (!ArgFlags.isInConsecutiveRegsLast())
355+
return true;
356+
357+
unsigned NumRegs = PendingMembers.size();
358+
assert(NumRegs == 2 && "Should have two parts");
359+
360+
MCPhysReg Regs[] = {X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9};
361+
ArrayRef<MCPhysReg> Allocated = State.AllocateRegBlock(Regs, NumRegs);
362+
if (!Allocated.empty()) {
363+
for (const auto &[Pending, Reg] : zip(PendingMembers, Allocated)) {
364+
Pending.convertToReg(Reg);
365+
State.addLoc(Pending);
366+
}
367+
} else {
368+
int64_t Offset = State.AllocateStack(8, Align(16));
369+
for (auto &Pending : PendingMembers) {
370+
Pending.convertToMem(Offset);
371+
State.addLoc(Pending);
372+
Offset += 8;
373+
}
374+
}
375+
PendingMembers.clear();
376+
return true;
377+
}
378+
343379
// Provides entry points of CC_X86 and RetCC_X86.
344380
#include "X86GenCallingConv.inc"

llvm/lib/Target/X86/X86CallingConv.td

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -548,11 +548,9 @@ def CC_X86_64_C : CallingConv<[
548548
CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
549549

550550
// i128 can be either passed in two i64 registers, or on the stack, but
551-
// not split across register and stack. As such, do not allow using R9
552-
// for a split i64.
551+
// not split across register and stack. Handle this with a custom function.
553552
CCIfType<[i64],
554-
CCIfSplit<CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>,
555-
CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [R9]>>>,
553+
CCIfConsecutiveRegs<CCCustom<"CC_X86_64_I128">>>,
556554

557555
CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
558556

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1604,6 +1604,10 @@ namespace llvm {
16041604
LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
16051605
unsigned &NumIntermediates, MVT &RegisterVT) const override;
16061606

1607+
bool functionArgumentNeedsConsecutiveRegisters(
1608+
Type *Ty, CallingConv::ID CallConv, bool isVarArg,
1609+
const DataLayout &DL) const override;
1610+
16071611
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
16081612

16091613
bool supportSwiftError() const override;

llvm/lib/Target/X86/X86ISelLoweringCall.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,14 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
233233
return VT.changeVectorElementTypeToInteger();
234234
}
235235

236+
bool X86TargetLowering::functionArgumentNeedsConsecutiveRegisters(
237+
Type *Ty, CallingConv::ID CallConv, bool isVarArg,
238+
const DataLayout &DL) const {
239+
// i128 split into i64 needs to be allocated to two consecutive registers,
240+
// or spilled to the stack as a whole.
241+
return Ty->isIntegerTy(128);
242+
}
243+
236244
/// Helper for getByValTypeAlignment to determine
237245
/// the desired ByVal argument alignment.
238246
static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {

llvm/test/CodeGen/X86/addcarry.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ define i256 @add256(i256 %a, i256 %b) nounwind {
4949
; CHECK-LABEL: add256:
5050
; CHECK: # %bb.0: # %entry
5151
; CHECK-NEXT: movq %rdi, %rax
52-
; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rsi
52+
; CHECK-NEXT: addq %r9, %rsi
5353
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
5454
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
5555
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %r8

llvm/test/CodeGen/X86/apx/flags-copy-lowering.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,15 @@ define <2 x i128> @flag_copy_2(<2 x i128> %x, <2 x i128> %y) nounwind {
3131
; CHECK-NEXT: movq %r8, %rdi
3232
; CHECK-NEXT: {nf} sarq $63, %rdi
3333
; CHECK-NEXT: cmovoq %rdi, %rcx
34-
; CHECK-NEXT: movabsq $-9223372036854775808, %r9 # imm = 0x8000000000000000
35-
; CHECK-NEXT: {nf} xorq %r9, %rdi
34+
; CHECK-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
35+
; CHECK-NEXT: {nf} xorq %r10, %rdi
3636
; CHECK-NEXT: cmovnoq %r8, %rdi
37-
; CHECK-NEXT: subq {{[0-9]+}}(%rsp), %rsi
37+
; CHECK-NEXT: subq %r9, %rsi
3838
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
3939
; CHECK-NEXT: movq %rdx, %r8
4040
; CHECK-NEXT: {nf} sarq $63, %r8
4141
; CHECK-NEXT: cmovoq %r8, %rsi
42-
; CHECK-NEXT: {nf} xorq %r9, %r8
42+
; CHECK-NEXT: {nf} xorq %r10, %r8
4343
; CHECK-NEXT: cmovnoq %rdx, %r8
4444
; CHECK-NEXT: movq %rcx, 16(%rax)
4545
; CHECK-NEXT: movq %rsi, (%rax)

llvm/test/CodeGen/X86/avgflooru-i128.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ define <2 x i128> @avgflooru_i128_vec(<2 x i128> %x, <2 x i128> %y) {
119119
; CHECK-LABEL: avgflooru_i128_vec:
120120
; CHECK: # %bb.0: # %start
121121
; CHECK-NEXT: movq %rdi, %rax
122-
; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rsi
122+
; CHECK-NEXT: addq %r9, %rsi
123123
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
124124
; CHECK-NEXT: setb %dil
125125
; CHECK-NEXT: movzbl %dil, %edi

llvm/test/CodeGen/X86/fmuladd-soft-float.ll

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1555,30 +1555,30 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
15551555
; SOFT-FLOAT-64-NEXT: .cfi_offset %r14, -32
15561556
; SOFT-FLOAT-64-NEXT: .cfi_offset %r15, -24
15571557
; SOFT-FLOAT-64-NEXT: .cfi_offset %rbp, -16
1558+
; SOFT-FLOAT-64-NEXT: movq %r9, %rbp
15581559
; SOFT-FLOAT-64-NEXT: movq %rcx, %r14
15591560
; SOFT-FLOAT-64-NEXT: movq %rdx, %r15
1560-
; SOFT-FLOAT-64-NEXT: movq %rsi, %r12
1561+
; SOFT-FLOAT-64-NEXT: movq %rsi, %r13
15611562
; SOFT-FLOAT-64-NEXT: movq %rdi, %rbx
1562-
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rbp
15631563
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
15641564
; SOFT-FLOAT-64-NEXT: movq %r8, %rdi
15651565
; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT
1566-
; SOFT-FLOAT-64-NEXT: movq %rax, %r13
1566+
; SOFT-FLOAT-64-NEXT: movq %rax, %r12
15671567
; SOFT-FLOAT-64-NEXT: movq %r14, %rdi
1568-
; SOFT-FLOAT-64-NEXT: movq %rbp, %rsi
1568+
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
15691569
; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT
15701570
; SOFT-FLOAT-64-NEXT: movq %rax, %r14
15711571
; SOFT-FLOAT-64-NEXT: movq %r15, %rdi
15721572
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
15731573
; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT
15741574
; SOFT-FLOAT-64-NEXT: movq %rax, %r15
1575-
; SOFT-FLOAT-64-NEXT: movq %r12, %rdi
1576-
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
1575+
; SOFT-FLOAT-64-NEXT: movq %r13, %rdi
1576+
; SOFT-FLOAT-64-NEXT: movq %rbp, %rsi
15771577
; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT
15781578
; SOFT-FLOAT-64-NEXT: movq %rax, %rdi
15791579
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
15801580
; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT
1581-
; SOFT-FLOAT-64-NEXT: movq %rax, %r12
1581+
; SOFT-FLOAT-64-NEXT: movq %rax, %r13
15821582
; SOFT-FLOAT-64-NEXT: movq %r15, %rdi
15831583
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
15841584
; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT
@@ -1587,13 +1587,13 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
15871587
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
15881588
; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT
15891589
; SOFT-FLOAT-64-NEXT: movq %rax, %r14
1590-
; SOFT-FLOAT-64-NEXT: movq %r13, %rdi
1590+
; SOFT-FLOAT-64-NEXT: movq %r12, %rdi
15911591
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
15921592
; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT
15931593
; SOFT-FLOAT-64-NEXT: movq %rax, 24(%rbx)
15941594
; SOFT-FLOAT-64-NEXT: movq %r14, 16(%rbx)
15951595
; SOFT-FLOAT-64-NEXT: movq %r15, 8(%rbx)
1596-
; SOFT-FLOAT-64-NEXT: movq %r12, (%rbx)
1596+
; SOFT-FLOAT-64-NEXT: movq %r13, (%rbx)
15971597
; SOFT-FLOAT-64-NEXT: movq %rbx, %rax
15981598
; SOFT-FLOAT-64-NEXT: addq $8, %rsp
15991599
; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 56
@@ -1633,30 +1633,30 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
16331633
; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r14, -32
16341634
; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r15, -24
16351635
; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbp, -16
1636+
; SOFT-FLOAT-64-FMA-NEXT: movq %r9, %rbp
16361637
; SOFT-FLOAT-64-FMA-NEXT: movq %rcx, %r14
16371638
; SOFT-FLOAT-64-FMA-NEXT: movq %rdx, %r15
1638-
; SOFT-FLOAT-64-FMA-NEXT: movq %rsi, %r12
1639+
; SOFT-FLOAT-64-FMA-NEXT: movq %rsi, %r13
16391640
; SOFT-FLOAT-64-FMA-NEXT: movq %rdi, %rbx
1640-
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rbp
16411641
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
16421642
; SOFT-FLOAT-64-FMA-NEXT: movq %r8, %rdi
16431643
; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT
1644-
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r13
1644+
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r12
16451645
; SOFT-FLOAT-64-FMA-NEXT: movq %r14, %rdi
1646-
; SOFT-FLOAT-64-FMA-NEXT: movq %rbp, %rsi
1646+
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
16471647
; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT
16481648
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r14
16491649
; SOFT-FLOAT-64-FMA-NEXT: movq %r15, %rdi
16501650
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
16511651
; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT
16521652
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r15
1653-
; SOFT-FLOAT-64-FMA-NEXT: movq %r12, %rdi
1654-
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
1653+
; SOFT-FLOAT-64-FMA-NEXT: movq %r13, %rdi
1654+
; SOFT-FLOAT-64-FMA-NEXT: movq %rbp, %rsi
16551655
; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT
16561656
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %rdi
16571657
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
16581658
; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT
1659-
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r12
1659+
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r13
16601660
; SOFT-FLOAT-64-FMA-NEXT: movq %r15, %rdi
16611661
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
16621662
; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT
@@ -1665,13 +1665,13 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
16651665
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
16661666
; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT
16671667
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r14
1668-
; SOFT-FLOAT-64-FMA-NEXT: movq %r13, %rdi
1668+
; SOFT-FLOAT-64-FMA-NEXT: movq %r12, %rdi
16691669
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
16701670
; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT
16711671
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, 24(%rbx)
16721672
; SOFT-FLOAT-64-FMA-NEXT: movq %r14, 16(%rbx)
16731673
; SOFT-FLOAT-64-FMA-NEXT: movq %r15, 8(%rbx)
1674-
; SOFT-FLOAT-64-FMA-NEXT: movq %r12, (%rbx)
1674+
; SOFT-FLOAT-64-FMA-NEXT: movq %r13, (%rbx)
16751675
; SOFT-FLOAT-64-FMA-NEXT: movq %rbx, %rax
16761676
; SOFT-FLOAT-64-FMA-NEXT: addq $8, %rsp
16771677
; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 56
@@ -1711,30 +1711,30 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
17111711
; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r14, -32
17121712
; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r15, -24
17131713
; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbp, -16
1714+
; SOFT-FLOAT-64-FMA4-NEXT: movq %r9, %rbp
17141715
; SOFT-FLOAT-64-FMA4-NEXT: movq %rcx, %r14
17151716
; SOFT-FLOAT-64-FMA4-NEXT: movq %rdx, %r15
1716-
; SOFT-FLOAT-64-FMA4-NEXT: movq %rsi, %r12
1717+
; SOFT-FLOAT-64-FMA4-NEXT: movq %rsi, %r13
17171718
; SOFT-FLOAT-64-FMA4-NEXT: movq %rdi, %rbx
1718-
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rbp
17191719
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
17201720
; SOFT-FLOAT-64-FMA4-NEXT: movq %r8, %rdi
17211721
; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT
1722-
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r13
1722+
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r12
17231723
; SOFT-FLOAT-64-FMA4-NEXT: movq %r14, %rdi
1724-
; SOFT-FLOAT-64-FMA4-NEXT: movq %rbp, %rsi
1724+
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
17251725
; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT
17261726
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r14
17271727
; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, %rdi
17281728
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
17291729
; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT
17301730
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r15
1731-
; SOFT-FLOAT-64-FMA4-NEXT: movq %r12, %rdi
1732-
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
1731+
; SOFT-FLOAT-64-FMA4-NEXT: movq %r13, %rdi
1732+
; SOFT-FLOAT-64-FMA4-NEXT: movq %rbp, %rsi
17331733
; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT
17341734
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %rdi
17351735
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
17361736
; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT
1737-
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r12
1737+
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r13
17381738
; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, %rdi
17391739
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
17401740
; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT
@@ -1743,13 +1743,13 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
17431743
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
17441744
; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT
17451745
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r14
1746-
; SOFT-FLOAT-64-FMA4-NEXT: movq %r13, %rdi
1746+
; SOFT-FLOAT-64-FMA4-NEXT: movq %r12, %rdi
17471747
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
17481748
; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT
17491749
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, 24(%rbx)
17501750
; SOFT-FLOAT-64-FMA4-NEXT: movq %r14, 16(%rbx)
17511751
; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, 8(%rbx)
1752-
; SOFT-FLOAT-64-FMA4-NEXT: movq %r12, (%rbx)
1752+
; SOFT-FLOAT-64-FMA4-NEXT: movq %r13, (%rbx)
17531753
; SOFT-FLOAT-64-FMA4-NEXT: movq %rbx, %rax
17541754
; SOFT-FLOAT-64-FMA4-NEXT: addq $8, %rsp
17551755
; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 56

llvm/test/CodeGen/X86/i128-abi.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ define i128 @on_stack(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5) {
2222
define i64 @trailing_arg_on_stack(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5, i64 %a6) {
2323
; CHECK-LABEL: trailing_arg_on_stack:
2424
; CHECK: # %bb.0:
25-
; CHECK-NEXT: movq 24(%rsp), %rax
25+
; CHECK-NEXT: movq %r9, %rax
2626
; CHECK-NEXT: retq
2727
ret i64 %a6
2828
}
@@ -69,20 +69,18 @@ define void @call_trailing_arg_on_stack(i128 %x, i64 %y) nounwind {
6969
; CHECK-LABEL: call_trailing_arg_on_stack:
7070
; CHECK: # %bb.0:
7171
; CHECK-NEXT: pushq %rax
72-
; CHECK-NEXT: movq %rdx, %rax
73-
; CHECK-NEXT: movq %rsi, %r9
72+
; CHECK-NEXT: movq %rdx, %r9
73+
; CHECK-NEXT: movq %rsi, %rax
7474
; CHECK-NEXT: movq %rdi, %r10
75-
; CHECK-NEXT: subq $8, %rsp
7675
; CHECK-NEXT: movl $1, %esi
7776
; CHECK-NEXT: movl $2, %edx
7877
; CHECK-NEXT: movl $3, %ecx
7978
; CHECK-NEXT: movl $4, %r8d
8079
; CHECK-NEXT: xorl %edi, %edi
8180
; CHECK-NEXT: pushq %rax
82-
; CHECK-NEXT: pushq %r9
8381
; CHECK-NEXT: pushq %r10
8482
; CHECK-NEXT: callq trailing_arg_on_stack@PLT
85-
; CHECK-NEXT: addq $32, %rsp
83+
; CHECK-NEXT: addq $16, %rsp
8684
; CHECK-NEXT: popq %rax
8785
; CHECK-NEXT: retq
8886
call i128 @trailing_arg_on_stack(i64 0, i64 1, i64 2, i64 3, i64 4, i128 %x, i64 %y)

llvm/test/CodeGen/X86/sadd_sat_vec.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1795,27 +1795,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
17951795
; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rcx
17961796
; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %r8
17971797
; SSE-NEXT: seto %dil
1798-
; SSE-NEXT: movq %r8, %r9
1799-
; SSE-NEXT: sarq $63, %r9
1798+
; SSE-NEXT: movq %r8, %r10
1799+
; SSE-NEXT: sarq $63, %r10
18001800
; SSE-NEXT: testb %dil, %dil
1801-
; SSE-NEXT: cmovneq %r9, %rcx
1802-
; SSE-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
1803-
; SSE-NEXT: xorq %r10, %r9
1801+
; SSE-NEXT: cmovneq %r10, %rcx
1802+
; SSE-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000
1803+
; SSE-NEXT: xorq %r11, %r10
18041804
; SSE-NEXT: testb %dil, %dil
1805-
; SSE-NEXT: cmoveq %r8, %r9
1806-
; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rsi
1805+
; SSE-NEXT: cmoveq %r8, %r10
1806+
; SSE-NEXT: addq %r9, %rsi
18071807
; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
18081808
; SSE-NEXT: seto %dil
18091809
; SSE-NEXT: movq %rdx, %r8
18101810
; SSE-NEXT: sarq $63, %r8
18111811
; SSE-NEXT: testb %dil, %dil
18121812
; SSE-NEXT: cmovneq %r8, %rsi
1813-
; SSE-NEXT: xorq %r10, %r8
1813+
; SSE-NEXT: xorq %r11, %r8
18141814
; SSE-NEXT: testb %dil, %dil
18151815
; SSE-NEXT: cmoveq %rdx, %r8
18161816
; SSE-NEXT: movq %rcx, 16(%rax)
18171817
; SSE-NEXT: movq %rsi, (%rax)
1818-
; SSE-NEXT: movq %r9, 24(%rax)
1818+
; SSE-NEXT: movq %r10, 24(%rax)
18191819
; SSE-NEXT: movq %r8, 8(%rax)
18201820
; SSE-NEXT: retq
18211821
;
@@ -1825,27 +1825,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
18251825
; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rcx
18261826
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %r8
18271827
; AVX-NEXT: seto %dil
1828-
; AVX-NEXT: movq %r8, %r9
1829-
; AVX-NEXT: sarq $63, %r9
1828+
; AVX-NEXT: movq %r8, %r10
1829+
; AVX-NEXT: sarq $63, %r10
18301830
; AVX-NEXT: testb %dil, %dil
1831-
; AVX-NEXT: cmovneq %r9, %rcx
1832-
; AVX-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
1833-
; AVX-NEXT: xorq %r10, %r9
1831+
; AVX-NEXT: cmovneq %r10, %rcx
1832+
; AVX-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000
1833+
; AVX-NEXT: xorq %r11, %r10
18341834
; AVX-NEXT: testb %dil, %dil
1835-
; AVX-NEXT: cmoveq %r8, %r9
1836-
; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rsi
1835+
; AVX-NEXT: cmoveq %r8, %r10
1836+
; AVX-NEXT: addq %r9, %rsi
18371837
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
18381838
; AVX-NEXT: seto %dil
18391839
; AVX-NEXT: movq %rdx, %r8
18401840
; AVX-NEXT: sarq $63, %r8
18411841
; AVX-NEXT: testb %dil, %dil
18421842
; AVX-NEXT: cmovneq %r8, %rsi
1843-
; AVX-NEXT: xorq %r10, %r8
1843+
; AVX-NEXT: xorq %r11, %r8
18441844
; AVX-NEXT: testb %dil, %dil
18451845
; AVX-NEXT: cmoveq %rdx, %r8
18461846
; AVX-NEXT: movq %rcx, 16(%rax)
18471847
; AVX-NEXT: movq %rsi, (%rax)
1848-
; AVX-NEXT: movq %r9, 24(%rax)
1848+
; AVX-NEXT: movq %r10, 24(%rax)
18491849
; AVX-NEXT: movq %r8, 8(%rax)
18501850
; AVX-NEXT: retq
18511851
%z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y)

0 commit comments

Comments
 (0)