Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions llvm/lib/Target/X86/X86CallingConv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,5 +340,41 @@ static bool CC_X86_64_Pointer(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
return false;
}

/// Special handling for i128: Either allocate the value to two consecutive
/// i64 registers, or to the stack. Do not partially allocate in registers,
/// and do not reserve any registers when allocating to the stack.
static bool CC_X86_64_I128(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
assert(ValVT == MVT::i64 && "Should have i64 parts");
SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
PendingMembers.push_back(
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));

if (!ArgFlags.isInConsecutiveRegsLast())
return true;

unsigned NumRegs = PendingMembers.size();
assert(NumRegs == 2 && "Should have two parts");

MCPhysReg Regs[] = {X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

static const?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

ArrayRef<MCPhysReg> Allocated = State.AllocateRegBlock(Regs, NumRegs);
if (!Allocated.empty()) {
for (const auto &[Pending, Reg] : zip(PendingMembers, Allocated)) {
Pending.convertToReg(Reg);
State.addLoc(Pending);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PendingMembers[0/1] and Allocated[0/1]?

}
} else {
int64_t Offset = State.AllocateStack(16, Align(16));
for (auto &Pending : PendingMembers) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: maybe better to use PendingMembers[0/1] directly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. I kept this a bit more generic originally in case we want to handle cases like <2 x i128> in the future, but for now we don't need the loops...

Pending.convertToMem(Offset);
State.addLoc(Pending);
Offset += 8;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not shown in test case, but I have a concern: would we get two discontinuous i64 due to the alignment?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I saw the pre-commited case now.

}
PendingMembers.clear();
return true;
}

// Provides entry points of CC_X86 and RetCC_X86.
#include "X86GenCallingConv.inc"
6 changes: 2 additions & 4 deletions llvm/lib/Target/X86/X86CallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -548,11 +548,9 @@ def CC_X86_64_C : CallingConv<[
CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,

// i128 can be either passed in two i64 registers, or on the stack, but
// not split across register and stack. As such, do not allow using R9
// for a split i64.
// not split across register and stack. Handle this with a custom function.
CCIfType<[i64],
CCIfSplit<CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>,
CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [R9]>>>,
CCIfConsecutiveRegs<CCCustom<"CC_X86_64_I128">>>,

CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1604,6 +1604,10 @@ namespace llvm {
LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
unsigned &NumIntermediates, MVT &RegisterVT) const override;

bool functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg,
const DataLayout &DL) const override;

bool isIntDivCheap(EVT VT, AttributeList Attr) const override;

bool supportSwiftError() const override;
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/X86/X86ISelLoweringCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,14 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
return VT.changeVectorElementTypeToInteger();
}

bool X86TargetLowering::functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg,
const DataLayout &DL) const {
// i128 split into i64 needs to be allocated to two consecutive registers,
// or spilled to the stack as a whole.
return Ty->isIntegerTy(128);
}

/// Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/addcarry.ll
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ define i256 @add256(i256 %a, i256 %b) nounwind {
; CHECK-LABEL: add256:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rsi
; CHECK-NEXT: addq %r9, %rsi
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %r8
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/apx/flags-copy-lowering.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ define <2 x i128> @flag_copy_2(<2 x i128> %x, <2 x i128> %y) nounwind {
; CHECK-NEXT: movq %r8, %rdi
; CHECK-NEXT: {nf} sarq $63, %rdi
; CHECK-NEXT: cmovoq %rdi, %rcx
; CHECK-NEXT: movabsq $-9223372036854775808, %r9 # imm = 0x8000000000000000
; CHECK-NEXT: {nf} xorq %r9, %rdi
; CHECK-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
; CHECK-NEXT: {nf} xorq %r10, %rdi
; CHECK-NEXT: cmovnoq %r8, %rdi
; CHECK-NEXT: subq {{[0-9]+}}(%rsp), %rsi
; CHECK-NEXT: subq %r9, %rsi
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this correct? The fourth i128 seems be split to R9 and stack.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, so you mean either this or four single i128 is not legal argument, because even for the later, the fourth i128 should be turn into memory by FE. In this way, I think we may not need to handle the problem here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I'm assuming that the frontend will handle <2 x i128> by directly using byval (https://clang.godbolt.org/z/bznzTKohz -- interestingly clang still directly returns the vector, I would have expected it to use sret, rather than relying on sret demotion in the backend...)

I could extend this code to also handle vectors of i128 and require the whole vector argument to be in consecutive arguments. I'm just not sure it makes sense to handle this, as there is no defined psABI for <2 x i128> in the first place, and if there were, the frontend would handle that part.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Returning is another story, we emit warnings sometimes https://clang.godbolt.org/z/fYo937x3K
I think we can leave it as it.

; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
; CHECK-NEXT: movq %rdx, %r8
; CHECK-NEXT: {nf} sarq $63, %r8
; CHECK-NEXT: cmovoq %r8, %rsi
; CHECK-NEXT: {nf} xorq %r9, %r8
; CHECK-NEXT: {nf} xorq %r10, %r8
; CHECK-NEXT: cmovnoq %rdx, %r8
; CHECK-NEXT: movq %rcx, 16(%rax)
; CHECK-NEXT: movq %rsi, (%rax)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avgflooru-i128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ define <2 x i128> @avgflooru_i128_vec(<2 x i128> %x, <2 x i128> %y) {
; CHECK-LABEL: avgflooru_i128_vec:
; CHECK: # %bb.0: # %start
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rsi
; CHECK-NEXT: addq %r9, %rsi
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; CHECK-NEXT: setb %dil
; CHECK-NEXT: movzbl %dil, %edi
Expand Down
54 changes: 27 additions & 27 deletions llvm/test/CodeGen/X86/fmuladd-soft-float.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1555,30 +1555,30 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
; SOFT-FLOAT-64-NEXT: .cfi_offset %r14, -32
; SOFT-FLOAT-64-NEXT: .cfi_offset %r15, -24
; SOFT-FLOAT-64-NEXT: .cfi_offset %rbp, -16
; SOFT-FLOAT-64-NEXT: movq %r9, %rbp
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no i128 but <4 x double>. This test should not be affected, right?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright, soft-float, another case where we can ignore the ABI..

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right. The previous implementation also (unintentionally) affected cases like an illegal <4 x i64> vector (or <4 x double> for softfloat) and wouldn't allow the first element of the vector to use r9. So the whole value could still be split across regs and stacks, just not between the 1st and 2nd element...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why it doesn't trigger assert(NumRegs == 2 && "Should have two parts");? I assume there will be 4 consecutive i64 to handle.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because functionArgumentNeedsConsecutiveRegisters only returns true for i128, so <4 x double> gets the default behavior.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, thanks!

; SOFT-FLOAT-64-NEXT: movq %rcx, %r14
; SOFT-FLOAT-64-NEXT: movq %rdx, %r15
; SOFT-FLOAT-64-NEXT: movq %rsi, %r12
; SOFT-FLOAT-64-NEXT: movq %rsi, %r13
; SOFT-FLOAT-64-NEXT: movq %rdi, %rbx
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rbp
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: movq %r8, %rdi
; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-NEXT: movq %rax, %r13
; SOFT-FLOAT-64-NEXT: movq %rax, %r12
; SOFT-FLOAT-64-NEXT: movq %r14, %rdi
; SOFT-FLOAT-64-NEXT: movq %rbp, %rsi
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-NEXT: movq %rax, %r14
; SOFT-FLOAT-64-NEXT: movq %r15, %rdi
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-NEXT: movq %rax, %r15
; SOFT-FLOAT-64-NEXT: movq %r12, %rdi
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: movq %r13, %rdi
; SOFT-FLOAT-64-NEXT: movq %rbp, %rsi
; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-NEXT: movq %rax, %rdi
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT
; SOFT-FLOAT-64-NEXT: movq %rax, %r12
; SOFT-FLOAT-64-NEXT: movq %rax, %r13
; SOFT-FLOAT-64-NEXT: movq %r15, %rdi
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT
Expand All @@ -1587,13 +1587,13 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT
; SOFT-FLOAT-64-NEXT: movq %rax, %r14
; SOFT-FLOAT-64-NEXT: movq %r13, %rdi
; SOFT-FLOAT-64-NEXT: movq %r12, %rdi
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT
; SOFT-FLOAT-64-NEXT: movq %rax, 24(%rbx)
; SOFT-FLOAT-64-NEXT: movq %r14, 16(%rbx)
; SOFT-FLOAT-64-NEXT: movq %r15, 8(%rbx)
; SOFT-FLOAT-64-NEXT: movq %r12, (%rbx)
; SOFT-FLOAT-64-NEXT: movq %r13, (%rbx)
; SOFT-FLOAT-64-NEXT: movq %rbx, %rax
; SOFT-FLOAT-64-NEXT: addq $8, %rsp
; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 56
Expand Down Expand Up @@ -1633,30 +1633,30 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r14, -32
; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r15, -24
; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbp, -16
; SOFT-FLOAT-64-FMA-NEXT: movq %r9, %rbp
; SOFT-FLOAT-64-FMA-NEXT: movq %rcx, %r14
; SOFT-FLOAT-64-FMA-NEXT: movq %rdx, %r15
; SOFT-FLOAT-64-FMA-NEXT: movq %rsi, %r12
; SOFT-FLOAT-64-FMA-NEXT: movq %rsi, %r13
; SOFT-FLOAT-64-FMA-NEXT: movq %rdi, %rbx
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rbp
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: movq %r8, %rdi
; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r13
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r12
; SOFT-FLOAT-64-FMA-NEXT: movq %r14, %rdi
; SOFT-FLOAT-64-FMA-NEXT: movq %rbp, %rsi
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r14
; SOFT-FLOAT-64-FMA-NEXT: movq %r15, %rdi
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r15
; SOFT-FLOAT-64-FMA-NEXT: movq %r12, %rdi
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: movq %r13, %rdi
; SOFT-FLOAT-64-FMA-NEXT: movq %rbp, %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %rdi
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r12
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r13
; SOFT-FLOAT-64-FMA-NEXT: movq %r15, %rdi
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT
Expand All @@ -1665,13 +1665,13 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r14
; SOFT-FLOAT-64-FMA-NEXT: movq %r13, %rdi
; SOFT-FLOAT-64-FMA-NEXT: movq %r12, %rdi
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, 24(%rbx)
; SOFT-FLOAT-64-FMA-NEXT: movq %r14, 16(%rbx)
; SOFT-FLOAT-64-FMA-NEXT: movq %r15, 8(%rbx)
; SOFT-FLOAT-64-FMA-NEXT: movq %r12, (%rbx)
; SOFT-FLOAT-64-FMA-NEXT: movq %r13, (%rbx)
; SOFT-FLOAT-64-FMA-NEXT: movq %rbx, %rax
; SOFT-FLOAT-64-FMA-NEXT: addq $8, %rsp
; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 56
Expand Down Expand Up @@ -1711,30 +1711,30 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r14, -32
; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r15, -24
; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbp, -16
; SOFT-FLOAT-64-FMA4-NEXT: movq %r9, %rbp
; SOFT-FLOAT-64-FMA4-NEXT: movq %rcx, %r14
; SOFT-FLOAT-64-FMA4-NEXT: movq %rdx, %r15
; SOFT-FLOAT-64-FMA4-NEXT: movq %rsi, %r12
; SOFT-FLOAT-64-FMA4-NEXT: movq %rsi, %r13
; SOFT-FLOAT-64-FMA4-NEXT: movq %rdi, %rbx
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rbp
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: movq %r8, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r13
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r12
; SOFT-FLOAT-64-FMA4-NEXT: movq %r14, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: movq %rbp, %rsi
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r14
; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r15
; SOFT-FLOAT-64-FMA4-NEXT: movq %r12, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: movq %r13, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: movq %rbp, %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r12
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r13
; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT
Expand All @@ -1743,13 +1743,13 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r14
; SOFT-FLOAT-64-FMA4-NEXT: movq %r13, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: movq %r12, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, 24(%rbx)
; SOFT-FLOAT-64-FMA4-NEXT: movq %r14, 16(%rbx)
; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, 8(%rbx)
; SOFT-FLOAT-64-FMA4-NEXT: movq %r12, (%rbx)
; SOFT-FLOAT-64-FMA4-NEXT: movq %r13, (%rbx)
; SOFT-FLOAT-64-FMA4-NEXT: movq %rbx, %rax
; SOFT-FLOAT-64-FMA4-NEXT: addq $8, %rsp
; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 56
Expand Down
10 changes: 4 additions & 6 deletions llvm/test/CodeGen/X86/i128-abi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ define i128 @on_stack2(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5, i1
define i64 @trailing_arg_on_stack(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5, i64 %a6) {
; CHECK-LABEL: trailing_arg_on_stack:
; CHECK: # %bb.0:
; CHECK-NEXT: movq 24(%rsp), %rax
; CHECK-NEXT: movq %r9, %rax
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the most relevant test diff.

; CHECK-NEXT: retq
ret i64 %a6
}
Expand Down Expand Up @@ -78,20 +78,18 @@ define void @call_trailing_arg_on_stack(i128 %x, i64 %y) nounwind {
; CHECK-LABEL: call_trailing_arg_on_stack:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: movq %rdx, %rax
; CHECK-NEXT: movq %rsi, %r9
; CHECK-NEXT: movq %rdx, %r9
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is relevant too. The %y is passing through R9 now for trailing_arg_on_stack. (So the function name is not correct anymore :)

; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: movq %rdi, %r10
; CHECK-NEXT: subq $8, %rsp
; CHECK-NEXT: movl $1, %esi
; CHECK-NEXT: movl $2, %edx
; CHECK-NEXT: movl $3, %ecx
; CHECK-NEXT: movl $4, %r8d
; CHECK-NEXT: xorl %edi, %edi
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: pushq %r9
; CHECK-NEXT: pushq %r10
; CHECK-NEXT: callq trailing_arg_on_stack@PLT
; CHECK-NEXT: addq $32, %rsp
; CHECK-NEXT: addq $16, %rsp
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
call i128 @trailing_arg_on_stack(i64 0, i64 1, i64 2, i64 3, i64 4, i128 %x, i64 %y)
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/X86/sadd_sat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1795,27 +1795,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rcx
; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %r8
; SSE-NEXT: seto %dil
; SSE-NEXT: movq %r8, %r9
; SSE-NEXT: sarq $63, %r9
; SSE-NEXT: movq %r8, %r10
; SSE-NEXT: sarq $63, %r10
; SSE-NEXT: testb %dil, %dil
; SSE-NEXT: cmovneq %r9, %rcx
; SSE-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
; SSE-NEXT: xorq %r10, %r9
; SSE-NEXT: cmovneq %r10, %rcx
; SSE-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000
; SSE-NEXT: xorq %r11, %r10
; SSE-NEXT: testb %dil, %dil
; SSE-NEXT: cmoveq %r8, %r9
; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rsi
; SSE-NEXT: cmoveq %r8, %r10
; SSE-NEXT: addq %r9, %rsi
; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; SSE-NEXT: seto %dil
; SSE-NEXT: movq %rdx, %r8
; SSE-NEXT: sarq $63, %r8
; SSE-NEXT: testb %dil, %dil
; SSE-NEXT: cmovneq %r8, %rsi
; SSE-NEXT: xorq %r10, %r8
; SSE-NEXT: xorq %r11, %r8
; SSE-NEXT: testb %dil, %dil
; SSE-NEXT: cmoveq %rdx, %r8
; SSE-NEXT: movq %rcx, 16(%rax)
; SSE-NEXT: movq %rsi, (%rax)
; SSE-NEXT: movq %r9, 24(%rax)
; SSE-NEXT: movq %r10, 24(%rax)
; SSE-NEXT: movq %r8, 8(%rax)
; SSE-NEXT: retq
;
Expand All @@ -1825,27 +1825,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rcx
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %r8
; AVX-NEXT: seto %dil
; AVX-NEXT: movq %r8, %r9
; AVX-NEXT: sarq $63, %r9
; AVX-NEXT: movq %r8, %r10
; AVX-NEXT: sarq $63, %r10
; AVX-NEXT: testb %dil, %dil
; AVX-NEXT: cmovneq %r9, %rcx
; AVX-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
; AVX-NEXT: xorq %r10, %r9
; AVX-NEXT: cmovneq %r10, %rcx
; AVX-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000
; AVX-NEXT: xorq %r11, %r10
; AVX-NEXT: testb %dil, %dil
; AVX-NEXT: cmoveq %r8, %r9
; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rsi
; AVX-NEXT: cmoveq %r8, %r10
; AVX-NEXT: addq %r9, %rsi
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; AVX-NEXT: seto %dil
; AVX-NEXT: movq %rdx, %r8
; AVX-NEXT: sarq $63, %r8
; AVX-NEXT: testb %dil, %dil
; AVX-NEXT: cmovneq %r8, %rsi
; AVX-NEXT: xorq %r10, %r8
; AVX-NEXT: xorq %r11, %r8
; AVX-NEXT: testb %dil, %dil
; AVX-NEXT: cmoveq %rdx, %r8
; AVX-NEXT: movq %rcx, 16(%rax)
; AVX-NEXT: movq %rsi, (%rax)
; AVX-NEXT: movq %r9, 24(%rax)
; AVX-NEXT: movq %r10, 24(%rax)
; AVX-NEXT: movq %r8, 8(%rax)
; AVX-NEXT: retq
%z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
Expand Down
Loading
Loading