diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index fefdaba7f8bf5..6852de8e6cf61 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -6433,14 +6433,16 @@ experimental at this time. def PreserveNoneDocs : Documentation { let Category = DocCatCallingConvs; let Content = [{ -On X86-64 and AArch64 targets, this attribute changes the calling convention of a function. +On X86, X86-64, and AArch64 targets, this attribute changes the calling convention of a function. The ``preserve_none`` calling convention tries to preserve as few general registers as possible. So all general registers are caller saved registers. It also uses more general registers to pass arguments. This attribute doesn't impact floating-point registers. ``preserve_none``'s ABI is still unstable, and may be changed in the future. -- On X86-64, only RSP and RBP are preserved by the callee. +- On X86, only ESP, EBP, and ESI are preserved by the callee. Registers EDI, + EDX, and EAX now can be used to pass function arguments. +- On X86-64, only RSP, RBP, and RBX are preserved by the callee. Registers R12, R13, R14, R15, RDI, RSI, RDX, RCX, R8, R9, R11, and RAX now can be used to pass function arguments. Floating-point registers (XMMs/YMMs) still follow the C calling convention. diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index ebc59c92f4c24..1d36f56b65a55 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -406,6 +406,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { case CC_X86RegCall: case CC_C: case CC_PreserveMost: + case CC_PreserveNone: case CC_Swift: case CC_X86Pascal: case CC_IntelOclBicc: diff --git a/clang/test/Sema/preserve-none-call-conv.c b/clang/test/Sema/preserve-none-call-conv.c index fc9463726e3f5..6b6c9957c2ba4 100644 --- a/clang/test/Sema/preserve-none-call-conv.c +++ b/clang/test/Sema/preserve-none-call-conv.c @@ -1,5 +1,6 @@ // RUN: %clang_cc1 %s -fsyntax-only -triple x86_64-unknown-unknown -verify // RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-unknown-unknown -verify +// RUN: %clang_cc1 %s -fsyntax-only -triple i686-unknown-unknown -verify typedef void typedef_fun_t(int); diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 822e761444db7..fc9e37f1bfbc0 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -463,7 +463,7 @@ added in the future: registers to pass arguments. This attribute doesn't impact non-general purpose registers (e.g. floating point registers, on X86 XMMs/YMMs). Non-general purpose registers still follow the standard C calling - convention. Currently it is for x86_64 and AArch64 only. + convention. Currently it is for x86, x86_64, and AArch64 only. "``cxx_fast_tlscc``" - The `CXX_FAST_TLS` calling convention for access functions Clang generates an access function to access C++-style Thread Local Storage (TLS). The access function generally has an entry block, an exit block and an diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td index f020e0b55141c..9e5aaeb44334d 100644 --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -1051,6 +1051,16 @@ def CC_X86_64_Preserve_None : CallingConv<[ CCDelegateTo ]>; +def CC_X86_32_Preserve_None : CallingConv<[ + // 32-bit variant of CC_X86_64_Preserve_None, above. Use everything except: + // - EBP frame pointer + // - ECX 'nest' parameter + // - ESI base pointer + // - EBX GOT pointer for PLT calls + CCIfType<[i32], CCAssignToReg<[EDI, EDX, EAX]>>, + CCDelegateTo +]>; + //===----------------------------------------------------------------------===// // X86 Root Argument Calling Conventions //===----------------------------------------------------------------------===// @@ -1072,6 +1082,7 @@ def CC_X86_32 : CallingConv<[ CCIfCC<"CallingConv::X86_RegCall", CCIfSubtarget<"isTargetWin32()", CCIfRegCallv4>>>, CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo>, + CCIfCC<"CallingConv::PreserveNone", CCDelegateTo>, // Otherwise, drop to normal X86-32 CC CCDelegateTo @@ -1186,7 +1197,8 @@ def CSR_64_AllRegs_AVX512 : CalleeSavedRegs<(sub (add CSR_64_MostRegs, RAX, (sequence "ZMM%u", 0, 31), (sequence "K%u", 0, 7)), (sequence "XMM%u", 0, 15))>; -def CSR_64_NoneRegs : CalleeSavedRegs<(add RBP)>; +def CSR_64_NoneRegs : CalleeSavedRegs<(add RBP, RBX)>; +def CSR_32_NoneRegs : CalleeSavedRegs<(add EBP, ESI)>; // Standard C + YMM6-15 def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 83b11eede829e..facad368c66d9 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -316,7 +316,7 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_64_RT_AllRegs_AVX_SaveList; return CSR_64_RT_AllRegs_SaveList; case CallingConv::PreserveNone: - return CSR_64_NoneRegs_SaveList; + return Is64Bit ? CSR_64_NoneRegs_SaveList : CSR_32_NoneRegs_SaveList; case CallingConv::CXX_FAST_TLS: if (Is64Bit) return MF->getInfo()->isSplitCSR() ? @@ -444,7 +444,7 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, return CSR_64_RT_AllRegs_AVX_RegMask; return CSR_64_RT_AllRegs_RegMask; case CallingConv::PreserveNone: - return CSR_64_NoneRegs_RegMask; + return Is64Bit ? CSR_64_NoneRegs_RegMask : CSR_32_NoneRegs_RegMask; case CallingConv::CXX_FAST_TLS: if (Is64Bit) return CSR_64_TLS_Darwin_RegMask; diff --git a/llvm/test/CodeGen/X86/preserve_none_swift.ll b/llvm/test/CodeGen/X86/preserve_none_swift.ll index 9a1c15190c6a2..bc64ee3b54f60 100644 --- a/llvm/test/CodeGen/X86/preserve_none_swift.ll +++ b/llvm/test/CodeGen/X86/preserve_none_swift.ll @@ -1,4 +1,5 @@ ; RUN: not llc -mtriple=x86_64 %s -o - 2>&1 | FileCheck %s +; RUN: not llc -mtriple=i686 %s -o - 2>&1 | FileCheck %s ; Swift attributes should not be used with preserve_none. diff --git a/llvm/test/CodeGen/X86/preserve_nonecc64.ll b/llvm/test/CodeGen/X86/preserve_nonecc64.ll index 9526b4b939f8f..8ab6d15f86de6 100644 --- a/llvm/test/CodeGen/X86/preserve_nonecc64.ll +++ b/llvm/test/CodeGen/X86/preserve_nonecc64.ll @@ -10,10 +10,12 @@ define preserve_nonecc RETTYPE @preserve_nonecc1(i64, i64, double, double) nounwind { entry: ;ALL-LABEL: preserve_nonecc1 -;ALL: pushq %rbp +;ALL: pushq %rbx +;ALL-NEXT: pushq %rbp ;ALL-NEXT: InlineAsm Start ;ALL-NEXT: InlineAsm End ;ALL-NEXT: popq %rbp +;ALL-NEXT: popq %rbx ;ALL-NEXT: retq call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{rbp},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"() ret RETTYPE RETVAL diff --git a/llvm/test/CodeGen/X86/preserve_nonecc_call.ll b/llvm/test/CodeGen/X86/preserve_nonecc_call.ll index 500ebb139811a..b51be69847eb6 100644 --- a/llvm/test/CodeGen/X86/preserve_nonecc_call.ll +++ b/llvm/test/CodeGen/X86/preserve_nonecc_call.ll @@ -1,5 +1,6 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=corei7 < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=corei7 < %s | FileCheck %s --check-prefixes=X64 +; RUN: llc -mtriple=i686-unknown-unknown < %s | FileCheck %s --check-prefixes=X86 ; This test checks various function call behaviors between preserve_none and ; normal calling conventions. @@ -10,36 +11,51 @@ declare preserve_nonecc void @callee(ptr) ; of incompatible calling convention. Callee saved registers are saved/restored ; around the call. define void @caller1(ptr %a) { -; CHECK-LABEL: caller1: -; CHECK: # %bb.0: -; CHECK-NEXT: pushq %r15 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: pushq %r14 -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: pushq %r13 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: pushq %r12 -; CHECK-NEXT: .cfi_def_cfa_offset 40 -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset %rbx, -48 -; CHECK-NEXT: .cfi_offset %r12, -40 -; CHECK-NEXT: .cfi_offset %r13, -32 -; CHECK-NEXT: .cfi_offset %r14, -24 -; CHECK-NEXT: .cfi_offset %r15, -16 -; CHECK-NEXT: movq %rdi, %r12 -; CHECK-NEXT: callq callee@PLT -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 40 -; CHECK-NEXT: popq %r12 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: popq %r13 -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: popq %r14 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: popq %r15 -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; X64-LABEL: caller1: +; X64: # %bb.0: +; X64-NEXT: pushq %r15 +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: pushq %r14 +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: pushq %r13 +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: pushq %r12 +; X64-NEXT: .cfi_def_cfa_offset 40 +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 48 +; X64-NEXT: .cfi_offset %r12, -40 +; X64-NEXT: .cfi_offset %r13, -32 +; X64-NEXT: .cfi_offset %r14, -24 +; X64-NEXT: .cfi_offset %r15, -16 +; X64-NEXT: movq %rdi, %r12 +; X64-NEXT: callq callee@PLT +; X64-NEXT: addq $8, %rsp +; X64-NEXT: .cfi_def_cfa_offset 40 +; X64-NEXT: popq %r12 +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: popq %r13 +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: popq %r14 +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: popq %r15 +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; X86-LABEL: caller1: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: pushl %edi +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: .cfi_offset %edi, -12 +; X86-NEXT: .cfi_offset %ebx, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: calll callee@PLT +; X86-NEXT: popl %edi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: popl %ebx +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl tail call preserve_nonecc void @callee(ptr %a) ret void } @@ -48,98 +64,346 @@ define void @caller1(ptr %a) { ; The tail call is preserved. No registers are saved/restored around the call. ; Actually a simple jmp instruction is generated. define preserve_nonecc void @caller2(ptr %a) { -; CHECK-LABEL: caller2: -; CHECK: # %bb.0: -; CHECK-NEXT: jmp callee@PLT # TAILCALL +; X64-LABEL: caller2: +; X64: # %bb.0: +; X64-NEXT: jmp callee@PLT # TAILCALL +; +; X86-LABEL: caller2: +; X86: # %bb.0: +; X86-NEXT: jmp callee@PLT # TAILCALL tail call preserve_nonecc void @callee(ptr %a) ret void } ; Preserve_none function can use more registers to pass parameters. -declare preserve_nonecc i64 @callee_with_many_param2(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11) -define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12) { -; CHECK-LABEL: callee_with_many_param: -; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: movq %r13, %r12 -; CHECK-NEXT: movq %r14, %r13 -; CHECK-NEXT: movq %r15, %r14 -; CHECK-NEXT: movq %rdi, %r15 -; CHECK-NEXT: movq %rsi, %rdi -; CHECK-NEXT: movq %rdx, %rsi -; CHECK-NEXT: movq %rcx, %rdx -; CHECK-NEXT: movq %r8, %rcx -; CHECK-NEXT: movq %r9, %r8 -; CHECK-NEXT: movq %r11, %r9 -; CHECK-NEXT: movq %rax, %r11 -; CHECK-NEXT: callq callee_with_many_param2@PLT -; CHECK-NEXT: popq %rcx -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq - %ret = call preserve_nonecc i64 @callee_with_many_param2(i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12) +declare preserve_nonecc i64 @callee_with_11_params(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11) +define preserve_nonecc i64 @callee_with_12_params(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12) { +; X64-LABEL: callee_with_12_params: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: movq %r13, %r12 +; X64-NEXT: movq %r14, %r13 +; X64-NEXT: movq %r15, %r14 +; X64-NEXT: movq %rdi, %r15 +; X64-NEXT: movq %rsi, %rdi +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq %rcx, %rdx +; X64-NEXT: movq %r8, %rcx +; X64-NEXT: movq %r9, %r8 +; X64-NEXT: movq %r11, %r9 +; X64-NEXT: movq %rax, %r11 +; X64-NEXT: callq callee_with_11_params@PLT +; X64-NEXT: popq %rcx +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; X86-LABEL: callee_with_12_params: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll callee_with_11_params@PLT +; X86-NEXT: addl $76, %esp +; X86-NEXT: .cfi_adjust_cfa_offset -76 +; X86-NEXT: retl + %ret = call preserve_nonecc i64 @callee_with_11_params(i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12) ret i64 %ret } define i64 @caller3() { -; CHECK-LABEL: caller3: -; CHECK: # %bb.0: -; CHECK-NEXT: pushq %r15 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: pushq %r14 -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: pushq %r13 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: pushq %r12 -; CHECK-NEXT: .cfi_def_cfa_offset 40 -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset %rbx, -48 -; CHECK-NEXT: .cfi_offset %r12, -40 -; CHECK-NEXT: .cfi_offset %r13, -32 -; CHECK-NEXT: .cfi_offset %r14, -24 -; CHECK-NEXT: .cfi_offset %r15, -16 -; CHECK-NEXT: movl $1, %r12d -; CHECK-NEXT: movl $2, %r13d -; CHECK-NEXT: movl $3, %r14d -; CHECK-NEXT: movl $4, %r15d -; CHECK-NEXT: movl $5, %edi -; CHECK-NEXT: movl $6, %esi -; CHECK-NEXT: movl $7, %edx -; CHECK-NEXT: movl $8, %ecx -; CHECK-NEXT: movl $9, %r8d -; CHECK-NEXT: movl $10, %r9d -; CHECK-NEXT: movl $11, %r11d -; CHECK-NEXT: movl $12, %eax -; CHECK-NEXT: callq callee_with_many_param@PLT -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 40 -; CHECK-NEXT: popq %r12 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: popq %r13 -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: popq %r14 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: popq %r15 -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq - %ret = call preserve_nonecc i64 @callee_with_many_param(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12) +; X64-LABEL: caller3: +; X64: # %bb.0: +; X64-NEXT: pushq %r15 +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: pushq %r14 +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: pushq %r13 +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: pushq %r12 +; X64-NEXT: .cfi_def_cfa_offset 40 +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 48 +; X64-NEXT: .cfi_offset %r12, -40 +; X64-NEXT: .cfi_offset %r13, -32 +; X64-NEXT: .cfi_offset %r14, -24 +; X64-NEXT: .cfi_offset %r15, -16 +; X64-NEXT: movl $1, %r12d +; X64-NEXT: movl $2, %r13d +; X64-NEXT: movl $3, %r14d +; X64-NEXT: movl $4, %r15d +; X64-NEXT: movl $5, %edi +; X64-NEXT: movl $6, %esi +; X64-NEXT: movl $7, %edx +; X64-NEXT: movl $8, %ecx +; X64-NEXT: movl $9, %r8d +; X64-NEXT: movl $10, %r9d +; X64-NEXT: movl $11, %r11d +; X64-NEXT: movl $12, %eax +; X64-NEXT: callq callee_with_12_params@PLT +; X64-NEXT: addq $8, %rsp +; X64-NEXT: .cfi_def_cfa_offset 40 +; X64-NEXT: popq %r12 +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: popq %r13 +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: popq %r14 +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: popq %r15 +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; X86-LABEL: caller3: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: pushl %edi +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: .cfi_offset %edi, -12 +; X86-NEXT: .cfi_offset %ebx, -8 +; X86-NEXT: movl $1, %edi +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: movl $2, %eax +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $12 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $11 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $10 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $9 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $8 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $7 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $6 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $5 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $4 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $3 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll callee_with_12_params@PLT +; X86-NEXT: addl $84, %esp +; X86-NEXT: .cfi_adjust_cfa_offset -84 +; X86-NEXT: popl %edi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: popl %ebx +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl + %ret = call preserve_nonecc i64 @callee_with_12_params(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12) ret i64 %ret } +declare preserve_nonecc i32 @callee_with_4_params(i32 %a1, i32 %a2, i32 %a3, i32 %a4) +define preserve_nonecc i32 @callee_with_5_params(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) { +; X64-LABEL: callee_with_5_params: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: movl %r13d, %r12d +; X64-NEXT: movl %r14d, %r13d +; X64-NEXT: movl %r15d, %r14d +; X64-NEXT: movl %edi, %r15d +; X64-NEXT: callq callee_with_4_params@PLT +; X64-NEXT: popq %rcx +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; X86-LABEL: callee_with_5_params: +; X86: # %bb.0: +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %eax, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll callee_with_4_params@PLT +; X86-NEXT: addl $4, %esp +; X86-NEXT: .cfi_adjust_cfa_offset -4 +; X86-NEXT: retl + %ret = call preserve_nonecc i32 @callee_with_4_params(i32 %a2, i32 %a3, i32 %a4, i32 %a5) + ret i32 %ret +} + +define i32 @caller4() { +; X64-LABEL: caller4: +; X64: # %bb.0: +; X64-NEXT: pushq %r15 +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: pushq %r14 +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: pushq %r13 +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: pushq %r12 +; X64-NEXT: .cfi_def_cfa_offset 40 +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 48 +; X64-NEXT: .cfi_offset %r12, -40 +; X64-NEXT: .cfi_offset %r13, -32 +; X64-NEXT: .cfi_offset %r14, -24 +; X64-NEXT: .cfi_offset %r15, -16 +; X64-NEXT: movl $1, %r12d +; X64-NEXT: movl $2, %r13d +; X64-NEXT: movl $3, %r14d +; X64-NEXT: movl $4, %r15d +; X64-NEXT: movl $5, %edi +; X64-NEXT: callq callee_with_5_params@PLT +; X64-NEXT: addq $8, %rsp +; X64-NEXT: .cfi_def_cfa_offset 40 +; X64-NEXT: popq %r12 +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: popq %r13 +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: popq %r14 +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: popq %r15 +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; X86-LABEL: caller4: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: pushl %edi +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: .cfi_offset %edi, -12 +; X86-NEXT: .cfi_offset %ebx, -8 +; X86-NEXT: movl $1, %edi +; X86-NEXT: movl $2, %edx +; X86-NEXT: movl $3, %eax +; X86-NEXT: pushl $5 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $4 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll callee_with_5_params@PLT +; X86-NEXT: addl $8, %esp +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: popl %edi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: popl %ebx +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl + %ret = call preserve_nonecc i32 @callee_with_5_params(i32 1, i32 2, i32 3, i32 4, i32 5) + ret i32 %ret +} + ; Non-volatile registers are used to pass the first few parameters. declare void @boring() -declare preserve_nonecc void @continuation(ptr, ptr, ptr, ptr) -define preserve_nonecc void @entry(ptr %r12, ptr %r13, ptr %r14, ptr %r15) { -; CHECK-LABEL: entry: -; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: callq boring@PLT -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: jmp continuation@PLT # TAILCALL +declare preserve_nonecc void @continuation4(ptr, ptr, ptr, ptr) +define preserve_nonecc void @entry4(ptr %a, ptr %b, ptr %c, ptr %d) { +; X64-LABEL: entry4: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: callq boring@PLT +; X64-NEXT: popq %rax +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: jmp continuation4@PLT # TAILCALL +; +; X86-LABEL: entry4: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: .cfi_offset %ebp, -12 +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: calll boring@PLT +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, %edx +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: jmp continuation4@PLT # TAILCALL + call void @boring() + musttail call preserve_nonecc void @continuation4(ptr %a, ptr %b, ptr %c, ptr %d) + ret void +} + +declare preserve_nonecc void @continuation1(ptr, ptr) +define preserve_nonecc void @entry1(ptr %a) { +; X64-LABEL: entry1: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: callq boring@PLT +; X64-NEXT: popq %rax +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: jmp continuation1@PLT # TAILCALL +; +; X86-LABEL: entry1: +; X86: # %bb.0: +; X86-NEXT: calll boring@PLT +; X86-NEXT: jmp continuation1@PLT # TAILCALL call void @boring() - musttail call preserve_nonecc void @continuation(ptr %r12, ptr %r13, ptr %r14, ptr %r15) + musttail call preserve_nonecc void @continuation1(ptr %a) ret void } diff --git a/llvm/test/CodeGen/X86/preserve_nonecc_call_win.ll b/llvm/test/CodeGen/X86/preserve_nonecc_call_win.ll index 8f933fbfd0568..0b0a048bf4d66 100644 --- a/llvm/test/CodeGen/X86/preserve_nonecc_call_win.ll +++ b/llvm/test/CodeGen/X86/preserve_nonecc_call_win.ll @@ -1,23 +1,69 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -mtriple=x86_64-pc-windows-msvc -mcpu=corei7 < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=x86_64-pc-windows-msvc -mcpu=corei7 < %s | FileCheck %s --check-prefixes=X64 +; RUN: llc -mtriple=i686-pc-windows-msvc < %s | FileCheck %s --check-prefixes=X86 ; Non-volatile registers are used to pass the first few parameters. declare void @boring() -declare preserve_nonecc void @continuation(ptr, ptr, ptr, ptr, ptr, ptr) -define preserve_nonecc void @entry(ptr %r12, ptr %r13, ptr %r14, ptr %r15, ptr %rdi, ptr %rsi) { -; CHECK-LABEL: entry: -; CHECK: # %bb.0: -; CHECK-NEXT: subq $40, %rsp -; CHECK-NEXT: .seh_stackalloc 40 -; CHECK-NEXT: .seh_endprologue -; CHECK-NEXT: callq boring -; CHECK-NEXT: nop -; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: addq $40, %rsp -; CHECK-NEXT: .seh_endepilogue -; CHECK-NEXT: jmp continuation # TAILCALL -; CHECK-NEXT: .seh_endproc +declare preserve_nonecc void @continuation6(ptr, ptr, ptr, ptr, ptr, ptr) +define preserve_nonecc void @entry6(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr %f) { +; X64-LABEL: entry6: +; X64: # %bb.0: +; X64-NEXT: subq $40, %rsp +; X64-NEXT: .seh_stackalloc 40 +; X64-NEXT: .seh_endprologue +; X64-NEXT: callq boring +; X64-NEXT: nop +; X64-NEXT: .seh_startepilogue +; X64-NEXT: addq $40, %rsp +; X64-NEXT: .seh_endepilogue +; X64-NEXT: jmp continuation6 # TAILCALL +; X64-NEXT: .seh_endproc +; +; X86-LABEL: entry6: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: pushl %ebp +; X86-NEXT: subl $8, %esp +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: calll _boring +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: addl $8, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: popl %esi +; X86-NEXT: jmp _continuation6 # TAILCALL call void @boring() - musttail call preserve_nonecc void @continuation(ptr %r12, ptr %r13, ptr %r14, ptr %r15, ptr %rdi, ptr %rsi) + musttail call preserve_nonecc void @continuation6(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr %f) + ret void +} + +declare preserve_nonecc void @continuation1(ptr) +define preserve_nonecc void @entry1(ptr %a) { +; X64-LABEL: entry1: +; X64: # %bb.0: +; X64-NEXT: subq $40, %rsp +; X64-NEXT: .seh_stackalloc 40 +; X64-NEXT: .seh_endprologue +; X64-NEXT: callq boring +; X64-NEXT: nop +; X64-NEXT: .seh_startepilogue +; X64-NEXT: addq $40, %rsp +; X64-NEXT: .seh_endepilogue +; X64-NEXT: jmp continuation1 # TAILCALL +; X64-NEXT: .seh_endproc +; +; X86-LABEL: entry1: +; X86: # %bb.0: +; X86-NEXT: calll _boring +; X86-NEXT: jmp _continuation1 # TAILCALL + call void @boring() + musttail call preserve_nonecc void @continuation1(ptr %a) ret void } diff --git a/llvm/test/CodeGen/X86/preserve_nonecc_musttail.ll b/llvm/test/CodeGen/X86/preserve_nonecc_musttail.ll index 77f5a8bd75ac8..1fc9efd7738d2 100644 --- a/llvm/test/CodeGen/X86/preserve_nonecc_musttail.ll +++ b/llvm/test/CodeGen/X86/preserve_nonecc_musttail.ll @@ -1,4 +1,5 @@ ; RUN: not llc -mtriple=x86_64-unknown-unknown -mcpu=corei7 %s -o - 2>&1 | FileCheck %s +; RUN: not llc -mtriple=i686-unknown-unknown %s -o - 2>&1 | FileCheck %s ; Incompatible calling convention causes following error message.