From c1570e959de48e876202d8a6e679abd20a554176 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 15 Apr 2024 09:04:52 -0700 Subject: [PATCH] [DAGCombiner][X86][WIP] Combine (build_vector (load X))->(vecty (load X)) This is an alternative to #88261. This gets the mmx cases from that patch without affecting RISC-V. The code here is largely lifted from the (bitcast (load))->(load) code. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 26 + llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll | 15 +- llvm/test/CodeGen/X86/mmx-intrinsics.ll | 1040 ++++------------- llvm/test/CodeGen/X86/pr35982.ll | 4 +- 4 files changed, 251 insertions(+), 834 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index dbbe8d9193c25..beab5a052a048 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -23501,6 +23501,32 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V); } + if (N->getNumOperands() == 1) { + SDValue N0 = N->getOperand(0); + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + // Do not remove the cast if the types differ in endian layout. + TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) == + TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) && + // If the load is volatile, we only want to change the load type if the + // resulting load is legal. Otherwise we might increase the number of + // memory accesses. We don't care if the original type was legal or not + // as we assume software couldn't rely on the number of accesses of an + // illegal type. + ((!LegalOperations && cast(N0)->isSimple()) || + TLI.isOperationLegal(ISD::LOAD, VT))) { + LoadSDNode *LN0 = cast(N0); + + if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG, + *LN0->getMemOperand())) { + SDValue Load = + DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), + LN0->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); + return Load; + } + } + } + return SDValue(); } diff --git a/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll b/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll index 69f733461efc7..3ced6c9483a6c 100644 --- a/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll +++ b/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll @@ -6,22 +6,11 @@ define void @test(<1 x i64> %c64, <1 x i64> %mask1, ptr %P) { ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: subl $16, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 24 ; CHECK-NEXT: .cfi_offset %edi, -8 -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; CHECK-NEXT: movl %eax, (%esp) -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi -; CHECK-NEXT: movq (%esp), %mm0 +; CHECK-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; CHECK-NEXT: movq {{[0-9]+}}(%esp), %mm1 -; CHECK-NEXT: maskmovq %mm0, %mm1 -; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: maskmovq %mm1, %mm0 ; CHECK-NEXT: popl %edi ; CHECK-NEXT: retl entry: diff --git a/llvm/test/CodeGen/X86/mmx-intrinsics.ll b/llvm/test/CodeGen/X86/mmx-intrinsics.ll index a43d9400cde6c..a7917d58cde5d 100644 --- a/llvm/test/CodeGen/X86/mmx-intrinsics.ll +++ b/llvm/test/CodeGen/X86/mmx-intrinsics.ll @@ -12,17 +12,9 @@ define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: phaddw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: phaddw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -57,17 +49,9 @@ define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pcmpgtd {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pcmpgtd 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -102,17 +86,9 @@ define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pcmpgtw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pcmpgtw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -147,17 +123,9 @@ define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pcmpgtb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pcmpgtb 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -192,17 +160,9 @@ define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pcmpeqd {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pcmpeqd 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -237,17 +197,9 @@ define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pcmpeqw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pcmpeqw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -282,17 +234,9 @@ define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pcmpeqb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pcmpeqb 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -327,17 +271,9 @@ define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: punpckldq {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0] +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: punpckldq 16(%ebp), %mm0 # mm0 = mm0[0],mem[0] ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -372,17 +308,9 @@ define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: punpcklwd {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: punpcklwd 16(%ebp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -417,17 +345,9 @@ define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: punpcklbw {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: punpcklbw 16(%ebp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -462,17 +382,9 @@ define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: punpckhdq {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[1],mem[1] +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: punpckhdq 16(%ebp), %mm0 # mm0 = mm0[1],mem[1] ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -507,17 +419,9 @@ define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: punpckhwd {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: punpckhwd 16(%ebp), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -552,17 +456,9 @@ define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: punpckhbw {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: punpckhbw 16(%ebp), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -597,17 +493,9 @@ define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: packuswb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: packuswb 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -642,17 +530,9 @@ define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: packssdw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: packssdw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -687,17 +567,9 @@ define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: packsswb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: packsswb 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -732,12 +604,8 @@ define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: psrad $3, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax @@ -770,12 +638,8 @@ define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: psraw $3, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax @@ -806,12 +670,8 @@ define i64 @test72_2(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -873,12 +733,8 @@ define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: psrld $3, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax @@ -909,12 +765,8 @@ define i64 @test70_2(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -944,12 +796,8 @@ define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: psrlw $3, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax @@ -1014,12 +862,8 @@ define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: pslld $3, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax @@ -1052,12 +896,8 @@ define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: psllw $3, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax @@ -1088,12 +928,8 @@ define i64 @test66_2(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -1123,12 +959,8 @@ define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: psrad 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax @@ -1164,12 +996,8 @@ define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: psraw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax @@ -1240,12 +1068,8 @@ define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: psrld 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax @@ -1281,12 +1105,8 @@ define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: psrlw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax @@ -1357,12 +1177,8 @@ define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: pslld 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax @@ -1398,12 +1214,8 @@ define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: psllw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax @@ -1439,17 +1251,9 @@ define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pxor {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pxor 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -1484,17 +1288,9 @@ define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: por {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: por 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -1529,17 +1325,9 @@ define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pandn {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pandn 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -1574,17 +1362,9 @@ define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pand {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pand 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -1619,17 +1399,9 @@ define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pmullw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pmullw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -1662,17 +1434,9 @@ define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pmullw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pmullw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -1707,17 +1471,9 @@ define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pmulhw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pmulhw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -1752,17 +1508,9 @@ define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pmaddwd {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pmaddwd 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -1797,17 +1545,9 @@ define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psubusw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: psubusw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -1842,17 +1582,9 @@ define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psubusb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: psubusb 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -1887,17 +1619,9 @@ define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psubsw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: psubsw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -1932,17 +1656,9 @@ define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psubsb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: psubsb 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2012,17 +1728,9 @@ define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psubd {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: psubd 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2057,17 +1765,9 @@ define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psubw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: psubw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2102,17 +1802,9 @@ define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psubb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: psubb 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2147,17 +1839,9 @@ define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: paddusw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: paddusw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2192,17 +1876,9 @@ define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: paddusb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: paddusb 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2237,17 +1913,9 @@ define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: paddsw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: paddsw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2282,17 +1950,9 @@ define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: paddsb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: paddsb 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2362,17 +2022,9 @@ define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: paddd {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: paddd 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2407,17 +2059,9 @@ define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: paddw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: paddw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2452,17 +2096,9 @@ define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: paddb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: paddb 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2497,17 +2133,9 @@ define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psadbw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: psadbw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2540,17 +2168,9 @@ define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pminsw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pminsw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2585,17 +2205,9 @@ define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pminub {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pminub 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2630,17 +2242,9 @@ define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pmaxsw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pmaxsw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2675,17 +2279,9 @@ define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pmaxub {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pmaxub 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2720,17 +2316,9 @@ define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pavgw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pavgw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2765,17 +2353,9 @@ define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pavgb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pavgb 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2829,18 +2409,8 @@ declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test24: ; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, (%esp) -; X86-NEXT: movq (%esp), %mm0 +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pmovmskb %mm0, %eax -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test24: @@ -2860,26 +2430,12 @@ declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, ptr) nounwind define void @test23(<1 x i64> %d, <1 x i64> %n, ptr %p) nounwind optsize ssp { ; X86-LABEL: test23: ; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, (%esp) -; X86-NEXT: movl 24(%ebp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq (%esp), %mm1 -; X86-NEXT: maskmovq %mm0, %mm1 -; X86-NEXT: leal -4(%ebp), %esp +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm1 +; X86-NEXT: maskmovq %mm1, %mm0 ; X86-NEXT: popl %edi -; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test23: @@ -2906,17 +2462,9 @@ define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pmulhuw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pmulhuw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2951,12 +2499,8 @@ define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: pshufw $3, {{[0-9]+}}(%esp), %mm0 # mm0 = mem[3,0,0,0] +; X86-NEXT: subl $8, %esp +; X86-NEXT: pshufw $3, 8(%ebp), %mm0 # mm0 = mem[3,0,0,0] ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2983,18 +2527,8 @@ entry: define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test21_2: ; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, (%esp) -; X86-NEXT: pshufw $3, (%esp), %mm0 # mm0 = mem[3,0,0,0] +; X86-NEXT: pshufw $3, {{[0-9]+}}(%esp), %mm0 # mm0 = mem[3,0,0,0] ; X86-NEXT: movd %mm0, %eax -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test21_2: @@ -3021,17 +2555,9 @@ define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pmuludq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pmuludq 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -3061,17 +2587,7 @@ declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test19: ; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, (%esp) -; X86-NEXT: cvtpi2pd (%esp), %xmm0 -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: cvtpi2pd {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: test19: @@ -3189,12 +2705,8 @@ define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: pabsd {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: pabsd 8(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -3226,12 +2738,8 @@ define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: pabsw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: pabsw 8(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -3263,12 +2771,8 @@ define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: pabsb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: pabsb 8(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -3300,17 +2804,9 @@ define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psignd {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: psignd 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -3345,17 +2841,9 @@ define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psignw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: psignw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -3390,17 +2878,9 @@ define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psignb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: psignb 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -3435,17 +2915,9 @@ define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pshufb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pshufb 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -3480,17 +2952,9 @@ define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pmulhrsw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pmulhrsw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -3525,17 +2989,9 @@ define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pmaddubsw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: pmaddubsw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -3570,17 +3026,9 @@ define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: phsubsw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: phsubsw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -3615,17 +3063,9 @@ define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: phsubd {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: phsubd 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -3660,17 +3100,9 @@ define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: phsubw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: phsubw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -3705,17 +3137,9 @@ define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: phaddsw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: phaddsw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -3750,17 +3174,9 @@ define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: phaddd {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: phaddd 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -3815,12 +3231,8 @@ define <1 x i64> @test_mm_insert_pi16(<1 x i64> %a.coerce, i32 %d) nounwind { ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: pinsrw $2, 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax @@ -3847,18 +3259,8 @@ declare x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx, i32, i32 immarg) define i32 @test_mm_extract_pi16(<1 x i64> %a.coerce) nounwind { ; X86-LABEL: test_mm_extract_pi16: ; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, (%esp) -; X86-NEXT: movq (%esp), %mm0 +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pextrw $2, %mm0, %eax -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test_mm_extract_pi16: diff --git a/llvm/test/CodeGen/X86/pr35982.ll b/llvm/test/CodeGen/X86/pr35982.ll index 4a79a109f8b60..9a700d1475e73 100644 --- a/llvm/test/CodeGen/X86/pr35982.ll +++ b/llvm/test/CodeGen/X86/pr35982.ll @@ -6,8 +6,8 @@ define float @PR35982_emms(<1 x i64>) nounwind { ; NO-POSTRA-LABEL: PR35982_emms: ; NO-POSTRA: # %bb.0: ; NO-POSTRA-NEXT: subl $8, %esp -; NO-POSTRA-NEXT: movl {{[0-9]+}}(%esp), %eax ; NO-POSTRA-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; NO-POSTRA-NEXT: movl {{[0-9]+}}(%esp), %eax ; NO-POSTRA-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1] ; NO-POSTRA-NEXT: movd %mm0, %ecx ; NO-POSTRA-NEXT: emms @@ -50,8 +50,8 @@ define float @PR35982_femms(<1 x i64>) nounwind { ; NO-POSTRA-LABEL: PR35982_femms: ; NO-POSTRA: # %bb.0: ; NO-POSTRA-NEXT: subl $8, %esp -; NO-POSTRA-NEXT: movl {{[0-9]+}}(%esp), %eax ; NO-POSTRA-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; NO-POSTRA-NEXT: movl {{[0-9]+}}(%esp), %eax ; NO-POSTRA-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1] ; NO-POSTRA-NEXT: movd %mm0, %ecx ; NO-POSTRA-NEXT: femms