diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index b563f6ebce34e..11327ee69a554 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -54,6 +54,8 @@ static cl::opt X86EarlyIfConv("x86-early-ifcvt", cl::Hidden, cl::desc("Enable early if-conversion on X86")); +static cl::opt UseAA("x86-use-aa", cl::init(true), + cl::desc("Enable the use of AA during codegen.")); /// Classify a blockaddress reference for the current subtarget according to how /// we should reference it in a non-pcrel context. @@ -320,6 +322,8 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, PreferVectorWidth = 256; } +bool X86Subtarget::useAA() const { return UseAA; } + X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef TuneCPU, StringRef FS) { diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index e3cb9ee8ce190..e2169c6b8d5e0 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -155,6 +155,8 @@ class X86Subtarget final : public X86GenSubtargetInfo { const LegalizerInfo *getLegalizerInfo() const override; const RegisterBankInfo *getRegBankInfo() const override; + bool useAA() const override; + private: /// Initialize the full set of dependencies so we can use an initializer /// list for X86Subtarget. diff --git a/llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll b/llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll index d77d4352f8336..8ecf9e7a8fccd 100644 --- a/llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll +++ b/llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll @@ -4,32 +4,23 @@ define fastcc void @fht(ptr %fz, i16 signext %n) { ; CHECK-LABEL: fht: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: subss %xmm3, %xmm1 -; CHECK-NEXT: movaps %xmm3, %xmm4 -; CHECK-NEXT: mulss %xmm0, %xmm4 -; CHECK-NEXT: addss %xmm3, %xmm4 -; CHECK-NEXT: movaps %xmm3, %xmm2 -; CHECK-NEXT: subss %xmm4, %xmm2 -; CHECK-NEXT: addss %xmm3, %xmm4 -; CHECK-NEXT: xorps %xmm5, %xmm5 -; CHECK-NEXT: subss %xmm1, %xmm5 +; CHECK-NEXT: subss %xmm2, %xmm1 +; CHECK-NEXT: movaps %xmm2, %xmm3 +; CHECK-NEXT: mulss %xmm0, %xmm3 +; CHECK-NEXT: addss %xmm2, %xmm3 +; CHECK-NEXT: movaps %xmm2, %xmm4 +; CHECK-NEXT: subss %xmm3, %xmm4 ; CHECK-NEXT: addss %xmm0, %xmm1 -; CHECK-NEXT: mulss %xmm0, %xmm4 -; CHECK-NEXT: mulss %xmm0, %xmm5 -; CHECK-NEXT: addss %xmm4, %xmm5 -; CHECK-NEXT: addss %xmm0, %xmm5 -; CHECK-NEXT: movss %xmm5, 0 -; CHECK-NEXT: movss %xmm3, (%ecx) -; CHECK-NEXT: addss %xmm0, %xmm3 -; CHECK-NEXT: movss %xmm3, 0 -; CHECK-NEXT: mulss %xmm0, %xmm1 -; CHECK-NEXT: mulss %xmm0, %xmm2 -; CHECK-NEXT: addss %xmm1, %xmm2 ; CHECK-NEXT: addss %xmm0, %xmm2 -; CHECK-NEXT: movss %xmm2, (%ecx) +; CHECK-NEXT: movss %xmm2, 0 +; CHECK-NEXT: mulss %xmm0, %xmm1 +; CHECK-NEXT: mulss %xmm0, %xmm4 +; CHECK-NEXT: addss %xmm1, %xmm4 +; CHECK-NEXT: addss %xmm0, %xmm4 +; CHECK-NEXT: movss %xmm4, (%ecx) ; CHECK-NEXT: retl entry: br i1 true, label %bb171.preheader, label %bb431 diff --git a/llvm/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll b/llvm/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll index 180d6719837b2..c4afa2ae393ca 100644 --- a/llvm/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll +++ b/llvm/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll @@ -34,7 +34,6 @@ define void @_GLOBAL__I__ZN5Pooma5pinfoE() nounwind { ; CHECK-NEXT: movl %eax, %esi ; CHECK-NEXT: movl $0, (%esp) ; CHECK-NEXT: calll __ZNSt8ios_baseC2Ev -; CHECK-NEXT: movl $0, 0 ; CHECK-NEXT: addl $12, %ebx ; CHECK-NEXT: movl %ebx, (%esi) ; CHECK-NEXT: movl L__ZTVSt15basic_streambufIcSt11char_traitsIcEE$non_lazy_ptr-L0$pb(%edi), %eax diff --git a/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll index 0103d2bf3cc2c..d2fe4897c1845 100644 --- a/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll +++ b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll @@ -402,9 +402,9 @@ define void @merge_loads_i16(i32 %count, ptr noalias nocapture %q, ptr noalias n define void @no_merge_loads(i32 %count, ptr noalias nocapture %q, ptr noalias nocapture %p) nounwind uwtable noinline ssp { ; X86-BWON-LABEL: no_merge_loads: ; X86-BWON: # %bb.0: -; X86-BWON-NEXT: pushl %ebx +; X86-BWON-NEXT: pushl %esi ; X86-BWON-NEXT: .cfi_def_cfa_offset 8 -; X86-BWON-NEXT: .cfi_offset %ebx, -8 +; X86-BWON-NEXT: .cfi_offset %esi, -8 ; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BWON-NEXT: testl %eax, %eax ; X86-BWON-NEXT: jle .LBB5_3 @@ -414,23 +414,21 @@ define void @no_merge_loads(i32 %count, ptr noalias nocapture %q, ptr noalias no ; X86-BWON-NEXT: .p2align 4 ; X86-BWON-NEXT: .LBB5_2: # %a4 ; X86-BWON-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BWON-NEXT: movzbl (%edx), %ebx -; X86-BWON-NEXT: movb %bl, (%ecx) -; X86-BWON-NEXT: movzbl 1(%edx), %ebx -; X86-BWON-NEXT: movb %bl, 1(%ecx) +; X86-BWON-NEXT: movzwl (%edx), %esi +; X86-BWON-NEXT: movw %si, (%ecx) ; X86-BWON-NEXT: addl $8, %ecx ; X86-BWON-NEXT: decl %eax ; X86-BWON-NEXT: jne .LBB5_2 ; X86-BWON-NEXT: .LBB5_3: # %._crit_edge -; X86-BWON-NEXT: popl %ebx +; X86-BWON-NEXT: popl %esi ; X86-BWON-NEXT: .cfi_def_cfa_offset 4 ; X86-BWON-NEXT: retl ; ; X86-BWOFF-LABEL: no_merge_loads: ; X86-BWOFF: # %bb.0: -; X86-BWOFF-NEXT: pushl %ebx +; X86-BWOFF-NEXT: pushl %esi ; X86-BWOFF-NEXT: .cfi_def_cfa_offset 8 -; X86-BWOFF-NEXT: .cfi_offset %ebx, -8 +; X86-BWOFF-NEXT: .cfi_offset %esi, -8 ; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BWOFF-NEXT: testl %eax, %eax ; X86-BWOFF-NEXT: jle .LBB5_3 @@ -440,15 +438,13 @@ define void @no_merge_loads(i32 %count, ptr noalias nocapture %q, ptr noalias no ; X86-BWOFF-NEXT: .p2align 4 ; X86-BWOFF-NEXT: .LBB5_2: # %a4 ; X86-BWOFF-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BWOFF-NEXT: movb (%edx), %bl -; X86-BWOFF-NEXT: movb %bl, (%ecx) -; X86-BWOFF-NEXT: movb 1(%edx), %bl -; X86-BWOFF-NEXT: movb %bl, 1(%ecx) +; X86-BWOFF-NEXT: movw (%edx), %si +; X86-BWOFF-NEXT: movw %si, (%ecx) ; X86-BWOFF-NEXT: addl $8, %ecx ; X86-BWOFF-NEXT: decl %eax ; X86-BWOFF-NEXT: jne .LBB5_2 ; X86-BWOFF-NEXT: .LBB5_3: # %._crit_edge -; X86-BWOFF-NEXT: popl %ebx +; X86-BWOFF-NEXT: popl %esi ; X86-BWOFF-NEXT: .cfi_def_cfa_offset 4 ; X86-BWOFF-NEXT: retl ; @@ -459,10 +455,8 @@ define void @no_merge_loads(i32 %count, ptr noalias nocapture %q, ptr noalias no ; X64-BWON-NEXT: .p2align 4 ; X64-BWON-NEXT: .LBB5_1: # %a4 ; X64-BWON-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BWON-NEXT: movzbl (%rsi), %eax -; X64-BWON-NEXT: movb %al, (%rdx) -; X64-BWON-NEXT: movzbl 1(%rsi), %eax -; X64-BWON-NEXT: movb %al, 1(%rdx) +; X64-BWON-NEXT: movzwl (%rsi), %eax +; X64-BWON-NEXT: movw %ax, (%rdx) ; X64-BWON-NEXT: addq $8, %rdx ; X64-BWON-NEXT: decl %edi ; X64-BWON-NEXT: jne .LBB5_1 @@ -476,10 +470,8 @@ define void @no_merge_loads(i32 %count, ptr noalias nocapture %q, ptr noalias no ; X64-BWOFF-NEXT: .p2align 4 ; X64-BWOFF-NEXT: .LBB5_1: # %a4 ; X64-BWOFF-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BWOFF-NEXT: movb (%rsi), %al -; X64-BWOFF-NEXT: movb %al, (%rdx) -; X64-BWOFF-NEXT: movb 1(%rsi), %al -; X64-BWOFF-NEXT: movb %al, 1(%rdx) +; X64-BWOFF-NEXT: movw (%rsi), %ax +; X64-BWOFF-NEXT: movw %ax, (%rdx) ; X64-BWOFF-NEXT: addq $8, %rdx ; X64-BWOFF-NEXT: decl %edi ; X64-BWOFF-NEXT: jne .LBB5_1 @@ -858,26 +850,26 @@ define void @MergeLoadStoreBaseIndexOffsetComplicated(ptr %a, ptr %b, ptr %c, i6 ; X86-BWON-NEXT: .cfi_offset %edi, -16 ; X86-BWON-NEXT: .cfi_offset %ebx, -12 ; X86-BWON-NEXT: .cfi_offset %ebp, -8 -; X86-BWON-NEXT: xorl %eax, %eax -; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BWON-NEXT: xorl %esi, %esi ; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-BWON-NEXT: xorl %ebp, %ebp ; X86-BWON-NEXT: .p2align 4 ; X86-BWON-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ; X86-BWON-NEXT: movsbl (%edi), %ecx -; X86-BWON-NEXT: movzbl (%esi,%ecx), %edx -; X86-BWON-NEXT: movzbl 1(%esi,%ecx), %ecx -; X86-BWON-NEXT: movb %dl, (%ebx,%eax) -; X86-BWON-NEXT: movl %eax, %edx -; X86-BWON-NEXT: orl $1, %edx -; X86-BWON-NEXT: movb %cl, (%ebx,%edx) +; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BWON-NEXT: movzbl (%eax,%ecx), %edx +; X86-BWON-NEXT: movzbl 1(%eax,%ecx), %ecx +; X86-BWON-NEXT: movl %esi, %eax +; X86-BWON-NEXT: orl $1, %eax +; X86-BWON-NEXT: movb %cl, (%ebx,%eax) +; X86-BWON-NEXT: movb %dl, (%ebx,%esi) ; X86-BWON-NEXT: incl %edi -; X86-BWON-NEXT: addl $2, %eax +; X86-BWON-NEXT: addl $2, %esi ; X86-BWON-NEXT: adcl $0, %ebp -; X86-BWON-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; X86-BWON-NEXT: movl %ebp, %ecx -; X86-BWON-NEXT: sbbl {{[0-9]+}}(%esp), %ecx +; X86-BWON-NEXT: cmpl {{[0-9]+}}(%esp), %esi +; X86-BWON-NEXT: movl %ebp, %eax +; X86-BWON-NEXT: sbbl {{[0-9]+}}(%esp), %eax ; X86-BWON-NEXT: jl .LBB10_1 ; X86-BWON-NEXT: # %bb.2: ; X86-BWON-NEXT: popl %esi @@ -904,26 +896,26 @@ define void @MergeLoadStoreBaseIndexOffsetComplicated(ptr %a, ptr %b, ptr %c, i6 ; X86-BWOFF-NEXT: .cfi_offset %edi, -16 ; X86-BWOFF-NEXT: .cfi_offset %ebx, -12 ; X86-BWOFF-NEXT: .cfi_offset %ebp, -8 -; X86-BWOFF-NEXT: xorl %eax, %eax -; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BWOFF-NEXT: xorl %esi, %esi ; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-BWOFF-NEXT: xorl %ebp, %ebp ; X86-BWOFF-NEXT: .p2align 4 ; X86-BWOFF-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ; X86-BWOFF-NEXT: movsbl (%edi), %ecx -; X86-BWOFF-NEXT: movb (%esi,%ecx), %dl -; X86-BWOFF-NEXT: movb 1(%esi,%ecx), %cl -; X86-BWOFF-NEXT: movb %dl, (%ebx,%eax) -; X86-BWOFF-NEXT: movl %eax, %edx -; X86-BWOFF-NEXT: orl $1, %edx -; X86-BWOFF-NEXT: movb %cl, (%ebx,%edx) +; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BWOFF-NEXT: movb (%eax,%ecx), %dl +; X86-BWOFF-NEXT: movb 1(%eax,%ecx), %cl +; X86-BWOFF-NEXT: movl %esi, %eax +; X86-BWOFF-NEXT: orl $1, %eax +; X86-BWOFF-NEXT: movb %cl, (%ebx,%eax) +; X86-BWOFF-NEXT: movb %dl, (%ebx,%esi) ; X86-BWOFF-NEXT: incl %edi -; X86-BWOFF-NEXT: addl $2, %eax +; X86-BWOFF-NEXT: addl $2, %esi ; X86-BWOFF-NEXT: adcl $0, %ebp -; X86-BWOFF-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; X86-BWOFF-NEXT: movl %ebp, %ecx -; X86-BWOFF-NEXT: sbbl {{[0-9]+}}(%esp), %ecx +; X86-BWOFF-NEXT: cmpl {{[0-9]+}}(%esp), %esi +; X86-BWOFF-NEXT: movl %ebp, %eax +; X86-BWOFF-NEXT: sbbl {{[0-9]+}}(%esp), %eax ; X86-BWOFF-NEXT: jl .LBB10_1 ; X86-BWOFF-NEXT: # %bb.2: ; X86-BWOFF-NEXT: popl %esi diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll index f8d32fc2d2925..ce1bf72d70a73 100644 --- a/llvm/test/CodeGen/X86/addcarry.ll +++ b/llvm/test/CodeGen/X86/addcarry.ll @@ -1155,14 +1155,14 @@ define void @PR39464(ptr noalias nocapture sret(%struct.U192) %0, ptr nocapture ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: movq (%rsi), %rcx +; CHECK-NEXT: movq 8(%rsi), %rdi ; CHECK-NEXT: addq (%rdx), %rcx -; CHECK-NEXT: movq %rcx, (%rdi) -; CHECK-NEXT: movq 8(%rsi), %rcx -; CHECK-NEXT: adcq 8(%rdx), %rcx -; CHECK-NEXT: movq %rcx, 8(%rdi) +; CHECK-NEXT: movq %rcx, (%rax) +; CHECK-NEXT: adcq 8(%rdx), %rdi +; CHECK-NEXT: movq %rdi, 8(%rax) ; CHECK-NEXT: movq 16(%rsi), %rcx ; CHECK-NEXT: adcq 16(%rdx), %rcx -; CHECK-NEXT: movq %rcx, 16(%rdi) +; CHECK-NEXT: movq %rcx, 16(%rax) ; CHECK-NEXT: retq %4 = load i64, ptr %1, align 8 %5 = load i64, ptr %2, align 8 diff --git a/llvm/test/CodeGen/X86/avoid-sfb.ll b/llvm/test/CodeGen/X86/avoid-sfb.ll index 22b4fddf88e45..29de9b6e68b22 100644 --- a/llvm/test/CodeGen/X86/avoid-sfb.ll +++ b/llvm/test/CodeGen/X86/avoid-sfb.ll @@ -418,18 +418,18 @@ define void @test_multiple_blocks(ptr nocapture noalias %s1, ptr nocapture %s2) ; CHECK-NEXT: movl $0, 36(%rdi) ; CHECK-NEXT: movups 16(%rdi), %xmm0 ; CHECK-NEXT: movups %xmm0, 16(%rsi) -; CHECK-NEXT: movl 32(%rdi), %eax -; CHECK-NEXT: movl %eax, 32(%rsi) -; CHECK-NEXT: movl 36(%rdi), %eax -; CHECK-NEXT: movl %eax, 36(%rsi) -; CHECK-NEXT: movq 40(%rdi), %rax -; CHECK-NEXT: movq %rax, 40(%rsi) ; CHECK-NEXT: movl (%rdi), %eax ; CHECK-NEXT: movl %eax, (%rsi) ; CHECK-NEXT: movl 4(%rdi), %eax ; CHECK-NEXT: movl %eax, 4(%rsi) ; CHECK-NEXT: movq 8(%rdi), %rax ; CHECK-NEXT: movq %rax, 8(%rsi) +; CHECK-NEXT: movl 32(%rdi), %eax +; CHECK-NEXT: movl %eax, 32(%rsi) +; CHECK-NEXT: movl 36(%rdi), %eax +; CHECK-NEXT: movl %eax, 36(%rsi) +; CHECK-NEXT: movq 40(%rdi), %rax +; CHECK-NEXT: movq %rax, 40(%rsi) ; CHECK-NEXT: retq ; ; DISABLED-LABEL: test_multiple_blocks: @@ -438,33 +438,11 @@ define void @test_multiple_blocks(ptr nocapture noalias %s1, ptr nocapture %s2) ; DISABLED-NEXT: movl $0, 36(%rdi) ; DISABLED-NEXT: movups 16(%rdi), %xmm0 ; DISABLED-NEXT: movups %xmm0, 16(%rsi) -; DISABLED-NEXT: movups 32(%rdi), %xmm0 -; DISABLED-NEXT: movups %xmm0, 32(%rsi) ; DISABLED-NEXT: movups (%rdi), %xmm0 ; DISABLED-NEXT: movups %xmm0, (%rsi) +; DISABLED-NEXT: movups 32(%rdi), %xmm0 +; DISABLED-NEXT: movups %xmm0, 32(%rsi) ; DISABLED-NEXT: retq -; -; AVX-LABEL: test_multiple_blocks: -; AVX: # %bb.0: # %entry -; AVX-NEXT: movl $0, 4(%rdi) -; AVX-NEXT: movl $0, 36(%rdi) -; AVX-NEXT: vmovups 16(%rdi), %xmm0 -; AVX-NEXT: vmovups %xmm0, 16(%rsi) -; AVX-NEXT: movl 32(%rdi), %eax -; AVX-NEXT: movl %eax, 32(%rsi) -; AVX-NEXT: movl 36(%rdi), %eax -; AVX-NEXT: movl %eax, 36(%rsi) -; AVX-NEXT: movq 40(%rdi), %rax -; AVX-NEXT: movq %rax, 40(%rsi) -; AVX-NEXT: movl (%rdi), %eax -; AVX-NEXT: movl %eax, (%rsi) -; AVX-NEXT: movl 4(%rdi), %eax -; AVX-NEXT: movl %eax, 4(%rsi) -; AVX-NEXT: vmovups 8(%rdi), %xmm0 -; AVX-NEXT: vmovups %xmm0, 8(%rsi) -; AVX-NEXT: movq 24(%rdi), %rax -; AVX-NEXT: movq %rax, 24(%rsi) -; AVX-NEXT: retq entry: %b = getelementptr inbounds %struct.S4, ptr %s1, i64 0, i32 1 store i32 0, ptr %b, align 4 @@ -547,62 +525,26 @@ if.end: ; preds = %if.then, %entry ; Function Attrs: nounwind uwtable define void @test_stack(ptr noalias nocapture sret(%struct.S6) %agg.result, ptr byval(%struct.S6) nocapture readnone align 8 %s1, ptr byval(%struct.S6) nocapture align 8 %s2, i32 %x) local_unnamed_addr #0 { -; CHECK-LABEL: test_stack: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: movl %esi, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 -; CHECK-NEXT: movups %xmm0, (%rdi) -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx -; CHECK-NEXT: movq %rcx, 16(%rdi) -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; CHECK-NEXT: movl %ecx, 24(%rdi) -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; CHECK-NEXT: movl %ecx, 28(%rdi) -; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %edx -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %esi -; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movl %edx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movl %esi, {{[0-9]+}}(%rsp) -; CHECK-NEXT: retq -; -; DISABLED-LABEL: test_stack: -; DISABLED: # %bb.0: # %entry -; DISABLED-NEXT: movq %rdi, %rax -; DISABLED-NEXT: movl %esi, {{[0-9]+}}(%rsp) -; DISABLED-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 -; DISABLED-NEXT: movups %xmm0, (%rdi) -; DISABLED-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 -; DISABLED-NEXT: movups %xmm0, 16(%rdi) -; DISABLED-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 -; DISABLED-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; DISABLED-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; DISABLED-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; DISABLED-NEXT: retq +; SSE-LABEL: test_stack: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movq %rdi, %rax +; SSE-NEXT: movl %esi, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movups %xmm0, (%rdi) +; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; SSE-NEXT: movups %xmm1, 16(%rdi) +; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) +; SSE-NEXT: retq ; ; AVX-LABEL: test_stack: ; AVX: # %bb.0: # %entry ; AVX-NEXT: movq %rdi, %rax ; AVX-NEXT: movl %esi, {{[0-9]+}}(%rsp) -; AVX-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm0 -; AVX-NEXT: vmovups %xmm0, (%rdi) -; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rcx -; AVX-NEXT: movq %rcx, 16(%rdi) -; AVX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; AVX-NEXT: movl %ecx, 24(%rdi) -; AVX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; AVX-NEXT: movl %ecx, 28(%rdi) -; AVX-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm0 -; AVX-NEXT: vmovups %xmm0, {{[0-9]+}}(%rsp) -; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rcx -; AVX-NEXT: movq %rcx, {{[0-9]+}}(%rsp) -; AVX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; AVX-NEXT: movl %ecx, {{[0-9]+}}(%rsp) -; AVX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; AVX-NEXT: movl %ecx, {{[0-9]+}}(%rsp) +; AVX-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 +; AVX-NEXT: vmovups %ymm0, (%rdi) +; AVX-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; AVX-NEXT: vzeroupper ; AVX-NEXT: retq entry: %s6.sroa.3.0..sroa_idx4 = getelementptr inbounds %struct.S6, ptr %s2, i64 0, i32 3 diff --git a/llvm/test/CodeGen/X86/cet_endbr_imm_enhance.ll b/llvm/test/CodeGen/X86/cet_endbr_imm_enhance.ll index 98d315ad14e68..ea8c1b6386983 100644 --- a/llvm/test/CodeGen/X86/cet_endbr_imm_enhance.ll +++ b/llvm/test/CodeGen/X86/cet_endbr_imm_enhance.ll @@ -29,9 +29,8 @@ define dso_local i64 @foo(ptr %azx) #0 { ; CHECK-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movabsq $-321002333478651, %rax # imm = 0xFFFEDC0CD1F0E105 ; CHECK-NEXT: notq %rax -; CHECK-NEXT: andq %rax, (%rdi) -; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: movq (%rax), %rax +; CHECK-NEXT: andq (%rdi), %rax +; CHECK-NEXT: movq %rax, (%rdi) ; CHECK-NEXT: retq entry: %azx.addr = alloca ptr, align 8 diff --git a/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll b/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll index a75973310d15c..8e63e19cc3d1c 100644 --- a/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll +++ b/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll @@ -15,12 +15,12 @@ entry: ; X86-LABEL: func_cf_vector_x86 ; X86: movl 12(%ebp), %eax ; X86: movl 8(%ebp), %ecx - ; X86: movsd 24(%eax), %xmm4 # xmm4 = mem[0],zero + ; X86: movsd 24(%eax), %xmm4 # xmm4 = mem[0],zero + ; X86: movsd 16(%eax), %xmm5 # xmm5 = mem[0],zero + ; X86: movsd (%eax), %xmm6 # xmm6 = mem[0],zero + ; X86: movsd 8(%eax), %xmm7 # xmm7 = mem[0],zero ; X86: movsd %xmm4, 24(%esp) - ; X86: movsd 16(%eax), %xmm5 # xmm5 = mem[0],zero ; X86: movsd %xmm5, 16(%esp) - ; X86: movsd (%eax), %xmm6 # xmm6 = mem[0],zero - ; X86: movsd 8(%eax), %xmm7 # xmm7 = mem[0],zero ; X86: movsd %xmm7, 8(%esp) ; X86: movsd %xmm6, (%esp) ; X86: calll *___guard_check_icall_fptr @@ -29,6 +29,7 @@ entry: ; X86: movaps %xmm5, %xmm2 ; X86: movaps %xmm4, %xmm3 ; X86: calll *%ecx + } attributes #0 = { "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } diff --git a/llvm/test/CodeGen/X86/fixup-bw-inst.ll b/llvm/test/CodeGen/X86/fixup-bw-inst.ll index 4301498912003..3e28fb0e91bcc 100644 --- a/llvm/test/CodeGen/X86/fixup-bw-inst.ll +++ b/llvm/test/CodeGen/X86/fixup-bw-inst.ll @@ -18,10 +18,8 @@ define void @foo1(i32 %count, ptr noalias nocapture %q, ptr noalias nocapture %p ; BWON-NEXT: .p2align 4 ; BWON-NEXT: LBB0_1: ## %a4 ; BWON-NEXT: ## =>This Inner Loop Header: Depth=1 -; BWON-NEXT: movzbl (%rsi), %eax -; BWON-NEXT: movb %al, (%rdx) -; BWON-NEXT: movzbl 1(%rsi), %eax -; BWON-NEXT: movb %al, 1(%rdx) +; BWON-NEXT: movzwl (%rsi), %eax +; BWON-NEXT: movw %ax, (%rdx) ; BWON-NEXT: addq $8, %rdx ; BWON-NEXT: decl %edi ; BWON-NEXT: jne LBB0_1 @@ -35,10 +33,8 @@ define void @foo1(i32 %count, ptr noalias nocapture %q, ptr noalias nocapture %p ; BWOFF-NEXT: .p2align 4 ; BWOFF-NEXT: LBB0_1: ## %a4 ; BWOFF-NEXT: ## =>This Inner Loop Header: Depth=1 -; BWOFF-NEXT: movb (%rsi), %al -; BWOFF-NEXT: movb %al, (%rdx) -; BWOFF-NEXT: movb 1(%rsi), %al -; BWOFF-NEXT: movb %al, 1(%rdx) +; BWOFF-NEXT: movw (%rsi), %ax +; BWOFF-NEXT: movw %ax, (%rdx) ; BWOFF-NEXT: addq $8, %rdx ; BWOFF-NEXT: decl %edi ; BWOFF-NEXT: jne LBB0_1 @@ -81,10 +77,8 @@ define void @foo2(i32 %count, ptr noalias nocapture %q, ptr noalias nocapture %p ; BWON-NEXT: .p2align 4 ; BWON-NEXT: LBB1_1: ## %a4 ; BWON-NEXT: ## =>This Inner Loop Header: Depth=1 -; BWON-NEXT: movzwl (%rsi), %eax -; BWON-NEXT: movw %ax, (%rdx) -; BWON-NEXT: movzwl 2(%rsi), %eax -; BWON-NEXT: movw %ax, 2(%rdx) +; BWON-NEXT: movl (%rsi), %eax +; BWON-NEXT: movl %eax, (%rdx) ; BWON-NEXT: addq $16, %rdx ; BWON-NEXT: decl %edi ; BWON-NEXT: jne LBB1_1 @@ -98,10 +92,8 @@ define void @foo2(i32 %count, ptr noalias nocapture %q, ptr noalias nocapture %p ; BWOFF-NEXT: .p2align 4 ; BWOFF-NEXT: LBB1_1: ## %a4 ; BWOFF-NEXT: ## =>This Inner Loop Header: Depth=1 -; BWOFF-NEXT: movw (%rsi), %ax -; BWOFF-NEXT: movw %ax, (%rdx) -; BWOFF-NEXT: movw 2(%rsi), %ax -; BWOFF-NEXT: movw %ax, 2(%rdx) +; BWOFF-NEXT: movl (%rsi), %eax +; BWOFF-NEXT: movl %eax, (%rdx) ; BWOFF-NEXT: addq $16, %rdx ; BWOFF-NEXT: decl %edi ; BWOFF-NEXT: jne LBB1_1 diff --git a/llvm/test/CodeGen/X86/fold-load-vec.ll b/llvm/test/CodeGen/X86/fold-load-vec.ll index 0bf846a0930bb..b3e2c2f24d1c9 100644 --- a/llvm/test/CodeGen/X86/fold-load-vec.ll +++ b/llvm/test/CodeGen/X86/fold-load-vec.ll @@ -10,14 +10,12 @@ define void @sample_test(ptr %source, ptr %dest) nounwind { ; CHECK-NEXT: subq $24, %rsp ; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: movaps (%rdi), %xmm1 ; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: movlps %xmm0, (%rsp) ; CHECK-NEXT: movlps %xmm0, (%rsi) -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: callq ext@PLT ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll index 6e7f109a5da5c..2582c5c4d7f0f 100644 --- a/llvm/test/CodeGen/X86/half.ll +++ b/llvm/test/CodeGen/X86/half.ll @@ -2029,8 +2029,8 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 { define void @pr63114() { ; CHECK-LIBCALL-LABEL: pr63114: ; CHECK-LIBCALL: # %bb.0: -; CHECK-LIBCALL-NEXT: movdqu (%rax), %xmm4 -; CHECK-LIBCALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm4[0,1,3,3,4,5,6,7] +; CHECK-LIBCALL-NEXT: movdqu (%rax), %xmm5 +; CHECK-LIBCALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm5[0,1,3,3,4,5,6,7] ; CHECK-LIBCALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,1] ; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,65535,65535,65535,65535] ; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm0 @@ -2038,28 +2038,28 @@ define void @pr63114() { ; CHECK-LIBCALL-NEXT: por %xmm2, %xmm0 ; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm3 = [65535,65535,65535,65535,65535,65535,65535,0] ; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm0 -; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm5 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60] -; CHECK-LIBCALL-NEXT: por %xmm5, %xmm0 -; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm6 = xmm4[0,1,2,3,4,5,7,7] +; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm4 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60] +; CHECK-LIBCALL-NEXT: por %xmm4, %xmm0 +; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm6 = xmm5[0,1,2,3,4,5,7,7] +; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm7 = xmm5[0,1,2,3,5,5,5,5] +; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3,0,3] +; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm5 = xmm5[0,1,2,3,5,5,5,5] +; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm5 +; CHECK-LIBCALL-NEXT: por %xmm2, %xmm5 +; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm5 +; CHECK-LIBCALL-NEXT: por %xmm4, %xmm5 ; CHECK-LIBCALL-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3] ; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm6 ; CHECK-LIBCALL-NEXT: por %xmm2, %xmm6 ; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm6 -; CHECK-LIBCALL-NEXT: por %xmm5, %xmm6 -; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm7 = xmm4[0,1,2,3,5,5,5,5] -; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,3,0,3] -; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,5,5,5,5] -; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm4 -; CHECK-LIBCALL-NEXT: por %xmm2, %xmm4 -; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm4 -; CHECK-LIBCALL-NEXT: por %xmm5, %xmm4 +; CHECK-LIBCALL-NEXT: por %xmm4, %xmm6 ; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm7 ; CHECK-LIBCALL-NEXT: por %xmm2, %xmm7 ; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm7 -; CHECK-LIBCALL-NEXT: por %xmm5, %xmm7 +; CHECK-LIBCALL-NEXT: por %xmm4, %xmm7 ; CHECK-LIBCALL-NEXT: movdqu %xmm7, 0 -; CHECK-LIBCALL-NEXT: movdqu %xmm4, 32 ; CHECK-LIBCALL-NEXT: movdqu %xmm6, 48 +; CHECK-LIBCALL-NEXT: movdqu %xmm5, 32 ; CHECK-LIBCALL-NEXT: movdqu %xmm0, 16 ; CHECK-LIBCALL-NEXT: retq ; @@ -2070,32 +2070,32 @@ define void @pr63114() { ; BWON-F16C-NEXT: vbroadcastss (%rax), %xmm2 ; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; BWON-F16C-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,0],xmm3[0,0] -; BWON-F16C-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 -; BWON-F16C-NEXT: vpsllq $48, %xmm3, %xmm4 -; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[3],xmm2[4,5,6,7] -; BWON-F16C-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1] -; BWON-F16C-NEXT: vpor %xmm3, %xmm2, %xmm2 -; BWON-F16C-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,3],xmm1[2,0] -; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[3],xmm1[4,5,6,7] -; BWON-F16C-NEXT: vpor %xmm3, %xmm1, %xmm1 -; BWON-F16C-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 -; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,1,3,3,4,5,6,7] -; BWON-F16C-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,1] -; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[3],xmm2[4,5,6,7] -; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm3[7] -; BWON-F16C-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] -; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[3],xmm0[4,5,6,7] -; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm3[7] +; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[0,1,3,3,4,5,6,7] +; BWON-F16C-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,2,1] +; BWON-F16C-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm4 +; BWON-F16C-NEXT: vpsllq $48, %xmm4, %xmm5 +; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2],xmm5[3],xmm3[4,5,6,7] +; BWON-F16C-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1] +; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,6],xmm4[7] +; BWON-F16C-NEXT: vpshufhw {{.*#+}} xmm6 = xmm0[0,1,2,3,5,5,5,5] +; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm6 = xmm6[0,1,2],xmm5[3],xmm6[4,5,6,7] +; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm6 = xmm6[0,1,2,3,4,5,6],xmm4[7] +; BWON-F16C-NEXT: vinsertf128 $1, %xmm3, %ymm6, %ymm3 +; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm5[3],xmm2[4,5,6,7] +; BWON-F16C-NEXT: vpor %xmm4, %xmm2, %xmm2 +; BWON-F16C-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[2,0] +; BWON-F16C-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm5[3],xmm0[4,5,6,7] +; BWON-F16C-NEXT: vpor %xmm4, %xmm0, %xmm0 ; BWON-F16C-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; BWON-F16C-NEXT: vmovups %ymm0, 0 -; BWON-F16C-NEXT: vmovups %ymm1, 32 +; BWON-F16C-NEXT: vmovups %ymm0, 32 +; BWON-F16C-NEXT: vmovups %ymm3, 0 ; BWON-F16C-NEXT: vzeroupper ; BWON-F16C-NEXT: retq ; ; CHECK-I686-LABEL: pr63114: ; CHECK-I686: # %bb.0: -; CHECK-I686-NEXT: movdqu (%eax), %xmm6 -; CHECK-I686-NEXT: pshuflw {{.*#+}} xmm0 = xmm6[0,1,3,3,4,5,6,7] +; CHECK-I686-NEXT: movdqu (%eax), %xmm5 +; CHECK-I686-NEXT: pshuflw {{.*#+}} xmm0 = xmm5[0,1,3,3,4,5,6,7] ; CHECK-I686-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,1] ; CHECK-I686-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,65535,65535,65535,65535] ; CHECK-I686-NEXT: pand %xmm1, %xmm0 @@ -2105,26 +2105,26 @@ define void @pr63114() { ; CHECK-I686-NEXT: pand %xmm3, %xmm0 ; CHECK-I686-NEXT: movdqa {{.*#+}} xmm4 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60] ; CHECK-I686-NEXT: por %xmm4, %xmm0 -; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm5 = xmm6[0,1,2,3,4,5,7,7] -; CHECK-I686-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3] +; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm7 = xmm5[0,1,2,3,4,5,7,7] +; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm6 = xmm5[0,1,2,3,5,5,5,5] +; CHECK-I686-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3,0,3] +; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm5 = xmm5[0,1,2,3,5,5,5,5] ; CHECK-I686-NEXT: pand %xmm1, %xmm5 ; CHECK-I686-NEXT: por %xmm2, %xmm5 ; CHECK-I686-NEXT: pand %xmm3, %xmm5 ; CHECK-I686-NEXT: por %xmm4, %xmm5 -; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm7 = xmm6[0,1,2,3,5,5,5,5] -; CHECK-I686-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,3,0,3] -; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,5,5,5,5] -; CHECK-I686-NEXT: pand %xmm1, %xmm6 -; CHECK-I686-NEXT: por %xmm2, %xmm6 -; CHECK-I686-NEXT: pand %xmm3, %xmm6 -; CHECK-I686-NEXT: por %xmm4, %xmm6 +; CHECK-I686-NEXT: pshufd {{.*#+}} xmm7 = xmm7[0,2,2,3] ; CHECK-I686-NEXT: pand %xmm1, %xmm7 ; CHECK-I686-NEXT: por %xmm2, %xmm7 ; CHECK-I686-NEXT: pand %xmm3, %xmm7 ; CHECK-I686-NEXT: por %xmm4, %xmm7 -; CHECK-I686-NEXT: movdqu %xmm7, 0 -; CHECK-I686-NEXT: movdqu %xmm6, 32 -; CHECK-I686-NEXT: movdqu %xmm5, 48 +; CHECK-I686-NEXT: pand %xmm1, %xmm6 +; CHECK-I686-NEXT: por %xmm2, %xmm6 +; CHECK-I686-NEXT: pand %xmm3, %xmm6 +; CHECK-I686-NEXT: por %xmm4, %xmm6 +; CHECK-I686-NEXT: movdqu %xmm6, 0 +; CHECK-I686-NEXT: movdqu %xmm7, 48 +; CHECK-I686-NEXT: movdqu %xmm5, 32 ; CHECK-I686-NEXT: movdqu %xmm0, 16 ; CHECK-I686-NEXT: retl %1 = load <24 x half>, ptr poison, align 2 diff --git a/llvm/test/CodeGen/X86/llvm.sincos.ll b/llvm/test/CodeGen/X86/llvm.sincos.ll index a429314630e56..ae3c6eafa4598 100644 --- a/llvm/test/CodeGen/X86/llvm.sincos.ll +++ b/llvm/test/CodeGen/X86/llvm.sincos.ll @@ -8,54 +8,45 @@ define void @test_sincos_v4f32(<4 x float> %x, ptr noalias %out_sin, ptr noalias ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: .cfi_def_cfa_offset 12 -; CHECK-NEXT: subl $52, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: subl $36, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset %esi, -12 ; CHECK-NEXT: .cfi_offset %edi, -8 -; CHECK-NEXT: movl 84(%esp), %esi -; CHECK-NEXT: flds 76(%esp) +; CHECK-NEXT: flds 48(%esp) ; CHECK-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; CHECK-NEXT: flds 64(%esp) +; CHECK-NEXT: flds 60(%esp) ; CHECK-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; CHECK-NEXT: flds 72(%esp) +; CHECK-NEXT: flds 52(%esp) ; CHECK-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; CHECK-NEXT: flds 68(%esp) -; CHECK-NEXT: movl 80(%esp), %edi -; CHECK-NEXT: leal 40(%esp), %eax +; CHECK-NEXT: flds 56(%esp) +; CHECK-NEXT: movl 64(%esp), %esi +; CHECK-NEXT: movl 68(%esp), %edi +; CHECK-NEXT: leal 8(%edi), %eax ; CHECK-NEXT: movl %eax, 8(%esp) -; CHECK-NEXT: leal 4(%edi), %eax +; CHECK-NEXT: leal 8(%esi), %eax ; CHECK-NEXT: movl %eax, 4(%esp) ; CHECK-NEXT: fstps (%esp) ; CHECK-NEXT: calll sincosf -; CHECK-NEXT: leal 44(%esp), %eax +; CHECK-NEXT: leal 4(%edi), %eax ; CHECK-NEXT: movl %eax, 8(%esp) -; CHECK-NEXT: leal 8(%edi), %eax +; CHECK-NEXT: leal 4(%esi), %eax ; CHECK-NEXT: movl %eax, 4(%esp) ; CHECK-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; CHECK-NEXT: fstps (%esp) ; CHECK-NEXT: calll sincosf -; CHECK-NEXT: leal 36(%esp), %eax +; CHECK-NEXT: leal 12(%edi), %eax ; CHECK-NEXT: movl %eax, 8(%esp) -; CHECK-NEXT: movl %edi, 4(%esp) +; CHECK-NEXT: leal 12(%esi), %eax +; CHECK-NEXT: movl %eax, 4(%esp) ; CHECK-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; CHECK-NEXT: fstps (%esp) ; CHECK-NEXT: calll sincosf -; CHECK-NEXT: leal 48(%esp), %eax -; CHECK-NEXT: movl %eax, 8(%esp) -; CHECK-NEXT: addl $12, %edi -; CHECK-NEXT: movl %edi, 4(%esp) +; CHECK-NEXT: movl %edi, 8(%esp) +; CHECK-NEXT: movl %esi, 4(%esp) ; CHECK-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; CHECK-NEXT: fstps (%esp) ; CHECK-NEXT: calll sincosf -; CHECK-NEXT: flds 36(%esp) -; CHECK-NEXT: flds 40(%esp) -; CHECK-NEXT: flds 44(%esp) -; CHECK-NEXT: flds 48(%esp) -; CHECK-NEXT: fstps 12(%esi) -; CHECK-NEXT: fstps 8(%esi) -; CHECK-NEXT: fstps 4(%esi) -; CHECK-NEXT: fstps (%esi) -; CHECK-NEXT: addl $52, %esp +; CHECK-NEXT: addl $36, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: popl %esi ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -77,32 +68,27 @@ define void @test_sincos_v2f64(<2 x double> %x, ptr noalias %out_sin, ptr noalia ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: .cfi_def_cfa_offset 12 -; CHECK-NEXT: subl $52, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: subl $36, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset %esi, -12 ; CHECK-NEXT: .cfi_offset %edi, -8 -; CHECK-NEXT: movl 84(%esp), %esi -; CHECK-NEXT: fldl 72(%esp) +; CHECK-NEXT: fldl 48(%esp) ; CHECK-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill -; CHECK-NEXT: fldl 64(%esp) -; CHECK-NEXT: movl 80(%esp), %edi -; CHECK-NEXT: leal 24(%esp), %eax +; CHECK-NEXT: fldl 56(%esp) +; CHECK-NEXT: movl 64(%esp), %esi +; CHECK-NEXT: movl 68(%esp), %edi +; CHECK-NEXT: leal 8(%edi), %eax ; CHECK-NEXT: movl %eax, 12(%esp) -; CHECK-NEXT: movl %edi, 8(%esp) +; CHECK-NEXT: leal 8(%esi), %eax +; CHECK-NEXT: movl %eax, 8(%esp) ; CHECK-NEXT: fstpl (%esp) ; CHECK-NEXT: calll sincos -; CHECK-NEXT: leal 32(%esp), %eax -; CHECK-NEXT: movl %eax, 12(%esp) -; CHECK-NEXT: addl $8, %edi -; CHECK-NEXT: movl %edi, 8(%esp) +; CHECK-NEXT: movl %edi, 12(%esp) +; CHECK-NEXT: movl %esi, 8(%esp) ; CHECK-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload ; CHECK-NEXT: fstpl (%esp) ; CHECK-NEXT: calll sincos -; CHECK-NEXT: fldl 24(%esp) -; CHECK-NEXT: fldl 32(%esp) -; CHECK-NEXT: fstpl 8(%esi) -; CHECK-NEXT: fstpl (%esi) -; CHECK-NEXT: addl $52, %esp +; CHECK-NEXT: addl $36, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: popl %esi ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -132,19 +118,16 @@ define void @can_fold_with_call_in_chain(float %x, ptr noalias %a, ptr noalias % ; CHECK-NEXT: .cfi_offset %edi, -8 ; CHECK-NEXT: flds 32(%esp) ; CHECK-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; CHECK-NEXT: movl 36(%esp), %edi -; CHECK-NEXT: movl 40(%esp), %esi -; CHECK-NEXT: movl %esi, 4(%esp) -; CHECK-NEXT: movl %edi, (%esp) -; CHECK-NEXT: calll foo@PLT -; CHECK-NEXT: leal 16(%esp), %eax -; CHECK-NEXT: movl %eax, 8(%esp) +; CHECK-NEXT: movl 36(%esp), %esi +; CHECK-NEXT: movl 40(%esp), %edi ; CHECK-NEXT: movl %edi, 4(%esp) +; CHECK-NEXT: movl %esi, (%esp) +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: movl %edi, 8(%esp) +; CHECK-NEXT: movl %esi, 4(%esp) ; CHECK-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; CHECK-NEXT: fstps (%esp) ; CHECK-NEXT: calll sincosf -; CHECK-NEXT: flds 16(%esp) -; CHECK-NEXT: fstps (%esi) ; CHECK-NEXT: addl $20, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: popl %esi diff --git a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll index 4cd206adc31de..dde531bfbf508 100644 --- a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll +++ b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll @@ -161,13 +161,13 @@ define void @t(ptr nocapture %in, ptr nocapture %out, ptr nocapture %rk, i32 %r) ; ATOM-NEXT: shrl $16, %r9d ; ATOM-NEXT: xorl 20(%rcx,%rdx), %eax ; ATOM-NEXT: shrl $24, %edi +; ATOM-NEXT: movb %r9b, 1(%rsi) ; ATOM-NEXT: movl %eax, %ecx ; ATOM-NEXT: shrl $16, %eax ; ATOM-NEXT: movb %dil, (%rsi) -; ATOM-NEXT: movb %r9b, 1(%rsi) ; ATOM-NEXT: shrl $24, %ecx -; ATOM-NEXT: movb %cl, 4(%rsi) ; ATOM-NEXT: movb %al, 5(%rsi) +; ATOM-NEXT: movb %cl, 4(%rsi) ; ATOM-NEXT: popq %rbx ; ATOM-NEXT: popq %r14 ; ATOM-NEXT: popq %r15 diff --git a/llvm/test/CodeGen/X86/misched-matrix.ll b/llvm/test/CodeGen/X86/misched-matrix.ll index f44bf39e76f6f..35877f5704fad 100644 --- a/llvm/test/CodeGen/X86/misched-matrix.ll +++ b/llvm/test/CodeGen/X86/misched-matrix.ll @@ -36,7 +36,7 @@ ; ILPMIN: imull ; ILPMIN: addl ; ILPMIN: addl -; ILPMIN: movl %{{.*}}, 4( +; ILPMIN: movl %{{.*}}, 8( ; ILPMIN: imull ; ILPMIN: imull ; ILPMIN: addl @@ -44,7 +44,7 @@ ; ILPMIN: imull ; ILPMIN: addl ; ILPMIN: addl -; ILPMIN: movl %{{.*}}, 8( +; ILPMIN: movl %{{.*}}, 4( ; ILPMIN: imull ; ILPMIN: imull ; ILPMIN: addl @@ -59,23 +59,15 @@ ; scheduled independently, and that the imull/adds are clustered. ; ; ILPMAX-LABEL: %for.body -; ILPMAX: movl %{{.*}}, ( +; ILPMAX: movl 4({{%[a-z]+}}), ; ILPMAX: imull ; ILPMAX: imull ; ILPMAX: imull ; ILPMAX: imull -; ILPMAX: addl -; ILPMAX: addl -; ILPMAX: addl -; ILPMAX: movl %{{.*}}, 4( ; ILPMAX: imull ; ILPMAX: imull ; ILPMAX: imull ; ILPMAX: imull -; ILPMAX: addl -; ILPMAX: addl -; ILPMAX: addl -; ILPMAX: movl %{{.*}}, 8( ; ILPMAX: imull ; ILPMAX: imull ; ILPMAX: imull @@ -83,7 +75,14 @@ ; ILPMAX: addl ; ILPMAX: addl ; ILPMAX: addl -; ILPMAX: movl %{{.*}}, 12( +; ILPMAX: movl %{{.*}}, 8( +; ILPMAX: addl +; ILPMAX: addl +; ILPMAX: addl +; ILPMAX: movl %{{.*}}, 4( +; ILPMAX: addl +; ILPMAX: addl +; ILPMAX: addl ; ILPMAX-LABEL: %for.end define void @mmult(ptr noalias nocapture %m1, ptr noalias nocapture %m2, diff --git a/llvm/test/CodeGen/X86/negate-add-zero.ll b/llvm/test/CodeGen/X86/negate-add-zero.ll index eb4e2d312af20..cbf8e6441237e 100644 --- a/llvm/test/CodeGen/X86/negate-add-zero.ll +++ b/llvm/test/CodeGen/X86/negate-add-zero.ll @@ -843,18 +843,17 @@ define linkonce void @_ZN21HNodeTranslateRotate36setVelERK9CDSVectorIdLi1EN3CDS1 ; CHECK-NEXT: fmul %st, %st(0) ; CHECK-NEXT: fadd %st, %st(5) ; CHECK-NEXT: fsubr %st, %st(5) -; CHECK-NEXT: fxch %st(4) -; CHECK-NEXT: fmull -8 ; CHECK-NEXT: fxch %st(5) ; CHECK-NEXT: fstl 8 -; CHECK-NEXT: fxch %st(2) -; CHECK-NEXT: fsubp %st, %st(5) ; CHECK-NEXT: fxch %st(4) -; CHECK-NEXT: fsubp %st, %st(2) +; CHECK-NEXT: fmull -8 +; CHECK-NEXT: fsubrp %st, %st(2) ; CHECK-NEXT: fxch %st(1) -; CHECK-NEXT: fadd %st(2), %st -; CHECK-NEXT: faddp %st, %st(2) +; CHECK-NEXT: fsubp %st, %st(2) ; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fadd %st(3), %st +; CHECK-NEXT: faddp %st, %st(3) +; CHECK-NEXT: fxch %st(2) ; CHECK-NEXT: fstl 16 ; CHECK-NEXT: fxch %st(2) ; CHECK-NEXT: fadd %st, %st(0) diff --git a/llvm/test/CodeGen/X86/pr30290.ll b/llvm/test/CodeGen/X86/pr30290.ll index 478cb142475da..a6d04c6845d71 100644 --- a/llvm/test/CodeGen/X86/pr30290.ll +++ b/llvm/test/CodeGen/X86/pr30290.ll @@ -21,8 +21,8 @@ define void @foo(ptr byval(%struct.face) nocapture align 8) local_unnamed_addr { ; CHECK-NEXT: subq $40, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1] -; CHECK-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movl $1, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm0 ; CHECK-NEXT: vmovups %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 diff --git a/llvm/test/CodeGen/X86/pr36312.ll b/llvm/test/CodeGen/X86/pr36312.ll index c643888b699e5..2ec772343226b 100644 --- a/llvm/test/CodeGen/X86/pr36312.ll +++ b/llvm/test/CodeGen/X86/pr36312.ll @@ -13,11 +13,10 @@ define void @g() local_unnamed_addr #0 { ; CHECK-LABEL: g: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq d(%rip), %rax -; CHECK-NEXT: movl 4(%rax), %eax ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: incl b(%rip) ; CHECK-NEXT: setne %cl -; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: addl 4(%rax), %ecx ; CHECK-NEXT: movl %ecx, a(%rip) ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/pr42565.ll b/llvm/test/CodeGen/X86/pr42565.ll index 10071064057b8..19ca68aee4809 100644 --- a/llvm/test/CodeGen/X86/pr42565.ll +++ b/llvm/test/CodeGen/X86/pr42565.ll @@ -11,10 +11,10 @@ define void @HUF_writeCTable_wksp() { ; CHECK-NEXT: .LBB0_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: leal 1(%rcx), %edx -; CHECK-NEXT: movb %dl, (%rax) ; CHECK-NEXT: movb %cl, (%rax) -; CHECK-NEXT: leaq 2(%rax), %rax ; CHECK-NEXT: addb $-2, %cl +; CHECK-NEXT: movb %dl, (%rax) +; CHECK-NEXT: leaq 2(%rax), %rax ; CHECK-NEXT: jmp .LBB0_1 entry: br label %for.body diff --git a/llvm/test/CodeGen/X86/pr51878_computeAliasing.ll b/llvm/test/CodeGen/X86/pr51878_computeAliasing.ll index cf50d0fd5ceef..587d8c1c17c23 100644 --- a/llvm/test/CodeGen/X86/pr51878_computeAliasing.ll +++ b/llvm/test/CodeGen/X86/pr51878_computeAliasing.ll @@ -20,7 +20,8 @@ define i16 @main() { ; CHECK-NEXT: movw $2, bar ; CHECK-NEXT: movw $4, aliasFoo ; CHECK-NEXT: movzwl foo, %eax -; CHECK-NEXT: addw bar, %ax +; CHECK-NEXT: addl $2, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retl entry: store i16 1, ptr @foo diff --git a/llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll b/llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll index 50422a867dc32..33945b6513b52 100644 --- a/llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll +++ b/llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i686 -regalloc=greedy --debug-only=regalloc 2>&1 | FileCheck %s +; RUN: llc < %s -x86-use-aa=false -mtriple=i686 -regalloc=greedy --debug-only=regalloc 2>&1 | FileCheck %s ; REQUIRES: asserts @@ -15,14 +15,14 @@ ; Make sure the split behaves as expected ; CHECK: RS_Split Cascade 1 -; CHECK-NOT: $eax static = +; CHECK-NOT: $eax static = ; CHECK: $eax no positive bundles ; CHECK-NEXT: $ecx no positive bundles ; CHECK-NEXT: $edx no positive bundles -; CHECK-NEXT: $esi static = +; CHECK-NEXT: $esi static = ; CHECK-NEXT: $edi no positive bundles ; CHECK-NEXT: $ebx no positive bundles -; CHECK-NEXT: $ebp static = +; CHECK-NEXT: $ebp static = ; CHECK: Split for $ebp ; Function Attrs: nounwind diff --git a/llvm/test/CodeGen/X86/stack-frame-layout-remarks.ll b/llvm/test/CodeGen/X86/stack-frame-layout-remarks.ll index cfc493af66709..e8c4cece53738 100644 --- a/llvm/test/CodeGen/X86/stack-frame-layout-remarks.ll +++ b/llvm/test/CodeGen/X86/stack-frame-layout-remarks.ll @@ -67,12 +67,12 @@ define void @cleanup_result(ptr %0) #1 { ; DEBUG: A @ dot.c:32 ; STRIPPED-NOT: A @ dot.c:32 ; BOTH: Offset: [SP-4], Type: Spill, Align: 8, Size: 4 +; BOTH: Offset: [SP-8], Type: Variable, Align: 4, Size: 4 +; DEBUG: i @ dot.c:55 +; STRIPPED-NOT: i @ dot.c:55 ; BOTH: Offset: [SP-12], Type: Variable, Align: 8, Size: 4 ; DEBUG: AB @ dot.c:38 ; STRIPPED-NOT: AB @ dot.c:38 -; BOTH: Offset: [SP-16], Type: Variable, Align: 4, Size: 4 -; DEBUG: i @ dot.c:55 -; STRIPPED-NOT: i @ dot.c:55 ; BOTH: Offset: [SP-20], Type: Variable, Align: 8, Size: 4 ; DEBUG: B @ dot.c:32 ; STRIPPED-NOT: B @ dot.c:32 diff --git a/llvm/test/CodeGen/X86/stores-merging.ll b/llvm/test/CodeGen/X86/stores-merging.ll index 31f8d08ca2f17..ccf6b37b6d648 100644 --- a/llvm/test/CodeGen/X86/stores-merging.ll +++ b/llvm/test/CodeGen/X86/stores-merging.ll @@ -627,10 +627,9 @@ define void @merge_hole(i32 %x, ptr %p) { define void @merge_hole2(i32 %x, ptr %p) { ; CHECK-LABEL: merge_hole2: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: shrl $16, %eax -; CHECK-NEXT: movw %ax, 2(%rsi) ; CHECK-NEXT: movb %dil, (%rsi) +; CHECK-NEXT: shrl $16, %edi +; CHECK-NEXT: movw %di, 2(%rsi) ; CHECK-NEXT: retq %p2 = getelementptr inbounds i16, ptr %p, i64 1 %sh = lshr i32 %x, 16 diff --git a/llvm/test/CodeGen/X86/subcarry.ll b/llvm/test/CodeGen/X86/subcarry.ll index 1e9db9f55a8d5..6cb9bc39a8517 100644 --- a/llvm/test/CodeGen/X86/subcarry.ll +++ b/llvm/test/CodeGen/X86/subcarry.ll @@ -542,14 +542,14 @@ define void @PR39464(ptr noalias nocapture sret(%struct.U192) %0, ptr nocapture ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: movq (%rsi), %rcx +; CHECK-NEXT: movq 8(%rsi), %rdi ; CHECK-NEXT: subq (%rdx), %rcx -; CHECK-NEXT: movq %rcx, (%rdi) -; CHECK-NEXT: movq 8(%rsi), %rcx -; CHECK-NEXT: sbbq 8(%rdx), %rcx -; CHECK-NEXT: movq %rcx, 8(%rdi) +; CHECK-NEXT: movq %rcx, (%rax) +; CHECK-NEXT: sbbq 8(%rdx), %rdi +; CHECK-NEXT: movq %rdi, 8(%rax) ; CHECK-NEXT: movq 16(%rsi), %rcx ; CHECK-NEXT: sbbq 16(%rdx), %rcx -; CHECK-NEXT: movq %rcx, 16(%rdi) +; CHECK-NEXT: movq %rcx, 16(%rax) ; CHECK-NEXT: retq %4 = load i64, ptr %1, align 8 %5 = load i64, ptr %2, align 8 diff --git a/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll b/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll index df8a85fd07258..610ef46ad97ca 100644 --- a/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll +++ b/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll @@ -6910,114 +6910,113 @@ define void @vec512_v16i16(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec ; SCALAR-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SCALAR-NEXT: movzwl 26(%rdi), %eax ; SCALAR-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SCALAR-NEXT: movl 24(%rdi), %r13d -; SCALAR-NEXT: movzwl 22(%rdi), %r12d -; SCALAR-NEXT: movl 20(%rdi), %r15d -; SCALAR-NEXT: movzwl 18(%rdi), %r14d -; SCALAR-NEXT: movl 16(%rdi), %ebx -; SCALAR-NEXT: movzwl 14(%rdi), %r11d -; SCALAR-NEXT: movl 12(%rdi), %r10d -; SCALAR-NEXT: movzwl 10(%rdi), %r9d -; SCALAR-NEXT: movl 8(%rdi), %r8d -; SCALAR-NEXT: movzwl 6(%rdi), %ecx -; SCALAR-NEXT: movzwl 2(%rdi), %ebp +; SCALAR-NEXT: movl 24(%rdi), %eax +; SCALAR-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SCALAR-NEXT: movzwl 22(%rdi), %eax +; SCALAR-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SCALAR-NEXT: movl 20(%rdi), %r10d +; SCALAR-NEXT: movzwl 18(%rdi), %r11d +; SCALAR-NEXT: movl 16(%rdi), %eax +; SCALAR-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SCALAR-NEXT: movzwl 14(%rdi), %ebp +; SCALAR-NEXT: movl 12(%rdi), %r14d +; SCALAR-NEXT: movzwl 10(%rdi), %r15d +; SCALAR-NEXT: movl 8(%rdi), %r12d +; SCALAR-NEXT: movzwl 6(%rdi), %r13d +; SCALAR-NEXT: movzwl 2(%rdi), %r8d ; SCALAR-NEXT: movl (%rdi), %eax -; SCALAR-NEXT: movl 4(%rdi), %edi +; SCALAR-NEXT: movl 4(%rdi), %ecx ; SCALAR-NEXT: notl %eax ; SCALAR-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SCALAR-NEXT: notl %ebp -; SCALAR-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SCALAR-NEXT: notl %edi -; SCALAR-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SCALAR-NEXT: notl %ecx -; SCALAR-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SCALAR-NEXT: notl %r8d ; SCALAR-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SCALAR-NEXT: notl %r9d -; SCALAR-NEXT: movl %r9d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SCALAR-NEXT: movl %r10d, %edi -; SCALAR-NEXT: notl %edi -; SCALAR-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SCALAR-NEXT: notl %r11d -; SCALAR-NEXT: movl %r11d, %r9d -; SCALAR-NEXT: notl %ebx -; SCALAR-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SCALAR-NEXT: notl %r14d -; SCALAR-NEXT: notl %r15d -; SCALAR-NEXT: notl %r12d +; SCALAR-NEXT: notl %ecx +; SCALAR-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SCALAR-NEXT: notl %r13d -; SCALAR-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Reload -; SCALAR-NEXT: notl %r10d -; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Reload +; SCALAR-NEXT: notl %r12d +; SCALAR-NEXT: notl %r15d +; SCALAR-NEXT: notl %r14d +; SCALAR-NEXT: notl %ebp +; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Reload +; SCALAR-NEXT: notl %ebx ; SCALAR-NEXT: notl %r11d +; SCALAR-NEXT: notl %r10d +; SCALAR-NEXT: movl %r10d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Reload +; SCALAR-NEXT: notl %r9d +; SCALAR-NEXT: movl %r9d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Reload ; SCALAR-NEXT: notl %r8d ; SCALAR-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SCALAR-NEXT: movw %r8w, 30(%rsi) -; SCALAR-NEXT: movw %r11w, 28(%rsi) -; SCALAR-NEXT: movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SCALAR-NEXT: movw %r10w, 26(%rsi) -; SCALAR-NEXT: movl %r10d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SCALAR-NEXT: movw %r13w, 24(%rsi) -; SCALAR-NEXT: movw %r12w, 22(%rsi) -; SCALAR-NEXT: movw %r15w, 20(%rsi) -; SCALAR-NEXT: movw %r14w, 18(%rsi) -; SCALAR-NEXT: movw %bx, 16(%rsi) -; SCALAR-NEXT: movw %r9w, 14(%rsi) -; SCALAR-NEXT: movw %di, 12(%rsi) -; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Reload -; SCALAR-NEXT: movw %bp, 10(%rsi) ; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload -; SCALAR-NEXT: movw %di, 8(%rsi) +; SCALAR-NEXT: notl %edi +; SCALAR-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload -; SCALAR-NEXT: movw %cx, 6(%rsi) -; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Reload -; SCALAR-NEXT: movw %r8w, 4(%rsi) +; SCALAR-NEXT: notl %ecx +; SCALAR-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; SCALAR-NEXT: notl %eax +; SCALAR-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SCALAR-NEXT: movw %ax, 30(%rsi) +; SCALAR-NEXT: movw %cx, 28(%rsi) +; SCALAR-NEXT: movw %di, 26(%rsi) +; SCALAR-NEXT: movw %r8w, 24(%rsi) +; SCALAR-NEXT: movw %r9w, 22(%rsi) +; SCALAR-NEXT: movw %r10w, 20(%rsi) +; SCALAR-NEXT: movw %r11w, 18(%rsi) +; SCALAR-NEXT: movw %bx, 16(%rsi) +; SCALAR-NEXT: movw %bp, 14(%rsi) +; SCALAR-NEXT: movw %r14w, 12(%rsi) +; SCALAR-NEXT: movw %r15w, 10(%rsi) +; SCALAR-NEXT: movw %r12w, 8(%rsi) +; SCALAR-NEXT: movw %r13w, 6(%rsi) +; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; SCALAR-NEXT: movw %di, 4(%rsi) ; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; SCALAR-NEXT: movw %ax, 2(%rsi) -; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Reload -; SCALAR-NEXT: movw %bx, (%rsi) -; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Reload -; SCALAR-NEXT: movw %r13w, 30(%rdx) -; SCALAR-NEXT: movw %r11w, 28(%rdx) -; SCALAR-NEXT: movw %r10w, 26(%rdx) -; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload -; SCALAR-NEXT: movw %si, 24(%rdx) -; SCALAR-NEXT: movw %r12w, 22(%rdx) -; SCALAR-NEXT: movw %r15w, 20(%rdx) -; SCALAR-NEXT: movw %r14w, 18(%rdx) -; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Reload -; SCALAR-NEXT: movw %r11w, 16(%rdx) -; SCALAR-NEXT: movw %r9w, 14(%rdx) -; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 4-byte Reload -; SCALAR-NEXT: movw %r10w, 12(%rdx) -; SCALAR-NEXT: movw %bp, 10(%rdx) -; SCALAR-NEXT: movw %di, 8(%rdx) -; SCALAR-NEXT: movw %cx, 6(%rdx) -; SCALAR-NEXT: movw %r8w, 4(%rdx) +; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; SCALAR-NEXT: movw %cx, (%rsi) +; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; SCALAR-NEXT: movw %cx, 30(%rdx) +; SCALAR-NEXT: movw %cx, 62(%rdx) +; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; SCALAR-NEXT: movw %cx, 28(%rdx) +; SCALAR-NEXT: movw %cx, 60(%rdx) +; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; SCALAR-NEXT: movw %cx, 26(%rdx) +; SCALAR-NEXT: movw %cx, 58(%rdx) +; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; SCALAR-NEXT: movw %cx, 24(%rdx) +; SCALAR-NEXT: movw %cx, 56(%rdx) +; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; SCALAR-NEXT: movw %cx, 22(%rdx) +; SCALAR-NEXT: movw %cx, 54(%rdx) +; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; SCALAR-NEXT: movw %cx, 20(%rdx) +; SCALAR-NEXT: movw %cx, 52(%rdx) +; SCALAR-NEXT: movw %r11w, 18(%rdx) +; SCALAR-NEXT: movw %r11w, 50(%rdx) +; SCALAR-NEXT: movl %ebx, %ecx +; SCALAR-NEXT: movw %cx, 16(%rdx) +; SCALAR-NEXT: movw %cx, 48(%rdx) +; SCALAR-NEXT: movw %bp, 14(%rdx) +; SCALAR-NEXT: movw %bp, 46(%rdx) +; SCALAR-NEXT: movw %r14w, 12(%rdx) +; SCALAR-NEXT: movw %r14w, 44(%rdx) +; SCALAR-NEXT: movw %r15w, 10(%rdx) +; SCALAR-NEXT: movw %r15w, 42(%rdx) +; SCALAR-NEXT: movw %r12w, 8(%rdx) +; SCALAR-NEXT: movw %r12w, 40(%rdx) +; SCALAR-NEXT: movw %r13w, 6(%rdx) +; SCALAR-NEXT: movw %r13w, 38(%rdx) +; SCALAR-NEXT: movl %edi, %ecx +; SCALAR-NEXT: movw %cx, 4(%rdx) +; SCALAR-NEXT: movw %cx, 36(%rdx) ; SCALAR-NEXT: movw %ax, 2(%rdx) -; SCALAR-NEXT: movl %ebx, %esi -; SCALAR-NEXT: movw %si, (%rdx) -; SCALAR-NEXT: movw %r13w, 62(%rdx) -; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Reload -; SCALAR-NEXT: movw %bx, 60(%rdx) -; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Reload -; SCALAR-NEXT: movw %bx, 58(%rdx) -; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Reload -; SCALAR-NEXT: movw %bx, 56(%rdx) -; SCALAR-NEXT: movw %r12w, 54(%rdx) -; SCALAR-NEXT: movw %r15w, 52(%rdx) -; SCALAR-NEXT: movw %r14w, 50(%rdx) -; SCALAR-NEXT: movw %r11w, 48(%rdx) -; SCALAR-NEXT: movw %r9w, 46(%rdx) -; SCALAR-NEXT: movw %r10w, 44(%rdx) -; SCALAR-NEXT: movw %bp, 42(%rdx) -; SCALAR-NEXT: movw %di, 40(%rdx) -; SCALAR-NEXT: movw %cx, 38(%rdx) -; SCALAR-NEXT: movw %r8w, 36(%rdx) ; SCALAR-NEXT: movw %ax, 34(%rdx) -; SCALAR-NEXT: movw %si, 32(%rdx) +; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; SCALAR-NEXT: movw %ax, (%rdx) +; SCALAR-NEXT: movw %ax, 32(%rdx) ; SCALAR-NEXT: popq %rbx ; SCALAR-NEXT: popq %r12 ; SCALAR-NEXT: popq %r13 @@ -7087,9 +7086,9 @@ define void @vec512_v32i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec. ; SCALAR-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; SCALAR-NEXT: movzbl 13(%rdi), %eax ; SCALAR-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; SCALAR-NEXT: movzbl 12(%rdi), %r13d -; SCALAR-NEXT: movzbl 11(%rdi), %eax +; SCALAR-NEXT: movzbl 12(%rdi), %eax ; SCALAR-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; SCALAR-NEXT: movzbl 11(%rdi), %r13d ; SCALAR-NEXT: movzbl 10(%rdi), %r12d ; SCALAR-NEXT: movzbl 9(%rdi), %r15d ; SCALAR-NEXT: movzbl 8(%rdi), %r14d @@ -7098,15 +7097,15 @@ define void @vec512_v32i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec. ; SCALAR-NEXT: movzbl 5(%rdi), %r11d ; SCALAR-NEXT: movzbl 4(%rdi), %r10d ; SCALAR-NEXT: movzbl 3(%rdi), %r9d -; SCALAR-NEXT: movzbl 2(%rdi), %r8d +; SCALAR-NEXT: movzbl 2(%rdi), %ecx ; SCALAR-NEXT: movzbl (%rdi), %eax -; SCALAR-NEXT: movzbl 1(%rdi), %ecx +; SCALAR-NEXT: movzbl 1(%rdi), %r8d ; SCALAR-NEXT: notb %al ; SCALAR-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; SCALAR-NEXT: notb %cl -; SCALAR-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; SCALAR-NEXT: notb %r8b ; SCALAR-NEXT: movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; SCALAR-NEXT: notb %cl +; SCALAR-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; SCALAR-NEXT: notb %r9b ; SCALAR-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; SCALAR-NEXT: notb %r10b @@ -7123,16 +7122,18 @@ define void @vec512_v32i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec. ; SCALAR-NEXT: movb %r15b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; SCALAR-NEXT: notb %r12b ; SCALAR-NEXT: movb %r12b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 1-byte Folded Reload -; SCALAR-NEXT: notb %r11b -; SCALAR-NEXT: movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; SCALAR-NEXT: notb %r13b ; SCALAR-NEXT: movb %r13b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 1-byte Folded Reload +; SCALAR-NEXT: notb %r13b ; SCALAR-NEXT: notb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill -; SCALAR-NEXT: notb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 1-byte Folded Reload -; SCALAR-NEXT: notb %r8b -; SCALAR-NEXT: notb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 1-byte Folded Reload +; SCALAR-NEXT: notb %r12b +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 1-byte Folded Reload +; SCALAR-NEXT: notb %r11b +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 1-byte Folded Reload +; SCALAR-NEXT: notb %r10b +; SCALAR-NEXT: movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; SCALAR-NEXT: movzbl 17(%rdi), %eax ; SCALAR-NEXT: notb %al ; SCALAR-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill @@ -7145,194 +7146,173 @@ define void @vec512_v32i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec. ; SCALAR-NEXT: movzbl 20(%rdi), %eax ; SCALAR-NEXT: notb %al ; SCALAR-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; SCALAR-NEXT: movzbl 21(%rdi), %ebp -; SCALAR-NEXT: notb %bpl -; SCALAR-NEXT: movb %bpl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; SCALAR-NEXT: movzbl 22(%rdi), %ebx -; SCALAR-NEXT: notb %bl -; SCALAR-NEXT: movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; SCALAR-NEXT: movzbl 23(%rdi), %r10d -; SCALAR-NEXT: notb %r10b -; SCALAR-NEXT: movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; SCALAR-NEXT: movzbl 24(%rdi), %r9d -; SCALAR-NEXT: notb %r9b -; SCALAR-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; SCALAR-NEXT: movzbl 25(%rdi), %ecx -; SCALAR-NEXT: notb %cl -; SCALAR-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; SCALAR-NEXT: movzbl 26(%rdi), %r14d -; SCALAR-NEXT: notb %r14b -; SCALAR-NEXT: movb %r14b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; SCALAR-NEXT: movzbl 27(%rdi), %r15d +; SCALAR-NEXT: movzbl 21(%rdi), %eax +; SCALAR-NEXT: notb %al +; SCALAR-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; SCALAR-NEXT: movzbl 22(%rdi), %eax +; SCALAR-NEXT: notb %al +; SCALAR-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; SCALAR-NEXT: movzbl 23(%rdi), %r15d ; SCALAR-NEXT: notb %r15b ; SCALAR-NEXT: movb %r15b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; SCALAR-NEXT: movzbl 28(%rdi), %r12d -; SCALAR-NEXT: notb %r12b -; SCALAR-NEXT: movb %r12b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; SCALAR-NEXT: movzbl 29(%rdi), %r13d -; SCALAR-NEXT: notb %r13b -; SCALAR-NEXT: movb %r13b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; SCALAR-NEXT: movzbl 30(%rdi), %eax +; SCALAR-NEXT: movzbl 24(%rdi), %ebx +; SCALAR-NEXT: notb %bl +; SCALAR-NEXT: movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; SCALAR-NEXT: movzbl 25(%rdi), %r8d +; SCALAR-NEXT: notb %r8b +; SCALAR-NEXT: movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; SCALAR-NEXT: movzbl 26(%rdi), %eax ; SCALAR-NEXT: notb %al ; SCALAR-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; SCALAR-NEXT: movzbl 27(%rdi), %r14d +; SCALAR-NEXT: notb %r14b +; SCALAR-NEXT: movzbl 28(%rdi), %ebp +; SCALAR-NEXT: notb %bpl +; SCALAR-NEXT: movzbl 29(%rdi), %r9d +; SCALAR-NEXT: notb %r9b +; SCALAR-NEXT: movzbl 30(%rdi), %ecx +; SCALAR-NEXT: notb %cl ; SCALAR-NEXT: movzbl 31(%rdi), %edi ; SCALAR-NEXT: notb %dil -; SCALAR-NEXT: movb %dil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; SCALAR-NEXT: movb %dil, 31(%rsi) -; SCALAR-NEXT: movb %al, 30(%rsi) -; SCALAR-NEXT: movb %r13b, 29(%rsi) -; SCALAR-NEXT: movb %r12b, 28(%rsi) -; SCALAR-NEXT: movb %r15b, 27(%rsi) -; SCALAR-NEXT: movb %r14b, 26(%rsi) -; SCALAR-NEXT: movb %cl, 25(%rsi) -; SCALAR-NEXT: movb %r9b, 24(%rsi) -; SCALAR-NEXT: movb %r10b, 23(%rsi) -; SCALAR-NEXT: movb %bl, 22(%rsi) -; SCALAR-NEXT: movb %bpl, 21(%rsi) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 1-byte Folded Reload -; SCALAR-NEXT: movb %bpl, 20(%rsi) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload -; SCALAR-NEXT: movb %al, 19(%rsi) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload -; SCALAR-NEXT: movb %al, 18(%rsi) +; SCALAR-NEXT: movb %cl, 30(%rsi) +; SCALAR-NEXT: movb %r9b, 29(%rsi) +; SCALAR-NEXT: movb %bpl, 28(%rsi) +; SCALAR-NEXT: movb %r14b, 27(%rsi) +; SCALAR-NEXT: movb %al, 26(%rsi) +; SCALAR-NEXT: movb %r8b, 25(%rsi) +; SCALAR-NEXT: movb %bl, 24(%rsi) +; SCALAR-NEXT: movb %r15b, 23(%rsi) ; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload -; SCALAR-NEXT: movb %al, 17(%rsi) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload -; SCALAR-NEXT: movb %cl, 16(%rsi) -; SCALAR-NEXT: movb %r8b, 15(%rsi) -; SCALAR-NEXT: movl %r8d, %r14d -; SCALAR-NEXT: movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; SCALAR-NEXT: movb %al, 22(%rsi) ; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 1-byte Folded Reload -; SCALAR-NEXT: movb %bl, 14(%rsi) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload -; SCALAR-NEXT: movb %al, 13(%rsi) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload -; SCALAR-NEXT: movb %al, 12(%rsi) -; SCALAR-NEXT: movb %r11b, 11(%rsi) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 1-byte Folded Reload -; SCALAR-NEXT: movb %dil, 10(%rsi) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 1-byte Folded Reload -; SCALAR-NEXT: movb %dil, 9(%rsi) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 1-byte Folded Reload -; SCALAR-NEXT: movb %dil, 8(%rsi) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 1-byte Folded Reload -; SCALAR-NEXT: movb %r11b, 7(%rsi) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 1-byte Folded Reload -; SCALAR-NEXT: movb %r13b, 6(%rsi) +; SCALAR-NEXT: movb %bl, 21(%rsi) +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 1-byte Folded Reload +; SCALAR-NEXT: movb %r15b, 20(%rsi) +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 1-byte Folded Reload +; SCALAR-NEXT: movb %r15b, 19(%rsi) +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 1-byte Folded Reload +; SCALAR-NEXT: movb %r15b, 18(%rsi) +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 1-byte Folded Reload +; SCALAR-NEXT: movb %r15b, 17(%rsi) +; SCALAR-NEXT: movb %r10b, 16(%rsi) +; SCALAR-NEXT: movb %r11b, 15(%rsi) +; SCALAR-NEXT: movl %r11d, %r15d +; SCALAR-NEXT: movb %r12b, 14(%rsi) +; SCALAR-NEXT: movl %r12d, %ebx +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 1-byte Folded Reload +; SCALAR-NEXT: movb %r12b, 13(%rsi) +; SCALAR-NEXT: movb %r13b, 12(%rsi) +; SCALAR-NEXT: movl %r13d, %r11d ; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r10d # 1-byte Folded Reload -; SCALAR-NEXT: movb %r10b, 5(%rsi) +; SCALAR-NEXT: movb %r10b, 11(%rsi) +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 1-byte Folded Reload +; SCALAR-NEXT: movb %r8b, 10(%rsi) +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 1-byte Folded Reload +; SCALAR-NEXT: movb %r8b, 9(%rsi) ; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 1-byte Folded Reload -; SCALAR-NEXT: movb %r12b, 4(%rsi) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 1-byte Folded Reload -; SCALAR-NEXT: movb %r9b, 3(%rsi) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 1-byte Folded Reload -; SCALAR-NEXT: movb %r15b, 2(%rsi) +; SCALAR-NEXT: movb %r12b, 8(%rsi) ; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 1-byte Folded Reload -; SCALAR-NEXT: movb %r8b, 1(%rsi) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 1-byte Folded Reload -; SCALAR-NEXT: movb %dil, (%rsi) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload -; SCALAR-NEXT: movb %sil, 31(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload -; SCALAR-NEXT: movb %sil, 30(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload -; SCALAR-NEXT: movb %sil, 29(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload -; SCALAR-NEXT: movb %sil, 28(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload -; SCALAR-NEXT: movb %sil, 27(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload -; SCALAR-NEXT: movb %sil, 26(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload -; SCALAR-NEXT: movb %sil, 25(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload -; SCALAR-NEXT: movb %sil, 24(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload -; SCALAR-NEXT: movb %sil, 23(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload -; SCALAR-NEXT: movb %sil, 22(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload -; SCALAR-NEXT: movb %sil, 21(%rdx) -; SCALAR-NEXT: movb %bpl, 20(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload -; SCALAR-NEXT: movb %sil, 19(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload -; SCALAR-NEXT: movb %sil, 18(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload -; SCALAR-NEXT: movb %sil, 17(%rdx) -; SCALAR-NEXT: movb %cl, 16(%rdx) -; SCALAR-NEXT: movb %r14b, 15(%rdx) -; SCALAR-NEXT: movb %bl, 14(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload -; SCALAR-NEXT: movb %cl, 13(%rdx) -; SCALAR-NEXT: movb %al, 12(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 1-byte Folded Reload -; SCALAR-NEXT: movb %sil, 11(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 1-byte Folded Reload -; SCALAR-NEXT: movb %bl, 10(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 1-byte Folded Reload -; SCALAR-NEXT: movb %r14b, 9(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 1-byte Folded Reload -; SCALAR-NEXT: movb %bpl, 8(%rdx) -; SCALAR-NEXT: movb %r11b, 7(%rdx) -; SCALAR-NEXT: movb %r13b, 6(%rdx) -; SCALAR-NEXT: movb %r10b, 5(%rdx) -; SCALAR-NEXT: movb %r12b, 4(%rdx) -; SCALAR-NEXT: movb %r9b, 3(%rdx) -; SCALAR-NEXT: movb %r15b, 2(%rdx) -; SCALAR-NEXT: movb %r8b, 1(%rdx) -; SCALAR-NEXT: movb %dil, (%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload -; SCALAR-NEXT: movb %al, 63(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload -; SCALAR-NEXT: movb %al, 62(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload -; SCALAR-NEXT: movb %al, 61(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload -; SCALAR-NEXT: movb %al, 60(%rdx) -; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload -; SCALAR-NEXT: movb %al, 59(%rdx) +; SCALAR-NEXT: movb %r8b, 7(%rsi) +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 1-byte Folded Reload +; SCALAR-NEXT: movb %r8b, 6(%rsi) +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 1-byte Folded Reload +; SCALAR-NEXT: movb %r8b, 5(%rsi) +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 1-byte Folded Reload +; SCALAR-NEXT: movb %r13b, 4(%rsi) +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 1-byte Folded Reload +; SCALAR-NEXT: movb %r13b, 3(%rsi) +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 1-byte Folded Reload +; SCALAR-NEXT: movb %r13b, 2(%rsi) +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 1-byte Folded Reload +; SCALAR-NEXT: movb %r13b, 1(%rsi) +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 1-byte Folded Reload +; SCALAR-NEXT: movb %r13b, (%rsi) +; SCALAR-NEXT: movb %dil, 31(%rdx) +; SCALAR-NEXT: movb %dil, 63(%rdx) +; SCALAR-NEXT: movb %cl, 30(%rdx) +; SCALAR-NEXT: movb %cl, 62(%rdx) +; SCALAR-NEXT: movb %r9b, 29(%rdx) +; SCALAR-NEXT: movb %r9b, 61(%rdx) +; SCALAR-NEXT: movb %bpl, 28(%rdx) +; SCALAR-NEXT: movb %bpl, 60(%rdx) +; SCALAR-NEXT: movb %r14b, 27(%rdx) +; SCALAR-NEXT: movb %r14b, 59(%rdx) ; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; SCALAR-NEXT: movb %al, 26(%rdx) ; SCALAR-NEXT: movb %al, 58(%rdx) ; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; SCALAR-NEXT: movb %al, 25(%rdx) ; SCALAR-NEXT: movb %al, 57(%rdx) ; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; SCALAR-NEXT: movb %al, 24(%rdx) ; SCALAR-NEXT: movb %al, 56(%rdx) ; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; SCALAR-NEXT: movb %al, 23(%rdx) ; SCALAR-NEXT: movb %al, 55(%rdx) ; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; SCALAR-NEXT: movb %al, 22(%rdx) ; SCALAR-NEXT: movb %al, 54(%rdx) ; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; SCALAR-NEXT: movb %al, 21(%rdx) ; SCALAR-NEXT: movb %al, 53(%rdx) ; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; SCALAR-NEXT: movb %al, 20(%rdx) ; SCALAR-NEXT: movb %al, 52(%rdx) ; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; SCALAR-NEXT: movb %al, 19(%rdx) ; SCALAR-NEXT: movb %al, 51(%rdx) ; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; SCALAR-NEXT: movb %al, 18(%rdx) ; SCALAR-NEXT: movb %al, 50(%rdx) ; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; SCALAR-NEXT: movb %al, 17(%rdx) ; SCALAR-NEXT: movb %al, 49(%rdx) ; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; SCALAR-NEXT: movb %al, 16(%rdx) ; SCALAR-NEXT: movb %al, 48(%rdx) +; SCALAR-NEXT: movb %r15b, 15(%rdx) +; SCALAR-NEXT: movb %r15b, 47(%rdx) +; SCALAR-NEXT: movb %bl, 14(%rdx) +; SCALAR-NEXT: movb %bl, 46(%rdx) ; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload -; SCALAR-NEXT: movb %al, 47(%rdx) +; SCALAR-NEXT: movb %al, 13(%rdx) +; SCALAR-NEXT: movb %al, 45(%rdx) +; SCALAR-NEXT: movb %r11b, 12(%rdx) +; SCALAR-NEXT: movb %r11b, 44(%rdx) +; SCALAR-NEXT: movb %r10b, 11(%rdx) +; SCALAR-NEXT: movb %r10b, 43(%rdx) ; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload -; SCALAR-NEXT: movb %al, 46(%rdx) -; SCALAR-NEXT: movb %cl, 45(%rdx) +; SCALAR-NEXT: movb %al, 10(%rdx) +; SCALAR-NEXT: movb %al, 42(%rdx) ; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload -; SCALAR-NEXT: movb %al, 44(%rdx) -; SCALAR-NEXT: movb %sil, 43(%rdx) -; SCALAR-NEXT: movb %bl, 42(%rdx) -; SCALAR-NEXT: movb %r14b, 41(%rdx) -; SCALAR-NEXT: movb %bpl, 40(%rdx) -; SCALAR-NEXT: movb %r11b, 39(%rdx) -; SCALAR-NEXT: movb %r13b, 38(%rdx) -; SCALAR-NEXT: movb %r10b, 37(%rdx) -; SCALAR-NEXT: movb %r12b, 36(%rdx) -; SCALAR-NEXT: movb %r9b, 35(%rdx) -; SCALAR-NEXT: movb %r15b, 34(%rdx) -; SCALAR-NEXT: movb %r8b, 33(%rdx) -; SCALAR-NEXT: movb %dil, 32(%rdx) +; SCALAR-NEXT: movb %al, 9(%rdx) +; SCALAR-NEXT: movb %al, 41(%rdx) +; SCALAR-NEXT: movb %r12b, 8(%rdx) +; SCALAR-NEXT: movb %r12b, 40(%rdx) +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; SCALAR-NEXT: movb %al, 7(%rdx) +; SCALAR-NEXT: movb %al, 39(%rdx) +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; SCALAR-NEXT: movb %al, 6(%rdx) +; SCALAR-NEXT: movb %al, 38(%rdx) +; SCALAR-NEXT: movb %r8b, 5(%rdx) +; SCALAR-NEXT: movb %r8b, 37(%rdx) +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; SCALAR-NEXT: movb %al, 4(%rdx) +; SCALAR-NEXT: movb %al, 36(%rdx) +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; SCALAR-NEXT: movb %al, 3(%rdx) +; SCALAR-NEXT: movb %al, 35(%rdx) +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; SCALAR-NEXT: movb %al, 2(%rdx) +; SCALAR-NEXT: movb %al, 34(%rdx) +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; SCALAR-NEXT: movb %al, 1(%rdx) +; SCALAR-NEXT: movb %al, 33(%rdx) +; SCALAR-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; SCALAR-NEXT: movb %al, (%rdx) +; SCALAR-NEXT: movb %al, 32(%rdx) ; SCALAR-NEXT: popq %rbx ; SCALAR-NEXT: popq %r12 ; SCALAR-NEXT: popq %r13 diff --git a/llvm/test/CodeGen/X86/swap.ll b/llvm/test/CodeGen/X86/swap.ll index e556900767afd..98815fd1416b5 100644 --- a/llvm/test/CodeGen/X86/swap.ll +++ b/llvm/test/CodeGen/X86/swap.ll @@ -12,10 +12,8 @@ define dso_local void @_Z4SwapP1SS0_(ptr nocapture %a, ptr nocapture %b) local_u ; NOAA-LABEL: _Z4SwapP1SS0_: ; NOAA: # %bb.0: # %entry ; NOAA-NEXT: vmovups (%rdi), %xmm0 -; NOAA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) -; NOAA-NEXT: vmovups (%rsi), %xmm0 -; NOAA-NEXT: vmovups %xmm0, (%rdi) -; NOAA-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 +; NOAA-NEXT: vmovups (%rsi), %xmm1 +; NOAA-NEXT: vmovups %xmm1, (%rdi) ; NOAA-NEXT: vmovups %xmm0, (%rsi) ; NOAA-NEXT: retq ; @@ -106,8 +104,6 @@ entry: define dso_local void @onealloc_readback_1(ptr nocapture %a, ptr nocapture %b) local_unnamed_addr { ; NOAA-LABEL: onealloc_readback_1: ; NOAA: # %bb.0: # %entry -; NOAA-NEXT: vmovups (%rdi), %xmm0 -; NOAA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; NOAA-NEXT: vmovups (%rsi), %xmm0 ; NOAA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; NOAA-NEXT: vmovups %xmm0, (%rdi) @@ -135,8 +131,6 @@ entry: define dso_local void @onealloc_readback_2(ptr nocapture %a, ptr nocapture %b) local_unnamed_addr { ; NOAA-LABEL: onealloc_readback_2: ; NOAA: # %bb.0: # %entry -; NOAA-NEXT: vmovups (%rdi), %xmm0 -; NOAA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; NOAA-NEXT: vmovups (%rsi), %xmm0 ; NOAA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; NOAA-NEXT: vmovups %xmm0, (%rdi) diff --git a/llvm/test/CodeGen/X86/vectorcall.ll b/llvm/test/CodeGen/X86/vectorcall.ll index 07446c6a7bfa4..4a31991e535cb 100644 --- a/llvm/test/CodeGen/X86/vectorcall.ll +++ b/llvm/test/CodeGen/X86/vectorcall.ll @@ -171,11 +171,11 @@ declare void @llvm.memcpy.p0.p0.i32(ptr nocapture writeonly, ptr nocapture reado define x86_vectorcallcc void @test_mixed_7(ptr noalias sret(%struct.HVA5) %agg.result) { ; CHECK-LABEL: test_mixed_7@@0 ; X64: mov{{[ql]}} %rcx, %rax -; CHECK: movaps %xmm{{[0-9]}}, 64(%{{rcx|eax}}) -; CHECK: movaps %xmm{{[0-9]}}, 48(%{{rcx|eax}}) -; CHECK: movaps %xmm{{[0-9]}}, 32(%{{rcx|eax}}) -; CHECK: movaps %xmm{{[0-9]}}, 16(%{{rcx|eax}}) ; CHECK: movaps %xmm{{[0-9]}}, (%{{rcx|eax}}) +; CHECK: movaps %xmm{{[0-9]}}, 16(%{{rcx|eax}}) +; CHECK: movaps %xmm{{[0-9]}}, 32(%{{rcx|eax}}) +; CHECK: movaps %xmm{{[0-9]}}, 48(%{{rcx|eax}}) +; CHECK: movaps %xmm{{[0-9]}}, 64(%{{rcx|eax}}) ; CHECK: ret{{[ql]}} entry: %a = alloca %struct.HVA5, align 16 diff --git a/llvm/test/CodeGen/X86/widen_arith-3.ll b/llvm/test/CodeGen/X86/widen_arith-3.ll index 9031588f2690d..d783c7b442420 100644 --- a/llvm/test/CodeGen/X86/widen_arith-3.ll +++ b/llvm/test/CodeGen/X86/widen_arith-3.ll @@ -28,12 +28,12 @@ define void @update(ptr %dst, ptr %src, i32 %n) nounwind { ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl 12(%ebp), %edx ; CHECK-NEXT: movl 8(%ebp), %ecx +; CHECK-NEXT: incl {{[0-9]+}}(%esp) ; CHECK-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: pinsrw $2, 4(%edx,%eax,8), %xmm1 ; CHECK-NEXT: psubw %xmm0, %xmm1 ; CHECK-NEXT: pextrw $2, %xmm1, 4(%ecx,%eax,8) ; CHECK-NEXT: movd %xmm1, (%ecx,%eax,8) -; CHECK-NEXT: incl {{[0-9]+}}(%esp) ; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .LBB0_3: # %afterfor ; CHECK-NEXT: movl %ebp, %esp diff --git a/llvm/test/CodeGen/X86/widen_cast-1.ll b/llvm/test/CodeGen/X86/widen_cast-1.ll index 566dde0ca13d3..72387f8a73edd 100644 --- a/llvm/test/CodeGen/X86/widen_cast-1.ll +++ b/llvm/test/CodeGen/X86/widen_cast-1.ll @@ -42,11 +42,11 @@ define void @convert(ptr %dst, ptr %src) nounwind { ; ATOM-NEXT: # =>This Inner Loop Header: Depth=1 ; ATOM-NEXT: movl (%esp), %eax ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; ATOM-NEXT: incl (%esp) ; ATOM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx ; ATOM-NEXT: psubw %xmm0, %xmm1 ; ATOM-NEXT: movq %xmm1, (%ecx,%eax,8) -; ATOM-NEXT: incl (%esp) ; ATOM-NEXT: cmpl $3, (%esp) ; ATOM-NEXT: jle .LBB0_2 ; ATOM-NEXT: .LBB0_3: # %afterfor diff --git a/llvm/test/CodeGen/X86/widen_cast-2.ll b/llvm/test/CodeGen/X86/widen_cast-2.ll index cd06f27dcc55c..1c55f40bd388b 100644 --- a/llvm/test/CodeGen/X86/widen_cast-2.ll +++ b/llvm/test/CodeGen/X86/widen_cast-2.ll @@ -22,9 +22,9 @@ define void @convert(ptr %dst, ptr %src) nounwind { ; CHECK-NEXT: psubw %xmm0, %xmm2 ; CHECK-NEXT: psubw %xmm0, %xmm1 ; CHECK-NEXT: movdqa %xmm1, (%ecx,%eax) -; CHECK-NEXT: movd %xmm2, 16(%ecx,%eax) -; CHECK-NEXT: pextrd $1, %xmm2, 20(%ecx,%eax) ; CHECK-NEXT: pextrd $2, %xmm2, 24(%ecx,%eax) +; CHECK-NEXT: pextrd $1, %xmm2, 20(%ecx,%eax) +; CHECK-NEXT: movd %xmm2, 16(%ecx,%eax) ; CHECK-NEXT: incl (%esp) ; CHECK-NEXT: cmpl $3, (%esp) ; CHECK-NEXT: jle .LBB0_2 diff --git a/llvm/test/CodeGen/X86/win32-eh.ll b/llvm/test/CodeGen/X86/win32-eh.ll index 82dc4beaf972b..64a986246c35e 100644 --- a/llvm/test/CodeGen/X86/win32-eh.ll +++ b/llvm/test/CodeGen/X86/win32-eh.ll @@ -73,12 +73,12 @@ catch: ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: subl ${{[0-9]+}}, %esp ; CHECK-NEXT: movl %ebp, %eax -; CHECK-NEXT: movl %esp, -36(%ebp) -; CHECK-NEXT: movl $-2, -16(%ebp) ; CHECK-NEXT: movl $L__ehtable$use_except_handler4, %[[lsda:[^ ,]*]] ; CHECK-NEXT: movl ___security_cookie, %[[seccookie:[^ ,]*]] ; CHECK-NEXT: xorl %[[seccookie]], %[[lsda]] ; CHECK-NEXT: movl %[[lsda]], -20(%ebp) +; CHECK-NEXT: movl %esp, -36(%ebp) +; CHECK-NEXT: movl $-2, -16(%ebp) ; CHECK-NEXT: xorl %[[seccookie]], %[[tmp1:[^ ,]*]] ; CHECK-NEXT: movl %[[tmp1]], -40(%ebp) ; CHECK-NEXT: leal -28(%ebp), %[[node:[^ ,]*]] @@ -130,12 +130,12 @@ catch: ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: subl ${{[0-9]+}}, %esp ; CHECK-NEXT: movl %ebp, %[[ehguard:[^ ,]*]] -; CHECK-NEXT: movl %esp, -36(%ebp) -; CHECK-NEXT: movl $-2, -16(%ebp) ; CHECK-NEXT: movl $L__ehtable$use_except_handler4_ssp, %[[lsda:[^ ,]*]] ; CHECK-NEXT: movl ___security_cookie, %[[seccookie:[^ ,]*]] ; CHECK-NEXT: xorl %[[seccookie]], %[[lsda]] ; CHECK-NEXT: movl %[[lsda]], -20(%ebp) +; CHECK-NEXT: movl %esp, -36(%ebp) +; CHECK-NEXT: movl $-2, -16(%ebp) ; CHECK-NEXT: xorl %[[seccookie]], %[[ehguard]] ; CHECK-NEXT: movl %[[ehguard]], -40(%ebp) ; CHECK-NEXT: leal -28(%ebp), %[[node:[^ ,]*]] @@ -146,7 +146,7 @@ catch: ; CHECK-NEXT: movl $0, -16(%ebp) ; CHECK-NEXT: calll _may_throw_or_crash ; CHECK: movl -28(%ebp), %[[next:[^ ,]*]] -; CHECK-NEXT: movl %[[next]], %fs:0 +; CHECK-NEXT: movl %[[next]], %fs:0 ; CHECK: retl ; CHECK-NEXT: [[catch:[^ ,]*]]: # %catch{{$}} @@ -156,7 +156,7 @@ catch: ; CHECK-LABEL: L__ehtable$use_except_handler4_ssp: ; CHECK-NEXT: .long -2 ; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long -40 +; CHECK-NEXT: .long -40 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long -2 ; CHECK-NEXT: .long _catchall_filt