|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
2 | | -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512f -hoist-const-loads=false | FileCheck %s |
| 2 | +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512f | FileCheck %s |
3 | 3 |
|
4 | 4 | define void @eggs(ptr %arg, ptr %arg1, ptr %arg2, ptr %arg3, ptr %arg4, ptr %arg5, i64 %arg6, i64 %arg7, i64 %arg8, i64 %arg9, i64 %arg10, i64 %arg11, i64 %arg12, ptr %arg13, ptr %arg14) nounwind { |
5 | 5 | ; CHECK-LABEL: eggs: |
6 | 6 | ; CHECK: ## %bb.0: ## %bb |
7 | 7 | ; CHECK-NEXT: pushq %r15 |
8 | 8 | ; CHECK-NEXT: pushq %r14 |
9 | | -; CHECK-NEXT: pushq %r13 |
10 | 9 | ; CHECK-NEXT: pushq %r12 |
11 | 10 | ; CHECK-NEXT: pushq %rbx |
| 11 | +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r14 |
12 | 12 | ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax |
13 | | -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
14 | | -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r11 |
15 | | -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbx |
16 | 13 | ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r15 |
17 | | -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r14 |
18 | 14 | ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r12 |
19 | | -; CHECK-NEXT: leaq (%r12,%r14,8), %r14 |
20 | | -; CHECK-NEXT: leaq (%r12,%r15,8), %r15 |
| 15 | +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r11 |
| 16 | +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| 17 | +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbx |
| 18 | +; CHECK-NEXT: leaq (%rbx,%r10,8), %r10 |
| 19 | +; CHECK-NEXT: leaq (%rbx,%r11,8), %r11 |
21 | 20 | ; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0 |
22 | | -; CHECK-NEXT: xorl %r12d, %r12d |
23 | | -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r13 |
24 | | -; CHECK-NEXT: addq %rbx, %r13 |
25 | | -; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rbx |
26 | | -; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 |
27 | | -; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 |
| 21 | +; CHECK-NEXT: xorl %ebx, %ebx |
| 22 | +; CHECK-NEXT: vmovupd (%r14,%r15,8), %zmm1 |
| 23 | +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r15 |
| 24 | +; CHECK-NEXT: addq %r12, %r15 |
| 25 | +; CHECK-NEXT: vmovupd (%r14,%r15,8), %zmm2 |
| 26 | +; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %r12 |
| 27 | +; CHECK-NEXT: vmovupd (%r14,%r12,8), %zmm8 |
28 | 28 | ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3 |
29 | 29 | ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 |
30 | 30 | ; CHECK-NEXT: vxorpd %xmm5, %xmm5, %xmm5 |
| 31 | +; CHECK-NEXT: vxorpd %xmm6, %xmm6, %xmm6 |
| 32 | +; CHECK-NEXT: vxorpd %xmm7, %xmm7, %xmm7 |
31 | 33 | ; CHECK-NEXT: .p2align 4, 0x90 |
32 | 34 | ; CHECK-NEXT: LBB0_1: ## %bb15 |
33 | 35 | ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 |
34 | | -; CHECK-NEXT: vmovupd (%rax,%r11,8), %zmm6 |
35 | | -; CHECK-NEXT: vmovupd (%rax,%r13,8), %zmm7 |
36 | | -; CHECK-NEXT: vmovupd (%rax,%rbx,8), %zmm8 |
37 | | -; CHECK-NEXT: vbroadcastsd (%r15,%r12,8), %zmm9 |
38 | | -; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm0 = (zmm6 * zmm9) + zmm0 |
39 | | -; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm1 = (zmm7 * zmm9) + zmm1 |
40 | | -; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm2 = (zmm8 * zmm9) + zmm2 |
41 | | -; CHECK-NEXT: vbroadcastsd (%r14,%r12,8), %zmm9 |
42 | | -; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm3 = (zmm9 * zmm6) + zmm3 |
43 | | -; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm4 = (zmm9 * zmm7) + zmm4 |
44 | | -; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm5 = (zmm8 * zmm9) + zmm5 |
45 | | -; CHECK-NEXT: incq %r12 |
46 | | -; CHECK-NEXT: cmpq %r12, %r10 |
| 36 | +; CHECK-NEXT: vbroadcastsd (%r11,%rbx,8), %zmm9 |
| 37 | +; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm0 = (zmm1 * zmm9) + zmm0 |
| 38 | +; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm3 = (zmm2 * zmm9) + zmm3 |
| 39 | +; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm4 = (zmm8 * zmm9) + zmm4 |
| 40 | +; CHECK-NEXT: vbroadcastsd (%r10,%rbx,8), %zmm9 |
| 41 | +; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm5 = (zmm1 * zmm9) + zmm5 |
| 42 | +; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm6 = (zmm2 * zmm9) + zmm6 |
| 43 | +; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm7 = (zmm8 * zmm9) + zmm7 |
| 44 | +; CHECK-NEXT: incq %rbx |
| 45 | +; CHECK-NEXT: cmpq %rbx, %rax |
47 | 46 | ; CHECK-NEXT: jne LBB0_1 |
48 | 47 | ; CHECK-NEXT: ## %bb.2: ## %bb51 |
49 | 48 | ; CHECK-NEXT: vmovapd %zmm0, (%rdi) |
50 | | -; CHECK-NEXT: vmovapd %zmm1, (%rsi) |
51 | | -; CHECK-NEXT: vmovapd %zmm2, (%rdx) |
52 | | -; CHECK-NEXT: vmovapd %zmm3, (%rcx) |
53 | | -; CHECK-NEXT: vmovapd %zmm4, (%r8) |
54 | | -; CHECK-NEXT: vmovapd %zmm5, (%r9) |
| 49 | +; CHECK-NEXT: vmovapd %zmm3, (%rsi) |
| 50 | +; CHECK-NEXT: vmovapd %zmm4, (%rdx) |
| 51 | +; CHECK-NEXT: vmovapd %zmm5, (%rcx) |
| 52 | +; CHECK-NEXT: vmovapd %zmm6, (%r8) |
| 53 | +; CHECK-NEXT: vmovapd %zmm7, (%r9) |
55 | 54 | ; CHECK-NEXT: popq %rbx |
56 | 55 | ; CHECK-NEXT: popq %r12 |
57 | | -; CHECK-NEXT: popq %r13 |
58 | 56 | ; CHECK-NEXT: popq %r14 |
59 | 57 | ; CHECK-NEXT: popq %r15 |
60 | 58 | ; CHECK-NEXT: vzeroupper |
|
0 commit comments