@@ -56,14 +56,9 @@ define void @PR90954(ptr %0, ptr %1, i32 %2) nounwind {
56
56
; CHECK-LABEL: PR90954:
57
57
; CHECK: # %bb.0:
58
58
; CHECK-NEXT: pushq %rbp
59
- ; CHECK-NEXT: movq %rsp, %rbp
60
- ; CHECK-NEXT: pushq %r15
61
59
; CHECK-NEXT: pushq %r14
62
- ; CHECK-NEXT: pushq %r13
63
- ; CHECK-NEXT: pushq %r12
64
60
; CHECK-NEXT: pushq %rbx
65
- ; CHECK-NEXT: andq $-1024, %rsp # imm = 0xFC00
66
- ; CHECK-NEXT: subq $5120, %rsp # imm = 0x1400
61
+ ; CHECK-NEXT: subq $2912, %rsp # imm = 0xB60
67
62
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
68
63
; CHECK-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp)
69
64
; CHECK-NEXT: movb $1, {{[0-9]+}}(%rsp)
@@ -79,29 +74,26 @@ define void @PR90954(ptr %0, ptr %1, i32 %2) nounwind {
79
74
; CHECK-NEXT: movw $64, %cx
80
75
; CHECK-NEXT: movw $16, %di
81
76
; CHECK-NEXT: movb $1, %r8b
82
- ; CHECK-NEXT: movl $64, %r9d
83
- ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r10
84
- ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r11
85
- ; CHECK-NEXT: xorl %ebx, %ebx
86
- ; CHECK-NEXT: xorl %r14d, %r14d
77
+ ; CHECK-NEXT: xorl %r9d, %r9d
78
+ ; CHECK-NEXT: xorl %r10d, %r10d
87
79
; CHECK-NEXT: jmp .LBB1_1
88
80
; CHECK-NEXT: .p2align 4
89
81
; CHECK-NEXT: .LBB1_5: # in Loop: Header=BB1_1 Depth=1
90
- ; CHECK-NEXT: incq %r14
91
- ; CHECK-NEXT: addl %edx, %ebx
82
+ ; CHECK-NEXT: incq %r10
83
+ ; CHECK-NEXT: addl %edx, %r9d
92
84
; CHECK-NEXT: .LBB1_1: # =>This Loop Header: Depth=1
93
85
; CHECK-NEXT: # Child Loop BB1_2 Depth 2
94
- ; CHECK-NEXT: movslq %ebx , %r15
95
- ; CHECK-NEXT: leaq (%rsi,%r15 ,4), %r15
96
- ; CHECK-NEXT: xorl %r12d , %r12d
97
- ; CHECK-NEXT: xorl %r13d , %r13d
86
+ ; CHECK-NEXT: movslq %r9d , %r11
87
+ ; CHECK-NEXT: leaq (%rsi,%r11 ,4), %r11
88
+ ; CHECK-NEXT: xorl %ebx , %ebx
89
+ ; CHECK-NEXT: xorl %r14d , %r14d
98
90
; CHECK-NEXT: jmp .LBB1_2
99
91
; CHECK-NEXT: .p2align 4
100
92
; CHECK-NEXT: .LBB1_4: # in Loop: Header=BB1_2 Depth=2
101
- ; CHECK-NEXT: tilestored %tmm1, (%r15 ,%rax)
102
- ; CHECK-NEXT: incq %r13
103
- ; CHECK-NEXT: addq $64, %r15
104
- ; CHECK-NEXT: decq %r12
93
+ ; CHECK-NEXT: tilestored %tmm1, (%r11 ,%rax)
94
+ ; CHECK-NEXT: incq %r14
95
+ ; CHECK-NEXT: addq $64, %r11
96
+ ; CHECK-NEXT: decq %rbx
105
97
; CHECK-NEXT: je .LBB1_5
106
98
; CHECK-NEXT: .LBB1_2: # Parent Loop BB1_1 Depth=1
107
99
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
@@ -110,46 +102,12 @@ define void @PR90954(ptr %0, ptr %1, i32 %2) nounwind {
110
102
; CHECK-NEXT: testb %r8b, %r8b
111
103
; CHECK-NEXT: jne .LBB1_4
112
104
; CHECK-NEXT: # %bb.3: # in Loop: Header=BB1_2 Depth=2
113
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
114
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
115
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
116
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
117
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
118
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
119
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
120
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
121
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
122
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
123
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
124
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
125
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
126
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
127
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
128
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
129
- ; CHECK-NEXT: tileloadd (%r10,%r9), %tmm1
130
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
131
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
132
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
133
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
134
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
135
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
136
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
137
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
138
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
139
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
140
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
141
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
142
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
143
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
144
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
145
- ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
146
- ; CHECK-NEXT: tileloadd (%r11,%r9), %tmm2
105
+ ; CHECK-NEXT: tilezero %tmm1
106
+ ; CHECK-NEXT: tilezero %tmm2
147
107
; CHECK-NEXT: tdpbf16ps %tmm2, %tmm1, %tmm0
148
- ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
149
- ; CHECK-NEXT: movabsq $64, %rax
150
- ; CHECK-NEXT: tilestored %tmm0, 3072(%rsp,%rax) # 1024-byte Folded Spill
151
- ; CHECK-NEXT: tileloadd 3072(%rsp,%rax), %tmm1 # 1024-byte Folded Reload
152
- ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
108
+ ; CHECK-NEXT: movabsq $64, %rbp
109
+ ; CHECK-NEXT: tilestored %tmm0, 896(%rsp,%rbp) # 1024-byte Folded Spill
110
+ ; CHECK-NEXT: tileloadd 896(%rsp,%rbp), %tmm1 # 1024-byte Folded Reload
153
111
; CHECK-NEXT: jmp .LBB1_4
154
112
%4 = shl i32 %2 , 4
155
113
%5 = icmp eq i64 0 , 0
0 commit comments