|
1 | 1 | # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 |
2 | | -# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -verify-machineinstrs --run-pass si-fold-operands %s -o - | FileCheck %s |
3 | | -# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -verify-machineinstrs -start-before=si-fold-operands -stop-after=register-coalescer %s -o - | FileCheck %s --check-prefixes=COALESCE |
4 | | -# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs -start-before=si-fold-operands -stop-after=register-coalescer %s -o - | FileCheck %s --check-prefixes=GFX908-COALESCE |
| 2 | +# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -run-pass si-fold-operands %s -o - | FileCheck %s |
| 3 | +# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -start-before=si-fold-operands -stop-after=register-coalescer %s -o - | FileCheck %s --check-prefixes=COALESCE |
| 4 | +# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -start-before=si-fold-operands -stop-after=register-coalescer %s -o - | FileCheck %s --check-prefixes=GFX908-COALESCE |
5 | 5 |
|
| 6 | +... |
6 | 7 | --- |
7 | 8 | name: test |
8 | 9 | tracksRegLiveness: true |
@@ -132,50 +133,50 @@ body: | |
132 | 133 | ; GFX908-COALESCE-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact [[V_PACK_B32_F16_e64_]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into `ptr addrspace(8) null`, align 1, addrspace 8) |
133 | 134 | ; GFX908-COALESCE-NEXT: S_ENDPGM 0 |
134 | 135 | bb.0: |
135 | | - successors: %bb.1, %bb.3 |
| 136 | + successors: %bb.2, %bb.1 |
136 | 137 | liveins: $sgpr4_sgpr5 |
137 | 138 |
|
138 | | - %259:sgpr_64(p4) = COPY $sgpr4_sgpr5 |
139 | | - %392:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %259(p4), 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4) |
140 | | - %264:sgpr_32 = S_MOV_B32 0 |
141 | | - S_BITCMP0_B32 killed %392, 0, implicit-def $scc |
142 | | - S_CBRANCH_SCC0 %bb.1, implicit $scc |
143 | | -
|
144 | | - bb.3: |
145 | | - successors: %bb.2 |
146 | | -
|
147 | | - %316:sgpr_32 = COPY %264 |
148 | | - %484:vgpr_32 = COPY %316, implicit $exec |
149 | | - S_BRANCH %bb.2 |
| 139 | + %0:sgpr_64(p4) = COPY $sgpr4_sgpr5 |
| 140 | + %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0(p4), 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4) |
| 141 | + %2:sgpr_32 = S_MOV_B32 0 |
| 142 | + S_BITCMP0_B32 killed %1, 0, implicit-def $scc |
| 143 | + S_CBRANCH_SCC0 %bb.2, implicit $scc |
150 | 144 |
|
151 | 145 | bb.1: |
152 | | - successors: %bb.2 |
| 146 | + successors: %bb.3 |
153 | 147 |
|
154 | | - %396:sgpr_32 = S_MOV_B32 0 |
155 | | - %424:vgpr_32 = COPY %396 |
156 | | - %425:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %424, implicit $exec |
157 | | - %427:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %424, implicit $exec |
158 | | - %429:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %424, implicit $exec |
159 | | - %431:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %424, implicit $exec |
160 | | - %403:areg_128_align2 = REG_SEQUENCE %425, %subreg.sub0, %427, %subreg.sub1, %429, %subreg.sub2, %431, %subreg.sub3 |
161 | | - %399:sreg_64 = REG_SEQUENCE %396, %subreg.sub0, %396, %subreg.sub1 |
162 | | - %401:vreg_64_align2 = COPY %399 |
163 | | - %400:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 %401, %401, killed %403, 0, 0, 0, implicit $mode, implicit $exec |
164 | | - %404:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 %401, %401, killed %400, 0, 0, 0, implicit $mode, implicit $exec |
165 | | - %407:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 %401, %401, killed %404, 0, 0, 0, implicit $mode, implicit $exec |
166 | | - %410:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 %401, %401, killed %407, 0, 0, 0, implicit $mode, implicit $exec |
167 | | - %48:vgpr_32 = COPY %410.sub0 |
168 | | - %52:vgpr_32 = COPY %48 |
| 148 | + %3:sgpr_32 = COPY %2 |
| 149 | + %4:vgpr_32 = COPY %3, implicit $exec |
| 150 | + S_BRANCH %bb.3 |
169 | 151 |
|
170 | 152 | bb.2: |
171 | | - %180:vgpr_32 = PHI %484, %bb.3, %52, %bb.1 |
172 | | - %413:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, %180, 0, 0, implicit $mode, implicit $exec |
173 | | - %415:vgpr_32 = nofpexcept V_PACK_B32_F16_e64 0, killed %413, 0, %264, 0, 0, implicit $mode, implicit $exec |
174 | | - %423:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
175 | | - %422:vreg_64_align2 = REG_SEQUENCE %415, %subreg.sub0, killed %423, %subreg.sub1 |
176 | | - %419:sgpr_128 = REG_SEQUENCE %264, %subreg.sub0, %264, %subreg.sub1, %264, %subreg.sub2, %264, %subreg.sub3 |
177 | | - %420:vreg_64_align2 = COPY %422 |
178 | | - BUFFER_STORE_DWORDX2_OFFSET_exact killed %420, killed %419, %264, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into `ptr addrspace(8) null`, align 1, addrspace 8) |
| 153 | + successors: %bb.3 |
| 154 | +
|
| 155 | + %5:sgpr_32 = S_MOV_B32 0 |
| 156 | + %6:vgpr_32 = COPY %5 |
| 157 | + %7:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %6, implicit $exec |
| 158 | + %8:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %6, implicit $exec |
| 159 | + %9:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %6, implicit $exec |
| 160 | + %10:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %6, implicit $exec |
| 161 | + %11:areg_128_align2 = REG_SEQUENCE %7, %subreg.sub0, %8, %subreg.sub1, %9, %subreg.sub2, %10, %subreg.sub3 |
| 162 | + %12:sreg_64 = REG_SEQUENCE %5, %subreg.sub0, %5, %subreg.sub1 |
| 163 | + %13:vreg_64_align2 = COPY %12 |
| 164 | + %14:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 %13, %13, killed %11, 0, 0, 0, implicit $mode, implicit $exec |
| 165 | + %15:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 %13, %13, killed %14, 0, 0, 0, implicit $mode, implicit $exec |
| 166 | + %16:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 %13, %13, killed %15, 0, 0, 0, implicit $mode, implicit $exec |
| 167 | + %17:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 %13, %13, killed %16, 0, 0, 0, implicit $mode, implicit $exec |
| 168 | + %18:vgpr_32 = COPY %17.sub0 |
| 169 | + %19:vgpr_32 = COPY %18 |
| 170 | +
|
| 171 | + bb.3: |
| 172 | + %20:vgpr_32 = PHI %4, %bb.1, %19, %bb.2 |
| 173 | + %21:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, %20, 0, 0, implicit $mode, implicit $exec |
| 174 | + %22:vgpr_32 = nofpexcept V_PACK_B32_F16_e64 0, killed %21, 0, %2, 0, 0, implicit $mode, implicit $exec |
| 175 | + %23:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| 176 | + %24:vreg_64_align2 = REG_SEQUENCE %22, %subreg.sub0, killed %23, %subreg.sub1 |
| 177 | + %25:sgpr_128 = REG_SEQUENCE %2, %subreg.sub0, %2, %subreg.sub1, %2, %subreg.sub2, %2, %subreg.sub3 |
| 178 | + %26:vreg_64_align2 = COPY %24 |
| 179 | + BUFFER_STORE_DWORDX2_OFFSET_exact killed %26, killed %25, %2, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into `ptr addrspace(8) null`, align 1, addrspace 8) |
179 | 180 | S_ENDPGM 0 |
180 | 181 |
|
181 | 182 | ... |
0 commit comments