|
| 1 | +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 |
| 2 | +# RUN: llc -march=amdgcn -mcpu=gfx1250 -run-pass=postmisched,post-RA-hazard-rec %s -o - | FileCheck --check-prefix=GCN %s |
| 3 | + |
| 4 | +# Bring all independent V_LSHL_ADD_U32_e64 instructions into the shadow |
| 5 | +# of the WMMA so then hazard recognizer only need to insert 4 V_NOP_e32 |
| 6 | +# instructions instead of 8. |
| 7 | + |
| 8 | +--- |
| 9 | +name: test_wmma_scale_f32_16x16x128_f8f6f4_shadow_sched |
| 10 | +tracksRegLiveness: true |
| 11 | +body: | |
| 12 | + bb.0: |
| 13 | + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45 |
| 14 | +
|
| 15 | + ; GCN-LABEL: name: test_wmma_scale_f32_16x16x128_f8f6f4_shadow_sched |
| 16 | + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45 |
| 17 | + ; GCN-NEXT: {{ $}} |
| 18 | + ; GCN-NEXT: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0, implicit $exec |
| 19 | + ; GCN-NEXT: $vgpr46 = V_LSHL_ADD_U32_e64 killed $vgpr43, 1, $vgpr43, implicit $exec |
| 20 | + ; GCN-NEXT: $vgpr47 = V_LSHL_ADD_U32_e64 killed $vgpr42, 1, $vgpr42, implicit $exec |
| 21 | + ; GCN-NEXT: $vgpr48 = V_LSHL_ADD_U32_e64 killed $vgpr41, 1, $vgpr41, implicit $exec |
| 22 | + ; GCN-NEXT: $vgpr49 = V_LSHL_ADD_U32_e64 killed $vgpr40, 1, $vgpr40, implicit $exec |
| 23 | + ; GCN-NEXT: V_NOP_e32 implicit $exec |
| 24 | + ; GCN-NEXT: V_NOP_e32 implicit $exec |
| 25 | + ; GCN-NEXT: V_NOP_e32 implicit $exec |
| 26 | + ; GCN-NEXT: V_NOP_e32 implicit $exec |
| 27 | + ; GCN-NEXT: $vgpr4 = V_ADD_F32_e32 1065353216, killed $vgpr4, implicit $mode, implicit $exec |
| 28 | + ; GCN-NEXT: $vgpr5 = V_ADD_F32_e32 1065353216, killed $vgpr5, implicit $mode, implicit $exec |
| 29 | + ; GCN-NEXT: $vgpr6 = V_ADD_F32_e32 1065353216, killed $vgpr6, implicit $mode, implicit $exec |
| 30 | + ; GCN-NEXT: $vgpr7 = V_ADD_F32_e32 1065353216, killed $vgpr7, implicit $mode, implicit $exec |
| 31 | + ; GCN-NEXT: $vgpr8 = V_ADD_F32_e32 1065353216, killed $vgpr8, implicit $mode, implicit $exec |
| 32 | + ; GCN-NEXT: $vgpr9 = V_ADD_F32_e32 1065353216, killed $vgpr9, implicit $mode, implicit $exec |
| 33 | + ; GCN-NEXT: $vgpr10 = V_ADD_F32_e32 1065353216, killed $vgpr10, implicit $mode, implicit $exec |
| 34 | + ; GCN-NEXT: $vgpr11 = V_ADD_F32_e32 1065353216, killed $vgpr11, implicit $mode, implicit $exec |
| 35 | + ; GCN-NEXT: GLOBAL_STORE_DWORDX4 renamable $vgpr44_vgpr45, killed renamable $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec |
| 36 | + ; GCN-NEXT: GLOBAL_STORE_DWORDX4 renamable $vgpr44_vgpr45, killed renamable $vgpr8_vgpr9_vgpr10_vgpr11, 32, 0, implicit $exec |
| 37 | + ; GCN-NEXT: GLOBAL_STORE_DWORDX4 killed renamable $vgpr44_vgpr45, killed renamable $vgpr46_vgpr47_vgpr48_vgpr49, 64, 0, implicit $exec |
| 38 | + ; GCN-NEXT: S_ENDPGM 0 |
| 39 | + early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0, implicit $exec |
| 40 | + $vgpr4 = V_ADD_F32_e32 1065353216, $vgpr4, implicit $mode, implicit $exec |
| 41 | + $vgpr5 = V_ADD_F32_e32 1065353216, $vgpr5, implicit $mode, implicit $exec |
| 42 | + $vgpr6 = V_ADD_F32_e32 1065353216, $vgpr6, implicit $mode, implicit $exec |
| 43 | + $vgpr7 = V_ADD_F32_e32 1065353216, $vgpr7, implicit $mode, implicit $exec |
| 44 | + $vgpr8 = V_ADD_F32_e32 1065353216, $vgpr8, implicit $mode, implicit $exec |
| 45 | + $vgpr9 = V_ADD_F32_e32 1065353216, $vgpr9, implicit $mode, implicit $exec |
| 46 | + $vgpr10 = V_ADD_F32_e32 1065353216, $vgpr10, implicit $mode, implicit $exec |
| 47 | + $vgpr11 = V_ADD_F32_e32 1065353216, $vgpr11, implicit $mode, implicit $exec |
| 48 | + $vgpr46 = V_LSHL_ADD_U32_e64 $vgpr43, 1, $vgpr43, implicit $exec |
| 49 | + $vgpr47 = V_LSHL_ADD_U32_e64 $vgpr42, 1, $vgpr42, implicit $exec |
| 50 | + $vgpr48 = V_LSHL_ADD_U32_e64 $vgpr41, 1, $vgpr41, implicit $exec |
| 51 | + $vgpr49 = V_LSHL_ADD_U32_e64 $vgpr40, 1, $vgpr40, implicit $exec |
| 52 | + GLOBAL_STORE_DWORDX4 renamable $vgpr44_vgpr45, killed renamable $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec |
| 53 | + GLOBAL_STORE_DWORDX4 renamable $vgpr44_vgpr45, killed renamable $vgpr8_vgpr9_vgpr10_vgpr11, 32, 0, implicit $exec |
| 54 | + GLOBAL_STORE_DWORDX4 renamable $vgpr44_vgpr45, killed renamable $vgpr46_vgpr47_vgpr48_vgpr49, 64, 0, implicit $exec |
| 55 | + S_ENDPGM 0 |
| 56 | +... |
0 commit comments