|
| 1 | +# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -start-before=amdgpu-insert-delay-alu %s -o - | FileCheck %s |
| 2 | + |
| 3 | +--- |
| 4 | +name: wmma_xdl_twoaddr_trans |
| 5 | +tracksRegLiveness: true |
| 6 | +body: | |
| 7 | + bb.0: |
| 8 | + ; CHECK-LABEL: {{^}}wmma_xdl_twoaddr_trans: |
| 9 | + ; CHECK: %bb.0: |
| 10 | + ; CHECK-NEXT: v_wmma_f32_16x16x64_fp8_fp8 v[8:15], v[0:7], v[0:7], v[8:15] |
| 11 | + ; CHECK-NEXT: v_exp_f32_e32 v16, v16 |
| 12 | + ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_2) |
| 13 | + ; CHECK-NEXT: v_add_nc_u32_e32 v17, v17, v8 |
| 14 | + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16, $vgpr17 |
| 15 | + $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_WMMA_F32_16X16X64_FP8_FP8_w32_twoaddr $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, 0, implicit $exec |
| 16 | + $vgpr16 = V_EXP_F32_e32 $vgpr16, implicit $exec, implicit $mode |
| 17 | + $vgpr17 = V_ADD_U32_e32 $vgpr17, $vgpr8, implicit $exec |
| 18 | +... |
| 19 | + |
| 20 | +--- |
| 21 | +name: wmma_xdl_threeaddr_trans |
| 22 | +tracksRegLiveness: true |
| 23 | +body: | |
| 24 | + bb.0: |
| 25 | + ; CHECK-LABEL: {{^}}wmma_xdl_threeaddr_trans: |
| 26 | + ; CHECK: %bb.0: |
| 27 | + ; CHECK-NEXT: v_wmma_f32_16x16x64_fp8_fp8 v[8:15], v[0:7], v[0:7], v[16:23] |
| 28 | + ; CHECK-NEXT: v_exp_f32_e32 v24, v24 |
| 29 | + ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_2) |
| 30 | + ; CHECK-NEXT: v_add_nc_u32_e32 v25, v25, v8 |
| 31 | + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24, $vgpr25 |
| 32 | + $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_WMMA_F32_16X16X64_FP8_FP8_w32_threeaddr $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec |
| 33 | + $vgpr24 = V_EXP_F32_e32 $vgpr24, implicit $exec, implicit $mode |
| 34 | + $vgpr25 = V_ADD_U32_e32 $vgpr25, $vgpr8, implicit $exec |
| 35 | +... |
| 36 | + |
| 37 | +name: swmmac_xdl_twoaddr_trans |
| 38 | +tracksRegLiveness: true |
| 39 | +body: | |
| 40 | + bb.0: |
| 41 | + ; CHECK-LABEL: {{^}}swmmac_xdl_twoaddr_trans: |
| 42 | + ; CHECK: %bb.0: |
| 43 | + ; CHECK-NEXT: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] |
| 44 | + ; CHECK-NEXT: v_exp_f32_e32 v30, v30 |
| 45 | + ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_2) |
| 46 | + ; CHECK-NEXT: v_add_nc_u32_e32 v31, v31, v24 |
| 47 | + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 |
| 48 | + $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_BF8_BF8_w32_twoaddr $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27, $vgpr28_vgpr29, 0, 0, 0, implicit $exec |
| 49 | + $vgpr30 = V_EXP_F32_e32 $vgpr30, implicit $exec, implicit $mode |
| 50 | + $vgpr31 = V_ADD_U32_e32 $vgpr31, $vgpr24, implicit $exec |
| 51 | +... |
| 52 | + |
| 53 | +name: wmma_non_xdl_large_data_valu |
| 54 | +tracksRegLiveness: true |
| 55 | +body: | |
| 56 | + bb.0: |
| 57 | + ; CHECK-LABEL: {{^}}wmma_non_xdl_large_data_valu: |
| 58 | + ; CHECK: %bb.0: |
| 59 | + ; CHECK-NEXT: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] matrix_b_reuse |
| 60 | + ; CHECK-NEXT: v_exp_f32_e32 v12, v12 |
| 61 | + ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 62 | + ; CHECK-NEXT: v_add_nc_u32_e32 v13, v13, v8 |
| 63 | + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12, $vgpr13 |
| 64 | + $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 = V_WMMA_F32_16X16X4_F32_w32_twoaddr 8, $vgpr0_vgpr1, 8, $vgpr2_vgpr3, 8, $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, 0, -1, 0, 0, implicit $exec |
| 65 | + $vgpr12 = V_EXP_F32_e32 $vgpr12, implicit $exec, implicit $mode |
| 66 | + $vgpr13 = V_ADD_U32_e32 $vgpr13, $vgpr8, implicit $exec |
| 67 | +... |
| 68 | + |
| 69 | +--- |
| 70 | +name: dot_xdl_dep_2 |
| 71 | +tracksRegLiveness: true |
| 72 | +body: | |
| 73 | + bb.0: |
| 74 | + ; CHECK-LABEL: {{^}}dot_xdl_dep_2: |
| 75 | + ; CHECK: %bb.0: |
| 76 | + ; CHECK-NEXT: v_dot4_i32_iu8 v0, s2, s3, v0 neg_lo:[1,1,0] |
| 77 | + ; CHECK-NEXT: v_dot4_i32_iu8 v1, s2, s3, v2 neg_lo:[1,1,0] |
| 78 | + ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| 79 | + ; CHECK-NEXT: v_add_nc_u32_e32 v2, v0, v0 |
| 80 | + liveins: $vgpr0, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 |
| 81 | + $vgpr0 = V_DOT4_I32_IU8 9, $sgpr2, 9, $sgpr3, 8, $vgpr0, 0, 0, 0, implicit $exec |
| 82 | + $vgpr1 = V_DOT4_I32_IU8 9, $sgpr2, 9, $sgpr3, 8, $vgpr2, 0, 0, 0, implicit $exec |
| 83 | + $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec |
| 84 | +... |
0 commit comments