|
| 1 | +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 |
| 2 | +# RUN: llc -mtriple=amdgcn -verify-machineinstrs --run-pass=postrapseudos -o - %s | FileCheck -check-prefix=GCN %s |
| 3 | + |
| 4 | +# This testcase shows the necessity of removing the killed flag from the $sgpr source when |
| 5 | +# creating a V_WRITELANE_B32. The SI_SPILL_S32_TO_VGPR will be converted in postrapseudos from: |
| 6 | +# |
| 7 | +# $X = SI_SPILL_S32_TO_VGPR killed $sgpr0, 6, $X, implicit-def $sgpr0_sgpr1 |
| 8 | +# |
| 9 | +# to: |
| 10 | +# |
| 11 | +# $X = V_WRITELANE_B32 $sgpr0, 6, $X(tied-def 0) |
| 12 | +# |
| 13 | +# The killed flag must be removed since $sgpr0 is no longer an implicit-def and |
| 14 | +# will be subsequently used in the S_MOV_B64. |
| 15 | + |
| 16 | +--- |
| 17 | +name: non_uniform_loop |
| 18 | +tracksRegLiveness: true |
| 19 | +machineFunctionInfo: |
| 20 | + wwmReservedRegs: |
| 21 | + - '$vgpr63' |
| 22 | +body: | |
| 23 | + bb.0: |
| 24 | + liveins: $vgpr0 |
| 25 | + ; GCN-LABEL: name: non_uniform_loop |
| 26 | + ; GCN: liveins: $vgpr0 |
| 27 | + ; GCN-NEXT: {{ $}} |
| 28 | + ; GCN-NEXT: $sgpr0 = V_READLANE_B32 $vgpr63, 2 |
| 29 | + ; GCN-NEXT: $vgpr63 = V_WRITELANE_B32 undef $sgpr0, 6, $vgpr63 |
| 30 | + ; GCN-NEXT: $exec = S_MOV_B64 killed renamable $sgpr0_sgpr1 |
| 31 | + ; GCN-NEXT: S_ENDPGM 0 |
| 32 | + $sgpr0 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 2, implicit-def $sgpr0_sgpr1 |
| 33 | + $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr0, 6, $vgpr63, implicit-def $sgpr0_sgpr1 |
| 34 | + $exec = S_MOV_B64_term killed renamable $sgpr0_sgpr1 |
| 35 | + S_ENDPGM 0 |
| 36 | +... |
| 37 | + |
| 38 | +# This test shows the necessity of adding an undef flag to the $sgpr source when |
| 39 | +# creating a V_WRITELANE_B32. |
| 40 | +# |
| 41 | +# The prologepilog creates: |
| 42 | +# |
| 43 | +# $exec = S_MOV_B64 killed $sgpr4_sgpr5 |
| 44 | +# |
| 45 | +# but $sgpr5 will be spilled by a: |
| 46 | +# |
| 47 | +# $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, killed $vgpr1(tied-def 0), implicit $sgpr4_sgpr5 |
| 48 | +# |
| 49 | +# The SI_SPILL_S32_TO_VGPR will be converted in postrapseudos to: |
| 50 | +# |
| 51 | +# $vgpr1 = V_WRITELANE_B32 undef $sgpr5, 1, killed $vgpr1(tied-def 0) |
| 52 | +# |
| 53 | +# The undef flag is necessary to satisfy the MachineVerifier since $sgpr5 is used after a kill. |
| 54 | +# This testcase was derived from function bitcast_v32i16_to_v64i8_scalar in amdgcn.bitcast.512bit.ll. |
| 55 | + |
| 56 | +--- |
| 57 | +name: bitcast_v32i16_to_v64i8_scalar |
| 58 | +tracksRegLiveness: true |
| 59 | +machineFunctionInfo: |
| 60 | + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| 61 | + stackPtrOffsetReg: '$sgpr32' |
| 62 | + wwmReservedRegs: |
| 63 | + - '$vgpr62' |
| 64 | +body: | |
| 65 | + bb.0: |
| 66 | + ; GCN-LABEL: name: bitcast_v32i16_to_v64i8_scalar |
| 67 | + ; GCN: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec |
| 68 | + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 |
| 69 | + ; GCN-NEXT: renamable $sgpr4 = IMPLICIT_DEF |
| 70 | + ; GCN-NEXT: $vgpr62 = V_WRITELANE_B32 undef $sgpr5, 1, killed $vgpr62 |
| 71 | + ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31 |
| 72 | + $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec |
| 73 | + $exec = S_MOV_B64 killed $sgpr4_sgpr5 |
| 74 | + renamable $sgpr4 = IMPLICIT_DEF |
| 75 | + $vgpr62 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, killed $vgpr62, implicit $sgpr4_sgpr5 |
| 76 | + SI_RETURN |
| 77 | +... |
| 78 | + |
0 commit comments