|
| 1 | +# RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs -start-before=prologepilog -stop-after=postrapseudos -o - %s | FileCheck -check-prefix=GCN %s |
| 2 | + |
| 3 | +# This testcase shows the necessity of adding an undef flag to the $sgpr source when |
| 4 | +# creating a V_WRITELANE_B32. |
| 5 | +# |
| 6 | +# The prologepilog pass will create a: |
| 7 | +# |
| 8 | +# $exec = S_MOV_B64 killed $sgpr4_sgpr5 |
| 9 | +# |
| 10 | +# but $sgpr5 will be spilled by a: |
| 11 | +# |
| 12 | +# $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, killed $vgpr1(tied-def 0), implicit $sgpr4_sgpr5 |
| 13 | +# |
| 14 | +# The SI_SPILL_S32_TO_VGPR will be converted in postrapesudos to: |
| 15 | +# |
| 16 | +# $vgpr1 = V_WRITELANE_B32 undef $sgpr5, 1, killed $vgpr1(tied-def 0) |
| 17 | +# |
| 18 | +# The undef flag is necessary to satisfy the MachineVerifier since $sgpr5 is used after a kill. |
| 19 | +# This testcase was derived from function bitcast_v32i16_to_v64i8_scalar in amdgcn.bitcast.512bit.ll. |
| 20 | + |
| 21 | +# GCN-LABEL: name: bitcast_v32i16_to_v64i8_scalar |
| 22 | +--- |
| 23 | +name: bitcast_v32i16_to_v64i8_scalar |
| 24 | +alignment: 1 |
| 25 | +exposesReturnsTwice: false |
| 26 | +legalized: false |
| 27 | +regBankSelected: false |
| 28 | +selected: false |
| 29 | +failedISel: false |
| 30 | +tracksRegLiveness: true |
| 31 | +hasWinCFI: false |
| 32 | +noPhis: true |
| 33 | +isSSA: false |
| 34 | +noVRegs: true |
| 35 | +hasFakeUses: false |
| 36 | +callsEHReturn: false |
| 37 | +callsUnwindInit: false |
| 38 | +hasEHContTarget: false |
| 39 | +hasEHScopes: false |
| 40 | +hasEHFunclets: false |
| 41 | +isOutlined: false |
| 42 | +debugInstrRef: false |
| 43 | +failsVerification: false |
| 44 | +tracksDebugUserValues: true |
| 45 | +registers: [] |
| 46 | +liveins: [] |
| 47 | +frameInfo: |
| 48 | + isFrameAddressTaken: false |
| 49 | + isReturnAddressTaken: false |
| 50 | + hasStackMap: false |
| 51 | + hasPatchPoint: false |
| 52 | + stackSize: 0 |
| 53 | + offsetAdjustment: 0 |
| 54 | + maxAlignment: 4 |
| 55 | + adjustsStack: false |
| 56 | + hasCalls: false |
| 57 | + stackProtector: '' |
| 58 | + functionContext: '' |
| 59 | + maxCallFrameSize: 4294967295 |
| 60 | + cvBytesOfCalleeSavedRegisters: 0 |
| 61 | + hasOpaqueSPAdjustment: false |
| 62 | + hasVAStart: false |
| 63 | + hasMustTailInVarArgFunc: false |
| 64 | + hasTailCall: false |
| 65 | + isCalleeSavedInfoValid: false |
| 66 | + localFrameSize: 0 |
| 67 | +fixedStack: [] |
| 68 | +stack: |
| 69 | + - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4, |
| 70 | + stack-id: default, callee-saved-register: '', callee-saved-restored: true, |
| 71 | + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } |
| 72 | + - { id: 1, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4, |
| 73 | + stack-id: default, callee-saved-register: '', callee-saved-restored: true, |
| 74 | + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } |
| 75 | + - { id: 2, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4, |
| 76 | + stack-id: default, callee-saved-register: '', callee-saved-restored: true, |
| 77 | + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } |
| 78 | +entry_values: [] |
| 79 | +callSites: [] |
| 80 | +debugValueSubstitutions: [] |
| 81 | +constants: [] |
| 82 | +machineFunctionInfo: |
| 83 | + explicitKernArgSize: 0 |
| 84 | + maxKernArgAlign: 1 |
| 85 | + ldsSize: 0 |
| 86 | + gdsSize: 0 |
| 87 | + dynLDSAlign: 1 |
| 88 | + isEntryFunction: false |
| 89 | + isChainFunction: false |
| 90 | + noSignedZerosFPMath: false |
| 91 | + memoryBound: false |
| 92 | + waveLimiter: false |
| 93 | + hasSpilledSGPRs: true |
| 94 | + hasSpilledVGPRs: true |
| 95 | + numWaveDispatchSGPRs: 30 |
| 96 | + numWaveDispatchVGPRs: 20 |
| 97 | + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| 98 | + frameOffsetReg: '$sgpr33' |
| 99 | + stackPtrOffsetReg: '$sgpr32' |
| 100 | + bytesInStackArgArea: 0 |
| 101 | + returnsVoid: true |
| 102 | + argumentInfo: |
| 103 | + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| 104 | + dispatchPtr: { reg: '$sgpr4_sgpr5' } |
| 105 | + queuePtr: { reg: '$sgpr6_sgpr7' } |
| 106 | + dispatchID: { reg: '$sgpr10_sgpr11' } |
| 107 | + workGroupIDX: { reg: '$sgpr12' } |
| 108 | + workGroupIDY: { reg: '$sgpr13' } |
| 109 | + workGroupIDZ: { reg: '$sgpr14' } |
| 110 | + LDSKernelId: { reg: '$sgpr15' } |
| 111 | + implicitArgPtr: { reg: '$sgpr8_sgpr9' } |
| 112 | + workItemIDX: { reg: '$vgpr31', mask: 1023 } |
| 113 | + workItemIDY: { reg: '$vgpr31', mask: 1047552 } |
| 114 | + workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } |
| 115 | + psInputAddr: 0 |
| 116 | + psInputEnable: 0 |
| 117 | + maxMemoryClusterDWords: 8 |
| 118 | + mode: |
| 119 | + ieee: true |
| 120 | + dx10-clamp: true |
| 121 | + fp32-input-denormals: true |
| 122 | + fp32-output-denormals: true |
| 123 | + fp64-fp16-input-denormals: true |
| 124 | + fp64-fp16-output-denormals: true |
| 125 | + highBitsOf32BitAddress: 0 |
| 126 | + occupancy: 3 |
| 127 | + spillPhysVGPRs: |
| 128 | + - '$vgpr63' |
| 129 | + wwmReservedRegs: |
| 130 | + - '$vgpr63' |
| 131 | + - '$vgpr62' |
| 132 | + vgprForAGPRCopy: '' |
| 133 | + sgprForEXECCopy: '$sgpr100_sgpr101' |
| 134 | + longBranchReservedReg: '' |
| 135 | + hasInitWholeWave: false |
| 136 | + dynamicVGPRBlockSize: 0 |
| 137 | + scratchReservedForDynamicVGPRs: 0 |
| 138 | + numKernargPreloadSGPRs: 0 |
| 139 | + isWholeWaveFunction: false |
| 140 | +body: | |
| 141 | + bb.0: |
| 142 | + successors: %bb.3(0x40000000), %bb.1(0x40000000) |
| 143 | + |
| 144 | + renamable $sgpr4 = IMPLICIT_DEF |
| 145 | + $vgpr62 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, killed $vgpr62, implicit $sgpr4_sgpr5 |
| 146 | + S_CBRANCH_EXECZ %bb.1, implicit $exec |
| 147 | + S_BRANCH %bb.3 |
| 148 | + |
| 149 | + bb.1: |
| 150 | + successors: %bb.2(0x40000000), %bb.4(0x40000000) |
| 151 | + |
| 152 | + S_CBRANCH_EXECZ %bb.4, implicit $exec |
| 153 | + S_BRANCH %bb.2 |
| 154 | + |
| 155 | + bb.2: |
| 156 | + successors: %bb.4(0x80000000) |
| 157 | + |
| 158 | + S_BRANCH %bb.4 |
| 159 | + |
| 160 | + bb.3: |
| 161 | + successors: %bb.1(0x80000000) |
| 162 | + |
| 163 | + S_BRANCH %bb.1 |
| 164 | + |
| 165 | + bb.4: |
| 166 | + |
| 167 | + SI_RETURN |
| 168 | +... |
0 commit comments