Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8909,16 +8909,19 @@ bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI,
// needed by the prolog. However, the insertions for scalar registers can
// always be placed at the BB top as they are independent of the exec mask
// value.
const MachineFunction *MF = MI.getParent()->getParent();
bool IsNullOrVectorRegister = true;
if (Reg) {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
const MachineRegisterInfo &MRI = MF->getRegInfo();
IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg));
}

uint16_t Opcode = MI.getOpcode();
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
return IsNullOrVectorRegister &&
(isSGPRSpill(Opcode) || isWWMRegSpillOpcode(Opcode) ||
Opcode == AMDGPU::IMPLICIT_DEF ||
(Opcode == AMDGPU::IMPLICIT_DEF &&
MFI->isWWMReg(MI.getOperand(0).getReg())) ||
(!MI.isTerminator() && Opcode != AMDGPU::COPY &&
MI.modifiesRegister(AMDGPU::EXEC, &RI)));
}
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
SMDiagnostic &Error, SMRange &SourceRange);

void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(Reg); }
bool isWWMReg(Register Reg) const {
return Reg.isVirtual() ? checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG)
: WWMReservedRegs.contains(Reg);
}

void updateNonWWMRegMask(BitVector &RegMask) { NonWWMRegMask = RegMask; }
BitVector getNonWWMRegMask() const { return NonWWMRegMask; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ body: |
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]]
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1)
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[DEF]](s1)
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
Expand Down Expand Up @@ -122,8 +122,8 @@ body: |
; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C1]](s1)
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[DEF]](s1)
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
Expand Down Expand Up @@ -790,8 +790,8 @@ body: |
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C3]]
; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) {
; CHECK-NEXT: v_readlane_b32 s4, v16, 4
; CHECK-NEXT: s_mov_b32 exec_lo, s4
; CHECK-NEXT: ; %bb.4:
; CHECK-NEXT: ; implicit-def: $sgpr4
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
; CHECK-NEXT: ; implicit-def: $sgpr4
; CHECK-NEXT: v_mov_b32_e32 v1, s4
; CHECK-NEXT: v_mov_b32_e32 v2, s4
; CHECK-NEXT: v_mov_b32_e32 v3, s4
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1135,11 +1135,11 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
; GCN-O0-NEXT: s_cbranch_execz .LBB5_5
; GCN-O0-NEXT: ; %bb.3: ; %bb4
; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1
; GCN-O0-NEXT: ; implicit-def: $sgpr4
; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
; GCN-O0-NEXT: ; implicit-def: $sgpr4
; GCN-O0-NEXT: v_mov_b32_e32 v0, s4
; GCN-O0-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
; GCN-O0-NEXT: s_mov_b32 s4, 0
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5370,9 +5370,9 @@ define amdgpu_kernel void @extract_vgpr_offset_multiple_in_block(ptr addrspace(1
; NOOPT-NEXT: s_waitcnt expcnt(0)
; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[28:29]
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: s_waitcnt vmcnt(0)
; NOOPT-NEXT: v_readlane_b32 s4, v18, 25
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: s_mov_b32 s7, s1
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
Expand Down Expand Up @@ -6223,8 +6223,8 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1)
; NOOPT-NEXT: s_mov_b64 exec, s[0:1]
; NOOPT-NEXT: s_cbranch_execz .LBB17_8
; NOOPT-NEXT: ; %bb.7: ; %bb1
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:68 ; 4-byte Folded Reload
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: s_mov_b32 s6, s1
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
Expand Down Expand Up @@ -7286,10 +7286,10 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) {
; NOOPT-NEXT: s_waitcnt expcnt(0)
; NOOPT-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[8:9]
; NOOPT-NEXT: ; implicit-def: $sgpr2
; NOOPT-NEXT: s_waitcnt vmcnt(0)
; NOOPT-NEXT: v_readlane_b32 s0, v4, 0
; NOOPT-NEXT: v_readlane_b32 s1, v4, 1
; NOOPT-NEXT: ; implicit-def: $sgpr2
; NOOPT-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; NOOPT-NEXT: s_mov_b32 s0, 1
; NOOPT-NEXT: ; implicit-def: $sgpr1
Expand All @@ -7316,11 +7316,11 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) {
; NOOPT-NEXT: ;;#ASMEND
; NOOPT-NEXT: s_branch .LBB19_4
; NOOPT-NEXT: .LBB19_3: ; %bb4
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: s_or_saveexec_b64 s[8:9], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
; NOOPT-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[8:9]
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: s_mov_b32 s6, s1
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
Expand All @@ -7345,8 +7345,8 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) {
; NOOPT-NEXT: s_mov_b64 exec, s[8:9]
; NOOPT-NEXT: s_branch .LBB19_1
; NOOPT-NEXT: .LBB19_4: ; %bb7
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: s_mov_b32 s7, s1
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
Expand Down Expand Up @@ -7529,10 +7529,10 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) {
; NOOPT-NEXT: s_waitcnt expcnt(0)
; NOOPT-NEXT: buffer_load_dword v4, off, s[16:19], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[12:13]
; NOOPT-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7
; NOOPT-NEXT: s_waitcnt vmcnt(0)
; NOOPT-NEXT: v_readlane_b32 s0, v4, 0
; NOOPT-NEXT: v_readlane_b32 s1, v4, 1
; NOOPT-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7
; NOOPT-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; NOOPT-NEXT: s_mov_b32 s0, 1
; NOOPT-NEXT: ; implicit-def: $sgpr1
Expand Down Expand Up @@ -7560,11 +7560,11 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) {
; NOOPT-NEXT: ;;#ASMEND
; NOOPT-NEXT: s_branch .LBB20_4
; NOOPT-NEXT: .LBB20_3: ; %bb4
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
; NOOPT-NEXT: buffer_load_dword v4, off, s[16:19], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[12:13]
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: s_mov_b32 s6, s1
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
Expand All @@ -7590,8 +7590,8 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) {
; NOOPT-NEXT: s_mov_b64 exec, s[12:13]
; NOOPT-NEXT: s_branch .LBB20_1
; NOOPT-NEXT: .LBB20_4: ; %bb7
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: s_mov_b32 s10, s1
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
Expand Down Expand Up @@ -9105,9 +9105,9 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) {
; NOOPT-NEXT: s_waitcnt expcnt(0)
; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[20:21]
; NOOPT-NEXT: ; implicit-def: $sgpr2_sgpr3
; NOOPT-NEXT: s_waitcnt vmcnt(0)
; NOOPT-NEXT: v_readlane_b32 s0, v18, 1
; NOOPT-NEXT: ; implicit-def: $sgpr2_sgpr3
; NOOPT-NEXT: ; kill: def $sgpr3 killed $sgpr3 killed $sgpr2_sgpr3
; NOOPT-NEXT: ; implicit-def: $sgpr4_sgpr5
; NOOPT-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
Expand Down
Loading
Loading