Skip to content

Commit 3a64b74

Browse files
committed
Merge amd-gfx13 and amd-gfx into amd-gfx-gfx13
3 parents a8a26d4 + d2208c9 + 70bfbfa commit 3a64b74

File tree

8 files changed

+11225
-7926
lines changed

8 files changed

+11225
-7926
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5707,6 +5707,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
57075707
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, SizeSrc2);
57085708
break;
57095709
}
5710+
case Intrinsic::amdgcn_waterfall_last_use_vgpr: {
5711+
unsigned SizeDst = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
5712+
unsigned SizeSrc1 = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
5713+
unsigned SizeSrc2 = getSizeInBits(MI.getOperand(3).getReg(), MRI, *TRI);
5714+
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeDst);
5715+
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, SizeSrc1);
5716+
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeSrc2);
5717+
break;
5718+
}
57105719
case Intrinsic::amdgcn_ds_gws_init:
57115720
case Intrinsic::amdgcn_ds_gws_barrier:
57125721
case Intrinsic::amdgcn_ds_gws_sema_br: {

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -924,9 +924,9 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
924924
PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
925925
});
926926

927-
PB.registerOptimizerEarlyEPCallback([this](ModulePassManager &MPM,
928-
OptimizationLevel Level,
929-
ThinOrFullLTOPhase Phase) {
927+
PB.registerOptimizerEarlyEPCallback([](ModulePassManager &MPM,
928+
OptimizationLevel Level,
929+
ThinOrFullLTOPhase Phase) {
930930
MPM.addPass(AMDGPURankSpecializationPass());
931931
});
932932

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6861,15 +6861,19 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
68616861
.addReg(WaveIDInWaveGroup)
68626862
.addImm(Rank);
68636863
BuildMI(*BB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_SCC0)).addMBB(SplitBB);
6864+
68646865
// Call inside the conditional branch.
68656866
Register CalleeAddrReg = MI.getOperand(1).getReg();
6866-
BuildMI(*RankCallBB, RankCallBB->end(), DL, TII->get(AMDGPU::S_SETPC_B64))
6867-
.addReg(CalleeAddrReg);
6867+
auto CalleeAddrDef = MRI.getVRegDef(CalleeAddrReg);
6868+
assert(CalleeAddrDef->getOpcode() == AMDGPU::SI_PC_ADD_REL_OFFSET64);
6869+
// Use s_add_pc_i64, bypass the address computation.
6870+
BuildMI(*RankCallBB, RankCallBB->end(), DL, TII->get(AMDGPU::S_ADD_PC_I64))
6871+
.addGlobalAddress(CalleeAddrDef->getOperand(1).getGlobal(), 0,
6872+
SIInstrInfo::MO_REL64);
6873+
68686874
// Update IDX0 for the next rank-call. Use the global address of the rank
68696875
// callee as the source. In AsmPrinter, it will be replaced with the
68706876
// MCSymbol representing the number of VGPRs of that callee.
6871-
auto CalleeAddrDef = MRI.getVRegDef(CalleeAddrReg);
6872-
assert(CalleeAddrDef->getOpcode() == AMDGPU::SI_PC_ADD_REL_OFFSET64);
68736877
BuildMI(*SplitBB, SplitBB->begin(), DL, TII->get(AMDGPU::S_ADD_GPR_IDX_U32),
68746878
AMDGPU::IDX0)
68756879
.addGlobalAddress(CalleeAddrDef->getOperand(1).getGlobal(), 0,

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 36 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2551,57 +2551,44 @@ def : GCNPat <
25512551
(V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
25522552
>;
25532553

2554-
multiclass SI_WATERFALL_Pattern < ValueType dvt, ValueType svt, string VecSize> {
2555-
def : GCNPat<
2556-
(i32 (int_amdgcn_waterfall_begin i32:$tok, svt:$idx)),
2557-
(!cast<Instruction>("SI_WATERFALL_BEGIN_"#VecSize) i32:$tok, svt:$idx)
2558-
>;
2559-
2560-
def : GCNPat<
2561-
(dvt (int_amdgcn_waterfall_readfirstlane i32:$tok, svt:$src)),
2562-
(!cast<Instruction>("SI_WATERFALL_READFIRSTLANE_"#VecSize) i32:$tok, svt:$src)
2563-
>;
2564-
2565-
def : GCNPat<
2566-
(svt (int_amdgcn_waterfall_end i32:$tok, svt:$src)),
2567-
(!cast<Instruction>("SI_WATERFALL_END_"#VecSize) i32:$tok, svt:$src)
2568-
>;
2569-
}
2570-
2571-
multiclass SI_WATERFALL_S_Pattern < ValueType svt, string VecSize> {
2572-
def : GCNPat<
2573-
(i32 (int_amdgcn_waterfall_begin i32:$tok, svt:$idx)),
2574-
(!cast<Instruction>("SI_WATERFALL_BEGIN_"#VecSize) i32:$tok, svt:$idx)
2575-
>;
2576-
2577-
def : GCNPat<
2578-
(svt (int_amdgcn_waterfall_readfirstlane i32:$tok, svt:$src)),
2579-
(!cast<Instruction>("SI_WATERFALL_READFIRSTLANE_"#VecSize) i32:$tok, svt:$src)
2580-
>;
2581-
2582-
def : GCNPat<
2583-
(svt (int_amdgcn_waterfall_end i32:$tok, svt:$src)),
2584-
(!cast<Instruction>("SI_WATERFALL_END_"#VecSize) i32:$tok, svt:$src)
2585-
>;
2586-
2587-
def : GCNPat<
2588-
(svt (int_amdgcn_waterfall_last_use i32:$tok, svt:$src)),
2589-
(!cast<Instruction>("SI_WATERFALL_LAST_USE_"#VecSize) i32:$tok, svt:$src)
2590-
>;
2591-
def : GCNPat<
2592-
(svt (int_amdgcn_waterfall_last_use_vgpr i32:$tok, svt:$src)),
2593-
(!cast<Instruction>("SI_WATERFALL_LAST_USE_"#VecSize#"_V") i32:$tok, svt:$src)
2594-
>;
2554+
multiclass SI_WATERFALL_Pattern<ValueType dvt, ValueType svt, string VecSize> {
2555+
def : GCNPat<(dvt(int_amdgcn_waterfall_readfirstlane i32:$tok, svt:$src)),
2556+
(!cast<Instruction>("SI_WATERFALL_READFIRSTLANE_"#VecSize)
2557+
i32:$tok,
2558+
svt:$src)>;
2559+
}
2560+
2561+
multiclass SI_WATERFALL_S_Pattern<list<ValueType> vts, string VecSize> {
2562+
foreach vt = vts in {
2563+
def : GCNPat<(i32(int_amdgcn_waterfall_begin i32:$tok, vt:$idx)),
2564+
(!cast<Instruction>("SI_WATERFALL_BEGIN_"#VecSize) i32:$tok,
2565+
vt:$idx)>;
2566+
2567+
def : GCNPat<(vt(int_amdgcn_waterfall_readfirstlane i32:$tok, vt:$src)),
2568+
(!cast<Instruction>("SI_WATERFALL_READFIRSTLANE_"#VecSize)
2569+
i32:$tok,
2570+
vt:$src)>;
2571+
2572+
def : GCNPat<(vt(int_amdgcn_waterfall_end i32:$tok, vt:$src)),
2573+
(!cast<Instruction>("SI_WATERFALL_END_"#VecSize) i32:$tok,
2574+
vt:$src)>;
2575+
2576+
def : GCNPat<(vt(int_amdgcn_waterfall_last_use i32:$tok, vt:$src)),
2577+
(!cast<Instruction>("SI_WATERFALL_LAST_USE_"#VecSize) i32:$tok,
2578+
vt:$src)>;
2579+
2580+
def : GCNPat<(vt(int_amdgcn_waterfall_last_use_vgpr i32:$tok, vt:$src)),
2581+
(!cast<Instruction>("SI_WATERFALL_LAST_USE_"#VecSize#"_V")
2582+
i32:$tok,
2583+
vt:$src)>;
2584+
}
25952585
}
25962586

2597-
defm : SI_WATERFALL_S_Pattern <i16, "V1">;
2598-
defm : SI_WATERFALL_S_Pattern <v2i16, "V1">;
2599-
defm : SI_WATERFALL_S_Pattern <v4i16, "V2">;
2600-
2601-
defm : SI_WATERFALL_S_Pattern <i32, "V1">;
2602-
defm : SI_WATERFALL_S_Pattern <v2i32, "V2">;
2603-
defm : SI_WATERFALL_S_Pattern <v4i32, "V4">;
2604-
defm : SI_WATERFALL_S_Pattern <v8i32, "V8">;
2587+
defm : SI_WATERFALL_S_Pattern<Reg16Types.types, "V1">;
2588+
defm : SI_WATERFALL_S_Pattern<Reg32Types.types, "V1">;
2589+
defm : SI_WATERFALL_S_Pattern<Reg64Types.types, "V2">;
2590+
defm : SI_WATERFALL_S_Pattern<Reg128Types.types, "V4">;
2591+
defm : SI_WATERFALL_S_Pattern<Reg256Types.types, "V8">;
26052592

26062593
defm : SI_WATERFALL_Pattern <i16, f16, "V1">;
26072594
defm : SI_WATERFALL_Pattern <v2i16, v2f16, "V1">;

0 commit comments

Comments
 (0)