Skip to content

Commit c256966

Browse files
authored
[AMDGPU]: Unpack packed instructions overlapped by MFMAs post-RA scheduling (llvm#157968)
This is a cleaned up version of PR llvm#151704. These optimizations are now performed post-RA scheduling.
1 parent 250a92f commit c256966

File tree

5 files changed

+1587
-13
lines changed

5 files changed

+1587
-13
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6332,6 +6332,66 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
63326332
return isImmOperandLegal(MI, OpIdx, *MO);
63336333
}
63346334

6335+
bool SIInstrInfo::isNeverCoissue(MachineInstr &MI) const {
6336+
bool IsGFX950Only = ST.hasGFX950Insts();
6337+
bool IsGFX940Only = ST.hasGFX940Insts();
6338+
6339+
if (!IsGFX950Only && !IsGFX940Only)
6340+
return false;
6341+
6342+
if (!isVALU(MI))
6343+
return false;
6344+
6345+
// V_COS, V_EXP, V_RCP, etc.
6346+
if (isTRANS(MI))
6347+
return true;
6348+
6349+
// DOT2, DOT2C, DOT4, etc.
6350+
if (isDOT(MI))
6351+
return true;
6352+
6353+
// MFMA, SMFMA
6354+
if (isMFMA(MI))
6355+
return true;
6356+
6357+
unsigned Opcode = MI.getOpcode();
6358+
switch (Opcode) {
6359+
case AMDGPU::V_CVT_PK_BF8_F32_e64:
6360+
case AMDGPU::V_CVT_PK_FP8_F32_e64:
6361+
case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6362+
case AMDGPU::V_MQSAD_U32_U8_e64:
6363+
case AMDGPU::V_PK_ADD_F16:
6364+
case AMDGPU::V_PK_ADD_F32:
6365+
case AMDGPU::V_PK_ADD_I16:
6366+
case AMDGPU::V_PK_ADD_U16:
6367+
case AMDGPU::V_PK_ASHRREV_I16:
6368+
case AMDGPU::V_PK_FMA_F16:
6369+
case AMDGPU::V_PK_FMA_F32:
6370+
case AMDGPU::V_PK_FMAC_F16_e32:
6371+
case AMDGPU::V_PK_FMAC_F16_e64:
6372+
case AMDGPU::V_PK_LSHLREV_B16:
6373+
case AMDGPU::V_PK_LSHRREV_B16:
6374+
case AMDGPU::V_PK_MAD_I16:
6375+
case AMDGPU::V_PK_MAD_U16:
6376+
case AMDGPU::V_PK_MAX_F16:
6377+
case AMDGPU::V_PK_MAX_I16:
6378+
case AMDGPU::V_PK_MAX_U16:
6379+
case AMDGPU::V_PK_MIN_F16:
6380+
case AMDGPU::V_PK_MIN_I16:
6381+
case AMDGPU::V_PK_MIN_U16:
6382+
case AMDGPU::V_PK_MOV_B32:
6383+
case AMDGPU::V_PK_MUL_F16:
6384+
case AMDGPU::V_PK_MUL_F32:
6385+
case AMDGPU::V_PK_MUL_LO_U16:
6386+
case AMDGPU::V_PK_SUB_I16:
6387+
case AMDGPU::V_PK_SUB_U16:
6388+
case AMDGPU::V_QSAD_PK_U16_U8_e64:
6389+
return true;
6390+
default:
6391+
return false;
6392+
}
6393+
}
6394+
63356395
void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
63366396
MachineInstr &MI) const {
63376397
unsigned Opc = MI.getOpcode();

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1203,6 +1203,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
12031203
return isImmOperandLegal(MI.getDesc(), OpNo, MO);
12041204
}
12051205

1206+
bool isNeverCoissue(MachineInstr &MI) const;
1207+
12061208
/// Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
12071209
bool isLegalAV64PseudoImm(uint64_t Imm) const;
12081210

0 commit comments

Comments
 (0)