Skip to content

Commit 55b02e0

Browse files
committed
Provide and use new TII method
Change-Id: I4cbb79b168f451e2decd0775657dafba0243faab
1 parent debc65d commit 55b02e0

File tree

6 files changed

+378
-192
lines changed

6 files changed

+378
-192
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1347,7 +1347,7 @@ The AMDGPU backend implements the following LLVM IR intrinsics.
13471347
- 0x0100: All DS read instructions may be scheduled accoss sched_barrier.
13481348
- 0x0200: All DS write instructions may be scheduled across sched_barrier.
13491349
- 0x0400: All Transcendental (e.g. V_EXP) instructions may be scheduled across sched_barrier.
1350-
- 0x0800: All Packed Arithmetic (e.g. V_PK_MOV, V_DOT, etc) instructions may be scheduled across sched_barrier.
1350+
- 0x0800: All "Never-Coissuable" (e.g. V_PK_ADD, V_DOT, etc) instructions may be scheduled across sched_barrier.
13511351

13521352
llvm.amdgcn.sched.group.barrier Creates schedule groups with specific properties to create custom scheduling
13531353
pipelines. The ordering between groups is enforced by the instruction scheduler.

llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,9 @@ enum class SchedGroupMask {
7575
DS_READ = 1u << 8,
7676
DS_WRITE = 1u << 9,
7777
TRANS = 1u << 10,
78-
PACK = 1u << 11,
78+
NONCOISSUE = 1u << 11,
7979
ALL = ALU | VALU | SALU | MFMA | VMEM | VMEM_READ | VMEM_WRITE | DS |
80-
DS_READ | DS_WRITE | TRANS | PACK,
80+
DS_READ | DS_WRITE | TRANS | NONCOISSUE,
8181
LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
8282
};
8383

@@ -2457,8 +2457,9 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
24572457
TII->isTRANS(MI))
24582458
Result = true;
24592459

2460-
else if (((SGMask & SchedGroupMask::PACK) != SchedGroupMask::NONE) &&
2461-
TII->isVOP3P(MI) && !TII->isMFMAorWMMA(MI))
2460+
else if (((SGMask & SchedGroupMask::NONCOISSUE) != SchedGroupMask::NONE) &&
2461+
TII->isNeverCoissue(MI) && !TII->isMFMAorWMMA(MI) &&
2462+
!TII->isTRANS(MI))
24622463
Result = true;
24632464

24642465
LLVM_DEBUG(
@@ -2640,17 +2641,17 @@ IGroupLPDAGMutation::invertSchedBarrierMask(SchedGroupMask Mask) const {
26402641
// allowed past the SCHED_BARRIER.
26412642
SchedGroupMask InvertedMask = ~Mask;
26422643

2643-
// ALU implies VALU, SALU, MFMA, TRANS, PACK.
2644+
// ALU implies VALU, SALU, MFMA, TRANS, NONCOISSUE.
26442645
if ((InvertedMask & SchedGroupMask::ALU) == SchedGroupMask::NONE)
26452646
InvertedMask &= ~SchedGroupMask::VALU & ~SchedGroupMask::SALU &
26462647
~SchedGroupMask::MFMA & ~SchedGroupMask::TRANS &
2647-
~SchedGroupMask::PACK;
2648-
// VALU, SALU, MFMA, TRANS, PACK implies ALU.
2648+
~SchedGroupMask::NONCOISSUE;
2649+
// VALU, SALU, MFMA, TRANS, NONCOISSUE implies ALU.
26492650
else if ((InvertedMask & SchedGroupMask::VALU) == SchedGroupMask::NONE ||
26502651
(InvertedMask & SchedGroupMask::SALU) == SchedGroupMask::NONE ||
26512652
(InvertedMask & SchedGroupMask::MFMA) == SchedGroupMask::NONE ||
26522653
(InvertedMask & SchedGroupMask::TRANS) == SchedGroupMask::NONE ||
2653-
(InvertedMask & SchedGroupMask::PACK) == SchedGroupMask::NONE)
2654+
(InvertedMask & SchedGroupMask::NONCOISSUE) == SchedGroupMask::NONE)
26542655
InvertedMask &= ~SchedGroupMask::ALU;
26552656

26562657
// VMEM implies VMEM_READ, VMEM_WRITE.

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8957,6 +8957,64 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
89578957
return Rsrc23;
89588958
}
89598959

8960+
bool SIInstrInfo::isNeverCoissue(const MachineInstr &MI) const {
8961+
bool IsGFX942Only = ST.hasGFX940Insts() && !ST.hasGFX950Insts();
8962+
if (!IsGFX942Only)
8963+
return false;
8964+
8965+
if (!isVALU(MI))
8966+
return false;
8967+
8968+
// V_COS, V_EXP, V_RCP, etc.
8969+
if (isTRANS(MI))
8970+
return true;
8971+
8972+
// DOT2, DOT2C, DOT4, etc.
8973+
if (isDOT(MI))
8974+
return true;
8975+
8976+
// MFMA, SMFMA
8977+
if (isMFMA(MI))
8978+
return true;
8979+
8980+
unsigned Opcode = MI.getOpcode();
8981+
switch (Opcode) {
8982+
case AMDGPU::V_CVT_PK_BF8_F32_e64:
8983+
case AMDGPU::V_CVT_PK_FP8_F32_e64:
8984+
case AMDGPU::V_MQSAD_PK_U16_U8_e64:
8985+
case AMDGPU::V_MQSAD_U32_U8_e64:
8986+
case AMDGPU::V_PK_ADD_F16:
8987+
case AMDGPU::V_PK_ADD_F32:
8988+
case AMDGPU::V_PK_ADD_I16:
8989+
case AMDGPU::V_PK_ADD_U16:
8990+
case AMDGPU::V_PK_ASHRREV_I16:
8991+
case AMDGPU::V_PK_FMA_F16:
8992+
case AMDGPU::V_PK_FMA_F32:
8993+
case AMDGPU::V_PK_FMAC_F16_e32:
8994+
case AMDGPU::V_PK_FMAC_F16_e64:
8995+
case AMDGPU::V_PK_LSHLREV_B16:
8996+
case AMDGPU::V_PK_LSHRREV_B16:
8997+
case AMDGPU::V_PK_MAD_I16:
8998+
case AMDGPU::V_PK_MAD_U16:
8999+
case AMDGPU::V_PK_MAX_F16:
9000+
case AMDGPU::V_PK_MAX_I16:
9001+
case AMDGPU::V_PK_MAX_U16:
9002+
case AMDGPU::V_PK_MIN_F16:
9003+
case AMDGPU::V_PK_MIN_I16:
9004+
case AMDGPU::V_PK_MIN_U16:
9005+
case AMDGPU::V_PK_MOV_B32:
9006+
case AMDGPU::V_PK_MUL_F16:
9007+
case AMDGPU::V_PK_MUL_F32:
9008+
case AMDGPU::V_PK_MUL_LO_U16:
9009+
case AMDGPU::V_PK_SUB_I16:
9010+
case AMDGPU::V_PK_SUB_U16:
9011+
case AMDGPU::V_QSAD_PK_U16_U8_e64:
9012+
return true;
9013+
default:
9014+
return false;
9015+
}
9016+
}
9017+
89609018
bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr &MI) const {
89619019
unsigned Opc = MI.getOpcode();
89629020

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1031,6 +1031,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
10311031
}
10321032
}
10331033

1034+
bool isNeverCoissue(const MachineInstr &MI) const;
1035+
10341036
bool isVGPRCopy(const MachineInstr &MI) const {
10351037
assert(isCopyInstr(MI));
10361038
Register Dest = MI.getOperand(0).getReg();

0 commit comments

Comments
 (0)