Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2394,15 +2394,18 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
else if (((SGMask & SchedGroupMask::ALU) != SchedGroupMask::NONE) &&
(TII->isVALU(MI) || TII->isMFMAorWMMA(MI) || TII->isSALU(MI) ||
TII->isTRANS(MI)))
Result = true;
Result = !(MI.mayLoad() || MI.mayStore());

else if (((SGMask & SchedGroupMask::VALU) != SchedGroupMask::NONE) &&
TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI))
Result = true;
// Some memory instructions may be marked as VALU (e.g. BUFFER_LOAD_*_LDS).
// For our purposes, these shall not be classified as VALU as this results
// in unexpected behavior.
Result = !(MI.mayLoad() || MI.mayStore());

else if (((SGMask & SchedGroupMask::SALU) != SchedGroupMask::NONE) &&
TII->isSALU(MI))
Result = true;
Result = !(MI.mayLoad() || MI.mayStore());

else if (((SGMask & SchedGroupMask::MFMA) != SchedGroupMask::NONE) &&
TII->isMFMAorWMMA(MI))
Expand Down
59 changes: 59 additions & 0 deletions llvm/test/CodeGen/AMDGPU/sched.group.classification.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=machine-scheduler -o - %s | FileCheck %s

---
name: buffer_load_lds_not_valu
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: buffer_load_lds_not_valu
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $exec = IMPLICIT_DEF
; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[V_ADD_U32_e32_]], implicit $exec
; CHECK-NEXT: $m0 = S_MOV_B32 0
; CHECK-NEXT: BUFFER_LOAD_DWORDX4_LDS_OFFEN [[DEF]], [[DEF1]], 0, 0, 0, 0, implicit $exec, implicit $m0
; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]], implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]], implicit $exec
; CHECK-NEXT: $m0 = S_MOV_B32 1
; CHECK-NEXT: BUFFER_LOAD_DWORDX4_LDS_OFFEN [[DEF]], [[DEF1]], 0, 0, 0, 0, implicit $exec, implicit $m0
; CHECK-NEXT: [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]], implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_3]], [[V_ADD_U32_e32_4]], implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_4]], [[V_ADD_U32_e32_5]], implicit $exec
; CHECK-NEXT: dead [[V_ADD_U32_e32_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_5]], [[V_ADD_U32_e32_6]], implicit $exec
; CHECK-NEXT: SCHED_GROUP_BARRIER 2, 2, 0
; CHECK-NEXT: SCHED_GROUP_BARRIER 4, 1, 0
; CHECK-NEXT: SCHED_GROUP_BARRIER 2, 2, 0
; CHECK-NEXT: SCHED_GROUP_BARRIER 4, 1, 0
; CHECK-NEXT: SCHED_GROUP_BARRIER 2, 4, 0
; CHECK-NEXT: S_ENDPGM 0
$exec = IMPLICIT_DEF
%0:vgpr_32 = IMPLICIT_DEF
%1:sgpr_128 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %3, %4, implicit $exec
$m0 = S_MOV_B32 0
BUFFER_LOAD_DWORDX4_LDS_OFFEN %0, %1, 0, 0, 0, 0, implicit $exec, implicit $m0
$m0 = S_MOV_B32 1
BUFFER_LOAD_DWORDX4_LDS_OFFEN %0, %1, 0, 0, 0, 0, implicit $exec, implicit $m0
%6:vgpr_32 = V_ADD_U32_e32 %4, %5, implicit $exec
%7:vgpr_32 = V_ADD_U32_e32 %5, %6, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %6, %7, implicit $exec
%9:vgpr_32 = V_ADD_U32_e32 %7, %8, implicit $exec
%10:vgpr_32 = V_ADD_U32_e32 %8, %9, implicit $exec
%11:vgpr_32 = V_ADD_U32_e32 %9, %10, implicit $exec
SCHED_GROUP_BARRIER 2, 2, 0
SCHED_GROUP_BARRIER 4, 1 ,0
SCHED_GROUP_BARRIER 2, 2, 0
SCHED_GROUP_BARRIER 4, 1 ,0
SCHED_GROUP_BARRIER 2, 4, 0
S_ENDPGM 0
...
Loading