Skip to content

Commit 6ea495f

Browse files
committed
[AMDGPU] Do not put memory instructions in *ALU SchedGroups
Change-Id: I35069ec1bc6bd47984b288f727cf8df6d6797e47
1 parent 3aaa58f commit 6ea495f

File tree

2 files changed

+65
-3
lines changed

2 files changed

+65
-3
lines changed

llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2394,15 +2394,18 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
23942394
else if (((SGMask & SchedGroupMask::ALU) != SchedGroupMask::NONE) &&
23952395
(TII->isVALU(MI) || TII->isMFMAorWMMA(MI) || TII->isSALU(MI) ||
23962396
TII->isTRANS(MI)))
2397-
Result = true;
2397+
Result = !(MI.mayLoad() || MI.mayLoad());
23982398

23992399
else if (((SGMask & SchedGroupMask::VALU) != SchedGroupMask::NONE) &&
24002400
TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI))
2401-
Result = true;
2401+
// Some memory instructions may be marked as VALU (e.g. BUFFER_LOAD_*_LDS).
2402+
// For our purposes, these shall not be classified as VALU as this results
2403+
// in unexpected behavior.
2404+
Result = !(MI.mayLoad() || MI.mayLoad());
24022405

24032406
else if (((SGMask & SchedGroupMask::SALU) != SchedGroupMask::NONE) &&
24042407
TII->isSALU(MI))
2405-
Result = true;
2408+
Result = !(MI.mayLoad() || MI.mayLoad());
24062409

24072410
else if (((SGMask & SchedGroupMask::MFMA) != SchedGroupMask::NONE) &&
24082411
TII->isMFMAorWMMA(MI))
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=machine-scheduler -o - %s | FileCheck %s
3+
4+
---
5+
name: buffer_load_lds_not_valu
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0:
9+
liveins: $vgpr0_vgpr1
10+
; CHECK-LABEL: name: buffer_load_lds_not_valu
11+
; CHECK: liveins: $vgpr0_vgpr1
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: $exec = IMPLICIT_DEF
14+
; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
15+
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
16+
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
17+
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
18+
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
19+
; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[V_ADD_U32_e32_]], implicit $exec
20+
; CHECK-NEXT: $m0 = S_MOV_B32 0
21+
; CHECK-NEXT: BUFFER_LOAD_DWORDX4_LDS_OFFEN [[DEF]], [[DEF1]], 0, 0, 0, 0, implicit $exec, implicit $m0
22+
; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]], implicit $exec
23+
; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]], implicit $exec
24+
; CHECK-NEXT: $m0 = S_MOV_B32 1
25+
; CHECK-NEXT: BUFFER_LOAD_DWORDX4_LDS_OFFEN [[DEF]], [[DEF1]], 0, 0, 0, 0, implicit $exec, implicit $m0
26+
; CHECK-NEXT: [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]], implicit $exec
27+
; CHECK-NEXT: [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_3]], [[V_ADD_U32_e32_4]], implicit $exec
28+
; CHECK-NEXT: [[V_ADD_U32_e32_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_4]], [[V_ADD_U32_e32_5]], implicit $exec
29+
; CHECK-NEXT: dead [[V_ADD_U32_e32_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_5]], [[V_ADD_U32_e32_6]], implicit $exec
30+
; CHECK-NEXT: SCHED_GROUP_BARRIER 2, 2, 0
31+
; CHECK-NEXT: SCHED_GROUP_BARRIER 4, 1, 0
32+
; CHECK-NEXT: SCHED_GROUP_BARRIER 2, 2, 0
33+
; CHECK-NEXT: SCHED_GROUP_BARRIER 4, 1, 0
34+
; CHECK-NEXT: SCHED_GROUP_BARRIER 2, 4, 0
35+
; CHECK-NEXT: S_ENDPGM 0
36+
$exec = IMPLICIT_DEF
37+
%0:vgpr_32 = IMPLICIT_DEF
38+
%1:sgpr_128 = IMPLICIT_DEF
39+
%2:vgpr_32 = IMPLICIT_DEF
40+
%3:vgpr_32 = IMPLICIT_DEF
41+
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
42+
%5:vgpr_32 = V_ADD_U32_e32 %3, %4, implicit $exec
43+
$m0 = S_MOV_B32 0
44+
BUFFER_LOAD_DWORDX4_LDS_OFFEN %0, %1, 0, 0, 0, 0, implicit $exec, implicit $m0
45+
$m0 = S_MOV_B32 1
46+
BUFFER_LOAD_DWORDX4_LDS_OFFEN %0, %1, 0, 0, 0, 0, implicit $exec, implicit $m0
47+
%6:vgpr_32 = V_ADD_U32_e32 %4, %5, implicit $exec
48+
%7:vgpr_32 = V_ADD_U32_e32 %5, %6, implicit $exec
49+
%8:vgpr_32 = V_ADD_U32_e32 %6, %7, implicit $exec
50+
%9:vgpr_32 = V_ADD_U32_e32 %7, %8, implicit $exec
51+
%10:vgpr_32 = V_ADD_U32_e32 %8, %9, implicit $exec
52+
%11:vgpr_32 = V_ADD_U32_e32 %9, %10, implicit $exec
53+
SCHED_GROUP_BARRIER 2, 2, 0
54+
SCHED_GROUP_BARRIER 4, 1 ,0
55+
SCHED_GROUP_BARRIER 2, 2, 0
56+
SCHED_GROUP_BARRIER 4, 1 ,0
57+
SCHED_GROUP_BARRIER 2, 4, 0
58+
S_ENDPGM 0
59+
...

0 commit comments

Comments
 (0)