Skip to content

Commit 73e20c7

Browse files
committed
review comments:
- try renaming to S_WAITCNT_lds_direct - be consistent (even at the cost of brevity?)
1 parent a997d32 commit 73e20c7

File tree

5 files changed

+29
-27
lines changed

5 files changed

+29
-27
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,9 +1381,9 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt(
13811381
Modified = true;
13821382
} else
13831383
WaitcntInstr = ⅈ
1384-
} else if (Opcode == AMDGPU::S_WAITCNT_LDS_DIRECT) {
1384+
} else if (Opcode == AMDGPU::S_WAITCNT_lds_direct) {
13851385
assert(ST->hasVMemToLDSLoad());
1386-
LLVM_DEBUG(dbgs() << "Processing S_WAITCNT_LDS_DIRECT: " << II
1386+
LLVM_DEBUG(dbgs() << "Processing S_WAITCNT_lds_direct: " << II
13871387
<< "Before: " << Wait.LoadCnt << '\n';);
13881388
ScoreBrackets.determineWait(LOAD_CNT, FIRST_LDS_VGPR, Wait);
13891389
LLVM_DEBUG(dbgs() << "After: " << Wait.LoadCnt << '\n';);
@@ -1566,7 +1566,7 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
15661566
ScoreBrackets.simplifyWaitcnt(OldWait);
15671567
Wait = Wait.combined(OldWait);
15681568
UpdatableInstr = &CombinedStoreDsCntInstr;
1569-
} else if (Opcode == AMDGPU::S_WAITCNT_LDS_DIRECT) {
1569+
} else if (Opcode == AMDGPU::S_WAITCNT_lds_direct) {
15701570
// Architectures higher than GFX10 do not have direct loads to
15711571
// LDS, so no work required here yet.
15721572
II.eraseFromParent();
@@ -2461,7 +2461,7 @@ static bool isWaitInstr(MachineInstr &Inst) {
24612461
Inst.getOperand(0).getReg() == AMDGPU::SGPR_NULL) ||
24622462
Opcode == AMDGPU::S_WAIT_LOADCNT_DSCNT ||
24632463
Opcode == AMDGPU::S_WAIT_STORECNT_DSCNT ||
2464-
Opcode == AMDGPU::S_WAITCNT_LDS_DIRECT ||
2464+
Opcode == AMDGPU::S_WAITCNT_lds_direct ||
24652465
counterTypeForInstr(Opcode).has_value();
24662466
}
24672467

llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1175,8 +1175,9 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
11751175
// SIInsertWaitcnts will later replace this with a vmcnt().
11761176
if (ST.hasVMemToLDSLoad() && isReleaseOrStronger(Order) &&
11771177
Scope == SIAtomicScope::WORKGROUP &&
1178-
any((SIAtomicAddrSpace)AddrSpace & SIAtomicAddrSpace::LDS)) {
1179-
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_LDS_DIRECT));
1178+
((SIAtomicAddrSpace)AddrSpace & SIAtomicAddrSpace::LDS) !=
1179+
SIAtomicAddrSpace::NONE) {
1180+
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_lds_direct));
11801181
Changed = true;
11811182
}
11821183

@@ -2093,8 +2094,9 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
20932094
// SIInsertWaitcnts will later replace this with a vmcnt().
20942095
if (ST.hasVMemToLDSLoad() && isReleaseOrStronger(Order) &&
20952096
Scope == SIAtomicScope::WORKGROUP &&
2096-
any((SIAtomicAddrSpace)AddrSpace & SIAtomicAddrSpace::LDS)) {
2097-
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_LDS_DIRECT));
2097+
((SIAtomicAddrSpace)AddrSpace & SIAtomicAddrSpace::LDS) !=
2098+
SIAtomicAddrSpace::NONE) {
2099+
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_lds_direct));
20982100
Changed = true;
20992101
}
21002102

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1624,7 +1624,7 @@ let OtherPredicates = [HasImageInsts] in {
16241624
// Represents the point at which a wave must wait for all outstanding direct loads to LDS.
16251625
// Typically inserted by the memory legalizer and consumed by SIInsertWaitcnts.
16261626

1627-
def S_WAITCNT_LDS_DIRECT : SPseudoInstSI<(outs), (ins)> {
1627+
def S_WAITCNT_lds_direct : SPseudoInstSI<(outs), (ins)> {
16281628
let hasSideEffects = 0;
16291629
}
16301630

llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -545,13 +545,13 @@ define amdgpu_kernel void @workgroup_one_as_release() #0 {
545545
; GFX10WGP-LABEL: name: workgroup_one_as_release
546546
; GFX10WGP: bb.0.entry:
547547
; GFX10WGP-NEXT: S_WAITCNT_soft 16240
548-
; GFX10WGP-NEXT: S_WAITCNT_LDS_DIRECT
548+
; GFX10WGP-NEXT: S_WAITCNT_lds_direct
549549
; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
550550
; GFX10WGP-NEXT: S_ENDPGM 0
551551
;
552552
; GFX10CU-LABEL: name: workgroup_one_as_release
553553
; GFX10CU: bb.0.entry:
554-
; GFX10CU-NEXT: S_WAITCNT_LDS_DIRECT
554+
; GFX10CU-NEXT: S_WAITCNT_lds_direct
555555
; GFX10CU-NEXT: S_ENDPGM 0
556556
;
557557
; GFX11WGP-LABEL: name: workgroup_one_as_release
@@ -580,14 +580,14 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 {
580580
; GFX10WGP-LABEL: name: workgroup_one_as_acq_rel
581581
; GFX10WGP: bb.0.entry:
582582
; GFX10WGP-NEXT: S_WAITCNT_soft 16240
583-
; GFX10WGP-NEXT: S_WAITCNT_LDS_DIRECT
583+
; GFX10WGP-NEXT: S_WAITCNT_lds_direct
584584
; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
585585
; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec
586586
; GFX10WGP-NEXT: S_ENDPGM 0
587587
;
588588
; GFX10CU-LABEL: name: workgroup_one_as_acq_rel
589589
; GFX10CU: bb.0.entry:
590-
; GFX10CU-NEXT: S_WAITCNT_LDS_DIRECT
590+
; GFX10CU-NEXT: S_WAITCNT_lds_direct
591591
; GFX10CU-NEXT: S_ENDPGM 0
592592
;
593593
; GFX11WGP-LABEL: name: workgroup_one_as_acq_rel
@@ -617,14 +617,14 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 {
617617
; GFX10WGP-LABEL: name: workgroup_one_as_seq_cst
618618
; GFX10WGP: bb.0.entry:
619619
; GFX10WGP-NEXT: S_WAITCNT_soft 16240
620-
; GFX10WGP-NEXT: S_WAITCNT_LDS_DIRECT
620+
; GFX10WGP-NEXT: S_WAITCNT_lds_direct
621621
; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
622622
; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec
623623
; GFX10WGP-NEXT: S_ENDPGM 0
624624
;
625625
; GFX10CU-LABEL: name: workgroup_one_as_seq_cst
626626
; GFX10CU: bb.0.entry:
627-
; GFX10CU-NEXT: S_WAITCNT_LDS_DIRECT
627+
; GFX10CU-NEXT: S_WAITCNT_lds_direct
628628
; GFX10CU-NEXT: S_ENDPGM 0
629629
;
630630
; GFX11WGP-LABEL: name: workgroup_one_as_seq_cst
@@ -1299,14 +1299,14 @@ define amdgpu_kernel void @workgroup_release() #0 {
12991299
; GFX10WGP-LABEL: name: workgroup_release
13001300
; GFX10WGP: bb.0.entry:
13011301
; GFX10WGP-NEXT: S_WAITCNT_soft 112
1302-
; GFX10WGP-NEXT: S_WAITCNT_LDS_DIRECT
1302+
; GFX10WGP-NEXT: S_WAITCNT_lds_direct
13031303
; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
13041304
; GFX10WGP-NEXT: S_ENDPGM 0
13051305
;
13061306
; GFX10CU-LABEL: name: workgroup_release
13071307
; GFX10CU: bb.0.entry:
13081308
; GFX10CU-NEXT: S_WAITCNT_soft 49279
1309-
; GFX10CU-NEXT: S_WAITCNT_LDS_DIRECT
1309+
; GFX10CU-NEXT: S_WAITCNT_lds_direct
13101310
; GFX10CU-NEXT: S_ENDPGM 0
13111311
;
13121312
; GFX11WGP-LABEL: name: workgroup_release
@@ -1338,15 +1338,15 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 {
13381338
; GFX10WGP-LABEL: name: workgroup_acq_rel
13391339
; GFX10WGP: bb.0.entry:
13401340
; GFX10WGP-NEXT: S_WAITCNT_soft 112
1341-
; GFX10WGP-NEXT: S_WAITCNT_LDS_DIRECT
1341+
; GFX10WGP-NEXT: S_WAITCNT_lds_direct
13421342
; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
13431343
; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec
13441344
; GFX10WGP-NEXT: S_ENDPGM 0
13451345
;
13461346
; GFX10CU-LABEL: name: workgroup_acq_rel
13471347
; GFX10CU: bb.0.entry:
13481348
; GFX10CU-NEXT: S_WAITCNT_soft 49279
1349-
; GFX10CU-NEXT: S_WAITCNT_LDS_DIRECT
1349+
; GFX10CU-NEXT: S_WAITCNT_lds_direct
13501350
; GFX10CU-NEXT: S_ENDPGM 0
13511351
;
13521352
; GFX11WGP-LABEL: name: workgroup_acq_rel
@@ -1379,15 +1379,15 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 {
13791379
; GFX10WGP-LABEL: name: workgroup_seq_cst
13801380
; GFX10WGP: bb.0.entry:
13811381
; GFX10WGP-NEXT: S_WAITCNT_soft 112
1382-
; GFX10WGP-NEXT: S_WAITCNT_LDS_DIRECT
1382+
; GFX10WGP-NEXT: S_WAITCNT_lds_direct
13831383
; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
13841384
; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec
13851385
; GFX10WGP-NEXT: S_ENDPGM 0
13861386
;
13871387
; GFX10CU-LABEL: name: workgroup_seq_cst
13881388
; GFX10CU: bb.0.entry:
13891389
; GFX10CU-NEXT: S_WAITCNT_soft 49279
1390-
; GFX10CU-NEXT: S_WAITCNT_LDS_DIRECT
1390+
; GFX10CU-NEXT: S_WAITCNT_lds_direct
13911391
; GFX10CU-NEXT: S_ENDPGM 0
13921392
;
13931393
; GFX11WGP-LABEL: name: workgroup_seq_cst

llvm/test/CodeGen/AMDGPU/insert-waitcnts-fence-soft.mir

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ body: |
1616
; GCN-NEXT: S_ENDPGM 0
1717
$m0 = S_MOV_B32 0
1818
BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
19-
S_WAITCNT_LDS_DIRECT
19+
S_WAITCNT_lds_direct
2020
$vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
2121
S_ENDPGM 0
2222
@@ -39,7 +39,7 @@ body: |
3939
$m0 = S_MOV_B32 0
4040
BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
4141
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
42-
S_WAITCNT_LDS_DIRECT
42+
S_WAITCNT_lds_direct
4343
$vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
4444
S_ENDPGM 0
4545
@@ -57,7 +57,7 @@ body: |
5757
; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
5858
; GCN-NEXT: S_ENDPGM 0
5959
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
60-
S_WAITCNT_LDS_DIRECT
60+
S_WAITCNT_lds_direct
6161
$vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
6262
S_ENDPGM 0
6363
@@ -78,7 +78,7 @@ body: |
7878
; GCN-NEXT: S_ENDPGM 0
7979
BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
8080
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
81-
S_WAITCNT_LDS_DIRECT
81+
S_WAITCNT_lds_direct
8282
$vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
8383
S_ENDPGM 0
8484
@@ -102,7 +102,7 @@ body: |
102102
BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
103103
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
104104
S_WAITCNT 3952
105-
S_WAITCNT_LDS_DIRECT
105+
S_WAITCNT_lds_direct
106106
$vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
107107
S_ENDPGM 0
108108
@@ -125,7 +125,7 @@ body: |
125125
$m0 = S_MOV_B32 0
126126
BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
127127
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
128-
S_WAITCNT_LDS_DIRECT
128+
S_WAITCNT_lds_direct
129129
S_WAITCNT 3952
130130
$vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
131131
S_ENDPGM 0

0 commit comments

Comments
 (0)