Skip to content

Commit c4f164e

Browse files
committed
Address review comments
1 parent e749e58 commit c4f164e

10 files changed

+28
-17
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,7 @@ bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
554554
unsigned NumBytes) const {
555555
// If the mem ops (to be clustered) do not have the same base ptr, then they
556556
// should not be clustered
557-
unsigned MaxMemoryClusterDWords = 8;
557+
unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit;
558558
if (!BaseOps1.empty() && !BaseOps2.empty()) {
559559
const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
560560
const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
@@ -563,8 +563,7 @@ bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
563563

564564
const SIMachineFunctionInfo *MFI =
565565
FirstLdSt.getMF()->getInfo<SIMachineFunctionInfo>();
566-
if (MFI->getMaxMemoryClusterDWords())
567-
MaxMemoryClusterDWords = MFI->getMaxMemoryClusterDWords();
566+
MaxMemoryClusterDWords = MFI->getMaxMemoryClusterDWords();
568567
} else if (!BaseOps1.empty() || !BaseOps2.empty()) {
569568
// If only one base op is empty, they do not have the same base ptr
570569
return false;

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ class RegScavenger;
3636
class TargetRegisterClass;
3737
class ScheduleHazardRecognizer;
3838

39+
constexpr unsigned DefaultMemoryClusterDWordsLimit = 8;
40+
3941
/// Mark the MMO of a uniform load if there are no potentially clobbering stores
4042
/// on any path from the start of an entry function to this load.
4143
static const MachineMemOperand::Flags MONoClobber =

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
163163
if (!S.empty())
164164
S.consumeInteger(0, HighBitsOf32BitAddress);
165165

166-
A = F.getFnAttribute("amdgpu-max-memory-cluster-dwords");
167-
S = A.getValueAsString();
168-
if (!S.empty())
169-
S.consumeInteger(0, MaxMemoryClusterDWords);
166+
MaxMemoryClusterDWords = F.getFnAttributeAsParsedInteger(
167+
"amdgpu-max-memory-cluster-dwords", DefaultMemoryClusterDWordsLimit);
170168

171169
// On GFX908, in order to guarantee copying between AGPRs, we need a scratch
172170
// VGPR available at all times. For now, reserve highest available VGPR. After

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
492492

493493
// Maximum number of dwords that can be clusterred during instruction
494494
// scheduler stage.
495-
unsigned MaxMemoryClusterDWords = 0;
495+
unsigned MaxMemoryClusterDWords;
496496

497497
mutable std::optional<bool> UsesAGPRs;
498498

llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
; CHECK-NEXT: workItemIDX: { reg: '$vgpr0' }
3030
; CHECK-NEXT: psInputAddr: 0
3131
; CHECK-NEXT: psInputEnable: 0
32-
; CHECK-NEXT: maxMemoryClusterDWords: 0
32+
; CHECK-NEXT: maxMemoryClusterDWords: 8
3333
; CHECK-NEXT: mode:
3434
; CHECK-NEXT: ieee: true
3535
; CHECK-NEXT: dx10-clamp: true
@@ -296,7 +296,7 @@
296296
; CHECK-NEXT: workItemIDX: { reg: '$vgpr0' }
297297
; CHECK-NEXT: psInputAddr: 0
298298
; CHECK-NEXT: psInputEnable: 0
299-
; CHECK-NEXT: maxMemoryClusterDWords: 0
299+
; CHECK-NEXT: maxMemoryClusterDWords: 8
300300
; CHECK-NEXT: mode:
301301
; CHECK-NEXT: ieee: true
302302
; CHECK-NEXT: dx10-clamp: true

llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
; AFTER-PEI-NEXT: workItemIDX: { reg: '$vgpr0' }
3030
; AFTER-PEI-NEXT: psInputAddr: 0
3131
; AFTER-PEI-NEXT: psInputEnable: 0
32-
; AFTER-PEI-NEXT: maxMemoryClusterDWords: 0
32+
; AFTER-PEI-NEXT: maxMemoryClusterDWords: 8
3333
; AFTER-PEI-NEXT: mode:
3434
; AFTER-PEI-NEXT: ieee: true
3535
; AFTER-PEI-NEXT: dx10-clamp: true

llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
; CHECK-NEXT: workItemIDX: { reg: '$vgpr0' }
3131
; CHECK-NEXT: psInputAddr: 0
3232
; CHECK-NEXT: psInputEnable: 0
33-
; CHECK-NEXT: maxMemoryClusterDWords: 0
33+
; CHECK-NEXT: maxMemoryClusterDWords: 8
3434
; CHECK-NEXT: mode:
3535
; CHECK-NEXT: ieee: true
3636
; CHECK-NEXT: dx10-clamp: true

llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
; CHECK-NEXT: workItemIDX: { reg: '$vgpr0' }
3131
; CHECK-NEXT: psInputAddr: 0
3232
; CHECK-NEXT: psInputEnable: 0
33-
; CHECK-NEXT: maxMemoryClusterDWords: 0
33+
; CHECK-NEXT: maxMemoryClusterDWords: 8
3434
; CHECK-NEXT: mode:
3535
; CHECK-NEXT: ieee: true
3636
; CHECK-NEXT: dx10-clamp: true

llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,3 +597,15 @@ body: |
597597
%2:sgpr_64 = COPY %1
598598
%1:sgpr_64 = COPY %0
599599
...
600+
601+
---
602+
# ALL-LABEL: name: max_memory_cluster_dwords
603+
# ALL: maxMemoryClusterDWords: 16
604+
name: max_memory_cluster_dwords
605+
machineFunctionInfo:
606+
maxMemoryClusterDWords: 16
607+
body: |
608+
bb.0:
609+
SI_RETURN
610+
611+
...

llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
; CHECK-NEXT: workItemIDZ: { reg: '$vgpr2' }
4141
; CHECK-NEXT: psInputAddr: 0
4242
; CHECK-NEXT: psInputEnable: 0
43-
; CHECK-NEXT: maxMemoryClusterDWords: 0
43+
; CHECK-NEXT: maxMemoryClusterDWords: 8
4444
; CHECK-NEXT: mode:
4545
; CHECK-NEXT: ieee: true
4646
; CHECK-NEXT: dx10-clamp: true
@@ -87,7 +87,7 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
8787
; CHECK-NEXT: implicitBufferPtr: { reg: '$sgpr0_sgpr1' }
8888
; CHECK-NEXT: psInputAddr: 1
8989
; CHECK-NEXT: psInputEnable: 1
90-
; CHECK-NEXT: maxMemoryClusterDWords: 0
90+
; CHECK-NEXT: maxMemoryClusterDWords: 8
9191
; CHECK-NEXT: mode:
9292
; CHECK-NEXT: ieee: false
9393
; CHECK-NEXT: dx10-clamp: true
@@ -158,7 +158,7 @@ define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 {
158158
; CHECK-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
159159
; CHECK-NEXT: psInputAddr: 0
160160
; CHECK-NEXT: psInputEnable: 0
161-
; CHECK-NEXT: maxMemoryClusterDWords: 0
161+
; CHECK-NEXT: maxMemoryClusterDWords: 8
162162
; CHECK-NEXT: mode:
163163
; CHECK-NEXT: ieee: true
164164
; CHECK-NEXT: dx10-clamp: true
@@ -211,7 +211,7 @@ define void @function() {
211211
; CHECK-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
212212
; CHECK-NEXT: psInputAddr: 0
213213
; CHECK-NEXT: psInputEnable: 0
214-
; CHECK-NEXT: maxMemoryClusterDWords: 0
214+
; CHECK-NEXT: maxMemoryClusterDWords: 8
215215
; CHECK-NEXT: mode:
216216
; CHECK-NEXT: ieee: true
217217
; CHECK-NEXT: dx10-clamp: true

0 commit comments

Comments
 (0)