Skip to content

Conversation

@optimisan
Copy link
Contributor

No description provided.

@optimisan optimisan force-pushed the users/Akshat-Oke/port-si-pre-allocate-wwm branch from a4bb6d9 to 646d2d1 Compare September 25, 2024 09:57
@optimisan optimisan force-pushed the users/Akshat-Oke/port-lrm branch from 8c58562 to 22bb8f0 Compare September 25, 2024 11:16
@optimisan optimisan force-pushed the users/Akshat-Oke/port-si-pre-allocate-wwm branch 2 times, most recently from 4c5184a to 0d0cd3f Compare September 25, 2024 11:25
@optimisan optimisan changed the base branch from users/Akshat-Oke/port-lrm to users/Akshat-Oke/09-25-_amdgpu_add_tests_for_sipreallocatewwmregs September 25, 2024 11:25
@optimisan optimisan requested a review from cdevadas September 25, 2024 11:34
@optimisan optimisan marked this pull request as ready for review September 25, 2024 11:35
@llvmbot
Copy link
Member

llvmbot commented Sep 25, 2024

@llvm/pr-subscribers-backend-amdgpu

Author: Akshat Oke (Akshat-Oke)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/109939.diff

7 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+3-3)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+4-3)
  • (modified) llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp (+39-21)
  • (added) llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h (+30)
  • (modified) llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir (+1)
  • (modified) llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir (+1)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index b2dd354e496a2e..c0fd5e4625895a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -49,7 +49,7 @@ FunctionPass *createSIFixSGPRCopiesLegacyPass();
 FunctionPass *createLowerWWMCopiesPass();
 FunctionPass *createSIMemoryLegalizerPass();
 FunctionPass *createSIInsertWaitcntsPass();
-FunctionPass *createSIPreAllocateWWMRegsPass();
+FunctionPass *createSIPreAllocateWWMRegsLegacyPass();
 FunctionPass *createSIFormMemoryClausesPass();
 
 FunctionPass *createSIPostRABundlerPass();
@@ -208,8 +208,8 @@ extern char &SILateBranchLoweringPassID;
 void initializeSIOptimizeExecMaskingPass(PassRegistry &);
 extern char &SIOptimizeExecMaskingID;
 
-void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
-extern char &SIPreAllocateWWMRegsID;
+void initializeSIPreAllocateWWMRegsLegacyPass(PassRegistry &);
+extern char &SIPreAllocateWWMRegsLegacyID;
 
 void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
 extern char &AMDGPUImageIntrinsicOptimizerID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 0ebf34c901c142..174a90f0aa419d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -102,5 +102,6 @@ MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
 MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
 MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
 MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
+MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
 MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
 #undef MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 04fdee0819b502..9a28c648e2c4ed 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -41,6 +41,7 @@
 #include "SIMachineFunctionInfo.h"
 #include "SIMachineScheduler.h"
 #include "SIPeepholeSDWA.h"
+#include "SIPreAllocateWWMRegs.h"
 #include "SIShrinkInstructions.h"
 #include "TargetInfo/AMDGPUTargetInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
@@ -461,7 +462,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeSILateBranchLoweringPass(*PR);
   initializeSIMemoryLegalizerPass(*PR);
   initializeSIOptimizeExecMaskingPass(*PR);
-  initializeSIPreAllocateWWMRegsPass(*PR);
+  initializeSIPreAllocateWWMRegsLegacyPass(*PR);
   initializeSIFormMemoryClausesPass(*PR);
   initializeSIPostRABundlerPass(*PR);
   initializeGCNCreateVOPDPass(*PR);
@@ -1443,7 +1444,7 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
 
   // Equivalent of PEI for SGPRs.
   addPass(&SILowerSGPRSpillsLegacyID);
-  addPass(&SIPreAllocateWWMRegsID);
+  addPass(&SIPreAllocateWWMRegsLegacyID);
 
   addPass(createVGPRAllocPass(false));
 
@@ -1467,7 +1468,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
 
   // Equivalent of PEI for SGPRs.
   addPass(&SILowerSGPRSpillsLegacyID);
-  addPass(&SIPreAllocateWWMRegsID);
+  addPass(&SIPreAllocateWWMRegsLegacyID);
 
   addPass(createVGPRAllocPass(true));
 
diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
index 0635cab7b872e2..c1d7a464a81537 100644
--- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "SIPreAllocateWWMRegs.h"
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -34,7 +35,7 @@ static cl::opt<bool>
 
 namespace {
 
-class SIPreAllocateWWMRegs : public MachineFunctionPass {
+class SIPreAllocateWWMRegs {
 private:
   const SIInstrInfo *TII;
   const SIRegisterInfo *TRI;
@@ -48,13 +49,21 @@ class SIPreAllocateWWMRegs : public MachineFunctionPass {
 #ifndef NDEBUG
   void printWWMInfo(const MachineInstr &MI);
 #endif
+  bool processDef(MachineOperand &MO);
+  void rewriteRegs(MachineFunction &MF);
+
+public:
+  SIPreAllocateWWMRegs(LiveIntervals *LIS, LiveRegMatrix *Matrix,
+                       VirtRegMap *VRM)
+      : LIS(LIS), Matrix(Matrix), VRM(VRM) {}
+  bool run(MachineFunction &MF);
+};
 
+class SIPreAllocateWWMRegsLegacy : public MachineFunctionPass {
 public:
   static char ID;
 
-  SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
-    initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
-  }
+  SIPreAllocateWWMRegsLegacy() : MachineFunctionPass(ID) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
@@ -65,28 +74,24 @@ class SIPreAllocateWWMRegs : public MachineFunctionPass {
     AU.setPreservesAll();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
-
-private:
-  bool processDef(MachineOperand &MO);
-  void rewriteRegs(MachineFunction &MF);
 };
 
 } // End anonymous namespace.
 
-INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
-                "SI Pre-allocate WWM Registers", false, false)
+INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE,
+                      "SI Pre-allocate WWM Registers", false, false)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperPass)
-INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
-                "SI Pre-allocate WWM Registers", false, false)
+INITIALIZE_PASS_END(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE,
+                    "SI Pre-allocate WWM Registers", false, false)
 
-char SIPreAllocateWWMRegs::ID = 0;
+char SIPreAllocateWWMRegsLegacy::ID = 0;
 
-char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
+char &llvm::SIPreAllocateWWMRegsLegacyID = SIPreAllocateWWMRegsLegacy::ID;
 
-FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
-  return new SIPreAllocateWWMRegs();
+FunctionPass *llvm::createSIPreAllocateWWMRegsLegacyPass() {
+  return new SIPreAllocateWWMRegsLegacy();
 }
 
 bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
@@ -184,7 +189,14 @@ SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) {
 
 #endif
 
-bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
+bool SIPreAllocateWWMRegsLegacy::runOnMachineFunction(MachineFunction &MF) {
+  auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
+  auto *Matrix = &getAnalysis<LiveRegMatrixWrapperPass>().getLRM();
+  auto *VRM = &getAnalysis<VirtRegMapWrapperPass>().getVRM();
+  return SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF);
+}
+
+bool SIPreAllocateWWMRegs::run(MachineFunction &MF) {
   LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
 
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -193,10 +205,6 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
   TRI = &TII->getRegisterInfo();
   MRI = &MF.getRegInfo();
 
-  LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
-  Matrix = &getAnalysis<LiveRegMatrixWrapperPass>().getLRM();
-  VRM = &getAnalysis<VirtRegMapWrapperPass>().getVRM();
-
   RegClassInfo.runOnMachineFunction(MF);
 
   bool PreallocateSGPRSpillVGPRs =
@@ -254,3 +262,13 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
   rewriteRegs(MF);
   return true;
 }
+
+PreservedAnalyses
+SIPreAllocateWWMRegsPass::run(MachineFunction &MF,
+                              MachineFunctionAnalysisManager &MFAM) {
+  auto *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF);
+  auto *Matrix = &MFAM.getResult<LiveRegMatrixAnalysis>(MF);
+  auto *VRM = &MFAM.getResult<VirtRegMapAnalysis>(MF);
+  SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF);
+  return PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h
new file mode 100644
index 00000000000000..b86f7fe9213af0
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h
@@ -0,0 +1,30 @@
+//===--- SIPreAllocateWWMRegs.h -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H
+#define LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class SIPreAllocateWWMRegsPass
+    : public PassInfoMixin<SIPreAllocateWWMRegsPass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+
+  MachineFunctionProperties getRequiredProperties() {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::IsSSA);
+  }
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H
diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
index f2db299f575f5e..27df8d0401e2e8 100644
--- a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 # RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s  | FileCheck %s
+# RUN: llc -mtriple=amdgcn -passes=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s  | FileCheck %s
 
 ---
 
diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir
index f0efe74878d831..d5508906519879 100644
--- a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 # RUN: llc -mtriple=amdgcn -verify-machineinstrs -amdgpu-prealloc-sgpr-spill-vgprs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -amdgpu-prealloc-sgpr-spill-vgprs -passes=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s
 
 ---
 

@optimisan optimisan force-pushed the users/Akshat-Oke/port-si-pre-allocate-wwm branch from 0d0cd3f to 26682b7 Compare September 25, 2024 11:36
Comment on lines +269 to +268
auto *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF);
auto *Matrix = &MFAM.getResult<LiveRegMatrixAnalysis>(MF);
auto *VRM = &MFAM.getResult<VirtRegMapAnalysis>(MF);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought this should be using getCachedResult, and the pass supported no LIS for the fast RA path. But I see now the legacy path is requiring them (although it probably shouldn't?)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, this looks odd. We shouldn't involve VRM for the fast regalloc case.

@optimisan optimisan force-pushed the users/Akshat-Oke/09-25-_amdgpu_add_tests_for_sipreallocatewwmregs branch from 3d87209 to 8da3a2c Compare October 7, 2024 10:28
@optimisan optimisan force-pushed the users/Akshat-Oke/port-si-pre-allocate-wwm branch from 26682b7 to 02fafdd Compare October 7, 2024 10:28
@optimisan optimisan force-pushed the users/Akshat-Oke/09-25-_amdgpu_add_tests_for_sipreallocatewwmregs branch from 8da3a2c to 131cedb Compare October 8, 2024 04:46
@optimisan optimisan force-pushed the users/Akshat-Oke/port-si-pre-allocate-wwm branch from 02fafdd to 5e2f02d Compare October 8, 2024 04:46
@optimisan optimisan force-pushed the users/Akshat-Oke/09-25-_amdgpu_add_tests_for_sipreallocatewwmregs branch from 131cedb to 9bddae3 Compare October 14, 2024 05:44
@optimisan optimisan force-pushed the users/Akshat-Oke/port-si-pre-allocate-wwm branch from 5e2f02d to a1886d3 Compare October 14, 2024 05:45
@optimisan optimisan force-pushed the users/Akshat-Oke/09-25-_amdgpu_add_tests_for_sipreallocatewwmregs branch from 9bddae3 to 604653e Compare October 14, 2024 08:06
@optimisan optimisan force-pushed the users/Akshat-Oke/port-si-pre-allocate-wwm branch from a1886d3 to 127ae26 Compare October 14, 2024 08:06
@optimisan optimisan force-pushed the users/Akshat-Oke/09-25-_amdgpu_add_tests_for_sipreallocatewwmregs branch from 604653e to 43c0d2c Compare October 14, 2024 13:35
@optimisan optimisan force-pushed the users/Akshat-Oke/port-si-pre-allocate-wwm branch from 127ae26 to 1fc1184 Compare October 14, 2024 13:36
@optimisan optimisan force-pushed the users/Akshat-Oke/09-25-_amdgpu_add_tests_for_sipreallocatewwmregs branch from 43c0d2c to 2ff11f5 Compare October 21, 2024 08:26
@optimisan optimisan force-pushed the users/Akshat-Oke/port-si-pre-allocate-wwm branch from 1fc1184 to 27c6dea Compare October 21, 2024 08:26
@optimisan optimisan force-pushed the users/Akshat-Oke/09-25-_amdgpu_add_tests_for_sipreallocatewwmregs branch from 2ff11f5 to d2032b2 Compare October 22, 2024 09:39
@optimisan optimisan force-pushed the users/Akshat-Oke/port-si-pre-allocate-wwm branch from 27c6dea to 65f7dab Compare October 22, 2024 09:40
@optimisan
Copy link
Contributor Author

optimisan commented Oct 22, 2024

Merge activity

  • Oct 22, 5:41 AM EDT: A user started a stack merge that includes this pull request via Graphite.
  • Oct 22, 6:04 AM EDT: Graphite rebased this pull request as part of a merge.
  • Oct 22, 6:07 AM EDT: A user merged this pull request with Graphite.

@optimisan optimisan force-pushed the users/Akshat-Oke/09-25-_amdgpu_add_tests_for_sipreallocatewwmregs branch 2 times, most recently from af68ad6 to 6169a44 Compare October 22, 2024 10:00
Base automatically changed from users/Akshat-Oke/09-25-_amdgpu_add_tests_for_sipreallocatewwmregs to main October 22, 2024 10:03
@optimisan optimisan force-pushed the users/Akshat-Oke/port-si-pre-allocate-wwm branch from 65f7dab to 9359d67 Compare October 22, 2024 10:04
@optimisan optimisan merged commit ca32bd6 into main Oct 22, 2024
4 of 6 checks passed
@optimisan optimisan deleted the users/Akshat-Oke/port-si-pre-allocate-wwm branch October 22, 2024 10:07
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants