Skip to content

Conversation

rampitec
Copy link
Collaborator

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Aug 15, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/153879.diff

4 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (+12)
  • (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h (+1)
  • (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+4)
  • (modified) llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir (+54)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index dd7c1914d3440..c1cca063aac6f 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1206,6 +1206,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
     fixDsAtomicAsyncBarrierArriveB64(MI);
   if (ST.hasScratchBaseForwardingHazard())
     fixScratchBaseForwardingHazard(MI);
+  if (ST.setRegModeNeedsVNOPs())
+    fixSetRegMode(MI);
 }
 
 static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
@@ -3546,3 +3548,13 @@ bool GCNHazardRecognizer::fixScratchBaseForwardingHazard(MachineInstr *MI) {
           AMDGPU::DepCtr::encodeFieldSaSdst(0), 0));
   return true;
 }
+
+bool GCNHazardRecognizer::fixSetRegMode(MachineInstr *MI) {
+  if (!isSSetReg(MI->getOpcode()) ||
+      MI->getOperand(1).getImm() != AMDGPU::Hwreg::ID_MODE)
+    return false;
+
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::V_NOP_e32));
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::V_NOP_e32));
+  return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index e0982b46424b9..67beffadc0913 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -113,6 +113,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
   bool fixGetRegWaitIdle(MachineInstr *MI);
   bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI);
   bool fixScratchBaseForwardingHazard(MachineInstr *MI);
+  bool fixSetRegMode(MachineInstr *MI);
 
   int checkMAIHazards(MachineInstr *MI);
   int checkMAIHazards908(MachineInstr *MI);
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 404a476a3076a..2a8385df3f934 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1345,6 +1345,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
 
   bool hasVALUReadSGPRHazard() const { return GFX12Insts && !GFX1250Insts; }
 
+  bool setRegModeNeedsVNOPs() const {
+    return GFX1250Insts && getGeneration() == GFX12;
+  }
+
   /// Return if operations acting on VGPR tuples require even alignment.
   bool needsAlignedVGPRs() const { return GFX90AInsts || GFX1250Insts; }
 
diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
index f4596b0832d97..170478539d8a9 100644
--- a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
@@ -493,3 +493,57 @@ body: |
     liveins: $vgpr0
     $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
 ...
+
+---
+name: s_setreg_b32_hwreg_mode
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; GCN-LABEL: name: s_setreg_b32_hwreg_mode
+    ; GCN: liveins: $sgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode
+    S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode
+...
+
+---
+name: s_setreg_b32_mode
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; GCN-LABEL: name: s_setreg_b32_mode
+    ; GCN: liveins: $sgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: S_SETREG_B32_mode $sgpr0, 1, implicit-def $mode, implicit $mode
+    S_SETREG_B32_mode $sgpr0, 1, implicit-def $mode, implicit $mode
+...
+
+---
+name: s_setreg_imm32_b32_hwreg_mode
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: s_setreg_imm32_b32_hwreg_mode
+    ; GCN: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: S_SETREG_IMM32_B32 1, 1, implicit-def $mode, implicit $mode
+    S_SETREG_IMM32_B32 1, 1, implicit-def $mode, implicit $mode
+...
+
+---
+name: s_setreg_imm32_b32_mode
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: s_setreg_imm32_b32_mode
+    ; GCN: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: S_SETREG_IMM32_B32_mode 1, 1, implicit-def $mode, implicit $mode
+    S_SETREG_IMM32_B32_mode 1, 1, implicit-def $mode, implicit $mode
+...

Base automatically changed from users/rampitec/08-15-_amdgpu_w_a_hazard_with_writing_s102_103_and_reading_flat_scratch_base to main August 15, 2025 22:23
@rampitec rampitec force-pushed the users/rampitec/08-15-_amdgpu_w_a_for_s_setreg_b32_gfx1250_hazard_with_mode_register branch from 32fc495 to d9f7d86 Compare August 15, 2025 22:25
@rampitec rampitec merged commit 4f34c74 into main Aug 15, 2025
9 checks passed
@rampitec rampitec deleted the users/rampitec/08-15-_amdgpu_w_a_for_s_setreg_b32_gfx1250_hazard_with_mode_register branch August 15, 2025 23:08
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants