Skip to content

Conversation

@rampitec
Copy link
Collaborator

No description provided.

@rampitec rampitec requested review from changpeng and shiltian August 18, 2025 20:58
Copy link
Collaborator Author

This stack of pull requests is managed by Graphite. Learn more about stacking.

@rampitec rampitec marked this pull request as ready for review August 18, 2025 20:58
@llvmbot
Copy link
Member

llvmbot commented Aug 18, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)

Changes

Patch is 28.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154200.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp (+62)
  • (added) llvm/test/CodeGen/AMDGPU/merge-flat-saddr-load-store.mir (+338)
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index b49c5a997af78..e204d6ba356b8 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -87,6 +87,8 @@ enum InstClassEnum {
   GLOBAL_STORE_SADDR,
   FLAT_LOAD,
   FLAT_STORE,
+  FLAT_LOAD_SADDR,
+  FLAT_STORE_SADDR,
   GLOBAL_LOAD, // GLOBAL_LOAD/GLOBAL_STORE are never used as the InstClass of
   GLOBAL_STORE // any CombineInfo, they are only ever returned by
                // getCommonInstClass.
@@ -354,6 +356,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
   case AMDGPU::GLOBAL_STORE_DWORD_SADDR:
   case AMDGPU::FLAT_LOAD_DWORD:
   case AMDGPU::FLAT_STORE_DWORD:
+  case AMDGPU::FLAT_LOAD_DWORD_SADDR:
+  case AMDGPU::FLAT_STORE_DWORD_SADDR:
     return 1;
   case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
   case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
@@ -367,6 +371,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
   case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
   case AMDGPU::FLAT_LOAD_DWORDX2:
   case AMDGPU::FLAT_STORE_DWORDX2:
+  case AMDGPU::FLAT_LOAD_DWORDX2_SADDR:
+  case AMDGPU::FLAT_STORE_DWORDX2_SADDR:
     return 2;
   case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM:
   case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM:
@@ -380,6 +386,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
   case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
   case AMDGPU::FLAT_LOAD_DWORDX3:
   case AMDGPU::FLAT_STORE_DWORDX3:
+  case AMDGPU::FLAT_LOAD_DWORDX3_SADDR:
+  case AMDGPU::FLAT_STORE_DWORDX3_SADDR:
     return 3;
   case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
   case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
@@ -393,6 +401,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
   case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
   case AMDGPU::FLAT_LOAD_DWORDX4:
   case AMDGPU::FLAT_STORE_DWORDX4:
+  case AMDGPU::FLAT_LOAD_DWORDX4_SADDR:
+  case AMDGPU::FLAT_STORE_DWORDX4_SADDR:
     return 4;
   case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
   case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM:
@@ -575,6 +585,16 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
   case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
   case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
     return GLOBAL_STORE_SADDR;
+  case AMDGPU::FLAT_LOAD_DWORD_SADDR:
+  case AMDGPU::FLAT_LOAD_DWORDX2_SADDR:
+  case AMDGPU::FLAT_LOAD_DWORDX3_SADDR:
+  case AMDGPU::FLAT_LOAD_DWORDX4_SADDR:
+    return FLAT_LOAD_SADDR;
+  case AMDGPU::FLAT_STORE_DWORD_SADDR:
+  case AMDGPU::FLAT_STORE_DWORDX2_SADDR:
+  case AMDGPU::FLAT_STORE_DWORDX3_SADDR:
+  case AMDGPU::FLAT_STORE_DWORDX4_SADDR:
+    return FLAT_STORE_SADDR;
   }
 }
 
@@ -661,6 +681,16 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
   case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
   case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
     return AMDGPU::GLOBAL_STORE_DWORD_SADDR;
+  case AMDGPU::FLAT_LOAD_DWORD_SADDR:
+  case AMDGPU::FLAT_LOAD_DWORDX2_SADDR:
+  case AMDGPU::FLAT_LOAD_DWORDX3_SADDR:
+  case AMDGPU::FLAT_LOAD_DWORDX4_SADDR:
+    return AMDGPU::FLAT_LOAD_DWORD_SADDR;
+  case AMDGPU::FLAT_STORE_DWORD_SADDR:
+  case AMDGPU::FLAT_STORE_DWORDX2_SADDR:
+  case AMDGPU::FLAT_STORE_DWORDX3_SADDR:
+  case AMDGPU::FLAT_STORE_DWORDX4_SADDR:
+    return AMDGPU::FLAT_STORE_DWORD_SADDR;
   }
 }
 
@@ -776,6 +806,14 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
   case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
   case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
   case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
+  case AMDGPU::FLAT_LOAD_DWORD_SADDR:
+  case AMDGPU::FLAT_LOAD_DWORDX2_SADDR:
+  case AMDGPU::FLAT_LOAD_DWORDX3_SADDR:
+  case AMDGPU::FLAT_LOAD_DWORDX4_SADDR:
+  case AMDGPU::FLAT_STORE_DWORD_SADDR:
+  case AMDGPU::FLAT_STORE_DWORDX2_SADDR:
+  case AMDGPU::FLAT_STORE_DWORDX3_SADDR:
+  case AMDGPU::FLAT_STORE_DWORDX4_SADDR:
     Result.SAddr = true;
     [[fallthrough]];
   case AMDGPU::GLOBAL_LOAD_DWORD:
@@ -1875,6 +1913,28 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
     case 4:
       return AMDGPU::FLAT_STORE_DWORDX4;
     }
+  case FLAT_LOAD_SADDR:
+    switch (Width) {
+    default:
+      return 0;
+    case 2:
+      return AMDGPU::FLAT_LOAD_DWORDX2_SADDR;
+    case 3:
+      return AMDGPU::FLAT_LOAD_DWORDX3_SADDR;
+    case 4:
+      return AMDGPU::FLAT_LOAD_DWORDX4_SADDR;
+    }
+  case FLAT_STORE_SADDR:
+    switch (Width) {
+    default:
+      return 0;
+    case 2:
+      return AMDGPU::FLAT_STORE_DWORDX2_SADDR;
+    case 3:
+      return AMDGPU::FLAT_STORE_DWORDX3_SADDR;
+    case 4:
+      return AMDGPU::FLAT_STORE_DWORDX4_SADDR;
+    }
   case MIMG:
     assert(((unsigned)llvm::popcount(CI.DMask | Paired.DMask) == Width) &&
            "No overlaps");
@@ -2508,12 +2568,14 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
       OptimizeListAgain |= CI.Width + Paired.Width < 4;
       break;
     case FLAT_LOAD:
+    case FLAT_LOAD_SADDR:
     case GLOBAL_LOAD:
     case GLOBAL_LOAD_SADDR:
       NewMI = mergeFlatLoadPair(CI, Paired, Where->I);
       OptimizeListAgain |= CI.Width + Paired.Width < 4;
       break;
     case FLAT_STORE:
+    case FLAT_STORE_SADDR:
     case GLOBAL_STORE:
     case GLOBAL_STORE_SADDR:
       NewMI = mergeFlatStorePair(CI, Paired, Where->I);
diff --git a/llvm/test/CodeGen/AMDGPU/merge-flat-saddr-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-flat-saddr-load-store.mir
new file mode 100644
index 0000000000000..1c133c6114ec2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/merge-flat-saddr-load-store.mir
@@ -0,0 +1,338 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=si-load-store-opt -o - %s | FileCheck -check-prefix=GCN %s
+
+---
+name:            merge_flat_load_dword_saddr_2
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: merge_flat_load_dword_saddr_2
+    ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[FLAT_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64_align2 = FLAT_LOAD_DWORDX2_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX2_SADDR]].sub0
+    ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX2_SADDR]].sub1
+    ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]]
+    %0:sreg_64_xexec_xnull = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    %3:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    S_NOP 0, implicit %2, implicit %3
+...
+
+---
+name:            merge_flat_load_dword_saddr_3
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: merge_flat_load_dword_saddr_3
+    ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[FLAT_LOAD_DWORDX3_SADDR:%[0-9]+]]:vreg_96_align2 = FLAT_LOAD_DWORDX3_SADDR [[DEF]], [[DEF1]], 0, 1, implicit $exec, implicit $flat_scr :: (load (s96) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[FLAT_LOAD_DWORDX3_SADDR]].sub0_sub1
+    ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX3_SADDR]].sub2
+    ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub1
+    ; GCN-NEXT: S_NOP 0, implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY1]]
+    %0:sreg_64_xexec_xnull = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 0, 1, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    %3:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 4, 1, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    %4:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 8, 1, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    S_NOP 0, implicit %2, implicit %3, implicit %4
+...
+
+---
+name:            merge_flat_load_dword_saddr_4
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: merge_flat_load_dword_saddr_4
+    ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 0, 2, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[FLAT_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2
+    ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX4_SADDR]].sub3
+    ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1
+    ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2
+    ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0
+    ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1
+    ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]]
+    %0:sreg_64_xexec_xnull = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 0, 2, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    %3:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 4, 2, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    %4:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 8, 2, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    %5:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 12, 2, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    S_NOP 0, implicit %2, implicit %3, implicit %4, implicit %5
+...
+
+---
+name:            merge_flat_load_dword_saddr_6
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: merge_flat_load_dword_saddr_6
+    ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 4, 3, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[FLAT_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2
+    ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX4_SADDR]].sub3
+    ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1
+    ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2
+    ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0
+    ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1
+    ; GCN-NEXT: [[FLAT_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64_align2 = FLAT_LOAD_DWORDX2_SADDR [[DEF]], [[DEF1]], 20, 3, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    ; GCN-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX2_SADDR]].sub0
+    ; GCN-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX2_SADDR]].sub1
+    ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]], implicit [[COPY6]], implicit [[COPY7]]
+    %0:sreg_64_xexec_xnull = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 4, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    %3:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 8, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    %4:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 12, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    %5:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 16, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    %6:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 20, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    %7:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 24, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    S_NOP 0, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7
+...
+
+---
+name:            merge_flat_load_dwordx2_saddr
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: merge_flat_load_dwordx2_saddr
+    ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[FLAT_LOAD_DWORDX4_SADDR]].sub0_sub1
+    ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[FLAT_LOAD_DWORDX4_SADDR]].sub2_sub3
+    ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]]
+    %0:sreg_64_xexec_xnull = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vreg_64_align2 = FLAT_LOAD_DWORDX2_SADDR %0, %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    %3:vreg_64_align2 = FLAT_LOAD_DWORDX2_SADDR %0, %1, 8, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    S_NOP 0, implicit %2, implicit %3
+...
+
+---
+name:            no_merge_flat_load_dword_and_flat_load_dword_saddr
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: no_merge_flat_load_dword_and_flat_load_dword_saddr
+    ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
+    ; GCN-NEXT: [[FLAT_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD_SADDR [[DEF]], [[DEF1]].sub0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
+    ; GCN-NEXT: S_NOP 0, implicit [[FLAT_LOAD_DWORD]], implicit [[FLAT_LOAD_DWORD_SADDR]]
+    %0:sreg_64_xexec_xnull = IMPLICIT_DEF
+    %1:vreg_64_align2 = IMPLICIT_DEF
+    %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    %3:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1.sub0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    S_NOP 0, implicit %2, implicit %3
+...
+
+---
+name:            no_merge_flat_load_dword_saddr_different_saddr
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: no_merge_flat_load_dword_saddr_different_saddr
+    ; GCN: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[FLAT_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD_SADDR [[DEF]].sub0_sub1, [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
+    ; GCN-NEXT: [[FLAT_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD_SADDR [[DEF]].sub2_sub3, [[DEF1]], 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
+    ; GCN-NEXT: S_NOP 0, implicit [[FLAT_LOAD_DWORD_SADDR]], implicit [[FLAT_LOAD_DWORD_SADDR1]]
+    %0:sgpr_128 = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0.sub0_sub1, %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    %3:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0.sub2_sub3, %1, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    S_NOP 0, implicit %2, implicit %3
+...
+
+---
+name:            no_merge_flat_load_dword_saddr_different_vaddr
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: no_merge_flat_load_dword_saddr_different_vaddr
+    ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[FLAT_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD_SADDR [[DEF]], [[DEF1]].sub0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
+    ; GCN-NEXT: [[FLAT_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD_SADDR [[DEF]], [[DEF1]].sub1, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
+    ; GCN-NEXT: S_NOP 0, implicit [[FLAT_LOAD_DWORD_SADDR]], implicit [[FLAT_LOAD_DWORD_SADDR1]]
+    %0:sreg_64_xexec_xnull = IMPLICIT_DEF
+    %1:vreg_64_align2 = IMPLICIT_DEF
+    %2:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1.sub0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    %3:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1.sub1, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
+    S_NOP 0, implicit %2, implicit %3
+...
+---
+name:            merge_flat_store_dword_saddr_2
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: merge_flat_store_dword_saddr_2
+    ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1
+    ; GCN-NEXT: FLAT_STORE_DWORDX2_SADDR [[DEF1]], killed [[REG_SEQUENCE]], [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr addrspace(1) undef`, align 4, addrspace 1)
+    %0:sreg_64_xexec_xnull = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    %3:vgpr_32 = IMPLICIT_DEF
+    FLAT_STORE_DWORD_SADDR %1, %2, %0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1)
+    FLAT_STORE_DWORD_SADDR %1, %3, %0, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1)
+...
+
+---
+name:            merge_flat_store_dword_saddr_3
+body:             |
+  bb.0.entry:
+
+    ; GCN-LABEL: name: merge_flat_store_dword_saddr_3
+    ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1
+    ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2
+    ; GCN-NEXT: FLAT_STORE_DWORDX3_SADDR [[DEF1]], killed [[REG_SEQUENCE1]], [[DEF]], 4, 1, implicit $exec, implicit $flat_scr :: (store (s96) into `ptr addrspace(1) undef`, align 4, addrspace 1)
+    %0:sreg_64_xexec_xnull = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    %3:vgpr_32 = IMPLICIT_DEF
+    %4:vgpr_32 = IMPLICIT_DEF
+    FLAT_STORE_DWORD_SADDR %1, %2, %0, 4, 1, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1)
+    FLAT_STORE_DWORD_SADDR %1, %3, %0, 8, 1, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1)
+    FL...
[truncated]

@rampitec rampitec merged commit 668e649 into main Aug 18, 2025
13 checks passed
@rampitec rampitec deleted the users/rampitec/08-18-_amdgpu_support_merging_of_flat_gvs_ops branch August 18, 2025 21:31
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants