-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[AMDGPU] Select scale_offset for scratch instructions on gfx1250 #150111
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
rampitec
merged 2 commits into
main
from
users/rampitec/07-22-_amdgpu_select_scale_offset_for_scratch_instructions_on_gfx1250
Jul 22, 2025
Merged
[AMDGPU] Select scale_offset for scratch instructions on gfx1250 #150111
rampitec
merged 2 commits into
main
from
users/rampitec/07-22-_amdgpu_select_scale_offset_for_scratch_instructions_on_gfx1250
Jul 22, 2025
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) ChangesPatch is 40.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150111.diff 7 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b15c946de16f9..5a2416debb417 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2116,7 +2116,8 @@ bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
SDValue &VAddr, SDValue &SAddr,
- SDValue &Offset) const {
+ SDValue &Offset,
+ SDValue &CPol) const {
int64_t ImmOffset = 0;
SDValue LHS, RHS;
@@ -2148,6 +2149,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
return false;
Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
+ CPol = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
return true;
}
}
@@ -2181,6 +2183,10 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
return false;
SAddr = SelectSAddrFI(CurDAG, SAddr);
Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
+
+ bool ScaleOffset = SelectScaleOffset(N, VAddr, true /* IsSigned */);
+ CPol = CurDAG->getTargetConstant(ScaleOffset ? AMDGPU::CPol::SCAL : 0,
+ SDLoc(), MVT::i32);
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index eb23e80943bb7..6123d75d7b616 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -175,7 +175,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool checkFlatScratchSVSSwizzleBug(SDValue VAddr, SDValue SAddr,
uint64_t ImmOffset) const;
bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &SAddr, SDValue &Offset) const;
+ SDValue &SAddr, SDValue &Offset,
+ SDValue &CPol) const;
bool SelectSMRDOffset(SDNode *N, SDValue ByteOffsetNode, SDValue *SOffset,
SDValue *Offset, bool Imm32Only = false,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 6b38beecea543..877c3ac34d555 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -5895,22 +5895,32 @@ AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const {
if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
return std::nullopt;
+ unsigned CPol = selectScaleOffset(Root, RHS, true /* IsSigned */)
+ ? AMDGPU::CPol::SCAL
+ : 0;
+
if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
int FI = LHSDef->MI->getOperand(1).getIndex();
return {{
- [=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr
[=](MachineInstrBuilder &MIB) { MIB.addFrameIndex(FI); }, // saddr
- [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); }, // offset
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(CPol); } // cpol
}};
}
+ if (!isSGPR(LHS))
+ if (auto Def = getDefSrcRegIgnoringCopies(LHS, *MRI))
+ LHS = Def->Reg;
+
if (!isSGPR(LHS))
return std::nullopt;
return {{
- [=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr
- [=](MachineInstrBuilder &MIB) { MIB.addReg(LHS); }, // saddr
- [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(LHS); }, // saddr
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); }, // offset
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(CPol); } // cpol
}};
}
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 1cc717bb437de..679c55dd0ea48 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -14,7 +14,7 @@ let WantsRoot = true in {
def GlobalSAddr : ComplexPattern<iPTR, 4, "SelectGlobalSAddr", [], [], -10>;
def GlobalSAddrGLC : ComplexPattern<iPTR, 4, "SelectGlobalSAddrGLC", [], [], -10>;
def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [], -10>;
- def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [], -10>;
+ def ScratchSVAddr : ComplexPattern<iPTR, 4, "SelectScratchSVAddr", [], [], -10>;
}
class True16D16Table <string hiOp, string loOp> {
@@ -1443,19 +1443,19 @@ class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
>;
class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset))),
- (inst $vaddr, $saddr, $offset, 0)
+ (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset, CPol:$cpol))),
+ (inst $vaddr, $saddr, $offset, $cpol)
>;
class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt> : GCNPat <
- (node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset)),
- (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $saddr, $offset)
+ (node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset, CPol:$cpol)),
+ (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $saddr, $offset, $cpol)
>;
class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset), vt:$in)),
- (inst $vaddr, $saddr, $offset, 0, $in)
+ (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset, CPol:$cpol), vt:$in)),
+ (inst $vaddr, $saddr, $offset, $cpol, $in)
>;
class ScratchLoadSVaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index 8a80afd4a768f..fa0e4b9c23df3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -257,20 +257,16 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
; GFX12: ; %bb.0: ; %bb
; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x0
; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX12-NEXT: v_mov_b32_e32 v2, 15
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0
+; GFX12-NEXT: v_mov_b32_e32 v2, 15
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_lshl_b32 s0, s0, 7
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT: v_add_nc_u32_e32 v1, s0, v1
-; GFX12-NEXT: scratch_store_b32 v0, v2, off scope:SCOPE_SYS
+; GFX12-NEXT: scratch_store_b32 v0, v2, s0 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
-; GFX12-NEXT: scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS
+; GFX12-NEXT: scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_endpgm
;
@@ -357,20 +353,16 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
; UNALIGNED_GFX12: ; %bb.0: ; %bb
; UNALIGNED_GFX12-NEXT: s_load_b32 s0, s[4:5], 0x0
; UNALIGNED_GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v2, 15
-; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; UNALIGNED_GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0
+; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v2, 15
; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1
; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0
; UNALIGNED_GFX12-NEXT: s_lshl_b32 s0, s0, 7
-; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
-; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
-; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v1, s0, v1
-; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v2, off scope:SCOPE_SYS
+; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v2, s0 scope:SCOPE_SYS
; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
-; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS
+; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS
; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0
; UNALIGNED_GFX12-NEXT: s_endpgm
bb:
@@ -937,19 +929,17 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: v_mov_b32_e32 v2, 15
; GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0
+; GFX12-NEXT: v_mov_b32_e32 v2, 15
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: scratch_store_b32 v0, v2, off offset:384 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: s_lshl_b32 s0, s0, 7
; GFX12-NEXT: s_add_co_u32 s0, 0x100, s0
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT: v_add_nc_u32_e32 v1, s0, v1
-; GFX12-NEXT: scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS
+; GFX12-NEXT: scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_endpgm
;
@@ -1048,19 +1038,17 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
; UNALIGNED_GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; UNALIGNED_GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0
-; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v2, 15
; UNALIGNED_GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0
+; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v2, 15
; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1
; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0
; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v2, off offset:384 scope:SCOPE_SYS
; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
; UNALIGNED_GFX12-NEXT: s_lshl_b32 s0, s0, 7
; UNALIGNED_GFX12-NEXT: s_add_co_u32 s0, 0x100, s0
-; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v1, s0, v1
-; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS
+; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS
; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0
; UNALIGNED_GFX12-NEXT: s_endpgm
bb:
@@ -1579,19 +1567,17 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: v_mov_b32_e32 v2, 15
; GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0
+; GFX12-NEXT: v_mov_b32_e32 v2, 15
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: scratch_store_b32 v0, v2, off offset:16512 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: s_lshl_b32 s0, s0, 7
; GFX12-NEXT: s_add_co_u32 s0, 0x4000, s0
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT: v_add_nc_u32_e32 v1, s0, v1
-; GFX12-NEXT: scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS
+; GFX12-NEXT: scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_endpgm
;
@@ -1692,19 +1678,17 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
; UNALIGNED_GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; UNALIGNED_GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0
-; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v2, 15
; UNALIGNED_GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0
+; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v2, 15
; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1
; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0
; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v2, off offset:16512 scope:SCOPE_SYS
; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
; UNALIGNED_GFX12-NEXT: s_lshl_b32 s0, s0, 7
; UNALIGNED_GFX12-NEXT: s_add_co_u32 s0, 0x4000, s0
-; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v1, s0, v1
-; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS
+; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS
; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0
; UNALIGNED_GFX12-NEXT: s_endpgm
bb:
@@ -4060,9 +4044,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
; GFX12: ; %bb.0: ; %bb
; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
-; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:65512 scope:SCOPE_SYS
+; GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: s_endpgm
;
@@ -4113,9 +4095,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
; UNALIGNED_GFX12: ; %bb.0: ; %bb
; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
-; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
-; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, off offset:65512 scope:SCOPE_SYS
+; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
; UNALIGNED_GFX12-NEXT: s_endpgm
bb:
@@ -4172,9 +4152,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
; GFX12: ; %bb.0: ; %bb
; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
-; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
+; GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:-16 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: s_endpgm
;
@@ -4223,9 +4201,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
; UNALIGNED_GFX12: ; %bb.0: ; %bb
; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
-; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
-; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
+; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:-16 scope:SCOPE_SYS
; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
; UNALIGNED_GFX12-NEXT: s_endpgm
bb:
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
index a98df5c97293c..b0e6752386285 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
@@ -150,13 +150,11 @@ define amdgpu_kernel void @soff1_voff1(i32 %soff) {
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
-; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
+; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
-; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS
+; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
-; GFX12-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS
+; GFX12-GISEL-NEXT: scratch_store_b8 v0, v3, s0 offset:4 scope:SCOPE_SYS
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
; GFX12-GISEL-NEXT: s_endpgm
bb:
@@ -321,15 +319,14 @@ define amdgpu_kernel void @soff1_voff2(i32 %soff) {
; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-GISEL-NEXT: v_mul_u32_u24_e32 v0, 2, v0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
-; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
+; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
-; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS
+; GFX12-GISEL-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
-; GFX12-GISEL-NEXT: scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS
+; GFX12-GISEL-NEXT: scratch_store_b8 v0, v3, s0 offset:4 scope:SCOPE_SYS
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
; GFX12-GISEL-NEXT: s_endpgm
bb:
@@ -494,15 +491,14 @@ define amdgpu_kernel void @soff1_voff4(i32 %soff) {
; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-GISEL-NEXT: v_mul_u32_u24_e32 v0, 4, v0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
-; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
+; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-GI...
[truncated]
|
shiltian
approved these changes
Jul 22, 2025
Base automatically changed from
users/rampitec/07-22-_amdgpu_select_scale_offset_for_global_instructions_on_gfx1250
to
main
July 22, 2025 22:04
mahesh-attarde
pushed a commit
to mahesh-attarde/llvm-project
that referenced
this pull request
Jul 28, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.

No description provided.