Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5324,6 +5324,12 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
unsigned CPol = Inst.getOperand(CPolPos).getImm();

if (!isGFX1250()) {
if (CPol & CPol::SCAL) {
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
StringRef CStr(S.getPointer());
S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
Error(S, "scale_offset is not supported on this GPU");
}
if (CPol & CPol::NV) {
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
StringRef CStr(S.getPointer());
Expand All @@ -5332,6 +5338,13 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
}
}

if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
StringRef CStr(S.getPointer());
S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
Error(S, "scale_offset is not supported for this instruction");
}

if (isGFX12Plus())
return validateTHAndScopeBits(Inst, Operands, CPol);

Expand Down Expand Up @@ -7003,6 +7016,7 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
ParseStatus ResTH = ParseStatus::NoMatch;
ParseStatus ResScope = ParseStatus::NoMatch;
ParseStatus ResNV = ParseStatus::NoMatch;
ParseStatus ResScal = ParseStatus::NoMatch;

for (;;) {
if (ResTH.isNoMatch()) {
Expand Down Expand Up @@ -7041,10 +7055,22 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
}
}

if (ResScal.isNoMatch()) {
if (trySkipId("scale_offset")) {
ResScal = ParseStatus::Success;
CPolVal |= CPol::SCAL;
continue;
} else if (trySkipId("no", "scale_offset")) {
ResScal = ParseStatus::Success;
continue;
}
}

break;
}

if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch())
if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
ResScal.isNoMatch())
return ParseStatus::NoMatch;

Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/FLATInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -2941,6 +2941,7 @@ multiclass VFLAT_Real_gfx12 <bits<8> op, string name = get_FLAT_ps<NAME>.Mnemoni
let DecoderNamespace = "GFX12";

let Inst{25-24} = {ps.is_flat_global, ps.is_flat_scratch};
let Inst{48} = cpol{CPolBit.SCAL}; // scale offset
}
}

Expand Down Expand Up @@ -3170,6 +3171,7 @@ multiclass VFLAT_Real_gfx1250<bits<8> op,
let DecoderNamespace = "GFX1250";

let Inst{25-24} = {ps.is_flat_global, ps.is_flat_scratch};
let Inst{48} = cpol{CPolBit.SCAL}; // scale offset
}
}

Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -1162,6 +1162,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,

bool hasLshlAddU64Inst() const { return HasLshlAddU64Inst; }

// Scalar and global loads support scale_offset bit.
bool hasScaleOffset() const { return GFX1250Insts; }

bool hasFlatGVSMode() const { return FlatGVSMode; }

bool enableSIScheduler() const {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
const int64_t TH = Imm & CPol::TH;
const int64_t Scope = Imm & CPol::SCOPE;

if (Imm & CPol::SCAL)
O << " scale_offset";

printTH(MI, TH, Scope, O);
printScope(Scope, O);

Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/SIDefines.h
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,8 @@ enum CPol {

SWZ = 1 << 6, // Swizzle bit

SCAL = 1 << 11, // Scale offset bit

ALL = TH | SCOPE,

// Helper bits
Expand Down
14 changes: 10 additions & 4 deletions llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1209,18 +1209,24 @@ void SIFoldOperandsImpl::foldOperand(
return;
}

// A frame index will resolve to a positive constant, so it should always be
// safe to fold the addressing mode, even pre-GFX9.
UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getFI());

const unsigned Opc = UseMI->getOpcode();
if (TII->isFLATScratch(*UseMI) &&
AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr) &&
!AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::saddr)) {
unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(Opc);
unsigned CPol =
TII->getNamedOperand(*UseMI, AMDGPU::OpName::cpol)->getImm();
if ((CPol & AMDGPU::CPol::SCAL) &&
!AMDGPU::supportsScaleOffset(*TII, NewOpc))
return;

UseMI->setDesc(TII->get(NewOpc));
}

// A frame index will resolve to a positive constant, so it should always be
// safe to fold the addressing mode, even pre-GFX9.
UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getFI());

return;
}

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/SIInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ def CPolBit {
int DLC = 2;
int SCC = 4;
int NV = 5;
int SCAL = 11;
}

class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5482,6 +5482,19 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}

if (const MachineOperand *CPol = getNamedOperand(MI, AMDGPU::OpName::cpol)) {
if (CPol->getImm() & AMDGPU::CPol::SCAL) {
if (!ST.hasScaleOffset()) {
ErrInfo = "Subtarget does not support offset scaling";
return false;
}
if (!AMDGPU::supportsScaleOffset(*this, MI.getOpcode())) {
ErrInfo = "Instruction does not support offset scaling";
return false;
}
}
}

return true;
}

Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIDefines.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Honestly, I'd make several groups here and then leave blank lines to prevent clang-format from complaining about it but it's completely up to you.

// This is always the first one
#include "SILoadStoreOptimizer.h"

#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "SIDefines.h"

 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"

// then llvm headers

#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/InitializePasses.h"
Expand Down Expand Up @@ -1078,7 +1079,9 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI,
if (EltOffset0 + CI.Width != EltOffset1 &&
EltOffset1 + Paired.Width != EltOffset0)
return false;
if (CI.CPol != Paired.CPol)
// Instructions with scale_offset modifier cannot be combined unless we
// also generate a code to scale the offset and reset that bit.
if (CI.CPol != Paired.CPol || (CI.CPol & AMDGPU::CPol::SCAL))
return false;
if (CI.InstClass == S_LOAD_IMM || CI.InstClass == S_BUFFER_LOAD_IMM ||
CI.InstClass == S_BUFFER_LOAD_SGPR_IMM) {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/SMInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1488,6 +1488,7 @@ class SMEM_Real_Load_gfx12<bits<6> op, string ps, string opName, OffsetMode offs
let Inst{20} = cpol{CPolBit.NV}; // non-volatile
let Inst{22-21} = cpol{4-3}; // scope
let Inst{24-23} = cpol{1-0}; // th - only lower 2 bits are supported
let Inst{56} = cpol{CPolBit.SCAL}; // scale offset
}

multiclass SM_Real_Loads_gfx12<bits<6> op, string ps = NAME> {
Expand Down
19 changes: 19 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3228,6 +3228,25 @@ const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
: getGfx9BufferFormatInfo(Format);
}

bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode) {
uint64_t TSFlags = MII.get(Opcode).TSFlags;

if (TSFlags & SIInstrFlags::SMRD)
return !getSMEMIsBuffer(Opcode);
if (!(TSFlags & SIInstrFlags::FLAT))
return false;

// Only SV and SVS modes are supported.
if (TSFlags & SIInstrFlags::FlatScratch)
return hasNamedOperand(Opcode, OpName::vaddr);

// Only GVS mode is supported.
return hasNamedOperand(Opcode, OpName::vaddr) &&
hasNamedOperand(Opcode, OpName::saddr);

return false;
}

bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) {
for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) {
int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1757,6 +1757,9 @@ bool isIntrinsicSourceOfDivergence(unsigned IntrID);
/// \returns true if the intrinsic is uniform
bool isIntrinsicAlwaysUniform(unsigned IntrID);

/// \returns true if a memory instruction supports scale_offset modifier.
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode);

/// \returns lds block size in terms of dwords. \p
/// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
/// must be defined in terms of bytes.
Expand Down
43 changes: 43 additions & 0 deletions llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi-gfx1250.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -start-before=si-fold-operands -stop-after=prologepilog -o - %s | FileCheck -check-prefix=GCN %s

---
name: test_fold_fi_scratch_load_vgpr
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
stackPtrOffsetReg: $sgpr32
stack:
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
body: |
bb.0.entry:
; GCN-LABEL: name: test_fold_fi_scratch_load_vgpr
; GCN: renamable $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
%1:vgpr_32 = SCRATCH_LOAD_DWORD %0:vgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
S_ENDPGM 0, implicit %1

...

# SS form of the SCRATCH_LOAD_DWORD does not support offset scaling

---
name: test_no_fold_fi_scratch_load_vgpr_scale_offset
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
stackPtrOffsetReg: $sgpr32
stack:
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
body: |
bb.0.entry:
; GCN-LABEL: name: test_no_fold_fi_scratch_load_vgpr_scale_offset
; GCN: renamable $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec
; GCN-NEXT: renamable $vgpr0 = SCRATCH_LOAD_DWORD killed renamable $vgpr0, 4, 2048, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
%1:vgpr_32 = SCRATCH_LOAD_DWORD %0:vgpr_32, 4, 2048, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
S_ENDPGM 0, implicit %1

...
104 changes: 104 additions & 0 deletions llvm/test/CodeGen/AMDGPU/load-store-opt-scale-offset.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=si-load-store-opt -o - %s | FileCheck -check-prefix=GCN %s

---
name: merge_global_load_dword_2_no_scale_offset
body: |
bb.0.entry:

; GCN-LABEL: name: merge_global_load_dword_2_no_scale_offset
; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR [[DEF]], [[DEF1]], 0, 1, implicit $exec :: (load (s64) from `ptr addrspace(1) undef` + 4, align 4, addrspace 1)
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1
; GCN-NEXT: S_NOP 0, implicit [[DEF1]], implicit [[COPY]]
%0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 1, implicit $exec :: (load (s32) from `float addrspace(1)* undef` + 4, basealign 4, addrspace 1)
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 4, 1, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 8, basealign 4, addrspace 1)
S_NOP 0, implicit %1, implicit %2
...

---
name: no_merge_global_load_dword_2_same_scale_offset
body: |
bb.0.entry:

; GCN-LABEL: name: no_merge_global_load_dword_2_same_scale_offset
; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 2049, implicit $exec :: (load (s32) from `ptr addrspace(1) undef` + 4, addrspace 1)
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 4, 2049, implicit $exec :: (load (s32) from `ptr addrspace(1) undef` + 8, addrspace 1)
; GCN-NEXT: S_NOP 0, implicit [[DEF1]], implicit [[GLOBAL_LOAD_DWORD_SADDR]]
%0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 2049, implicit $exec :: (load (s32) from `float addrspace(1)* undef` + 4, basealign 4, addrspace 1)
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 4, 2049, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 8, basealign 4, addrspace 1)
S_NOP 0, implicit %1, implicit %2
...

---
name: no_merge_global_load_dword_2_different_scale_offset
body: |
bb.0.entry:

; GCN-LABEL: name: no_merge_global_load_dword_2_different_scale_offset
; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef` + 4, addrspace 1)
; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 4, 2048, implicit $exec :: (load (s32) from `ptr addrspace(1) undef` + 8, addrspace 1)
; GCN-NEXT: S_NOP 0, implicit [[DEF1]], implicit [[GLOBAL_LOAD_DWORD_SADDR]]
%0:sreg_64_xexec_xnull = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from `float addrspace(1)* undef` + 4, basealign 4, addrspace 1)
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 4, 2048, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 8, basealign 4, addrspace 1)
S_NOP 0, implicit %1, implicit %2
...

# NB: We do not currently support merging SGPR offset and SGPR+Imm offset forms
# of S_LOAD, but the check stays the same: these cannot be merged with different
# scale offsets.
#
# We also do not currently merge flat scratch instructions, although a common
# check in the merge logic that CPol shall not be set for merge to happen.

---
name: merge_s_load_x1_x1_imm_no_scale_offset
body: |
bb.0:
; GCN-LABEL: name: merge_s_load_x1_x1_imm_no_scale_offset
; GCN: [[DEF:%[0-9]+]]:sgpr_64 = IMPLICIT_DEF
; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[DEF]], 0, 0 :: (dereferenceable invariant load (s64), align 4)
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_LOAD_DWORDX2_IMM]].sub0
; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_LOAD_DWORDX2_IMM]].sub1
%0:sgpr_64 = IMPLICIT_DEF
%1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0:sgpr_64, 0, 0 :: (dereferenceable invariant load (s32))
%2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0:sgpr_64, 4, 0 :: (dereferenceable invariant load (s32))
...

---
name: no_merge_s_load_x1_x1_imm_same_scale_offset
body: |
bb.0:
; GCN-LABEL: name: no_merge_s_load_x1_x1_imm_same_scale_offset
; GCN: [[DEF:%[0-9]+]]:sgpr_64 = IMPLICIT_DEF
; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[DEF]], 0, 2048 :: (dereferenceable invariant load (s32))
; GCN-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[DEF]], 4, 2048 :: (dereferenceable invariant load (s32))
%0:sgpr_64 = IMPLICIT_DEF
%1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0:sgpr_64, 0, 2048 :: (dereferenceable invariant load (s32))
%2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0:sgpr_64, 4, 2048 :: (dereferenceable invariant load (s32))
...

---
name: no_merge_s_load_x1_x1_imm_different_scale_offset
body: |
bb.0:
; GCN-LABEL: name: no_merge_s_load_x1_x1_imm_different_scale_offset
; GCN: [[DEF:%[0-9]+]]:sgpr_64 = IMPLICIT_DEF
; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[DEF]], 0, 0 :: (dereferenceable invariant load (s32))
; GCN-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[DEF]], 4, 2048 :: (dereferenceable invariant load (s32))
%0:sgpr_64 = IMPLICIT_DEF
%1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0:sgpr_64, 0, 0 :: (dereferenceable invariant load (s32))
%2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0:sgpr_64, 4, 2048 :: (dereferenceable invariant load (s32))
...
Loading
Loading