Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 1 addition & 8 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2293,16 +2293,9 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
assert((ApertureRegNo != AMDGPU::SRC_PRIVATE_BASE ||
!ST.hasGloballyAddressableScratch()) &&
"Cannot use src_private_base with globally addressable scratch!");
// FIXME: It would be more natural to emit a COPY here, but then copy
// coalescing would kick in and it would think it's okay to use the "HI"
// subregister (instead of extracting the HI 32 bits) which is an artificial
// (unusable) register.
// Register TableGen definitions would need an overhaul to get rid of the
// artificial "HI" aperture registers and prevent this kind of issue from
// happening.
Register Dst = MRI.createGenericVirtualRegister(S64);
MRI.setRegClass(Dst, &AMDGPU::SReg_64RegClass);
B.buildInstr(AMDGPU::S_MOV_B64, {Dst}, {Register(ApertureRegNo)});
B.buildCopy({Dst}, {Register(ApertureRegNo)});
return B.buildUnmerge(S32, Dst).getReg(1);
}

Expand Down
19 changes: 4 additions & 15 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8159,25 +8159,14 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
// it returns a wrong value (all zeroes?). The real value is in the upper 32
// bits.
//
// To work around the issue, directly emit a 64 bit mov from this register
// To work around the issue, emit a 64 bit copy from this register
// then extract the high bits. Note that this shouldn't even result in a
// shift being emitted and simply become a pair of registers (e.g.):
// s_mov_b64 s[6:7], src_shared_base
// v_mov_b32_e32 v1, s7
//
// FIXME: It would be more natural to emit a CopyFromReg here, but then copy
// coalescing would kick in and it would think it's okay to use the "HI"
// subregister directly (instead of extracting the HI 32 bits) which is an
// artificial (unusable) register.
// Register TableGen definitions would need an overhaul to get rid of the
// artificial "HI" aperture registers and prevent this kind of issue from
// happening.
SDNode *Mov = DAG.getMachineNode(AMDGPU::S_MOV_B64, DL, MVT::i64,
DAG.getRegister(ApertureRegNo, MVT::i64));
return DAG.getNode(
ISD::TRUNCATE, DL, MVT::i32,
DAG.getNode(ISD::SRL, DL, MVT::i64,
{SDValue(Mov, 0), DAG.getConstant(32, DL, MVT::i64)}));
SDValue Copy =
DAG.getCopyFromReg(DAG.getEntryNode(), DL, ApertureRegNo, MVT::v2i32);
return DAG.getExtractVectorElt(DL, MVT::i32, Copy, 1);
}

// For code object version 5, private_base and shared_base are passed through
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -913,7 +913,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}

if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) {
if (!AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) {
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
return;
}
Expand Down
40 changes: 24 additions & 16 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -228,16 +228,12 @@ def SGPR_NULL64 :
// need them, we need to do a 64 bit load and extract the bits manually.
multiclass ApertureRegister<string name, bits<10> regIdx> {
let isConstant = true in {
// FIXME: We shouldn't need to define subregisters for these (nor add them to any 16 bit
// register classes), but if we don't it seems to confuse the TableGen
// backend and we end up with a lot of weird register pressure sets and classes.
defm _LO : SIRegLoHi16 <name, regIdx>;
defm _HI : SIRegLoHi16 <"", regIdx>;

def "" : RegisterWithSubRegs<name, [!cast<Register>(NAME#_LO), !cast<Register>(NAME#_HI)]> {
def "" : RegisterWithSubRegs<name, [!cast<Register>(NAME#_LO)]> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
let SubRegIndices = [sub0];
let HWEncoding = !cast<Register>(NAME#_LO).HWEncoding;
let CoveredBySubRegs = 0;
}
} // isConstant = true
}
Expand Down Expand Up @@ -790,8 +786,7 @@ let GeneratePressureSet = 0, HasSGPR = 1 in {
def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32,
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
SGPR_NULL, SGPR_NULL_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE_LO,
SRC_SHARED_LIMIT_LO, SRC_PRIVATE_BASE_LO, SRC_PRIVATE_LIMIT_LO, SRC_SHARED_BASE_HI,
SRC_SHARED_LIMIT_HI, SRC_PRIVATE_BASE_HI, SRC_PRIVATE_LIMIT_HI, SRC_POPS_EXITING_WAVE_ID,
SRC_SHARED_LIMIT_LO, SRC_PRIVATE_BASE_LO, SRC_PRIVATE_LIMIT_LO, SRC_POPS_EXITING_WAVE_ID,
SRC_VCCZ, SRC_EXECZ, SRC_SCC, SRC_FLAT_SCRATCH_BASE_LO, SRC_FLAT_SCRATCH_BASE_HI)> {
let AllocationPriority = 0;
}
Expand All @@ -801,10 +796,9 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16,
XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, SGPR_NULL_HI_LO16, TTMP_LO16,
TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO_LO16,
SRC_SHARED_LIMIT_LO_LO16, SRC_PRIVATE_BASE_LO_LO16, SRC_PRIVATE_LIMIT_LO_LO16,
SRC_SHARED_BASE_HI_LO16, SRC_SHARED_LIMIT_HI_LO16, SRC_PRIVATE_BASE_HI_LO16,
SRC_PRIVATE_LIMIT_HI_LO16, SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16,
SRC_EXECZ_LO16, SRC_SCC_LO16, EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16,
SRC_FLAT_SCRATCH_BASE_LO_LO16, SRC_FLAT_SCRATCH_BASE_HI_LO16)> {
SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, SRC_SCC_LO16,
EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16, SRC_FLAT_SCRATCH_BASE_LO_LO16,
SRC_FLAT_SCRATCH_BASE_HI_LO16)> {
let Size = 16;
let isAllocatable = 0;
let BaseClassOrder = 16;
Expand All @@ -825,6 +819,13 @@ def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2
let AllocationPriority = 0;
}

def APERTURE_Class : SIRegisterClass<"AMDGPU", Reg64Types.types, 32,
(add SRC_SHARED_BASE, SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> {
let isAllocatable = 0;
let Size = 64;
let BaseClassOrder = 10000;
}

} // End GeneratePressureSet = 0

// Register class for all scalar registers (SGPRs + Special Registers)
Expand Down Expand Up @@ -876,8 +877,7 @@ def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16, v4bf16],
}

def SReg_64_XEXEC_XNULL : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
(add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SRC_SHARED_BASE,
SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA,
(add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA,
SRC_FLAT_SCRATCH_BASE)> {
let CopyCost = 1;
let AllocationPriority = 1;
Expand All @@ -900,6 +900,14 @@ def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f1
let Size = 64;
}

def SReg_64_Encodable : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
(add SReg_64, APERTURE_Class)> {
let CopyCost = 1;
let isAllocatable = 0;
let HasSGPR = 1;
let Size = 64;
}

def SReg_1_XEXEC : SIRegisterClass<"AMDGPU", [i1], 32,
(add SReg_64_XEXEC, SReg_32_XEXEC)> {
let CopyCost = 1;
Expand Down Expand Up @@ -1225,7 +1233,7 @@ def SSrc_bf16: SrcRegOrImm9 <SReg_32, "OPERAND_REG_IMM_BF16">;
def SSrc_f16 : SrcRegOrImm9 <SReg_32, "OPERAND_REG_IMM_FP16">;
def SSrc_b32 : SrcRegOrImm9 <SReg_32, "OPERAND_REG_IMM_INT32">;
def SSrc_f32 : SrcRegOrImm9 <SReg_32, "OPERAND_REG_IMM_FP32">;
def SSrc_b64 : SrcRegOrImm9 <SReg_64, "OPERAND_REG_IMM_INT64">;
def SSrc_b64 : SrcRegOrImm9 <SReg_64_Encodable, "OPERAND_REG_IMM_INT64">;

def SSrcOrLds_b32 : SrcRegOrImm9 <SRegOrLds_32, "OPERAND_REG_IMM_INT32">;

Expand Down
12 changes: 5 additions & 7 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@
define amdgpu_ps void @amdgpu_ps() {
; MESA-LABEL: amdgpu_ps:
; MESA: ; %bb.0:
; MESA-NEXT: s_add_u32 flat_scratch_lo, s2, s4
; MESA-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
; MESA-NEXT: s_mov_b64 s[0:1], src_private_base
; MESA-NEXT: s_mov_b32 s0, 0
; MESA-NEXT: s_mov_b64 s[2:3], src_private_base
; MESA-NEXT: s_mov_b32 s1, s3
; MESA-NEXT: s_add_u32 flat_scratch_lo, s2, s4
; MESA-NEXT: v_mov_b32_e32 v0, s0
; MESA-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
; MESA-NEXT: v_mov_b32_e32 v2, 0
; MESA-NEXT: v_mov_b32_e32 v1, s1
; MESA-NEXT: flat_store_dword v[0:1], v2
Expand All @@ -30,11 +29,10 @@ define amdgpu_ps void @amdgpu_ps() {
; PAL-NEXT: s_waitcnt lgkmcnt(0)
; PAL-NEXT: s_and_b32 s3, s3, 0xffff
; PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s0
; PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
; PAL-NEXT: s_mov_b64 s[0:1], src_private_base
; PAL-NEXT: s_mov_b32 s0, 0
; PAL-NEXT: s_mov_b64 s[2:3], src_private_base
; PAL-NEXT: s_mov_b32 s1, s3
; PAL-NEXT: v_mov_b32_e32 v0, s0
; PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
; PAL-NEXT: v_mov_b32_e32 v1, s1
; PAL-NEXT: flat_store_dword v[0:1], v2
; PAL-NEXT: s_waitcnt vmcnt(0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,52 +65,52 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
;
; GFX9V4-LABEL: addrspacecast:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9V4-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
; GFX9V4-NEXT: s_add_u32 flat_scratch_lo, s12, s17
; GFX9V4-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; GFX9V4-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX9V4-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX9V4-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9V4-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V4-NEXT: s_mov_b32 s2, s0
; GFX9V4-NEXT: s_cmp_lg_u32 s0, -1
; GFX9V4-NEXT: s_mov_b32 s0, s4
; GFX9V4-NEXT: s_cmp_lg_u32 s4, -1
; GFX9V4-NEXT: s_cselect_b64 s[0:1], s[0:1], 0
; GFX9V4-NEXT: s_mov_b32 s2, s5
; GFX9V4-NEXT: s_cmp_lg_u32 s5, -1
; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
; GFX9V4-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
; GFX9V4-NEXT: s_mov_b32 s4, s1
; GFX9V4-NEXT: s_cmp_lg_u32 s1, -1
; GFX9V4-NEXT: v_mov_b32_e32 v0, s2
; GFX9V4-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
; GFX9V4-NEXT: v_mov_b32_e32 v2, 1
; GFX9V4-NEXT: v_mov_b32_e32 v1, s3
; GFX9V4-NEXT: v_mov_b32_e32 v1, s1
; GFX9V4-NEXT: flat_store_dword v[0:1], v2
; GFX9V4-NEXT: s_waitcnt vmcnt(0)
; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
; GFX9V4-NEXT: v_mov_b32_e32 v0, s2
; GFX9V4-NEXT: v_mov_b32_e32 v2, 2
; GFX9V4-NEXT: v_mov_b32_e32 v1, s1
; GFX9V4-NEXT: v_mov_b32_e32 v1, s3
; GFX9V4-NEXT: flat_store_dword v[0:1], v2
; GFX9V4-NEXT: s_waitcnt vmcnt(0)
; GFX9V4-NEXT: s_endpgm
;
; GFX9V5-LABEL: addrspacecast:
; GFX9V5: ; %bb.0:
; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9V5-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
; GFX9V5-NEXT: s_add_u32 flat_scratch_lo, s12, s17
; GFX9V5-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; GFX9V5-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX9V5-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX9V5-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9V5-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V5-NEXT: s_mov_b32 s2, s0
; GFX9V5-NEXT: s_cmp_lg_u32 s0, -1
; GFX9V5-NEXT: s_mov_b32 s0, s4
; GFX9V5-NEXT: s_cmp_lg_u32 s4, -1
; GFX9V5-NEXT: s_cselect_b64 s[0:1], s[0:1], 0
; GFX9V5-NEXT: s_mov_b32 s2, s5
; GFX9V5-NEXT: s_cmp_lg_u32 s5, -1
; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
; GFX9V5-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
; GFX9V5-NEXT: s_mov_b32 s4, s1
; GFX9V5-NEXT: s_cmp_lg_u32 s1, -1
; GFX9V5-NEXT: v_mov_b32_e32 v0, s2
; GFX9V5-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
; GFX9V5-NEXT: v_mov_b32_e32 v2, 1
; GFX9V5-NEXT: v_mov_b32_e32 v1, s3
; GFX9V5-NEXT: v_mov_b32_e32 v1, s1
; GFX9V5-NEXT: flat_store_dword v[0:1], v2
; GFX9V5-NEXT: s_waitcnt vmcnt(0)
; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
; GFX9V5-NEXT: v_mov_b32_e32 v0, s2
; GFX9V5-NEXT: v_mov_b32_e32 v2, 2
; GFX9V5-NEXT: v_mov_b32_e32 v1, s1
; GFX9V5-NEXT: v_mov_b32_e32 v1, s3
; GFX9V5-NEXT: flat_store_dword v[0:1], v2
; GFX9V5-NEXT: s_waitcnt vmcnt(0)
; GFX9V5-NEXT: s_endpgm
Expand Down Expand Up @@ -150,10 +150,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) #0 {
;
; GFX9V4-LABEL: llvm_amdgcn_is_shared:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9V4-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX9V4-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
; GFX9V4-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V4-NEXT: s_cmp_eq_u32 s1, s3
; GFX9V4-NEXT: s_cmp_eq_u32 s3, s1
; GFX9V4-NEXT: s_cselect_b32 s0, 1, 0
; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
; GFX9V4-NEXT: global_store_dword v[0:1], v0, off
Expand All @@ -162,10 +162,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) #0 {
;
; GFX9V5-LABEL: llvm_amdgcn_is_shared:
; GFX9V5: ; %bb.0:
; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9V5-NEXT: s_mov_b64 s[2:3], src_shared_base
; GFX9V5-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
; GFX9V5-NEXT: s_mov_b64 s[0:1], src_shared_base
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V5-NEXT: s_cmp_eq_u32 s1, s3
; GFX9V5-NEXT: s_cmp_eq_u32 s3, s1
; GFX9V5-NEXT: s_cselect_b32 s0, 1, 0
; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
; GFX9V5-NEXT: global_store_dword v[0:1], v0, off
Expand Down Expand Up @@ -206,10 +206,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) #0 {
;
; GFX9V4-LABEL: llvm_amdgcn_is_private:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9V4-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX9V4-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
; GFX9V4-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V4-NEXT: s_cmp_eq_u32 s1, s3
; GFX9V4-NEXT: s_cmp_eq_u32 s3, s1
; GFX9V4-NEXT: s_cselect_b32 s0, 1, 0
; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
; GFX9V4-NEXT: global_store_dword v[0:1], v0, off
Expand All @@ -218,10 +218,10 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) #0 {
;
; GFX9V5-LABEL: llvm_amdgcn_is_private:
; GFX9V5: ; %bb.0:
; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9V5-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX9V5-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0
; GFX9V5-NEXT: s_mov_b64 s[0:1], src_private_base
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V5-NEXT: s_cmp_eq_u32 s1, s3
; GFX9V5-NEXT: s_cmp_eq_u32 s3, s1
; GFX9V5-NEXT: s_cselect_b32 s0, 1, 0
; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
; GFX9V5-NEXT: global_store_dword v[0:1], v0, off
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,8 @@ body: |
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5)
; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_private_base
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64)
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_64(s64) = COPY $src_private_base
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[UV1]](s32)
; GFX9-NEXT: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1
; GFX9-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
Expand Down Expand Up @@ -227,8 +227,8 @@ body: |
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_shared_base
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64)
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_64(s64) = COPY $src_shared_base
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[UV1]](s32)
; GFX9-NEXT: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
; GFX9-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
Expand Down Expand Up @@ -380,16 +380,16 @@ body: |
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>)
; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_shared_base
; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64)
; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_64(s64) = COPY $src_shared_base
; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[UV3]](s32)
; GFX9-NEXT: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
; GFX9-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C]]
; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
; GFX9-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
; GFX9-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_shared_base
; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_1]](s64)
; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_64(s64) = COPY $src_shared_base
; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](s64)
; GFX9-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[UV5]](s32)
; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C]]
; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](s1), [[MV1]], [[C1]]
Expand Down Expand Up @@ -517,8 +517,8 @@ body: |
; GFX9-LABEL: name: test_addrspacecast_p5_fi_to_p0
; GFX9: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[FRAME_INDEX]](p5)
; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_private_base
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64)
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64(s64) = COPY $src_private_base
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[UV1]](s32)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0)
%0:_(p5) = G_FRAME_INDEX %stack.0
Expand Down
Loading
Loading