Skip to content

Commit cedc90b

Browse files
committed
tmp
1 parent f3a9523 commit cedc90b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+52507
-27539
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9115,6 +9115,63 @@ void SIInstrInfo::movePackToVALU(SIInstrWorklist &Worklist,
91159115
MachineOperand &Src1 = Inst.getOperand(2);
91169116
const DebugLoc &DL = Inst.getDebugLoc();
91179117

9118+
if (ST.useRealTrue16Insts()) {
9119+
Register SrcReg0 = Src0.getReg();
9120+
Register SrcReg1 = Src1.getReg();
9121+
9122+
if (!RI.isVGPR(MRI, SrcReg0)) {
9123+
SrcReg0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9124+
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), SrcReg0).add(Src0);
9125+
}
9126+
if (!RI.isVGPR(MRI, SrcReg1)) {
9127+
SrcReg1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9128+
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), SrcReg1).add(Src1);
9129+
}
9130+
bool isSrc0Reg16 = MRI.constrainRegClass(SrcReg0, &AMDGPU::VGPR_16RegClass);
9131+
bool isSrc1Reg16 = MRI.constrainRegClass(SrcReg1, &AMDGPU::VGPR_16RegClass);
9132+
9133+
auto NewMI = BuildMI(*MBB, Inst, DL, get(AMDGPU::REG_SEQUENCE), ResultReg);
9134+
switch (Inst.getOpcode()) {
9135+
case AMDGPU::S_PACK_LL_B32_B16: {
9136+
NewMI
9137+
.addReg(SrcReg0, 0,
9138+
isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9139+
.addImm(AMDGPU::lo16)
9140+
.addReg(SrcReg1, 0,
9141+
isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9142+
.addImm(AMDGPU::hi16);
9143+
} break;
9144+
case AMDGPU::S_PACK_LH_B32_B16: {
9145+
NewMI
9146+
.addReg(SrcReg0, 0,
9147+
isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9148+
.addImm(AMDGPU::lo16)
9149+
.addReg(SrcReg1, 0, AMDGPU::hi16)
9150+
.addImm(AMDGPU::hi16);
9151+
} break;
9152+
case AMDGPU::S_PACK_HL_B32_B16: {
9153+
NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
9154+
.addImm(AMDGPU::lo16)
9155+
.addReg(SrcReg1, 0,
9156+
isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9157+
.addImm(AMDGPU::hi16);
9158+
} break;
9159+
case AMDGPU::S_PACK_HH_B32_B16: {
9160+
NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
9161+
.addImm(AMDGPU::lo16)
9162+
.addReg(SrcReg1, 0, AMDGPU::hi16)
9163+
.addImm(AMDGPU::hi16);
9164+
} break;
9165+
default:
9166+
llvm_unreachable("unhandled s_pack_* instruction");
9167+
}
9168+
9169+
MachineOperand &Dest = Inst.getOperand(0);
9170+
MRI.replaceRegWith(Dest.getReg(), ResultReg);
9171+
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9172+
return;
9173+
}
9174+
91189175
switch (Inst.getOpcode()) {
91199176
case AMDGPU::S_PACK_LL_B32_B16: {
91209177
Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

llvm/test/CodeGen/AMDGPU/add.v2i16.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -780,7 +780,7 @@ define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i64(ptr addrspace(1) %out,
780780
; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
781781
; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
782782
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
783-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
783+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, 0
784784
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
785785
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
786786
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
@@ -790,11 +790,9 @@ define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i64(ptr addrspace(1) %out,
790790
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
791791
; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, v1, v0
792792
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
793-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
794-
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
795-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
796-
; GFX11-TRUE16-NEXT: v_lshl_or_b32 v2, v2, 16, v3
797-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
793+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
794+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
795+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.h
798796
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, v1
799797
; GFX11-TRUE16-NEXT: global_store_b128 v1, v[0:3], s[0:1]
800798
; GFX11-TRUE16-NEXT: s_endpgm

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll

Lines changed: 14415 additions & 7927 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll

Lines changed: 1536 additions & 820 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll

Lines changed: 3865 additions & 2029 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)