Skip to content

Commit 97026e3

Browse files
committed
added imm case
1 parent 5209296 commit 97026e3

15 files changed

+11285
-17205
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9116,17 +9116,19 @@ void SIInstrInfo::movePackToVALU(SIInstrWorklist &Worklist,
91169116
const DebugLoc &DL = Inst.getDebugLoc();
91179117

91189118
if (ST.useRealTrue16Insts()) {
9119-
Register SrcReg0 = Src0.getReg();
9120-
Register SrcReg1 = Src1.getReg();
9121-
9122-
if (!RI.isVGPR(MRI, SrcReg0)) {
9119+
Register SrcReg0, SrcReg1;
9120+
if (!Src0.isReg() || (Src0.isReg() && !RI.isVGPR(MRI, Src0.getReg()))) {
91239121
SrcReg0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
91249122
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), SrcReg0).add(Src0);
9125-
}
9126-
if (!RI.isVGPR(MRI, SrcReg1)) {
9123+
} else
9124+
SrcReg0 = Src0.getReg();
9125+
9126+
if (!Src1.isReg() || (Src1.isReg() && !RI.isVGPR(MRI, Src1.getReg()))) {
91279127
SrcReg1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
91289128
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), SrcReg1).add(Src1);
9129-
}
9129+
} else
9130+
SrcReg1 = Src1.getReg();
9131+
91309132
bool isSrc0Reg16 = MRI.constrainRegClass(SrcReg0, &AMDGPU::VGPR_16RegClass);
91319133
bool isSrc1Reg16 = MRI.constrainRegClass(SrcReg1, &AMDGPU::VGPR_16RegClass);
91329134

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll

Lines changed: 108 additions & 60 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll

Lines changed: 1196 additions & 2452 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll

Lines changed: 1496 additions & 2640 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll

Lines changed: 1716 additions & 2784 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll

Lines changed: 1974 additions & 2936 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll

Lines changed: 2216 additions & 3066 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll

Lines changed: 2454 additions & 3178 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,3 +338,16 @@ body: |
338338
%2:sreg_32 = S_FMAC_F16 %1:sreg_32, %1:sreg_32, %1:sreg_32, implicit $mode
339339
%3:sreg_32 = S_PACK_LL_B32_B16 %2:sreg_32, %1:sreg_32, implicit-def dead $scc
340340
...
341+
342+
---
343+
name: s_pack_ll_b32_b16_use_imm
344+
body: |
345+
bb.0:
346+
; GCN-LABEL: name: s_pack_ll_b32_b16_use_imm
347+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
348+
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
349+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_MOV_B32_e32_]].lo16, %subreg.lo16, [[DEF]].lo16, %subreg.hi16
350+
%0:vgpr_32 = IMPLICIT_DEF
351+
%1:sreg_32 = COPY %0:vgpr_32
352+
%2:sreg_32 = S_PACK_LL_B32_B16 1, %1:sreg_32, implicit-def dead $scc
353+
...

llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -850,14 +850,18 @@ define amdgpu_kernel void @v_insertelement_v2i16_0_reghi(ptr addrspace(1) %out,
850850
; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
851851
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
852852
; GFX11-TRUE16-NEXT: s_load_b32 s4, s[4:5], 0x10
853-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
853+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
854854
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
855855
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
856856
; GFX11-TRUE16-NEXT: global_load_b32 v1, v0, s[2:3]
857857
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, s4
858+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v2.h
858859
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
859-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.h
860-
; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1]
860+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
861+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
862+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v1
863+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
864+
; GFX11-TRUE16-NEXT: global_store_b32 v0, v2, s[0:1]
861865
; GFX11-TRUE16-NEXT: s_endpgm
862866
;
863867
; GFX11-FAKE16-LABEL: v_insertelement_v2i16_0_reghi:

0 commit comments

Comments
 (0)