Skip to content

Commit bf5e288

Browse files
committed
handle G_UNMERGE_VALUES inst
1 parent 99e497a commit bf5e288

File tree

2 files changed

+35
-26
lines changed

2 files changed

+35
-26
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4377,6 +4377,14 @@ static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI) {
43774377
return false;
43784378
}
43794379

4380+
static bool isUnmergeHalf(const MachineInstr *MI,
4381+
const MachineRegisterInfo &MRI) {
4382+
if (MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
4383+
return false;
4384+
return MI->getNumOperands() == 3 && MI->getOperand(0).isDef() &&
4385+
MI->getOperand(1).isDef() && !MI->getOperand(2).isDef();
4386+
}
4387+
43804388
static std::optional<std::pair<const MachineOperand *, SrcStatus>>
43814389
retOpRegStat(const MachineOperand *Op, SrcStatus Stat,
43824390
std::pair<const MachineOperand *, SrcStatus> &Curr) {
@@ -4413,6 +4421,14 @@ static TypeClass isVectorOfTwoOrScalar(const MachineOperand *Op,
44134421
return TypeClass::NONE_OF_LISTED;
44144422
}
44154423

4424+
static bool isSameOperand(const MachineOperand *Op1,
4425+
const MachineOperand *Op2) {
4426+
if (Op1->isReg())
4427+
return Op2->isReg() && Op1->getReg() == Op2->getReg();
4428+
4429+
return Op1->isIdenticalTo(*Op2);
4430+
}
4431+
44164432
static SrcStatus getNegStatus(const MachineOperand *Op, SrcStatus S,
44174433
const MachineRegisterInfo &MRI) {
44184434
TypeClass NegType = isVectorOfTwoOrScalar(Op, MRI);
@@ -4599,6 +4615,12 @@ calcNextStatus(std::pair<const MachineOperand *, SrcStatus> Curr,
45994615
case SrcStatus::IS_SAME:
46004616
if (isTruncHalf(MI, MRI))
46014617
return retOpRegStat(&MI->getOperand(1), SrcStatus::IS_LOWER_HALF, Curr);
4618+
else if (isUnmergeHalf(MI, MRI)) {
4619+
if (isSameOperand(Curr.first, &MI->getOperand(0)))
4620+
return retOpRegStat(&MI->getOperand(2), SrcStatus::IS_LOWER_HALF, Curr);
4621+
else
4622+
return retOpRegStat(&MI->getOperand(2), SrcStatus::IS_UPPER_HALF, Curr);
4623+
}
46024624
break;
46034625
case SrcStatus::IS_HI_NEG:
46044626
if (isTruncHalf(MI, MRI)) {
@@ -4610,6 +4632,13 @@ calcNextStatus(std::pair<const MachineOperand *, SrcStatus> Curr,
46104632
// = -OpLower
46114633
return retOpRegStat(&MI->getOperand(1), SrcStatus::IS_LOWER_HALF_NEG,
46124634
Curr);
4635+
} else if (isUnmergeHalf(MI, MRI)) {
4636+
if (isSameOperand(Curr.first, &MI->getOperand(0)))
4637+
return retOpRegStat(&MI->getOperand(2), SrcStatus::IS_LOWER_HALF_NEG,
4638+
Curr);
4639+
else
4640+
return retOpRegStat(&MI->getOperand(2), SrcStatus::IS_UPPER_HALF_NEG,
4641+
Curr);
46134642
}
46144643
break;
46154644
case SrcStatus::IS_UPPER_HALF:
@@ -4717,14 +4746,6 @@ static bool isSameBitWidth(const MachineOperand *Op1, const MachineOperand *Op2,
47174746
return Width1 == Width2;
47184747
}
47194748

4720-
static bool isSameOperand(const MachineOperand *Op1,
4721-
const MachineOperand *Op2) {
4722-
if (Op1->isReg())
4723-
return Op2->isReg() && Op1->getReg() == Op2->getReg();
4724-
4725-
return Op1->isIdenticalTo(*Op2);
4726-
}
4727-
47284749
static unsigned updateMods(SrcStatus HiStat, SrcStatus LoStat, unsigned Mods) {
47294750
// SrcStatus::IS_LOWER_HALF remain 0.
47304751
if (HiStat == SrcStatus::IS_UPPER_HALF_NEG) {

llvm/test/CodeGen/AMDGPU/packed-fp32.ll

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1920,9 +1920,7 @@ define amdgpu_kernel void @fma_vector_vector_neg_scalar_lo_scalar_hi(ptr addrspa
19201920
; PACKED-GISEL-NEXT: ds_read_b32 v5, v5 offset:8
19211921
; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
19221922
; PACKED-GISEL-NEXT: v_pk_mul_f32 v[4:5], 1.0, v[4:5] op_sel_hi:[0,1]
1923-
; PACKED-GISEL-NEXT: v_xor_b32_e32 v4, 0x80000000, v4
1924-
; PACKED-GISEL-NEXT: v_xor_b32_e32 v5, 0x80000000, v5
1925-
; PACKED-GISEL-NEXT: v_pk_fma_f32 v[0:1], v[0:1], v[2:3], v[4:5]
1923+
; PACKED-GISEL-NEXT: v_pk_fma_f32 v[0:1], v[0:1], v[2:3], v[4:5] neg_lo:[0,0,1] neg_hi:[0,0,1]
19261924
; PACKED-GISEL-NEXT: v_mov_b32_e32 v2, 0
19271925
; PACKED-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
19281926
; PACKED-GISEL-NEXT: s_endpgm
@@ -1984,9 +1982,7 @@ define amdgpu_kernel void @shuffle_add_f32(ptr addrspace(1) %out, ptr addrspace(
19841982
; PACKED-GISEL-NEXT: ds_read_b64 v[2:3], v2 offset:8
19851983
; PACKED-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
19861984
; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1987-
; PACKED-GISEL-NEXT: v_mov_b32_e32 v4, v3
1988-
; PACKED-GISEL-NEXT: v_mov_b32_e32 v5, v2
1989-
; PACKED-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], v[4:5]
1985+
; PACKED-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], v[2:3] op_sel:[0,1] op_sel_hi:[1,0]
19901986
; PACKED-GISEL-NEXT: v_mov_b32_e32 v2, 0
19911987
; PACKED-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
19921988
; PACKED-GISEL-NEXT: s_endpgm
@@ -2048,12 +2044,8 @@ define amdgpu_kernel void @shuffle_neg_add_f32(ptr addrspace(1) %out, ptr addrsp
20482044
; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
20492045
; PACKED-GISEL-NEXT: ds_read_b64 v[2:3], v2 offset:8
20502046
; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2051-
; PACKED-GISEL-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
2052-
; PACKED-GISEL-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
2053-
; PACKED-GISEL-NEXT: v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1]
2054-
; PACKED-GISEL-NEXT: v_mov_b32_e32 v4, v3
2055-
; PACKED-GISEL-NEXT: v_mov_b32_e32 v5, v2
2056-
; PACKED-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], v[4:5]
2047+
; PACKED-GISEL-NEXT: v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1] neg_lo:[0,1] neg_hi:[0,1]
2048+
; PACKED-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], v[2:3] op_sel:[0,1] op_sel_hi:[1,0]
20572049
; PACKED-GISEL-NEXT: v_mov_b32_e32 v2, 0
20582050
; PACKED-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
20592051
; PACKED-GISEL-NEXT: s_endpgm
@@ -2283,9 +2275,7 @@ define amdgpu_kernel void @fneg_v2f32_vec(ptr addrspace(1) %a) {
22832275
; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
22842276
; PACKED-GISEL-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
22852277
; PACKED-GISEL-NEXT: s_waitcnt vmcnt(0)
2286-
; PACKED-GISEL-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
2287-
; PACKED-GISEL-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
2288-
; PACKED-GISEL-NEXT: v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1]
2278+
; PACKED-GISEL-NEXT: v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1] neg_lo:[0,1] neg_hi:[0,1]
22892279
; PACKED-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
22902280
; PACKED-GISEL-NEXT: s_endpgm
22912281
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -2326,9 +2316,7 @@ define amdgpu_kernel void @fneg_v2f32_scalar(ptr addrspace(1) %a, <2 x float> %x
23262316
; PACKED-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
23272317
; PACKED-GISEL-NEXT: v_mov_b32_e32 v2, 0
23282318
; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2329-
; PACKED-GISEL-NEXT: s_xor_b32 s2, s2, 0x80000000
2330-
; PACKED-GISEL-NEXT: s_xor_b32 s3, s3, 0x80000000
2331-
; PACKED-GISEL-NEXT: v_pk_mul_f32 v[0:1], 1.0, s[2:3] op_sel_hi:[0,1]
2319+
; PACKED-GISEL-NEXT: v_pk_mul_f32 v[0:1], 1.0, s[2:3] op_sel_hi:[0,1] neg_lo:[0,1] neg_hi:[0,1]
23322320
; PACKED-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
23332321
; PACKED-GISEL-NEXT: s_endpgm
23342322
%fneg = fsub <2 x float> <float -0.0, float -0.0>, %x

0 commit comments

Comments
 (0)