Skip to content

Commit 9971bf8

Browse files
committed
[AMDGPU] Support arbitrary types in amdgcn.dead
Legalize the amdgcn.dead intrinsic to work with types other than i32. It still generates IMPLICIT_DEFs. Remove some of the previous code for selecting/reg bank mapping it for 32-bit types, since everything is done in the legalizer now.
1 parent 83fbe67 commit 9971bf8

File tree

7 files changed

+430
-16
lines changed

7 files changed

+430
-16
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,12 +1191,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
11911191
case Intrinsic::amdgcn_permlane16_swap:
11921192
case Intrinsic::amdgcn_permlane32_swap:
11931193
return selectPermlaneSwapIntrin(I, IntrinsicID);
1194-
case Intrinsic::amdgcn_dead: {
1195-
I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
1196-
I.removeOperand(1); // drop intrinsic ID
1197-
return RBI.constrainGenericRegister(I.getOperand(0).getReg(),
1198-
AMDGPU::VGPR_32RegClass, *MRI);
1199-
}
12001194
default:
12011195
return selectImpl(I, *CoverageInfo);
12021196
}

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7651,6 +7651,12 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
76517651
return legalizeLaneOp(Helper, MI, IntrID);
76527652
case Intrinsic::amdgcn_s_buffer_prefetch_data:
76537653
return legalizeSBufferPrefetch(Helper, MI);
7654+
case Intrinsic::amdgcn_dead: {
7655+
for (const MachineOperand &Def : MI.defs())
7656+
B.buildUndef(Def);
7657+
MI.eraseFromParent();
7658+
return true;
7659+
}
76547660
default: {
76557661
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
76567662
AMDGPU::getImageDimIntrinsicInfo(IntrID))

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4701,7 +4701,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
47014701
case Intrinsic::amdgcn_set_inactive_chain_arg:
47024702
case Intrinsic::amdgcn_permlane64:
47034703
case Intrinsic::amdgcn_ds_bpermute_fi_b32:
4704-
case Intrinsic::amdgcn_dead:
47054704
return getDefaultMappingAllVGPR(MI);
47064705
case Intrinsic::amdgcn_cvt_pkrtz:
47074706
if (Subtarget.hasSALUFloatInsts() && isSALUMapping(MI))

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6629,6 +6629,11 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
66296629
Results.push_back(LoadVal);
66306630
return;
66316631
}
6632+
case Intrinsic::amdgcn_dead: {
6633+
for (unsigned I = 0, E = N->getNumValues(); I < E; ++I)
6634+
Results.push_back(DAG.getUNDEF(N->getValueType(I)));
6635+
return;
6636+
}
66326637
}
66336638
break;
66346639
}
@@ -9116,6 +9121,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
91169121
case Intrinsic::amdgcn_mov_dpp8:
91179122
case Intrinsic::amdgcn_update_dpp:
91189123
return lowerLaneOp(*this, Op.getNode(), DAG);
9124+
case Intrinsic::amdgcn_dead: {
9125+
SmallVector<SDValue, 8> Undefs;
9126+
for (unsigned I = 0, E = Op.getNode()->getNumValues(); I != E; ++I)
9127+
Undefs.push_back(DAG.getUNDEF(Op.getNode()->getValueType(I)));
9128+
return DAG.getMergeValues(Undefs, SDLoc(Op));
9129+
}
91199130
default:
91209131
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
91219132
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4484,9 +4484,3 @@ def V_ILLEGAL : Enc32, InstSI<(outs), (ins), "v_illegal"> {
44844484
let hasSideEffects = 1;
44854485
let SubtargetPredicate = isGFX10Plus;
44864486
}
4487-
4488-
// FIXME: Would be nice if we could set the register class for the destination
4489-
// register too.
4490-
def IMP_DEF_FROM_INTRINSIC: Pat<
4491-
(i32 (int_amdgcn_dead)), (IMPLICIT_DEF)>;
4492-
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-amdpal -mcpu=gfx1200 -run-pass=legalizer %s -o - | FileCheck %s
3+
4+
---
5+
name: test_struct
6+
body: |
7+
bb.1.entry:
8+
9+
; CHECK-LABEL: name: test_struct
10+
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
11+
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
12+
; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
13+
; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
14+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<3 x s32>)
15+
; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
16+
; CHECK-NEXT: $vgpr1 = COPY [[UV]](s32)
17+
; CHECK-NEXT: $vgpr2 = COPY [[UV1]](s32)
18+
; CHECK-NEXT: $vgpr3 = COPY [[UV2]](s32)
19+
; CHECK-NEXT: $vgpr4_vgpr5 = COPY [[DEF2]](s64)
20+
; CHECK-NEXT: $vgpr6 = COPY [[DEF3]](<2 x s16>)
21+
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
22+
%0:_(s32), %1:_(<3 x s32>), %2:_(s64), %3:_(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.dead)
23+
24+
%4:_(s32), %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %1(<3 x s32>)
25+
$vgpr0 = COPY %0(s32)
26+
$vgpr1 = COPY %4(s32)
27+
$vgpr2 = COPY %5(s32)
28+
$vgpr3 = COPY %6(s32)
29+
$vgpr4_vgpr5 = COPY %2(s64)
30+
$vgpr6 = COPY %3(<2 x s16>)
31+
SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
32+
...

0 commit comments

Comments
 (0)