Skip to content

Commit bbc9f77

Browse files
committed
Address review comments
1 parent 9971bf8 commit bbc9f77

File tree

3 files changed

+45
-19
lines changed

3 files changed

+45
-19
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7652,6 +7652,7 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
76527652
case Intrinsic::amdgcn_s_buffer_prefetch_data:
76537653
return legalizeSBufferPrefetch(Helper, MI);
76547654
case Intrinsic::amdgcn_dead: {
7655+
// TODO: Use poison instead of undef
76557656
for (const MachineOperand &Def : MI.defs())
76567657
B.buildUndef(Def);
76577658
MI.eraseFromParent();

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6631,7 +6631,7 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
66316631
}
66326632
case Intrinsic::amdgcn_dead: {
66336633
for (unsigned I = 0, E = N->getNumValues(); I < E; ++I)
6634-
Results.push_back(DAG.getUNDEF(N->getValueType(I)));
6634+
Results.push_back(DAG.getPOISON(N->getValueType(I)));
66356635
return;
66366636
}
66376637
}
@@ -9122,10 +9122,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
91229122
case Intrinsic::amdgcn_update_dpp:
91239123
return lowerLaneOp(*this, Op.getNode(), DAG);
91249124
case Intrinsic::amdgcn_dead: {
9125-
SmallVector<SDValue, 8> Undefs;
9125+
SmallVector<SDValue, 8> Poisons;
91269126
for (unsigned I = 0, E = Op.getNode()->getNumValues(); I != E; ++I)
9127-
Undefs.push_back(DAG.getUNDEF(Op.getNode()->getValueType(I)));
9128-
return DAG.getMergeValues(Undefs, SDLoc(Op));
9127+
Poisons.push_back(DAG.getPOISON(Op.getNode()->getValueType(I)));
9128+
return DAG.getMergeValues(Poisons, SDLoc(Op));
91299129
}
91309130
default:
91319131
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dead.ll

Lines changed: 40 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ if.end:
341341
ret [32 x i32] %res
342342
}
343343

344-
%non_trivial_types = type { i8, i16, half, bfloat, <2 x i16>, <2 x half>, <2 x bfloat>, <5 x i32>, i128}
344+
%non_trivial_types = type { i8, i16, half, bfloat, <2 x i16>, <2 x half>, <2 x bfloat>, <5 x i32>, i128, <7 x i16>}
345345

346346
define %non_trivial_types @dead_non_trivial(i1 %cond, %non_trivial_types %x, ptr addrspace(1) %ptr1, i32 %v) #0 {
347347
; ASM-DAG-LABEL: dead_non_trivial:
@@ -351,15 +351,15 @@ define %non_trivial_types @dead_non_trivial(i1 %cond, %non_trivial_types %x, ptr
351351
; ASM-DAG-NEXT: s_wait_samplecnt 0x0
352352
; ASM-DAG-NEXT: s_wait_bvhcnt 0x0
353353
; ASM-DAG-NEXT: s_wait_kmcnt 0x0
354-
; ASM-DAG-NEXT: v_mov_b32_e32 v20, v0
354+
; ASM-DAG-NEXT: v_mov_b32_e32 v24, v0
355355
; ASM-DAG-NEXT: v_mov_b32_e32 v0, v1
356356
; ASM-DAG-NEXT: s_mov_b32 s0, exec_lo
357357
; ASM-DAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
358-
; ASM-DAG-NEXT: v_and_b32_e32 v1, 1, v20
358+
; ASM-DAG-NEXT: v_and_b32_e32 v1, 1, v24
359359
; ASM-DAG-NEXT: v_cmpx_eq_u32_e32 1, v1
360360
; ASM-DAG-NEXT: s_cbranch_execz .LBB3_2
361361
; ASM-DAG-NEXT: ; %bb.1: ; %if.then
362-
; ASM-DAG-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_add_nc_u32 v0, 15, v19
362+
; ASM-DAG-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_add_nc_u32 v0, 15, v23
363363
; ASM-DAG-NEXT: v_mov_b32_e32 v3, 0x3e00
364364
; ASM-DAG-NEXT: ; implicit-def: $vgpr2
365365
; ASM-DAG-NEXT: ; implicit-def: $vgpr4
@@ -368,7 +368,8 @@ define %non_trivial_types @dead_non_trivial(i1 %cond, %non_trivial_types %x, ptr
368368
; ASM-DAG-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12
369369
; ASM-DAG-NEXT: ; implicit-def: $vgpr13_vgpr14
370370
; ASM-DAG-NEXT: ; implicit-def: $vgpr15_vgpr16
371-
; ASM-DAG-NEXT: global_store_b32 v[17:18], v0, off
371+
; ASM-DAG-NEXT: ; implicit-def: $vgpr17_vgpr18_vgpr19_vgpr20
372+
; ASM-DAG-NEXT: global_store_b32 v[21:22], v0, off
372373
; ASM-DAG-NEXT: ; implicit-def: $vgpr0
373374
; ASM-DAG-NEXT: .LBB3_2: ; %if.end
374375
; ASM-DAG-NEXT: s_wait_alu 0xfffe
@@ -380,7 +381,9 @@ define %non_trivial_types @dead_non_trivial(i1 %cond, %non_trivial_types %x, ptr
380381
; ASM-DAG-NEXT: v_dual_mov_b32 v9, v10 :: v_dual_mov_b32 v10, v11
381382
; ASM-DAG-NEXT: v_dual_mov_b32 v11, v12 :: v_dual_mov_b32 v12, v13
382383
; ASM-DAG-NEXT: v_dual_mov_b32 v13, v14 :: v_dual_mov_b32 v14, v15
383-
; ASM-DAG-NEXT: v_mov_b32_e32 v15, v16
384+
; ASM-DAG-NEXT: v_dual_mov_b32 v15, v16 :: v_dual_mov_b32 v16, v17
385+
; ASM-DAG-NEXT: v_dual_mov_b32 v17, v18 :: v_dual_mov_b32 v18, v19
386+
; ASM-DAG-NEXT: v_mov_b32_e32 v19, v20
384387
; ASM-DAG-NEXT: s_setpc_b64 s[30:31]
385388
;
386389
; ASM-GISEL-LABEL: dead_non_trivial:
@@ -390,38 +393,60 @@ define %non_trivial_types @dead_non_trivial(i1 %cond, %non_trivial_types %x, ptr
390393
; ASM-GISEL-NEXT: s_wait_samplecnt 0x0
391394
; ASM-GISEL-NEXT: s_wait_bvhcnt 0x0
392395
; ASM-GISEL-NEXT: s_wait_kmcnt 0x0
393-
; ASM-GISEL-NEXT: v_mov_b32_e32 v20, v0
396+
; ASM-GISEL-NEXT: v_mov_b32_e32 v24, v0
394397
; ASM-GISEL-NEXT: v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, v2
395398
; ASM-GISEL-NEXT: v_dual_mov_b32 v2, v3 :: v_dual_mov_b32 v3, v4
396399
; ASM-GISEL-NEXT: v_dual_mov_b32 v4, v5 :: v_dual_mov_b32 v5, v6
397-
; ASM-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_1)
398-
; ASM-GISEL-NEXT: v_dual_mov_b32 v6, v7 :: v_dual_and_b32 v7, 1, v20
399-
; ASM-GISEL-NEXT: s_mov_b32 s0, exec_lo
400-
; ASM-GISEL-NEXT: v_cmpx_ne_u32_e32 0, v7
400+
; ASM-GISEL-NEXT: v_dual_mov_b32 v6, v7 :: v_dual_mov_b32 v7, v19
401+
; ASM-GISEL-NEXT: v_dual_mov_b32 v19, v20 :: v_dual_and_b32 v20, 1, v24
402+
; ASM-GISEL-NEXT: v_lshrrev_b32_e32 v24, 16, v18
403+
; ASM-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
404+
; ASM-GISEL-NEXT: v_lshrrev_b32_e32 v25, 16, v7
405+
; ASM-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v20
406+
; ASM-GISEL-NEXT: v_lshrrev_b32_e32 v20, 16, v17
407+
; ASM-GISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo
401408
; ASM-GISEL-NEXT: s_cbranch_execz .LBB3_2
402409
; ASM-GISEL-NEXT: ; %bb.1: ; %if.then
403410
; ASM-GISEL-NEXT: s_movk_i32 s1, 0x3e00
404411
; ASM-GISEL-NEXT: s_mov_b32 s2, 0
405-
; ASM-GISEL-NEXT: v_add_nc_u32_e32 v0, 15, v19
406412
; ASM-GISEL-NEXT: s_wait_alu 0xfffe
413+
; ASM-GISEL-NEXT: s_lshr_b32 s3, s0, 16
414+
; ASM-GISEL-NEXT: s_lshr_b32 s4, s0, 16
415+
; ASM-GISEL-NEXT: s_lshr_b32 s5, s0, 16
416+
; ASM-GISEL-NEXT: s_wait_alu 0xfffe
417+
; ASM-GISEL-NEXT: v_dual_mov_b32 v25, s5 :: v_dual_add_nc_u32 v0, 15, v23
407418
; ASM-GISEL-NEXT: v_mov_b32_e32 v2, s1
408419
; ASM-GISEL-NEXT: v_mov_b32_e32 v6, s2
420+
; ASM-GISEL-NEXT: v_mov_b32_e32 v20, s3
421+
; ASM-GISEL-NEXT: v_mov_b32_e32 v24, s4
422+
; ASM-GISEL-NEXT: global_store_b32 v[21:22], v0, off
423+
; ASM-GISEL-NEXT: ; implicit-def: $vgpr0
409424
; ASM-GISEL-NEXT: ; implicit-def: $vgpr1
410425
; ASM-GISEL-NEXT: ; implicit-def: $vgpr3
411426
; ASM-GISEL-NEXT: ; implicit-def: $vgpr4
412427
; ASM-GISEL-NEXT: ; implicit-def: $vgpr5
413428
; ASM-GISEL-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12
414429
; ASM-GISEL-NEXT: ; implicit-def: $vgpr13_vgpr14_vgpr15_vgpr16
415-
; ASM-GISEL-NEXT: global_store_b32 v[17:18], v0, off
416-
; ASM-GISEL-NEXT: ; implicit-def: $vgpr0
430+
; ASM-GISEL-NEXT: ; implicit-def: $vgpr17
431+
; ASM-GISEL-NEXT: ; implicit-def: $vgpr18
432+
; ASM-GISEL-NEXT: ; implicit-def: $vgpr7
433+
; ASM-GISEL-NEXT: ; implicit-def: $vgpr19
417434
; ASM-GISEL-NEXT: .LBB3_2: ; %if.end
418435
; ASM-GISEL-NEXT: s_wait_alu 0xfffe
419436
; ASM-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
437+
; ASM-GISEL-NEXT: v_and_b32_e32 v17, 0xffff, v17
438+
; ASM-GISEL-NEXT: v_and_b32_e32 v18, 0xffff, v18
439+
; ASM-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7
440+
; ASM-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
441+
; ASM-GISEL-NEXT: v_lshl_or_b32 v20, v20, 16, v17
442+
; ASM-GISEL-NEXT: v_lshl_or_b32 v17, v24, 16, v18
443+
; ASM-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3)
444+
; ASM-GISEL-NEXT: v_lshl_or_b32 v18, v25, 16, v7
420445
; ASM-GISEL-NEXT: v_dual_mov_b32 v7, v8 :: v_dual_mov_b32 v8, v9
421446
; ASM-GISEL-NEXT: v_dual_mov_b32 v9, v10 :: v_dual_mov_b32 v10, v11
422447
; ASM-GISEL-NEXT: v_dual_mov_b32 v11, v12 :: v_dual_mov_b32 v12, v13
423448
; ASM-GISEL-NEXT: v_dual_mov_b32 v13, v14 :: v_dual_mov_b32 v14, v15
424-
; ASM-GISEL-NEXT: v_mov_b32_e32 v15, v16
449+
; ASM-GISEL-NEXT: v_dual_mov_b32 v15, v16 :: v_dual_mov_b32 v16, v20
425450
; ASM-GISEL-NEXT: s_setpc_b64 s[30:31]
426451
entry:
427452
br i1 %cond, label %if.then, label %if.end

0 commit comments

Comments
 (0)