Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 6 additions & 83 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16803,15 +16803,11 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
// creating a cycle in a DAG. Let's undo that by mutating the freeze.
assert(N->getOperand(0) == FrozenN0 && "Expected cycle in DAG");
DAG.UpdateNodeOperands(N, N0);
// Revisit the node.
AddToWorklist(N);
return FrozenN0;
}

// We currently avoid folding freeze over SRA/SRL, due to the problems seen
// with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
// example https://reviews.llvm.org/D136529#4120959.
if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
return SDValue();

// Fold freeze(op(x, ...)) -> op(freeze(x), ...).
// Try to push freeze through instructions that propagate but don't produce
// poison as far as possible. If an operand of freeze follows three
Expand All @@ -16824,18 +16820,6 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
N0->getNumValues() != 1 || !N0->hasOneUse())
return SDValue();

// TOOD: we should always allow multiple operands, however this increases the
// likelihood of infinite loops due to the ReplaceAllUsesOfValueWith call
// below causing later nodes that share frozen operands to fold again and no
// longer being able to confirm other operands are not poison due to recursion
// depth limits on isGuaranteedNotToBeUndefOrPoison.
bool AllowMultipleMaybePoisonOperands =
N0.getOpcode() == ISD::SELECT_CC || N0.getOpcode() == ISD::SETCC ||
N0.getOpcode() == ISD::BUILD_VECTOR ||
N0.getOpcode() == ISD::BUILD_PAIR ||
N0.getOpcode() == ISD::VECTOR_SHUFFLE ||
N0.getOpcode() == ISD::CONCAT_VECTORS || N0.getOpcode() == ISD::FMUL;

// Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
// ones" or "constant" into something that depends on FrozenUndef. We can
// instead pick undef values to keep those properties, while at the same time
Expand All @@ -16856,74 +16840,13 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
}
}

SmallSet<SDValue, 8> MaybePoisonOperands;
SmallVector<unsigned, 8> MaybePoisonOperandNumbers;
for (auto [OpNo, Op] : enumerate(N0->ops())) {
if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly=*/false))
continue;
bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;
if (IsNewMaybePoisonOperand)
MaybePoisonOperandNumbers.push_back(OpNo);
if (!HadMaybePoisonOperands)
continue;
if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
// Multiple maybe-poison ops when not allowed - bail out.
return SDValue();
}
}
// NOTE: the whole op may be not guaranteed to not be undef or poison because
// it could create undef or poison due to it's poison-generating flags.
// So not finding any maybe-poison operands is fine.

for (unsigned OpNo : MaybePoisonOperandNumbers) {
// N0 can mutate during iteration, so make sure to refetch the maybe poison
// operands via the operand numbers. The typical scenario is that we have
// something like this
// t262: i32 = freeze t181
// t150: i32 = ctlz_zero_undef t262
// t184: i32 = ctlz_zero_undef t181
// t268: i32 = select_cc t181, Constant:i32<0>, t184, t186, setne:ch
// When freezing the t181 operand we get t262 back, and then the
// ReplaceAllUsesOfValueWith call will not only replace t181 by t262, but
// also recursively replace t184 by t150.
SDValue MaybePoisonOperand = N->getOperand(0).getOperand(OpNo);
// Don't replace every single UNDEF everywhere with frozen UNDEF, though.
if (MaybePoisonOperand.isUndef())
continue;
// First, freeze each offending operand.
SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
// Then, change all other uses of unfrozen operand to use frozen operand.
DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
// But, that also updated the use in the freeze we just created, thus
// creating a cycle in a DAG. Let's undo that by mutating the freeze.
DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
MaybePoisonOperand);
}

// This node has been merged with another.
if (N->getOpcode() == ISD::DELETED_NODE)
return SDValue(N, 0);
}

assert(N->getOpcode() != ISD::DELETED_NODE && "Node was deleted!");

// The whole node may have been updated, so the value we were holding
// may no longer be valid. Re-fetch the operand we're `freeze`ing.
N0 = N->getOperand(0);
// Collect and freeze all operands.
SmallVector<SDValue> Ops(N0->ops());
for (auto &Op : Ops)
Op = DAG.getFreeze(Op);

// Finally, recreate the node, it's operands were updated to use
// frozen operands, so we just need to use it's "original" operands.
SmallVector<SDValue> Ops(N0->ops());
// TODO: ISD::UNDEF and ISD::POISON should get separate handling, but best
// leave for a future patch.
for (SDValue &Op : Ops) {
if (Op.isUndef())
Op = DAG.getFreeze(Op);
}

SDLoc DL(N0);

// Special case handling for ShuffleVectorSDNode nodes.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ define i32 @select_sdiv_lhs_opaque_const0_i32(i1 %cond) {
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
Expand Down Expand Up @@ -211,7 +211,7 @@ define i32 @select_sdiv_lhs_opaque_const1_i32(i1 %cond) {
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/bf16-conversions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ define amdgpu_ps float @v_test_cvt_v2f64_v2bf16_v(<2 x double> %src) {
; GFX-950-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v7
; GFX-950-NEXT: v_cndmask_b32_e64 v2, -1, 1, s[2:3]
; GFX-950-NEXT: v_add_u32_e32 v2, v6, v2
; GFX-950-NEXT: s_or_b64 vcc, s[0:1], vcc
; GFX-950-NEXT: s_or_b64 vcc, vcc, s[0:1]
; GFX-950-NEXT: v_cvt_f32_f64_e32 v5, v[0:1]
; GFX-950-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc
; GFX-950-NEXT: v_cvt_f64_f32_e32 v[2:3], v5
Expand Down Expand Up @@ -225,7 +225,7 @@ define amdgpu_ps float @v_test_cvt_v2f64_v2bf16_v(<2 x double> %src) {
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250-NEXT: v_add_nc_u32_e32 v0, v9, v0
; GFX1250-NEXT: v_cmp_eq_u32_e64 s2, 1, v11
; GFX1250-NEXT: s_or_b32 vcc_lo, s1, vcc_lo
; GFX1250-NEXT: s_or_b32 vcc_lo, vcc_lo, s1
; GFX1250-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc_lo
; GFX1250-NEXT: s_or_b32 vcc_lo, s2, s0
; GFX1250-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo
Expand Down
62 changes: 33 additions & 29 deletions llvm/test/CodeGen/AMDGPU/div_i128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -437,30 +437,33 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v6, vcc
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, 1
; GFX9-O0-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9]
; GFX9-O0-NEXT: s_mov_b64 s[14:15], -1
; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[8:9]
; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
; GFX9-O0-NEXT: s_mov_b64 s[4:5], -1
; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
; GFX9-O0-NEXT: s_mov_b32 s14, s13
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
Expand All @@ -474,17 +477,14 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[12:13]
; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[8:9]
; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10
; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[12:13]
; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[8:9]
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4
; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[12:13]
; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[8:9]
; GFX9-O0-NEXT: v_mov_b32_e32 v3, s10
; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9]
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
Expand Down Expand Up @@ -977,10 +977,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
; GFX9-O0-NEXT: s_mov_b32 s5, s6
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
Expand Down Expand Up @@ -2564,17 +2564,20 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v6, vcc
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
Expand All @@ -2587,6 +2590,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
; GFX9-O0-NEXT: s_mov_b32 s14, s13
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
Expand Down Expand Up @@ -3100,10 +3104,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
; GFX9-O0-NEXT: s_mov_b32 s5, s6
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
Expand Down
14 changes: 6 additions & 8 deletions llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,13 @@ define amdgpu_kernel void @uniform_trunc_i16_to_i1(ptr addrspace(1) %out, i16 %x
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, killed [[COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3
; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[S_LOAD_DWORD_IMM]]
; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16
; GCN-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_2]], implicit-def dead $scc
; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY killed [[S_LSHR_B32_]]
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY3]], implicit-def dead $scc
; GCN-NEXT: S_CMP_EQ_U32 killed [[S_AND_B32_]], 1, implicit-def $scc
; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORD_IMM]]
; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[COPY3]]
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 65536, [[COPY3]], implicit-def dead $scc
; GCN-NEXT: S_CMP_LG_U32 killed [[S_AND_B32_]], 0, implicit-def $scc
; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $scc
; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN-NEXT: S_CMP_LT_I32 killed [[S_SEXT_I32_I16_]], killed [[S_MOV_B32_3]], implicit-def $scc
; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN-NEXT: S_CMP_LT_I32 killed [[S_SEXT_I32_I16_]], killed [[S_MOV_B32_2]], implicit-def $scc
; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_64 = COPY $scc
; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[COPY5]], killed [[COPY4]], implicit-def dead $scc
; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec
Expand Down
28 changes: 9 additions & 19 deletions llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1435,25 +1435,15 @@ define i128 @fptoui_f32_to_i128(float %x) {
}

define i128 @fptosi_f16_to_i128(half %x) {
; SDAG-LABEL: fptosi_f16_to_i128:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SDAG-NEXT: v_cvt_i32_f32_e32 v0, v0
; SDAG-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; SDAG-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; SDAG-NEXT: v_mov_b32_e32 v3, v2
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: fptosi_f16_to_i128:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; GISEL-NEXT: v_cvt_i32_f32_e32 v0, v0
; GISEL-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GISEL-NEXT: v_mov_b32_e32 v2, v1
; GISEL-NEXT: v_mov_b32_e32 v3, v1
; GISEL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: fptosi_f16_to_i128:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
; GCN-NEXT: v_cvt_i32_f32_e32 v0, v0
; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: v_mov_b32_e32 v3, v1
; GCN-NEXT: s_setpc_b64 s[30:31]
%cvt = fptosi half %x to i128
ret i128 %cvt
}
Expand Down
Loading