Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -4192,6 +4192,15 @@ class TargetLowering : public TargetLoweringBase {
/// results of this function, because simply replacing TLO.Old
/// with TLO.New will be incorrect when this parameter is true and TLO.Old
/// has multiple uses.
/// Vector elements that aren't demanded can be turned into poison unless the
/// corresponding bi in the \p DoNotPoisonEltMask is set.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// corresponding bi in the \p DoNotPoisonEltMask is set.
/// corresponding bit in the \p DoNotPoisonEltMask is set.

bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask,
const APInt &DoNotPoisonEltMask,
APInt &KnownUndef, APInt &KnownZero,
TargetLoweringOpt &TLO, unsigned Depth = 0,
bool AssumeSingleUse = false) const;
/// Version of SimplifyDemandedVectorElts without the DoNotPoisonEltMask
/// argument. All undemanded elements can be turned into poison.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask,
APInt &KnownUndef, APInt &KnownZero,
TargetLoweringOpt &TLO, unsigned Depth = 0,
Expand Down Expand Up @@ -4269,7 +4278,8 @@ class TargetLowering : public TargetLoweringBase {
/// (used to simplify the caller). The KnownUndef/Zero elements may only be
/// accurate for those bits in the DemandedMask.
virtual bool SimplifyDemandedVectorEltsForTargetNode(
SDValue Op, const APInt &DemandedElts, APInt &KnownUndef,
SDValue Op, const APInt &DemandedElts, const APInt &DoNotPoisonEltMask,
APInt &KnownUndef,
APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const;

/// Attempt to simplify any target nodes based on the demanded bits/elts,
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1414,7 +1414,9 @@ bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
bool AssumeSingleUse) {
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
APInt KnownUndef, KnownZero;
if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
APInt DoNotPoisonElts = APInt::getZero(DemandedElts.getBitWidth());
if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, DoNotPoisonElts,
KnownUndef, KnownZero,
TLO, 0, AssumeSingleUse))
return false;

Expand Down
190 changes: 129 additions & 61 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Large diffs are not rendered by default.

116 changes: 77 additions & 39 deletions llvm/lib/Target/X86/X86ISelLowering.cpp

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1302,6 +1302,7 @@ namespace llvm {

bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
const APInt &DemandedElts,
const APInt &DoNotPoisonElts,
APInt &KnownUndef,
APInt &KnownZero,
TargetLoweringOpt &TLO,
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/AMDGPU/dagcomb-mullohi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,9 @@ define i32 @mul_one_bit_hi_hi_u32_lshr_ashr(i32 %arg, i32 %arg1, ptr %arg2) {
; CHECK-LABEL: mul_one_bit_hi_hi_u32_lshr_ashr:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v0, 0
; CHECK-NEXT: v_mul_hi_u32 v6, v1, v0
; CHECK-NEXT: v_ashrrev_i64 v[0:1], 33, v[4:5]
; CHECK-NEXT: flat_store_dword v[2:3], v6
; CHECK-NEXT: v_mul_hi_u32 v4, v1, v0
; CHECK-NEXT: v_ashrrev_i64 v[0:1], 33, v[3:4]
; CHECK-NEXT: flat_store_dword v[2:3], v4
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
bb:
Expand Down
Loading