Skip to content

Commit b327962

Browse files
committed
[SelectionDAG] Add DoNotPoisonEltMask to SimplifyDemandedVectorElts
The fix for #138513 resulted in a number of regressions due to the need to demand elements corresponding to bits used by bitcasts even if those bits weren't used. Problem was that if we did not demand those elements the calls to SimplifyDemandedVectorElts could end up turning those unused elements in to poison, making the bitcast result poison. This patch is trying to avoid such regressions by adding a new element mask ('DoNotPoisonEltMask') to SimplifyDemandedVectorElts that identify elements that aren't really demanded, but they must not be made more poisonous during simplifications.
1 parent 5c0bd57 commit b327962

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+3316
-3680
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4263,6 +4263,15 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
42634263
/// results of this function, because simply replacing TLO.Old
42644264
/// with TLO.New will be incorrect when this parameter is true and TLO.Old
42654265
/// has multiple uses.
4266+
/// Vector elements that aren't demanded can be turned into poison unless the
4267+
/// corresponding bit in \p DoNotPoisonEltMask is set.
4268+
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask,
4269+
const APInt &DoNotPoisonEltMask,
4270+
APInt &KnownUndef, APInt &KnownZero,
4271+
TargetLoweringOpt &TLO, unsigned Depth = 0,
4272+
bool AssumeSingleUse = false) const;
4273+
/// Version of SimplifyDemandedVectorElts without the DoNotPoisonEltMask
4274+
/// argument. All undemanded elements can be turned into poison.
42664275
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask,
42674276
APInt &KnownUndef, APInt &KnownZero,
42684277
TargetLoweringOpt &TLO, unsigned Depth = 0,

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1466,8 +1466,10 @@ bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
14661466
bool AssumeSingleUse) {
14671467
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
14681468
APInt KnownUndef, KnownZero;
1469-
if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1470-
TLO, 0, AssumeSingleUse))
1469+
APInt DoNotPoisonElts = APInt::getZero(DemandedElts.getBitWidth());
1470+
if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, DoNotPoisonElts,
1471+
KnownUndef, KnownZero, TLO, 0,
1472+
AssumeSingleUse))
14711473
return false;
14721474

14731475
// Revisit the node.

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 171 additions & 101 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.make.buffer.rsrc.ll

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -188,29 +188,33 @@ define amdgpu_ps ptr addrspace(8) @variable_top_half(ptr inreg %p, i64 inreg %nu
188188
; CHECK45-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 25
189189
; CHECK45-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 killed [[COPY5]], killed [[S_MOV_B32_]], implicit-def dead $scc
190190
; CHECK45-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
191-
; CHECK45-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, killed [[S_LSHL_B32_]], %subreg.sub1
191+
; CHECK45-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_LSHL_B32_]], %subreg.sub1
192192
; CHECK45-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[REG_SEQUENCE]], killed [[REG_SEQUENCE2]], implicit-def dead $scc
193193
; CHECK45-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_]].sub1
194194
; CHECK45-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 28
195195
; CHECK45-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], killed [[S_MOV_B32_2]], implicit-def dead $scc
196-
; CHECK45-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, killed [[S_LSHL_B32_1]], %subreg.sub1
196+
; CHECK45-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
197+
; CHECK45-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
198+
; CHECK45-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[DEF]], %subreg.sub0, killed [[S_LSHL_B32_1]], %subreg.sub1
197199
; CHECK45-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 7
198200
; CHECK45-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[REG_SEQUENCE1]], killed [[S_MOV_B32_3]], implicit-def dead $scc
199201
; CHECK45-NEXT: [[S_OR_B64_1:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_LSHR_B64_]], killed [[REG_SEQUENCE3]], implicit-def dead $scc
200-
; CHECK45-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 70368744177664
201-
; CHECK45-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY killed [[S_MOV_B]]
202-
; CHECK45-NEXT: [[S_OR_B64_2:%[0-9]+]]:sreg_64 = S_OR_B64 killed [[S_OR_B64_1]], killed [[COPY7]], implicit-def dead $scc
203-
; CHECK45-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_2]].sub1
204-
; CHECK45-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
205-
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY9]], implicit $exec
206-
; CHECK45-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
207-
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY10]], implicit $exec
208-
; CHECK45-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
209-
; CHECK45-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
210-
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY12]], implicit $exec
211-
; CHECK45-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_LSHR_B64_]].sub0
212-
; CHECK45-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
213-
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
202+
; CHECK45-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 16384
203+
; CHECK45-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
204+
; CHECK45-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
205+
; CHECK45-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[DEF2]], %subreg.sub0, killed [[S_MOV_B32_4]], %subreg.sub1
206+
; CHECK45-NEXT: [[S_OR_B64_2:%[0-9]+]]:sreg_64 = S_OR_B64 killed [[S_OR_B64_1]], killed [[REG_SEQUENCE4]], implicit-def dead $scc
207+
; CHECK45-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_2]].sub1
208+
; CHECK45-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
209+
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY8]], implicit $exec
210+
; CHECK45-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
211+
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY9]], implicit $exec
212+
; CHECK45-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
213+
; CHECK45-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
214+
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY11]], implicit $exec
215+
; CHECK45-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_LSHR_B64_]].sub0
216+
; CHECK45-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
217+
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
214218
; CHECK45-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]]
215219
; CHECK45-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
216220
; CHECK45-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_3]]

0 commit comments

Comments
 (0)