Skip to content

Commit 0dc732d

Browse files
[LLVM][InstCombine] Extend masked_gather's demanded elt analysis.
Add support for other Constant types for the mask operand.
1 parent 20293eb commit 0dc732d

File tree

2 files changed

+16
-7
lines changed

2 files changed

+16
-7
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1834,14 +1834,22 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
18341834
// segfaults which didn't exist in the original program.
18351835
APInt DemandedPtrs(APInt::getAllOnes(VWidth)),
18361836
DemandedPassThrough(DemandedElts);
1837-
if (auto *CV = dyn_cast<ConstantVector>(II->getOperand(2)))
1838-
for (unsigned i = 0; i < VWidth; i++) {
1839-
Constant *CElt = CV->getAggregateElement(i);
1840-
if (CElt->isNullValue())
1841-
DemandedPtrs.clearBit(i);
1842-
else if (CElt->isAllOnesValue())
1843-
DemandedPassThrough.clearBit(i);
1837+
if (auto *CMask = dyn_cast<Constant>(II->getOperand(2))) {
1838+
if (CMask->isNullValue())
1839+
DemandedPtrs.clearAllBits();
1840+
else if (CMask->isAllOnesValue())
1841+
DemandedPassThrough.clearAllBits();
1842+
else if (auto *CV = dyn_cast<ConstantVector>(CMask)) {
1843+
for (unsigned i = 0; i < VWidth; i++) {
1844+
Constant *CElt = CV->getAggregateElement(i);
1845+
if (CElt->isNullValue())
1846+
DemandedPtrs.clearBit(i);
1847+
else if (CElt->isAllOnesValue())
1848+
DemandedPassThrough.clearBit(i);
1849+
}
18441850
}
1851+
}
1852+
18451853
if (II->getIntrinsicID() == Intrinsic::masked_gather)
18461854
simplifyAndSetOp(II, 0, DemandedPtrs, PoisonElts2);
18471855
simplifyAndSetOp(II, 3, DemandedPassThrough, PoisonElts3);

llvm/test/Transforms/InstCombine/masked_intrinsics.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
; RUN: opt -passes=instcombine -S < %s | FileCheck %s
3+
; RUN: opt -passes=instcombine -use-constant-int-for-fixed-length-splat -S < %s | FileCheck %s
34

45
declare <2 x double> @llvm.masked.load.v2f64.p0(ptr %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
56
declare void @llvm.masked.store.v2f64.p0(<2 x double> %val, ptr %ptrs, i32, <2 x i1> %mask)

0 commit comments

Comments
 (0)