-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[InstCombine] Extend bitmask mul combine to handle independent operands #142503
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-llvm-transforms Author: Jeffrey Byrnes (jrbyrnes) ChangesThis extends #136013 to capture cases where the combineable bitmask muls are nested under multiple or-disjoints. This PR is meant for commits starting at 8c403c912046505ffc10378560c2fc48f214af6a op1 = or-disjoint mul(and (X, C1), D) , reg1 -> temp1 = or-disjoint reg1, reg2 Case1: https://alive2.llvm.org/ce/z/dHApyV Full diff: https://github.com/llvm/llvm-project/pull/142503.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 59b46ebdb72e2..231b980506744 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3593,6 +3593,111 @@ static Value *foldOrOfInversions(BinaryOperator &I,
return nullptr;
}
+// A decomposition of ((A & N) ? 0 : N * C) . Where X = A, Factor = C, Mask = N.
+// The NUW / NSW bools
+// Note that we can decompose equivalent forms of this expression (e.g. ((A & N)
+// * C))
+struct DecomposedBitMaskMul {
+ Value *X;
+ APInt Factor;
+ APInt Mask;
+ bool NUW;
+ bool NSW;
+
+ bool isCombineableWith(DecomposedBitMaskMul Other) {
+ return X == Other.X && (Mask & Other.Mask).isZero() &&
+ Factor == Other.Factor;
+ }
+};
+
+static std::optional<DecomposedBitMaskMul> matchBitmaskMul(Value *V) {
+ Instruction *Op = dyn_cast<Instruction>(V);
+ if (!Op)
+ return std::nullopt;
+
+ Value *MulOp = nullptr;
+ const APInt *MulConst = nullptr;
+
+ // Decompose (A & N) * C) into BitMaskMul
+ if (match(Op, m_Mul(m_Value(MulOp), m_APInt(MulConst)))) {
+ Value *Original = nullptr;
+ const APInt *Mask = nullptr;
+ if (MulConst->isZero())
+ return std::nullopt;
+
+ if (match(MulOp, m_And(m_Value(Original), m_APInt(Mask)))) {
+ if (Mask->isZero())
+ return std::nullopt;
+ return std::optional<DecomposedBitMaskMul>(
+ {Original, *MulConst, *Mask,
+ cast<BinaryOperator>(Op)->hasNoUnsignedWrap(),
+ cast<BinaryOperator>(Op)->hasNoSignedWrap()});
+ }
+ return std::nullopt;
+ }
+
+ Value *Cond = nullptr;
+ const APInt *EqZero = nullptr, *NeZero = nullptr;
+
+ // Decompose ((A & N) ? 0 : N * C) into BitMaskMul
+ if (match(Op, m_Select(m_Value(Cond), m_APInt(EqZero), m_APInt(NeZero)))) {
+ auto ICmpDecompose =
+ decomposeBitTest(Cond, /*LookThruTrunc=*/true,
+ /*AllowNonZeroC=*/false, /*DecomposeBitMask=*/true);
+ if (!ICmpDecompose.has_value())
+ return std::nullopt;
+
+ if (ICmpDecompose->Pred == ICmpInst::ICMP_NE)
+ std::swap(EqZero, NeZero);
+
+ if (!EqZero->isZero() || NeZero->isZero())
+ return std::nullopt;
+
+ if (!ICmpInst::isEquality(ICmpDecompose->Pred) ||
+ !ICmpDecompose->C.isZero() || !ICmpDecompose->Mask.isPowerOf2() ||
+ ICmpDecompose->Mask.isZero() ||
+ NeZero->getBitWidth() != ICmpDecompose->Mask.getBitWidth())
+ return std::nullopt;
+
+ if (!NeZero->urem(ICmpDecompose->Mask).isZero())
+ return std::nullopt;
+
+ return std::optional<DecomposedBitMaskMul>(
+ {ICmpDecompose->X, NeZero->udiv(ICmpDecompose->Mask),
+ ICmpDecompose->Mask, /*NUW=*/false, /*NSW=*/false});
+ }
+
+ return std::nullopt;
+}
+
+using CombinedBitmaskMul =
+ std::pair<std::optional<DecomposedBitMaskMul>, Value *>;
+
+static CombinedBitmaskMul matchCombinedBitmaskMul(Value *V) {
+ auto DecompBitMaskMul = matchBitmaskMul(V);
+ if (DecompBitMaskMul)
+ return {DecompBitMaskMul, nullptr};
+
+ // Otherwise, check the operands of V for bitmaskmul pattern
+ auto BOp = dyn_cast<BinaryOperator>(V);
+ if (!BOp)
+ return {std::nullopt, nullptr};
+
+ auto Disj = dyn_cast<PossiblyDisjointInst>(BOp);
+ if (!Disj || !Disj->isDisjoint())
+ return {std::nullopt, nullptr};
+
+ auto DecompBitMaskMul0 = matchBitmaskMul(BOp->getOperand(0));
+ if (DecompBitMaskMul0)
+ return {DecompBitMaskMul0, BOp->getOperand(1)};
+
+ auto DecompBitMaskMul1 = matchBitmaskMul(BOp->getOperand(1));
+ if (DecompBitMaskMul1)
+ return {DecompBitMaskMul1, BOp->getOperand(0)};
+
+ return {std::nullopt, nullptr};
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
@@ -3675,49 +3780,47 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
/*NSW=*/true, /*NUW=*/true))
return R;
- Value *Cond0 = nullptr, *Cond1 = nullptr;
- const APInt *Op0Eq = nullptr, *Op0Ne = nullptr;
- const APInt *Op1Eq = nullptr, *Op1Ne = nullptr;
-
- // (!(A & N) ? 0 : N * C) + (!(A & M) ? 0 : M * C) -> A & (N + M) * C
- if (match(I.getOperand(0),
- m_Select(m_Value(Cond0), m_APInt(Op0Eq), m_APInt(Op0Ne))) &&
- match(I.getOperand(1),
- m_Select(m_Value(Cond1), m_APInt(Op1Eq), m_APInt(Op1Ne)))) {
-
- auto LHSDecompose =
- decomposeBitTest(Cond0, /*LookThruTrunc=*/true,
- /*AllowNonZeroC=*/false, /*DecomposeAnd=*/true);
- auto RHSDecompose =
- decomposeBitTest(Cond1, /*LookThruTrunc=*/true,
- /*AllowNonZeroC=*/false, /*DecomposeAnd=*/true);
-
- if (LHSDecompose && RHSDecompose && LHSDecompose->X == RHSDecompose->X &&
- RHSDecompose->Mask.isPowerOf2() && LHSDecompose->Mask.isPowerOf2() &&
- LHSDecompose->Mask != RHSDecompose->Mask &&
- LHSDecompose->Mask.getBitWidth() == Op0Ne->getBitWidth() &&
- RHSDecompose->Mask.getBitWidth() == Op1Ne->getBitWidth()) {
- assert(Op0Ne->getBitWidth() == Op1Ne->getBitWidth());
- assert(ICmpInst::isEquality(LHSDecompose->Pred));
- if (LHSDecompose->Pred == ICmpInst::ICMP_NE)
- std::swap(Op0Eq, Op0Ne);
- if (RHSDecompose->Pred == ICmpInst::ICMP_NE)
- std::swap(Op1Eq, Op1Ne);
-
- if (!Op0Ne->isZero() && !Op1Ne->isZero() && Op0Eq->isZero() &&
- Op1Eq->isZero() && Op0Ne->urem(LHSDecompose->Mask).isZero() &&
- Op1Ne->urem(RHSDecompose->Mask).isZero() &&
- Op0Ne->udiv(LHSDecompose->Mask) ==
- Op1Ne->udiv(RHSDecompose->Mask)) {
- auto NewAnd = Builder.CreateAnd(
- LHSDecompose->X,
- ConstantInt::get(LHSDecompose->X->getType(),
- (LHSDecompose->Mask + RHSDecompose->Mask)));
-
- return BinaryOperator::CreateMul(
- NewAnd, ConstantInt::get(NewAnd->getType(),
- Op0Ne->udiv(LHSDecompose->Mask)));
+ // (!(A & N) ? 0 : N * C) + (!(A & M) ? 0 : M * C) -> A & (N + M) * C
+ // This also accepts the equivalent mul form of (A & N) ? 0 : N * C)
+ // expressions i.e. (A & N) * C
+ CombinedBitmaskMul Decomp1 = matchCombinedBitmaskMul(I.getOperand(1));
+ auto BMDecomp1 = Decomp1.first;
+
+ if (BMDecomp1) {
+ CombinedBitmaskMul Decomp0 = matchCombinedBitmaskMul(I.getOperand(0));
+ auto BMDecomp0 = Decomp0.first;
+
+ if (BMDecomp0 && BMDecomp0->isCombineableWith(*BMDecomp1)) {
+ auto NewAnd = Builder.CreateAnd(
+ BMDecomp0->X,
+ ConstantInt::get(BMDecomp0->X->getType(),
+ (BMDecomp0->Mask + BMDecomp1->Mask)));
+
+ BinaryOperator *Combined = cast<BinaryOperator>(Builder.CreateMul(
+ NewAnd, ConstantInt::get(NewAnd->getType(), BMDecomp1->Factor)));
+
+ Combined->setHasNoUnsignedWrap(BMDecomp0->NUW && BMDecomp1->NUW);
+ Combined->setHasNoSignedWrap(BMDecomp0->NSW && BMDecomp1->NSW);
+
+ // If our tree has indepdent or-disjoint operands, bring them in.
+ auto OtherOp0 = Decomp0.second;
+ auto OtherOp1 = Decomp1.second;
+
+ if (OtherOp0 || OtherOp1) {
+ Value *OtherOp;
+ if (OtherOp0 && OtherOp1) {
+ OtherOp = Builder.CreateOr(OtherOp0, OtherOp1);
+ cast<PossiblyDisjointInst>(OtherOp)->setIsDisjoint(true);
+ } else {
+ OtherOp = OtherOp0 ? OtherOp0 : OtherOp1;
+ }
+ Combined = cast<BinaryOperator>(Builder.CreateOr(Combined, OtherOp));
+ cast<PossiblyDisjointInst>(Combined)->setIsDisjoint(true);
}
+
+ // Caller expects detached instruction
+ Combined->removeFromParent();
+ return Combined;
}
}
}
diff --git a/llvm/test/Transforms/InstCombine/or-bitmask.ll b/llvm/test/Transforms/InstCombine/or-bitmask.ll
index 3b482dc1794db..8a2e328fa95cb 100644
--- a/llvm/test/Transforms/InstCombine/or-bitmask.ll
+++ b/llvm/test/Transforms/InstCombine/or-bitmask.ll
@@ -36,13 +36,9 @@ define i32 @add_select_cmp_and2(i32 %in) {
define i32 @add_select_cmp_and3(i32 %in) {
; CHECK-LABEL: @add_select_cmp_and3(
-; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
-; CHECK-NEXT: [[TEMP:%.*]] = mul nuw nsw i32 [[TMP1]], 72
-; CHECK-NEXT: [[BITOP2:%.*]] = and i32 [[IN]], 4
-; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[BITOP2]], 0
-; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 0, i32 288
-; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TEMP]], [[SEL2]]
-; CHECK-NEXT: ret i32 [[OUT]]
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 7
+; CHECK-NEXT: [[TEMP1:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: ret i32 [[TEMP1]]
;
%bitop0 = and i32 %in, 1
%cmp0 = icmp eq i32 %bitop0, 0
@@ -60,12 +56,9 @@ define i32 @add_select_cmp_and3(i32 %in) {
define i32 @add_select_cmp_and4(i32 %in) {
; CHECK-LABEL: @add_select_cmp_and4(
-; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
-; CHECK-NEXT: [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN]], 12
-; CHECK-NEXT: [[TEMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 72
-; CHECK-NEXT: [[OUT1:%.*]] = or disjoint i32 [[OUT]], [[TEMP3]]
-; CHECK-NEXT: ret i32 [[OUT1]]
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT: [[TEMP2:%.*]] = mul nuw nsw i32 [[TMP2]], 72
+; CHECK-NEXT: ret i32 [[TEMP2]]
;
%bitop0 = and i32 %in, 1
%cmp0 = icmp eq i32 %bitop0, 0
@@ -361,6 +354,136 @@ define i64 @mask_select_types_1(i64 %in) {
ret i64 %out
}
+define i32 @add_select_cmp_mixed1(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_mixed1(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT: [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %mask = and i32 %in, 1
+ %sel0 = mul i32 %mask, 72
+ %bitop1 = and i32 %in, 2
+ %cmp1 = icmp eq i32 %bitop1, 0
+ %sel1 = select i1 %cmp1, i32 0, i32 144
+ %out = or disjoint i32 %sel0, %sel1
+ ret i32 %out
+}
+
+define i32 @add_select_cmp_mixed2(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_mixed2(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT: [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %bitop0 = and i32 %in, 1
+ %cmp0 = icmp eq i32 %bitop0, 0
+ %mask = and i32 %in, 2
+ %sel0 = select i1 %cmp0, i32 0, i32 72
+ %sel1 = mul i32 %mask, 72
+ %out = or disjoint i32 %sel0, %sel1
+ ret i32 %out
+}
+
+define i32 @add_select_cmp_and_mul(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and_mul(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
+; CHECK-NEXT: [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %mask0 = and i32 %in, 1
+ %sel0 = mul i32 %mask0, 72
+ %mask1 = and i32 %in, 2
+ %sel1 = mul i32 %mask1, 72
+ %out = or disjoint i32 %sel0, %sel1
+ ret i32 %out
+}
+
+define i32 @add_select_cmp_mixed2_mismatch(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_mixed2_mismatch(
+; CHECK-NEXT: [[BITOP0:%.*]] = and i32 [[IN:%.*]], 1
+; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
+; CHECK-NEXT: [[MASK:%.*]] = and i32 [[IN]], 2
+; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 73
+; CHECK-NEXT: [[SEL1:%.*]] = mul nuw nsw i32 [[MASK]], 72
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %bitop0 = and i32 %in, 1
+ %cmp0 = icmp eq i32 %bitop0, 0
+ %mask = and i32 %in, 2
+ %sel0 = select i1 %cmp0, i32 0, i32 73
+ %sel1 = mul i32 %mask, 72
+ %out = or disjoint i32 %sel0, %sel1
+ ret i32 %out
+}
+
+define i32 @add_select_cmp_and_mul_mismatch(i32 %in) {
+; CHECK-LABEL: @add_select_cmp_and_mul_mismatch(
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[IN:%.*]] to i1
+; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[TMP1]], i32 73, i32 0
+; CHECK-NEXT: [[MASK1:%.*]] = and i32 [[IN]], 2
+; CHECK-NEXT: [[SEL1:%.*]] = mul nuw nsw i32 [[MASK1]], 72
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %mask0 = and i32 %in, 1
+ %sel0 = mul i32 %mask0, 73
+ %mask1 = and i32 %in, 2
+ %sel1 = mul i32 %mask1, 72
+ %out = or disjoint i32 %sel0, %sel1
+ ret i32 %out
+}
+
+define i32 @unrelated_ops(i32 %in, i32 %in2) {
+; CHECK-LABEL: @unrelated_ops(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[IN2:%.*]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %1 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %1, 72
+ %2 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %2, 72
+ %temp3 = or disjoint i32 %in2, %temp2
+ %out = or disjoint i32 %temp, %temp3
+ ret i32 %out
+}
+
+define i32 @unrelated_ops1(i32 %in, i32 %in2) {
+; CHECK-LABEL: @unrelated_ops1(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[IN2:%.*]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %1 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %1, 72
+ %2 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %2, 72
+ %temp3 = or disjoint i32 %in2, %temp
+ %out = or disjoint i32 %temp3, %temp2
+ ret i32 %out
+}
+
+define i32 @unrelated_ops2(i32 %in, i32 %in2, i32 %in3) {
+; CHECK-LABEL: @unrelated_ops2(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 15
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], 72
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[IN3:%.*]], [[IN2:%.*]]
+; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret i32 [[OUT]]
+;
+ %1 = and i32 %in, 3
+ %temp = mul nuw nsw i32 %1, 72
+ %temp3 = or disjoint i32 %temp, %in3
+ %2 = and i32 %in, 12
+ %temp2 = mul nuw nsw i32 %2, 72
+ %temp4 = or disjoint i32 %in2, %temp2
+ %out = or disjoint i32 %temp3, %temp4
+ ret i32 %out
+}
+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CONSTSPLAT: {{.*}}
; CONSTVEC: {{.*}}
|
8c403c9 to
e9996d1
Compare
|
Rebase for #135274 |
|
Ping -- any concerns? |
Can we perform the reassociation first in Then this pattern should be handled by #136013. |
|
Okay -- the latest version reassociates chains with independent operands into a more combineable pattern (rather than doing the combine itself). We should only be doing this reassociation if we can find a bitmask mul operand on both sides, otherwise we may end up infinitely reassociating. Since we require the DecomposedBitMaskMul analysis, I've decided to do the reassocation in InstCombineAndOrXor where the analysis already exists. The implementation does seem a little simpler, but we now rely on future iterations of InstCombine to perform the optimization. Moreover, we still need to do the analysis to check for the conditions of reassociation. Thus, it is somewhat unclear if this change is a net positive. |
I don't think it is necessary to check the specific pattern. As these two muls share the same constant factor, it is always profitable to reassociate the tree. |
Sorry, did not see your comment originally. I think we can extend the reassociation pass to handle cases like https://godbolt.org/z/7vWePe1xK , but I don't think this will be sufficient for my usecase. The problem is #133139 (comment) In other words: is the canonical form of The premise of the work starting at #136013 is that if we are combining multiple of these forms under an or-disjoint, it is preferable to have a single and->mul. This PR and subsequent PRs (including the PR I'm currently commenting on) have transformed the select form into and->mul given conditions which make the transform unarguably beneficial, however, it is still very much possible that we have the select form. Thus, to be sufficient for my usecase, it seems that reassociation must be able to handle the case where one or more of the leaf forms are and->icmp->select (instead of and->mul). Presently, this is not done even for the add case https://godbolt.org/z/W1YxhrWa9 . Potentially, we could make this extension to the add case, then extend the or-disjoint to support all forms. However, if possible I would prefer to bring in something along the lines of what this PR is currently doing, potentially to be deleted after we improve reassociation -- this work has already been stalled for some time, and going the reassociation route seems like it would open a whole new body of work. |
I see that I did not make this explicitly clear in the description, nor did I cover it in any tests. Sorry about that. |
I wonder whether we should reconsider that decision. The transform is reversible, so if necessary we can have an undo transform from and+mul to and+icmp+select in the backend. On this PR, I have two notes:
The nice thing about this is that if we have more folds where we think it is worthwhile to perform them via reassociation, it's just a matter of adding them to tryFold(), we don't need to invent special handling for each one of them. |
Looking into the details -- turning out to be a larger refactor than I had thought. How do you feel about providing the functionality using the current implementation, then refactoring into reassociateBooleanAndOr as an extension? EDIT: posted what I have -- can revert / split-up as needed. |
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
You don't need to refactor |
|
Yes, I just meant the general code pattern, not to reuse that specific helper. |
|
Thanks for the clarification. The latest introduces These permutations are checked for I've tried to follow the implementation and caller pattern for Thus, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a parameter IsDisjoint = false to IRBuilder::CreateOr?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
|
Some test churn after the update as it was previously missing |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this needed for your motivating case? This is an unusual degree of ad-hoc reassociation, and I think more than your original implementation allowed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My original implementation captured this case. matchCombinedBitmaskMul would take an operand from an or-disjoint and try to matchBitmaskMul on it. If there was no such match, and the operand was itself an or-disjoint, it would try to find a matchBitmaskMul in either of the child operands. Thus, we could end up combining two grandchildren from the original or-disjoint.
However, rerunning the motivating workload through its default pipeline indicates that this set of permutations / reassociations is not necessary, and that we provide the optimization with the standard reassociations.
I think it's fine to just include the standard reassociations, and potentially extend something if we find we have left some on the table.
Change-Id: Ife1a010d2ae6df40549a6c73f7b893948befa3be
Change-Id: I879acdf0b17a7110286c6c375410300611c468eb
Change-Id: Ib86e8ed347ef60948c3e4cb44c5fab1c3667afc6
Change-Id: I950ee32ec053430fd51c7fd52645fe52e9e6ecff
Change-Id: I875b9fac4749b3f391efce47f8d3b9e2004de8c2
Change-Id: Ie86d0e58f7fdb2c0489d3dee3a41ef3911f9477b
Change-Id: I2c418b8e5bf7fed050ee77515a73fa4368a1ea7d
Change-Id: I172be21fe78361f4520a893d6c97c422accbf13f
Change-Id: I4937190c734c5adcdd73d81278d82a53aec4c4c9
Change-Id: I72b20a2695eb8de9ac24578729e77c0f3b3cd8aa
972262d to
8587a7c
Compare
|
Force push for rebase |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
Change-Id: I24f6316b71fffaadac0e1cd4a9158d985f184664
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Change-Id: I296804c2599419d4121ae4c88724ef09ddb5b4fd
This extends #136013 to capture cases where the combineable bitmask muls are nested under multiple or-disjoints.
This PR is meant for commits starting at 8c403c912046505ffc10378560c2fc48f214af6a
op1 = or-disjoint mul(and (X, C1), D) , reg1
op2 = or-disjoint mul(and (X, C2), D) , reg2
out = or-disjoint op1, op2
->
temp1 = or-disjoint reg1, reg2
out = or-disjoint mul(and (X, (C1 + C2)), D), temp1
Case1: https://alive2.llvm.org/ce/z/dHApyV
Case2: https://alive2.llvm.org/ce/z/Jz-Nag
Case3: https://alive2.llvm.org/ce/z/3xBnEV