Skip to content

Commit f20619c

Browse files
authored
[RISCV] More explicitly check that combineOp_VLToVWOp_VL removes the extends it is supposed to. (llvm#166710)
If we visit multiple root nodes, make sure the strategy chosen for other nodes includes the nodes we've already committed to remove. This can occur if have add/sub nodes where one operand is a zext and the other is a sext. We might see that the nodes share a common extension but pick a strategy that doesn't share it.
1 parent 52e8f3c commit f20619c

File tree

2 files changed

+32
-20
lines changed

2 files changed

+32
-20
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17867,6 +17867,7 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N,
1786717867

1786817868
SmallVector<SDNode *> Worklist;
1786917869
SmallPtrSet<SDNode *, 8> Inserted;
17870+
SmallPtrSet<SDNode *, 8> ExtensionsToRemove;
1787017871
Worklist.push_back(N);
1787117872
Inserted.insert(N);
1787217873
SmallVector<CombineResult> CombinesToApply;
@@ -17876,22 +17877,25 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N,
1787617877

1787717878
NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
1787817879
NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
17879-
auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
17880-
&Inserted](const NodeExtensionHelper &Op) {
17881-
if (Op.needToPromoteOtherUsers()) {
17882-
for (SDUse &Use : Op.OrigOperand->uses()) {
17883-
SDNode *TheUser = Use.getUser();
17884-
if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
17885-
return false;
17886-
// We only support the first 2 operands of FMA.
17887-
if (Use.getOperandNo() >= 2)
17888-
return false;
17889-
if (Inserted.insert(TheUser).second)
17890-
Worklist.push_back(TheUser);
17891-
}
17892-
}
17893-
return true;
17894-
};
17880+
auto AppendUsersIfNeeded =
17881+
[&Worklist, &Subtarget, &Inserted,
17882+
&ExtensionsToRemove](const NodeExtensionHelper &Op) {
17883+
if (Op.needToPromoteOtherUsers()) {
17884+
// Remember that we're supposed to remove this extension.
17885+
ExtensionsToRemove.insert(Op.OrigOperand.getNode());
17886+
for (SDUse &Use : Op.OrigOperand->uses()) {
17887+
SDNode *TheUser = Use.getUser();
17888+
if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
17889+
return false;
17890+
// We only support the first 2 operands of FMA.
17891+
if (Use.getOperandNo() >= 2)
17892+
return false;
17893+
if (Inserted.insert(TheUser).second)
17894+
Worklist.push_back(TheUser);
17895+
}
17896+
}
17897+
return true;
17898+
};
1789517899

1789617900
// Control the compile time by limiting the number of node we look at in
1789717901
// total.
@@ -17912,6 +17916,15 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N,
1791217916
std::optional<CombineResult> Res =
1791317917
FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
1791417918
if (Res) {
17919+
// If this strategy wouldn't remove an extension we're supposed to
17920+
// remove, reject it.
17921+
if (!Res->LHSExt.has_value() &&
17922+
ExtensionsToRemove.contains(LHS.OrigOperand.getNode()))
17923+
continue;
17924+
if (!Res->RHSExt.has_value() &&
17925+
ExtensionsToRemove.contains(RHS.OrigOperand.getNode()))
17926+
continue;
17927+
1791517928
Matched = true;
1791617929
CombinesToApply.push_back(*Res);
1791717930
// All the inputs that are extended need to be folded, otherwise

llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -595,12 +595,11 @@ define <vscale x 4 x i32> @mismatched_extend_sub_add_commuted(<vscale x 4 x i16>
595595
; FOLDING: # %bb.0:
596596
; FOLDING-NEXT: vsetvli a0, zero, e32, m2, ta, ma
597597
; FOLDING-NEXT: vzext.vf2 v10, v8
598-
; FOLDING-NEXT: vsext.vf2 v12, v9
599598
; FOLDING-NEXT: vsetvli zero, zero, e16, m1, ta, ma
600-
; FOLDING-NEXT: vwsub.wv v10, v10, v9
601-
; FOLDING-NEXT: vwaddu.wv v12, v12, v8
599+
; FOLDING-NEXT: vwsub.wv v12, v10, v9
600+
; FOLDING-NEXT: vwadd.wv v10, v10, v9
602601
; FOLDING-NEXT: vsetvli zero, zero, e32, m2, ta, ma
603-
; FOLDING-NEXT: vmul.vv v8, v10, v12
602+
; FOLDING-NEXT: vmul.vv v8, v12, v10
604603
; FOLDING-NEXT: ret
605604
%a = zext <vscale x 4 x i16> %x to <vscale x 4 x i32>
606605
%b = sext <vscale x 4 x i16> %y to <vscale x 4 x i32>

0 commit comments

Comments
 (0)