File tree Expand file tree Collapse file tree 2 files changed +18
-3
lines changed
test/Transforms/SLPVectorizer/RISCV Expand file tree Collapse file tree 2 files changed +18
-3
lines changed Original file line number Diff line number Diff line change @@ -15529,7 +15529,23 @@ void BoUpSLP::computeMinimumValueSizes() {
1552915529 // Check if the root is trunc and the next node is gather/buildvector, then
1553015530 // keep trunc in scalars, which is free in most cases.
1553115531 if (E.isGather() && IsTruncRoot && E.UserTreeIndices.size() == 1 &&
15532- E.Idx > (IsStoreOrInsertElt ? 2 : 1)) {
15532+ E.Idx > (IsStoreOrInsertElt ? 2 : 1) &&
15533+ all_of(E.Scalars, [&](Value *V) {
15534+ return V->hasOneUse() || isa<Constant>(V) ||
15535+ (!V->hasNUsesOrMore(UsesLimit) &&
15536+ none_of(V->users(), [&](User *U) {
15537+ const TreeEntry *TE = getTreeEntry(U);
15538+ const TreeEntry *UserTE = E.UserTreeIndices.back().UserTE;
15539+ if (TE == UserTE || !TE)
15540+ return false;
15541+ unsigned UserTESz = DL->getTypeSizeInBits(
15542+ UserTE->Scalars.front()->getType());
15543+ auto It = MinBWs.find(TE);
15544+ if (It != MinBWs.end() && It->second.first > UserTESz)
15545+ return true;
15546+ return DL->getTypeSizeInBits(U->getType()) > UserTESz;
15547+ }));
15548+ })) {
1553315549 ToDemote.push_back(E.Idx);
1553415550 const TreeEntry *UserTE = E.UserTreeIndices.back().UserTE;
1553515551 auto It = MinBWs.find(UserTE);
Original file line number Diff line number Diff line change @@ -8,8 +8,7 @@ define i32 @test(i64 %v1, i64 %v2) {
88; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0
99; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[V2]], i32 1
1010; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
11- ; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64>
12- ; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[TMP3]], <i64 32, i64 32>
11+ ; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[TMP1]], <i64 32, i64 32>
1312; CHECK-NEXT: [[TMP5:%.*]] = trunc <2 x i64> [[TMP4]] to <2 x i32>
1413; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP2]], [[TMP5]]
1514; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
You can’t perform that action at this time.
0 commit comments