Skip to content

Commit f9ddb2e

Browse files
authored
Merge branch 'main' into feat/55474
2 parents 986922f + 7d01a8f commit f9ddb2e

File tree

2 files changed

+114
-4
lines changed

2 files changed

+114
-4
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13205,9 +13205,12 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1320513205
VTE = *MIt;
1320613206
}
1320713207
}
13208-
Instruction &LastBundleInst = getLastInstructionInBundle(VTE);
13209-
if (&LastBundleInst == TEInsertPt || !CheckOrdering(&LastBundleInst))
13210-
continue;
13208+
if (none_of(TE->CombinedEntriesWithIndices,
13209+
[&](const auto &P) { return P.first == VTE->Idx; })) {
13210+
Instruction &LastBundleInst = getLastInstructionInBundle(VTE);
13211+
if (&LastBundleInst == TEInsertPt || !CheckOrdering(&LastBundleInst))
13212+
continue;
13213+
}
1321113214
VToTEs.insert(VTE);
1321213215
}
1321313216
if (VToTEs.empty())
@@ -14497,7 +14500,9 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1449714500
break;
1449814501
}
1449914502
}
14500-
int VF = getVF(V1);
14503+
unsigned VF = 0;
14504+
for (Value *V : InVectors)
14505+
VF = std::max(VF, getVF(V));
1450114506
for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
1450214507
if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem)
1450314508
CommonMask[Idx] = Mask[Idx] + (It == InVectors.begin() ? 0 : VF);
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -pass-remarks-output=%t < %s | FileCheck %s
3+
; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
4+
5+
; YAML-LABEL: --- !Passed
6+
; YAML-NEXT: Pass: slp-vectorizer
7+
; YAML-NEXT: Name: VectorizedHorizontalReduction
8+
; YAML-NEXT: Function: test
9+
; YAML-NEXT: Args:
10+
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
11+
; YAML-NEXT: - Cost: '-41'
12+
; YAML-NEXT: - String: ' and with tree size '
13+
; YAML-NEXT: - TreeSize: '7'
14+
; YAML-NEXT: ...
15+
16+
define i64 @test() {
17+
; CHECK-LABEL: define i64 @test(
18+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
19+
; CHECK-NEXT: [[ENTRY:.*:]]
20+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 1, i32 0>, i32 0, i32 6
21+
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 8)
22+
; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP1]], <4 x i32> <i32 0, i32 0, i32 0, i32 1>, i64 24)
23+
; CHECK-NEXT: [[TMP3:%.*]] = sub <32 x i32> zeroinitializer, [[TMP2]]
24+
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> [[TMP3]])
25+
; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP4]], 0
26+
; CHECK-NEXT: [[RES:%.*]] = sext i32 [[OP_RDX]] to i64
27+
; CHECK-NEXT: ret i64 [[RES]]
28+
;
29+
entry:
30+
%.neg15 = sub i32 0, 0
31+
%0 = trunc i64 1 to i32
32+
%.neg.1 = sub i32 0, 0
33+
%.neg15.1 = sub i32 0, %0
34+
%.neg16.1 = add i32 %.neg.1, %.neg15.1
35+
%1 = add i32 %.neg16.1, %.neg15
36+
%2 = trunc i64 0 to i32
37+
%.neg.2 = sub i32 0, %0
38+
%.neg15.2 = sub i32 0, %2
39+
%.neg16.2 = add i32 %.neg.2, %.neg15.2
40+
%3 = add i32 %.neg16.2, %1
41+
%4 = trunc i64 0 to i32
42+
%.neg.3 = sub i32 0, %2
43+
%.neg15.3 = sub i32 0, %4
44+
%.neg16.3 = add i32 %.neg.3, %.neg15.3
45+
%5 = add i32 %.neg16.3, %3
46+
%6 = trunc i64 0 to i32
47+
%.neg.4 = sub i32 0, %4
48+
%.neg15.4 = sub i32 0, %6
49+
%.neg16.4 = add i32 %.neg.4, %.neg15.4
50+
%7 = add i32 %.neg16.4, %5
51+
%.neg.5 = sub i32 0, %6
52+
%.neg15.5 = sub i32 0, 0
53+
%.neg16.5 = add i32 %.neg.5, %.neg15.5
54+
%8 = add i32 %.neg16.5, %7
55+
%.neg15.6 = sub i32 0, 0
56+
%.neg16.6 = add i32 0, %.neg15.6
57+
%9 = add i32 %.neg16.6, %8
58+
%.neg.7 = sub i32 0, 0
59+
%.neg15.7 = sub i32 0, 0
60+
%.neg16.7 = add i32 %.neg.7, %.neg15.7
61+
%10 = add i32 %.neg16.7, %9
62+
%11 = trunc i64 0 to i32
63+
%.neg.8 = sub i32 0, 0
64+
%.neg15.8 = sub i32 0, %11
65+
%.neg16.8 = add i32 %.neg.8, %.neg15.8
66+
%12 = add i32 %.neg16.8, %10
67+
%13 = trunc i64 0 to i32
68+
%.neg.9 = sub i32 0, %11
69+
%.neg15.9 = sub i32 0, %13
70+
%.neg16.9 = add i32 %.neg.9, %.neg15.9
71+
%14 = add i32 %.neg16.9, %12
72+
%15 = trunc i64 0 to i32
73+
%.neg.10 = sub i32 0, %13
74+
%.neg15.10 = sub i32 0, %15
75+
%.neg16.10 = add i32 %.neg.10, %.neg15.10
76+
%16 = add i32 %.neg16.10, %14
77+
%17 = trunc i64 0 to i32
78+
%.neg.11 = sub i32 0, %15
79+
%.neg15.11 = sub i32 0, %17
80+
%.neg16.11 = add i32 %.neg.11, %.neg15.11
81+
%18 = add i32 %.neg16.11, %16
82+
%19 = trunc i64 0 to i32
83+
%.neg.12 = sub i32 0, %17
84+
%.neg15.12 = sub i32 0, %19
85+
%.neg16.12 = add i32 %.neg.12, %.neg15.12
86+
%20 = add i32 %.neg16.12, %18
87+
%.neg.13 = sub i32 0, %19
88+
%.neg15.13 = sub i32 0, 0
89+
%.neg16.13 = add i32 %.neg.13, %.neg15.13
90+
%21 = add i32 %.neg16.13, %20
91+
%.neg.14 = sub i32 0, 0
92+
%.neg15.14 = sub i32 0, 0
93+
%.neg16.14 = add i32 %.neg.14, %.neg15.14
94+
%22 = add i32 %.neg16.14, %21
95+
%.neg.15 = sub i32 0, 0
96+
%.neg15.15 = sub i32 0, 0
97+
%.neg16.15 = add i32 %.neg.15, %.neg15.15
98+
%23 = add i32 %.neg16.15, %22
99+
%.neg.16 = sub i32 0, 0
100+
%.neg15.16 = sub i32 0, 0
101+
%.neg16.16 = add i32 %.neg.16, %.neg15.16
102+
%24 = add i32 %.neg16.16, %23
103+
%res = sext i32 %24 to i64
104+
ret i64 %res
105+
}

0 commit comments

Comments
 (0)