Skip to content

Commit 57bbdbd

Browse files
committed
[SLP]Relax assertion in mask combine for non-power-of-2 number of elements
The nodes may contain non-power-of-2 number of elements. Need to relax the assertion to avoid possible compiler crash Fixes #117517
1 parent 7e3187e commit 57bbdbd

File tree

2 files changed

+58
-3
lines changed

2 files changed

+58
-3
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,9 +1143,7 @@ static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask,
11431143
assert(
11441144
(!ExtendingManyInputs || SubMask.size() > Mask.size() ||
11451145
// Check if input scalars were extended to match the size of other node.
1146-
(SubMask.size() == Mask.size() &&
1147-
std::all_of(std::next(Mask.begin(), Mask.size() / 2), Mask.end(),
1148-
[](int Idx) { return Idx == PoisonMaskElem; }))) &&
1146+
(SubMask.size() == Mask.size() && Mask.back() == PoisonMaskElem)) &&
11491147
"SubMask with many inputs support must be larger than the mask.");
11501148
if (Mask.empty()) {
11511149
Mask.append(SubMask.begin(), SubMask.end());
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-10 < %s | FileCheck %s
3+
4+
define i64 @test() {
5+
; CHECK-LABEL: define i64 @test() {
6+
; CHECK-NEXT: [[ENTRY:.*:]]
7+
; CHECK-NEXT: [[OR54_I_I_6:%.*]] = or i32 0, 0
8+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[OR54_I_I_6]], i32 8
9+
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 0)
10+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 7, i32 7, i32 8>
11+
; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i32> [[TMP2]] to <16 x i64>
12+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP3]])
13+
; CHECK-NEXT: ret i64 [[TMP4]]
14+
;
15+
entry:
16+
%xor148.2.i = xor i32 0, 0
17+
%conv193.i = zext i32 %xor148.2.i to i64
18+
%conv193.1.i = zext i32 %xor148.2.i to i64
19+
%or194.1.i = or i64 %conv193.i, %conv193.1.i
20+
%xor148.2.i.1 = xor i32 0, 0
21+
%conv193.i.1 = zext i32 %xor148.2.i.1 to i64
22+
%or194.i.1 = or i64 %or194.1.i, %conv193.i.1
23+
%conv193.1.i.1 = zext i32 %xor148.2.i.1 to i64
24+
%or194.1.i.1 = or i64 %or194.i.1, %conv193.1.i.1
25+
%xor148.2.i.2 = xor i32 0, 0
26+
%conv193.i.2 = zext i32 %xor148.2.i.2 to i64
27+
%or194.i.2 = or i64 %or194.1.i.1, %conv193.i.2
28+
%conv193.1.i.2 = zext i32 %xor148.2.i.2 to i64
29+
%or194.1.i.2 = or i64 %or194.i.2, %conv193.1.i.2
30+
%xor148.2.i.3 = xor i32 0, 0
31+
%conv193.i.3 = zext i32 %xor148.2.i.3 to i64
32+
%or194.i.3 = or i64 %or194.1.i.2, %conv193.i.3
33+
%conv193.1.i.3 = zext i32 %xor148.2.i.3 to i64
34+
%or194.1.i.3 = or i64 %or194.i.3, %conv193.1.i.3
35+
%xor148.2.i.4 = xor i32 0, 0
36+
%conv193.i.4 = zext i32 %xor148.2.i.4 to i64
37+
%or194.i.4 = or i64 %or194.1.i.3, %conv193.i.4
38+
%conv193.1.i.4 = zext i32 %xor148.2.i.4 to i64
39+
%or194.1.i.4 = or i64 %or194.i.4, %conv193.1.i.4
40+
%xor148.2.i.5 = xor i32 0, 0
41+
%conv193.i.5 = zext i32 %xor148.2.i.5 to i64
42+
%or194.i.5 = or i64 %or194.1.i.4, %conv193.i.5
43+
%conv193.1.i.5 = zext i32 %xor148.2.i.5 to i64
44+
%or194.1.i.5 = or i64 %or194.i.5, %conv193.1.i.5
45+
%xor148.2.i.6 = xor i32 0, 0
46+
%conv193.i.6 = zext i32 %xor148.2.i.6 to i64
47+
%or194.i.6 = or i64 %or194.1.i.5, %conv193.i.6
48+
%or54.i.i.6 = or i32 %xor148.2.i.6, 0
49+
%conv193.1.i.6 = zext i32 %or54.i.i.6 to i64
50+
%xor148.2.i.7 = xor i32 0, 0
51+
%conv193.i.7 = zext i32 %xor148.2.i.7 to i64
52+
%0 = or i64 %or194.i.6, %conv193.i.7
53+
%conv193.1.i.7 = zext i32 %xor148.2.i.7 to i64
54+
%1 = or i64 %0, %conv193.1.i.7
55+
%or194.1.i.7 = or i64 %1, %conv193.1.i.6
56+
ret i64 %or194.1.i.7
57+
}

0 commit comments

Comments
 (0)