Skip to content

Commit f2301be

Browse files
committed
[SLP]Add a check if the user itself is commutable
If the commutable instruction can be represented as a non-commutable vector instruction (like add 0, %v can be represented as a part of sub nodes with operation sub %v, 0), its operands might still be reordered and this should be accounted when checking for copyables in operands Fixes #158293
1 parent 6512bf0 commit f2301be

File tree

2 files changed

+25
-0
lines changed

2 files changed

+25
-0
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5253,6 +5253,7 @@ class BoUpSLP {
52535253
// Same applies even for non-commutative cmps, because we can invert
52545254
// their predicate potentially and, thus, reorder the operands.
52555255
bool IsCommutativeUser =
5256+
::isCommutative(User) ||
52565257
::isCommutative(TE->getMatchingMainOpOrAltOp(User), User);
52575258
EdgeInfo EI(TE, U.getOperandNo());
52585259
if (!IsCommutativeUser && !isa<CmpInst>(User)) {
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt --passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s
3+
4+
define i64 @test(i32 %arg) {
5+
; CHECK-LABEL: define i64 @test(
6+
; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[BB:.*:]]
8+
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 896), align 4
9+
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[TMP0]], zeroinitializer
10+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[ARG]], i32 0
11+
; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP1]], [[TMP2]]
12+
; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 896), align 4
13+
; CHECK-NEXT: ret i64 0
14+
;
15+
bb:
16+
%load = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 900), align 4
17+
%add = add i32 0, %load
18+
store i32 %add, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 900), align 4
19+
%load1 = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 896), align 4
20+
%add2 = add i32 %load1, 0
21+
%sub = sub i32 %add2, %arg
22+
store i32 %sub, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 896), align 4
23+
ret i64 0
24+
}

0 commit comments

Comments
 (0)