Skip to content

Commit 0dddfab

Browse files
committed
[SLP]Recalculate deps if the original instruction scheduled after being copyable
If the original instruction is going to be scheduled after same instruction being scheduled as copyable, need to recalculate dependencies. Otherwise, the dependencies maybe calculated incorrectly.
1 parent 675b01a commit 0dddfab

File tree

2 files changed

+97
-0
lines changed

2 files changed

+97
-0
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20788,6 +20788,14 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
2078820788
continue;
2078920789
}
2079020790
auto *SD = cast<ScheduleData>(SE);
20791+
if (SD->hasValidDependencies() &&
20792+
(!S.areInstructionsWithCopyableElements() ||
20793+
!S.isCopyableElement(SD->getInst())) &&
20794+
!getScheduleCopyableData(SD->getInst()).empty() && EI.UserTE &&
20795+
EI.UserTE->hasState() &&
20796+
(!EI.UserTE->hasCopyableElements() ||
20797+
!EI.UserTE->isCopyableElement(SD->getInst())))
20798+
SD->clearDirectDependencies();
2079120799
for (const Use &U : SD->getInst()->operands()) {
2079220800
unsigned &NumOps =
2079320801
UserOpToNumOps
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-10 < %s | FileCheck %s
3+
4+
define void @test(ptr %0, i32 %1, i32 %2) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i64 48
9+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i64 56
10+
; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP2]], [[TMP1]]
11+
; CHECK-NEXT: [[ADD_NARROWED_I_I:%.*]] = shl i32 [[TMP1]], 1
12+
; CHECK-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP7]], 1
13+
; CHECK-NEXT: [[TMP18:%.*]] = zext i32 [[ADD_NARROWED_I_I]] to i64
14+
; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP18]], -1
15+
; CHECK-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP19]] to i32
16+
; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x i32> poison, i32 [[TMP21]], i32 0
17+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP28]], <2 x i32> poison, <2 x i32> zeroinitializer
18+
; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i32> [[TMP11]], splat (i32 -2)
19+
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> <i32 poison, i32 -2>, i32 [[TMP1]], i32 0
20+
; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i32> [[TMP13]], [[TMP12]]
21+
; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i32> [[TMP13]], [[TMP12]]
22+
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], <2 x i32> <i32 0, i32 3>
23+
; CHECK-NEXT: [[TMP17:%.*]] = load <2 x i32>, ptr [[TMP5]], align 8
24+
; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x i32> <i32 1, i32 poison>, i32 [[TMP1]], i32 1
25+
; CHECK-NEXT: [[TMP33:%.*]] = and <2 x i32> [[TMP17]], [[TMP32]]
26+
; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr null)
27+
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x i32> [[TMP33]], <2 x i32> poison, <2 x i32> <i32 poison, i32 0>
28+
; CHECK-NEXT: [[TMP34:%.*]] = insertelement <2 x i32> [[TMP20]], i32 [[TMP10]], i32 0
29+
; CHECK-NEXT: [[TMP22:%.*]] = zext <2 x i32> [[TMP34]] to <2 x i64>
30+
; CHECK-NEXT: [[TMP23:%.*]] = zext <2 x i32> [[TMP33]] to <2 x i64>
31+
; CHECK-NEXT: [[TMP35:%.*]] = shl <2 x i64> [[TMP23]], splat (i64 1)
32+
; CHECK-NEXT: [[TMP25:%.*]] = or <2 x i64> [[TMP35]], [[TMP22]]
33+
; CHECK-NEXT: [[TMP26:%.*]] = trunc <2 x i64> [[TMP25]] to <2 x i32>
34+
; CHECK-NEXT: [[TMP27:%.*]] = trunc <2 x i64> [[TMP25]] to <2 x i32>
35+
; CHECK-NEXT: [[TMP24:%.*]] = tail call i32 asm sideeffect "", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 0)
36+
; CHECK-NEXT: store <2 x i32> [[TMP16]], ptr [[TMP3]], align 16
37+
; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <2 x i32> [[TMP32]], <2 x i32> poison, <2 x i32> <i32 1, i32 1>
38+
; CHECK-NEXT: [[TMP30:%.*]] = and <2 x i32> [[TMP29]], [[TMP26]]
39+
; CHECK-NEXT: [[TMP31:%.*]] = or <2 x i32> [[TMP30]], [[TMP27]]
40+
; CHECK-NEXT: store <2 x i32> [[TMP31]], ptr [[TMP5]], align 8
41+
; CHECK-NEXT: ret void
42+
;
43+
entry:
44+
%3 = getelementptr i8, ptr %0, i64 48
45+
%4 = getelementptr i8, ptr %0, i64 52
46+
%5 = getelementptr i8, ptr %0, i64 56
47+
%6 = getelementptr i8, ptr %0, i64 60
48+
%.pre21.i = load i32, ptr %5, align 8
49+
%.pre23.i = load i32, ptr %6, align 4
50+
%7 = and i32 %2, %1
51+
%8 = and i32 %.pre21.i, 1
52+
%9 = and i32 %1, %.pre23.i
53+
call void @llvm.stackrestore.p0(ptr null)
54+
%add.narrowed.i.i = shl i32 %1, 1
55+
%10 = lshr i32 %7, 1
56+
%11 = zext i32 %10 to i64
57+
%12 = zext i32 %8 to i64
58+
%reass.add1.i = shl i64 %12, 1
59+
%13 = or i64 %reass.add1.i, %11
60+
%14 = trunc i64 %13 to i32
61+
%15 = zext i32 %9 to i64
62+
%reass.add2.i = shl i64 %15, 1
63+
%16 = or i64 %reass.add2.i, %12
64+
%17 = trunc i64 %16 to i32
65+
%18 = zext i32 %add.narrowed.i.i to i64
66+
%19 = add i64 %18, -1
67+
%20 = trunc i64 %19 to i32
68+
%21 = trunc i64 %19 to i32
69+
%22 = trunc i64 %13 to i32
70+
%23 = trunc i64 %16 to i32
71+
%24 = tail call i32 asm sideeffect "", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 0)
72+
%25 = and i32 %20, -2
73+
%26 = or i32 %1, %25
74+
store i32 %26, ptr %3, align 16
75+
%27 = and i32 %21, -2
76+
%28 = xor i32 %27, -2
77+
store i32 %28, ptr %4, align 4
78+
%29 = and i32 %1, %14
79+
%30 = or i32 %29, %22
80+
store i32 %30, ptr %5, align 8
81+
%31 = and i32 %1, %17
82+
%32 = or i32 %31, %23
83+
store i32 %32, ptr %6, align 4
84+
ret void
85+
}
86+
87+
declare void @llvm.stackrestore.p0(ptr) #0
88+
89+
attributes #0 = { nocallback nofree nosync nounwind willreturn }

0 commit comments

Comments
 (0)