Skip to content

Commit a64f0e9

Browse files
committed
[HEXAGON] [MachinePipeliner] Fix the DAG in case of dependent phis.
%3:intregs = PHI %21:intregs, %bb.6, %7:intregs, %bb.1 - SU0 %7:intregs = PHI %21:intregs, %bb.6, %13:intregs, %bb.1 - SU1 %27:intregs = A2_zxtb %3:intregs - SU2 %13:intregs = C2_muxri %45:predregs, 0, %46:intreg If we have dependent phis, SU0 should be the successor of SU1 not the other way around.
1 parent 98534ee commit a64f0e9

File tree

3 files changed

+119
-3
lines changed

3 files changed

+119
-3
lines changed

llvm/lib/CodeGen/MachinePipeliner.cpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -962,8 +962,26 @@ void SwingSchedulerDAG::updatePhiDependences() {
962962
HasPhiDef = Reg;
963963
// Add a chain edge to a dependent Phi that isn't an existing
964964
// predecessor.
965+
966+
// %3:intregs = PHI %21:intregs, %bb.6, %7:intregs, %bb.1 - SU0
967+
// %7:intregs = PHI %21:intregs, %bb.6, %13:intregs, %bb.1 - SU1
968+
// %27:intregs = A2_zxtb %3:intregs - SU2
969+
// %13:intregs = C2_muxri %45:predregs, 0, %46:intreg
970+
// If we have dependent phis, SU0 should be the successor of SU1
971+
// not the other way around. (it used to be SU1 is the successor
972+
// of SU0). In some cases, SU0 is scheduled earlier than SU1
973+
// resulting in bad IR as we do not have a value that can be used
974+
// by SU2.
975+
976+
// Reachability check is to ensure that we do not violate DAG.
977+
// %1:intregs = PHI %10:intregs, %bb.0, %3:intregs, %bb.1 - SU0
978+
// %2:intregs = PHI %10:intregs, %bb.0, %1:intregs, %bb.1 - SU1
979+
// %3:intregs = PHI %11:intregs, %bb.0, %2:intregs, %bb.1 - SU2
980+
// S2_storerb_io %0:intregs, 0, %2:intregs
981+
// Make sure we do not create an edge between SU2 and SU0.
982+
965983
if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
966-
I.addPred(SDep(SU, SDep::Barrier));
984+
SU->addPred(SDep(&I, SDep::Barrier));
967985
}
968986
}
969987
}
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
;RUN: llc -march=hexagon -mv71t -O2 < %s -o - 2>&1 > /dev/null
2+
3+
; Validate that we do not crash while running this test.
4+
;%3:intregs = PHI %21:intregs, %bb.6, %7:intregs, %bb.1 - SU0
5+
;%7:intregs = PHI %21:intregs, %bb.6, %13:intregs, %bb.1 - SU1
6+
;%27:intregs = A2_zxtb %3:intregs - SU2
7+
;%13:intregs = C2_muxri %45:predregs, 0, %46:intreg
8+
;If we have dependent phis, SU0 should be the successor of SU1 not
9+
;the other way around. (it used to be SU1 is the successor of SU0).
10+
;In some cases, SU0 is scheduled earlier than SU1 resulting in bad
11+
;IR as we do not have a value that can be used by SU2.
12+
13+
@global = common dso_local local_unnamed_addr global ptr null, align 4
14+
@global.1 = common dso_local local_unnamed_addr global i32 0, align 4
15+
@global.2 = common dso_local local_unnamed_addr global i16 0, align 2
16+
@global.3 = common dso_local local_unnamed_addr global i16 0, align 2
17+
@global.4 = common dso_local local_unnamed_addr global i32 0, align 4
18+
19+
; Function Attrs: nofree norecurse nosync nounwind
20+
define dso_local i32 @wombat(i8 zeroext %arg) local_unnamed_addr #0 {
21+
bb:
22+
%load = load ptr, ptr @global, align 4
23+
%load1 = load i32, ptr @global.1, align 4
24+
%add2 = add nsw i32 %load1, -1
25+
store i32 %add2, ptr @global.1, align 4
26+
%icmp = icmp eq i32 %load1, 0
27+
br i1 %icmp, label %bb36, label %bb3
28+
29+
bb3: ; preds = %bb3, %bb
30+
%phi = phi i32 [ %add30, %bb3 ], [ %add2, %bb ]
31+
%phi4 = phi i8 [ %phi8, %bb3 ], [ %arg, %bb ]
32+
%phi5 = phi i16 [ %select23, %bb3 ], [ undef, %bb ]
33+
%phi6 = phi i16 [ %select26, %bb3 ], [ undef, %bb ]
34+
%phi7 = phi i16 [ %select, %bb3 ], [ undef, %bb ]
35+
%phi8 = phi i8 [ %select29, %bb3 ], [ %arg, %bb ]
36+
%zext = zext i8 %phi4 to i32
37+
%getelementptr = getelementptr inbounds i32, ptr %load, i32 %zext
38+
%getelementptr9 = getelementptr inbounds i32, ptr %getelementptr, i32 2
39+
%ptrtoint = ptrtoint ptr %getelementptr9 to i32
40+
%trunc = trunc i32 %ptrtoint to i16
41+
%sext10 = sext i16 %phi7 to i32
42+
%shl11 = shl i32 %ptrtoint, 16
43+
%ashr = ashr exact i32 %shl11, 16
44+
%icmp12 = icmp slt i32 %ashr, %sext10
45+
%select = select i1 %icmp12, i16 %trunc, i16 %phi7
46+
%getelementptr13 = getelementptr inbounds i32, ptr %getelementptr, i32 3
47+
%load14 = load i32, ptr %getelementptr13, align 4
48+
%shl = shl i32 %load14, 8
49+
%getelementptr15 = getelementptr inbounds i32, ptr %getelementptr, i32 1
50+
%load16 = load i32, ptr %getelementptr15, align 4
51+
%shl17 = shl i32 %load16, 16
52+
%ashr18 = ashr exact i32 %shl17, 16
53+
%add = add nsw i32 %ashr18, %load14
54+
%lshr = lshr i32 %add, 8
55+
%or = or i32 %lshr, %shl
56+
%sub = sub i32 %or, %load16
57+
%trunc19 = trunc i32 %sub to i16
58+
%sext = sext i16 %phi5 to i32
59+
%shl20 = shl i32 %sub, 16
60+
%ashr21 = ashr exact i32 %shl20, 16
61+
%icmp22 = icmp sgt i32 %ashr21, %sext
62+
%select23 = select i1 %icmp22, i16 %trunc19, i16 %phi5
63+
%sext24 = sext i16 %phi6 to i32
64+
%icmp25 = icmp slt i32 %ashr21, %sext24
65+
%select26 = select i1 %icmp25, i16 %trunc19, i16 %phi6
66+
%icmp27 = icmp eq i8 %phi8, 0
67+
%add28 = add i8 %phi8, -1
68+
%select29 = select i1 %icmp27, i8 0, i8 %add28
69+
%add30 = add nsw i32 %phi, -1
70+
%icmp31 = icmp eq i32 %phi, 0
71+
br i1 %icmp31, label %bb32, label %bb3
72+
73+
bb32: ; preds = %bb3
74+
store i16 %trunc, ptr @global.2, align 2
75+
store i16 %trunc19, ptr @global.3, align 2
76+
store i32 -1, ptr @global.1, align 4
77+
%sext33 = sext i16 %select to i32
78+
%sext34 = sext i16 %select23 to i32
79+
%sext35 = sext i16 %select26 to i32
80+
br label %bb36
81+
82+
bb36: ; preds = %bb32, %bb
83+
%phi37 = phi i32 [ %sext33, %bb32 ], [ 0, %bb ]
84+
%phi38 = phi i32 [ %sext35, %bb32 ], [ 0, %bb ]
85+
%phi39 = phi i32 [ %sext34, %bb32 ], [ 0, %bb ]
86+
%sub40 = sub nsw i32 %phi39, %phi38
87+
%icmp41 = icmp slt i32 %sub40, %phi37
88+
br i1 %icmp41, label %bb43, label %bb42
89+
90+
bb42: ; preds = %bb36
91+
store i32 0, ptr @global.4, align 4
92+
br label %bb43
93+
94+
bb43: ; preds = %bb42, %bb36
95+
ret i32 undef
96+
}

llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@
55
; 2, so the computation for the number of Phis needs to be adjusted when
66
; the incoming prolog block is from prolog 0 or prolog 1.
77
; Note: the pipeliner no longer generates a 3 stage pipeline for this test.
8+
; Note: the pipeliner has been generating a 4-stage pipelined loop.
89

910
; CHECK: loop0
1011
; CHECK: [[REG0:r([0-9]+)]] = add(r{{[0-8]+}},#8)
12+
; CHECK: r{{[0-9]+}} = [[REG0]]
1113
; CHECK: endloop0
12-
; CHECK: [[REG0]] = add(r{{[0-9]+}},#8)
14+
; CHECK: r{{[0-9]+}} = add(r{{[0-9]+}},#8)
1315

1416
; Function Attrs: nounwind
1517
define void @f0(ptr nocapture readonly %a0, i32 %a1) #0 {
@@ -50,6 +52,6 @@ declare i32 @llvm.hexagon.M2.mpy.ll.s0(i32, i32) #1
5052
; Function Attrs: nounwind readnone
5153
declare i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s0(i32, i32, i32) #1
5254

53-
attributes #0 = { nounwind "target-cpu"="hexagonv60" }
55+
attributes #0 = { nounwind "target-cpu"="hexagonv68" }
5456
attributes #1 = { nounwind readnone }
5557
attributes #2 = { nounwind }

0 commit comments

Comments
 (0)