-
Notifications
You must be signed in to change notification settings - Fork 15.4k
Open
Labels
Description
| Bugzilla Link | 44461 |
| Version | trunk |
| OS | Linux |
| CC | @alex,@efriedma-quic,@RKSimon,@rotateright |
Extended Description
For backwards compatibility reasons, Rust generates some pretty ridiculous code for its inclusive range iterator.
In LLVM 10, as a result of improvements to other optimizations, SimplifyCFG ends up converting code along the lines of
define fastcc i64 @test() {
start:
br label %bb10
bb10: ; preds = %bb3.i.i, %bb6
%iter1.sroa.5.0 = phi i64 [ 100000, %start ], [ %spec.select, %bb3.i.i ]
%iter1.sroa.9.0 = phi i8 [ 2, %start ], [ %tmp3, %bb3.i.i ]
%count.1 = phi i64 [ 0, %start ], [ %tmp4, %bb3.i.i ]
%c1 = icmp eq i8 %iter1.sroa.9.0, 2
%c2 = icmp eq i8 %iter1.sroa.9.0, 0
%c3 = or i1 %c1, %c2
br i1 %c3, label %bb3.i.i, label %bb12
bb3.i.i: ; preds = %bb10, %bb10
%tmp2 = icmp eq i64 %iter1.sroa.5.0, 0
%tmp3 = zext i1 %tmp2 to i8
%_5.0.i.i.i.i = add i64 %iter1.sroa.5.0, -1
%spec.select = select i1 %tmp2, i64 0, i64 %_5.0.i.i.i.i
%tmp4 = add i64 %count.1, %iter1.sroa.5.0
br label %bb10
bb12: ; preds = %bb10
ret i64 %count.1
}
into
define fastcc i64 @test() {
start:
br label %bb10
bb10: ; preds = %bb3.i.i, %start
%iter1.sroa.5.0 = phi i64 [ 100000, %start ], [ %spec.select, %bb3.i.i ]
%iter1.sroa.9.0 = phi i8 [ 2, %start ], [ %tmp3, %bb3.i.i ]
%count.1 = phi i64 [ 0, %start ], [ %tmp4, %bb3.i.i ]
switch i8 %iter1.sroa.9.0, label %bb12 [
i8 2, label %bb3.i.i
i8 0, label %bb3.i.i
]
bb3.i.i: ; preds = %bb10, %bb10
%tmp2 = icmp eq i64 %iter1.sroa.5.0, 0
%tmp3 = zext i1 %tmp2 to i8
%_5.0.i.i.i.i = add i64 %iter1.sroa.5.0, -1
%spec.select = select i1 %tmp2, i64 0, i64 %_5.0.i.i.i.i
%tmp4 = add i64 %count.1, %iter1.sroa.5.0
br label %bb10
bb12: ; preds = %bb10
%count.1.lcssa = phi i64 [ %count.1, %bb10 ]
ret i64 %count.1.lcssa
}
Unfortunately, this means that the loop can no longer be rotated (more generally, switches are essentially opaque to loop optimizations).
For the original code -loop-rotate gives:
define fastcc i64 @test() {
start:
br label %bb3.i.i
bb3.i.i: ; preds = %start, %bb3.i.i
%count.12 = phi i64 [ 0, %start ], [ %tmp4, %bb3.i.i ]
%iter1.sroa.5.01 = phi i64 [ 100000, %start ], [ %spec.select, %bb3.i.i ]
%tmp2 = icmp eq i64 %iter1.sroa.5.01, 0
%tmp3 = zext i1 %tmp2 to i8
%_5.0.i.i.i.i = add i64 %iter1.sroa.5.01, -1
%spec.select = select i1 %tmp2, i64 0, i64 %_5.0.i.i.i.i
%tmp4 = add i64 %count.12, %iter1.sroa.5.01
%c1 = icmp eq i8 %tmp3, 2
%c2 = icmp eq i8 %tmp3, 0
%c3 = or i1 %c1, %c2
br i1 %c3, label %bb3.i.i, label %bb12
bb12: ; preds = %bb3.i.i
%count.1.lcssa = phi i64 [ %tmp4, %bb3.i.i ]
ret i64 %count.1.lcssa
}
while for the new one, it doesn't do anything.
I'm wondering whether it would make sense to tweak SimplifyCFG heuristics to not create a switch if we have just two conditions, or possibly only limit this to the aggressive SimplifyCFG pass that is performed after loop transformations have run.