Skip to content

Commit be9e287

Browse files
authored
[AArch64][test] Improve pr166870.ll test case (#168194)
As pointed out in post-commit discussion on #167336 <#167336 (comment)>, although the test case succeeds in showing a codegen difference now the faulty MachineCopyPropagation logic was removed, the example was reduced so much that it actually would have been legal to remove the seemingly redundant mov. This is a re-reduction of that test case which should now demonstrate a mov that can't safely be removed (mov w9, w9) because the upper bits no longer being zeroed may alter the program logic.
1 parent 5673305 commit be9e287

File tree

1 file changed

+77
-46
lines changed

1 file changed

+77
-46
lines changed
Lines changed: 77 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,68 +1,99 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
22
; RUN: llc -O3 < %s -mtriple=aarch64 | FileCheck %s
33

4-
; The seemingly redundant mov where src_reg == dst_reg shouldn't be removed,
5-
; because it has the effect of zeroing the upper bits in x8.
4+
; The seemingly redundant wreg mov where src_reg == dst_reg shouldn't be
5+
; removed, because it has the effect of zeroing the upper bits in the matching
6+
; xreg.
67

7-
define i32 @ham(i32 %arg, i1 %arg1, i1 %arg2, ptr %arg3) nounwind {
8-
; CHECK-LABEL: ham:
8+
define i32 @widget(i32 %arg, i32 %arg1, i1 %arg2, ptr %arg3, i1 %arg4) #0 nounwind {
9+
; CHECK-LABEL: widget:
910
; CHECK: // %bb.0: // %bb
10-
; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
11-
; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
12-
; CHECK-NEXT: tbnz w1, #0, .LBB0_3
13-
; CHECK-NEXT: // %bb.1: // %bb4
14-
; CHECK-NEXT: tbnz w2, #0, .LBB0_3
15-
; CHECK-NEXT: // %bb.2: // %bb5
16-
; CHECK-NEXT: mov x19, x3
17-
; CHECK-NEXT: mov w21, w1
18-
; CHECK-NEXT: mov w20, w0
19-
; CHECK-NEXT: bl zot
20-
; CHECK-NEXT: tbz w21, #0, .LBB0_4
21-
; CHECK-NEXT: .LBB0_3: // %bb6
22-
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
23-
; CHECK-NEXT: mov w0, wzr
24-
; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
11+
; CHECK-NEXT: tbz w2, #0, .LBB0_2
12+
; CHECK-NEXT: // %bb.1:
13+
; CHECK-NEXT: mov w0, #1 // =0x1
14+
; CHECK-NEXT: ret
15+
; CHECK-NEXT: .LBB0_2: // %bb5
16+
; CHECK-NEXT: tbz w4, #0, .LBB0_4
17+
; CHECK-NEXT: // %bb.3:
18+
; CHECK-NEXT: mov w0, #0 // =0x0
2519
; CHECK-NEXT: ret
26-
; CHECK-NEXT: .LBB0_4:
27-
; CHECK-NEXT: mov w8, w20
28-
; CHECK-NEXT: mov w20, wzr
20+
; CHECK-NEXT: .LBB0_4: // %bb6
21+
; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
22+
; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
23+
; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
24+
; CHECK-NEXT: mov x19, x3
25+
; CHECK-NEXT: mov x20, x0
26+
; CHECK-NEXT: mov x21, x1
27+
; CHECK-NEXT: bl baz
28+
; CHECK-NEXT: mov w0, #0 // =0x0
29+
; CHECK-NEXT: cbnz wzr, .LBB0_11
30+
; CHECK-NEXT: // %bb.5: // %bb6
31+
; CHECK-NEXT: mov w10, #1 // =0x1
32+
; CHECK-NEXT: cbnz w10, .LBB0_11
33+
; CHECK-NEXT: // %bb.6: // %bb7
34+
; CHECK-NEXT: cbnz w10, .LBB0_10
35+
; CHECK-NEXT: // %bb.7: // %bb8
36+
; CHECK-NEXT: mov x8, x21
37+
; CHECK-NEXT: mov x9, x20
38+
; CHECK-NEXT: mov w20, #0 // =0x0
39+
; CHECK-NEXT: mov w9, w9
40+
; CHECK-NEXT: mov x21, x9
2941
; CHECK-NEXT: mov w8, w8
30-
; CHECK-NEXT: mov w21, w8
31-
; CHECK-NEXT: .LBB0_5: // %bb7
42+
; CHECK-NEXT: mov x22, x8
43+
; CHECK-NEXT: .LBB0_8: // %bb10
3244
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
3345
; CHECK-NEXT: strb w20, [x19]
34-
; CHECK-NEXT: cbnz x21, .LBB0_5
35-
; CHECK-NEXT: // %bb.6: // %bb8
36-
; CHECK-NEXT: // in Loop: Header=BB0_5 Depth=1
37-
; CHECK-NEXT: bl quux
38-
; CHECK-NEXT: b .LBB0_5
46+
; CHECK-NEXT: cbnz x21, .LBB0_8
47+
; CHECK-NEXT: // %bb.9: // %bb12
48+
; CHECK-NEXT: // in Loop: Header=BB0_8 Depth=1
49+
; CHECK-NEXT: bl snork
50+
; CHECK-NEXT: cbnz x22, .LBB0_8
51+
; CHECK-NEXT: .LBB0_10:
52+
; CHECK-NEXT: mov w0, #0 // =0x0
53+
; CHECK-NEXT: .LBB0_11:
54+
; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
55+
; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
56+
; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
57+
; CHECK-NEXT: ret
3958
bb:
40-
br i1 %arg1, label %bb6, label %bb4
41-
42-
bb4:
43-
%load = load ptr, ptr null, align 8
44-
br i1 %arg2, label %bb6, label %bb5
59+
br i1 %arg2, label %bb14, label %bb5
4560

4661
bb5:
47-
%call = call i32 @zot() #0
48-
%zext = zext i32 %arg to i64
49-
br i1 %arg1, label %bb6, label %bb7
62+
%load = load ptr, ptr null, align 8
63+
br i1 %arg4, label %bb14, label %bb6
5064

5165
bb6:
52-
ret i32 0
66+
%call = call i32 @baz() #1
67+
%or = or i1 false, true
68+
br i1 %or, label %bb14, label %bb7
5369

5470
bb7:
55-
store i8 0, ptr %arg3, align 1
56-
%icmp = icmp eq i64 %zext, 0
57-
br i1 %icmp, label %bb8, label %bb7
71+
%icmp = icmp eq i32 0, 0
72+
%zext = zext i32 %arg to i64
73+
br i1 %icmp, label %bb14, label %bb8
5874

5975
bb8:
60-
call void @quux()
61-
br label %bb7
76+
%zext9 = zext i32 %arg1 to i64
77+
br label %bb10
78+
79+
bb10:
80+
store i8 0, ptr %arg3, align 1
81+
%icmp11 = icmp eq i64 %zext, 0
82+
br i1 %icmp11, label %bb12, label %bb10
83+
84+
bb12:
85+
call void @snork()
86+
%icmp13 = icmp eq i64 0, %zext9
87+
br i1 %icmp13, label %bb14, label %bb10
88+
89+
bb14:
90+
%phi = phi i32 [ 0, %bb6 ], [ 0, %bb7 ], [ 0, %bb12 ], [ 1, %bb ], [ 0, %bb5 ]
91+
ret i32 %phi
6292
}
6393

64-
declare i32 @zot()
94+
declare i32 @baz()
6595

66-
declare void @quux()
96+
declare void @snork()
6797

68-
attributes #0 = { returns_twice }
98+
attributes #0 = { "target-cpu"="apple-m1" }
99+
attributes #1 = { returns_twice }

0 commit comments

Comments
 (0)