|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
2 | 2 | ; RUN: llc -O3 < %s -mtriple=aarch64 | FileCheck %s |
3 | 3 |
|
4 | | -; The seemingly redundant mov where src_reg == dst_reg shouldn't be removed, |
5 | | -; because it has the effect of zeroing the upper bits in x8. |
| 4 | +; The seemingly redundant wreg mov where src_reg == dst_reg shouldn't be |
| 5 | +; removed, because it has the effect of zeroing the upper bits in the matching |
| 6 | +; xreg. |
6 | 7 |
|
7 | | -define i32 @ham(i32 %arg, i1 %arg1, i1 %arg2, ptr %arg3) nounwind { |
8 | | -; CHECK-LABEL: ham: |
| 8 | +define i32 @widget(i32 %arg, i32 %arg1, i1 %arg2, ptr %arg3, i1 %arg4) #0 nounwind { |
| 9 | +; CHECK-LABEL: widget: |
9 | 10 | ; CHECK: // %bb.0: // %bb |
10 | | -; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill |
11 | | -; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill |
12 | | -; CHECK-NEXT: tbnz w1, #0, .LBB0_3 |
13 | | -; CHECK-NEXT: // %bb.1: // %bb4 |
14 | | -; CHECK-NEXT: tbnz w2, #0, .LBB0_3 |
15 | | -; CHECK-NEXT: // %bb.2: // %bb5 |
16 | | -; CHECK-NEXT: mov x19, x3 |
17 | | -; CHECK-NEXT: mov w21, w1 |
18 | | -; CHECK-NEXT: mov w20, w0 |
19 | | -; CHECK-NEXT: bl zot |
20 | | -; CHECK-NEXT: tbz w21, #0, .LBB0_4 |
21 | | -; CHECK-NEXT: .LBB0_3: // %bb6 |
22 | | -; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload |
23 | | -; CHECK-NEXT: mov w0, wzr |
24 | | -; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload |
| 11 | +; CHECK-NEXT: tbz w2, #0, .LBB0_2 |
| 12 | +; CHECK-NEXT: // %bb.1: |
| 13 | +; CHECK-NEXT: mov w0, #1 // =0x1 |
| 14 | +; CHECK-NEXT: ret |
| 15 | +; CHECK-NEXT: .LBB0_2: // %bb5 |
| 16 | +; CHECK-NEXT: tbz w4, #0, .LBB0_4 |
| 17 | +; CHECK-NEXT: // %bb.3: |
| 18 | +; CHECK-NEXT: mov w0, #0 // =0x0 |
25 | 19 | ; CHECK-NEXT: ret |
26 | | -; CHECK-NEXT: .LBB0_4: |
27 | | -; CHECK-NEXT: mov w8, w20 |
28 | | -; CHECK-NEXT: mov w20, wzr |
| 20 | +; CHECK-NEXT: .LBB0_4: // %bb6 |
| 21 | +; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill |
| 22 | +; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill |
| 23 | +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill |
| 24 | +; CHECK-NEXT: mov x19, x3 |
| 25 | +; CHECK-NEXT: mov x20, x0 |
| 26 | +; CHECK-NEXT: mov x21, x1 |
| 27 | +; CHECK-NEXT: bl baz |
| 28 | +; CHECK-NEXT: mov w0, #0 // =0x0 |
| 29 | +; CHECK-NEXT: cbnz wzr, .LBB0_11 |
| 30 | +; CHECK-NEXT: // %bb.5: // %bb6 |
| 31 | +; CHECK-NEXT: mov w10, #1 // =0x1 |
| 32 | +; CHECK-NEXT: cbnz w10, .LBB0_11 |
| 33 | +; CHECK-NEXT: // %bb.6: // %bb7 |
| 34 | +; CHECK-NEXT: cbnz w10, .LBB0_10 |
| 35 | +; CHECK-NEXT: // %bb.7: // %bb8 |
| 36 | +; CHECK-NEXT: mov x8, x21 |
| 37 | +; CHECK-NEXT: mov x9, x20 |
| 38 | +; CHECK-NEXT: mov w20, #0 // =0x0 |
| 39 | +; CHECK-NEXT: mov w9, w9 |
| 40 | +; CHECK-NEXT: mov x21, x9 |
29 | 41 | ; CHECK-NEXT: mov w8, w8 |
30 | | -; CHECK-NEXT: mov w21, w8 |
31 | | -; CHECK-NEXT: .LBB0_5: // %bb7 |
| 42 | +; CHECK-NEXT: mov x22, x8 |
| 43 | +; CHECK-NEXT: .LBB0_8: // %bb10 |
32 | 44 | ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
33 | 45 | ; CHECK-NEXT: strb w20, [x19] |
34 | | -; CHECK-NEXT: cbnz x21, .LBB0_5 |
35 | | -; CHECK-NEXT: // %bb.6: // %bb8 |
36 | | -; CHECK-NEXT: // in Loop: Header=BB0_5 Depth=1 |
37 | | -; CHECK-NEXT: bl quux |
38 | | -; CHECK-NEXT: b .LBB0_5 |
| 46 | +; CHECK-NEXT: cbnz x21, .LBB0_8 |
| 47 | +; CHECK-NEXT: // %bb.9: // %bb12 |
| 48 | +; CHECK-NEXT: // in Loop: Header=BB0_8 Depth=1 |
| 49 | +; CHECK-NEXT: bl snork |
| 50 | +; CHECK-NEXT: cbnz x22, .LBB0_8 |
| 51 | +; CHECK-NEXT: .LBB0_10: |
| 52 | +; CHECK-NEXT: mov w0, #0 // =0x0 |
| 53 | +; CHECK-NEXT: .LBB0_11: |
| 54 | +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload |
| 55 | +; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload |
| 56 | +; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload |
| 57 | +; CHECK-NEXT: ret |
39 | 58 | bb: |
40 | | - br i1 %arg1, label %bb6, label %bb4 |
41 | | - |
42 | | -bb4: |
43 | | - %load = load ptr, ptr null, align 8 |
44 | | - br i1 %arg2, label %bb6, label %bb5 |
| 59 | + br i1 %arg2, label %bb14, label %bb5 |
45 | 60 |
|
46 | 61 | bb5: |
47 | | - %call = call i32 @zot() #0 |
48 | | - %zext = zext i32 %arg to i64 |
49 | | - br i1 %arg1, label %bb6, label %bb7 |
| 62 | + %load = load ptr, ptr null, align 8 |
| 63 | + br i1 %arg4, label %bb14, label %bb6 |
50 | 64 |
|
51 | 65 | bb6: |
52 | | - ret i32 0 |
| 66 | + %call = call i32 @baz() #1 |
| 67 | + %or = or i1 false, true |
| 68 | + br i1 %or, label %bb14, label %bb7 |
53 | 69 |
|
54 | 70 | bb7: |
55 | | - store i8 0, ptr %arg3, align 1 |
56 | | - %icmp = icmp eq i64 %zext, 0 |
57 | | - br i1 %icmp, label %bb8, label %bb7 |
| 71 | + %icmp = icmp eq i32 0, 0 |
| 72 | + %zext = zext i32 %arg to i64 |
| 73 | + br i1 %icmp, label %bb14, label %bb8 |
58 | 74 |
|
59 | 75 | bb8: |
60 | | - call void @quux() |
61 | | - br label %bb7 |
| 76 | + %zext9 = zext i32 %arg1 to i64 |
| 77 | + br label %bb10 |
| 78 | + |
| 79 | +bb10: |
| 80 | + store i8 0, ptr %arg3, align 1 |
| 81 | + %icmp11 = icmp eq i64 %zext, 0 |
| 82 | + br i1 %icmp11, label %bb12, label %bb10 |
| 83 | + |
| 84 | +bb12: |
| 85 | + call void @snork() |
| 86 | + %icmp13 = icmp eq i64 0, %zext9 |
| 87 | + br i1 %icmp13, label %bb14, label %bb10 |
| 88 | + |
| 89 | +bb14: |
| 90 | + %phi = phi i32 [ 0, %bb6 ], [ 0, %bb7 ], [ 0, %bb12 ], [ 1, %bb ], [ 0, %bb5 ] |
| 91 | + ret i32 %phi |
62 | 92 | } |
63 | 93 |
|
64 | | -declare i32 @zot() |
| 94 | +declare i32 @baz() |
65 | 95 |
|
66 | | -declare void @quux() |
| 96 | +declare void @snork() |
67 | 97 |
|
68 | | -attributes #0 = { returns_twice } |
| 98 | +attributes #0 = { "target-cpu"="apple-m1" } |
| 99 | +attributes #1 = { returns_twice } |
0 commit comments