-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AArch64][test] Improve pr166870.ll test case #168194
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
As pointed out in post-commit discussion on llvm#167336 <llvm#167336 (comment)>, although the test case succeeds in showing a codegen difference now the faulty MachineCopyPropagation logic was removed, the example was reduced so much that it actually would have been legal to remove the seemingly redundant mov. This is a re-reduction of that test case which should now demonstrate a mov that can't safely be removed (mov w9, w9) because the upper bits no longer being zeroed may alter the program logic.
|
@llvm/pr-subscribers-backend-aarch64 Author: Alex Bradbury (asb) ChangesAs pointed out in post-commit discussion on #167336 <#167336 (comment)>, although the test case succeeds in showing a codegen difference now the faulty MachineCopyPropagation logic was removed, the example was reduced so much that it actually would have been legal to remove the seemingly redundant mov. This is a re-reduction of that test case which should now demonstrate a mov that can't safely be removed (mov w9, w9) because the upper bits no longer being zeroed may alter the program logic. Full diff: https://github.com/llvm/llvm-project/pull/168194.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/AArch64/pr166870.ll b/llvm/test/CodeGen/AArch64/pr166870.ll
index dc23f51987635..d6f99c67a01ff 100644
--- a/llvm/test/CodeGen/AArch64/pr166870.ll
+++ b/llvm/test/CodeGen/AArch64/pr166870.ll
@@ -1,68 +1,99 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -O3 < %s -mtriple=aarch64 | FileCheck %s
-; The seemingly redundant mov where src_reg == dst_reg shouldn't be removed,
-; because it has the effect of zeroing the upper bits in x8.
+; The seemingly redundant wreg mov where src_reg == dst_reg shouldn't be
+; removed, because it has the effect of zeroing the upper bits in the matching
+; xreg.
-define i32 @ham(i32 %arg, i1 %arg1, i1 %arg2, ptr %arg3) nounwind {
-; CHECK-LABEL: ham:
+define i32 @widget(i32 %arg, i32 %arg1, i1 %arg2, ptr %arg3, i1 %arg4) #0 nounwind {
+; CHECK-LABEL: widget:
; CHECK: // %bb.0: // %bb
-; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: tbnz w1, #0, .LBB0_3
-; CHECK-NEXT: // %bb.1: // %bb4
-; CHECK-NEXT: tbnz w2, #0, .LBB0_3
-; CHECK-NEXT: // %bb.2: // %bb5
-; CHECK-NEXT: mov x19, x3
-; CHECK-NEXT: mov w21, w1
-; CHECK-NEXT: mov w20, w0
-; CHECK-NEXT: bl zot
-; CHECK-NEXT: tbz w21, #0, .LBB0_4
-; CHECK-NEXT: .LBB0_3: // %bb6
-; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT: tbz w2, #0, .LBB0_2
+; CHECK-NEXT: // %bb.1:
+; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB0_2: // %bb5
+; CHECK-NEXT: tbz w4, #0, .LBB0_4
+; CHECK-NEXT: // %bb.3:
+; CHECK-NEXT: mov w0, #0 // =0x0
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB0_4:
-; CHECK-NEXT: mov w8, w20
-; CHECK-NEXT: mov w20, wzr
+; CHECK-NEXT: .LBB0_4: // %bb6
+; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: mov x19, x3
+; CHECK-NEXT: mov x20, x0
+; CHECK-NEXT: mov x21, x1
+; CHECK-NEXT: bl baz
+; CHECK-NEXT: mov w0, #0 // =0x0
+; CHECK-NEXT: cbnz wzr, .LBB0_11
+; CHECK-NEXT: // %bb.5: // %bb6
+; CHECK-NEXT: mov w10, #1 // =0x1
+; CHECK-NEXT: cbnz w10, .LBB0_11
+; CHECK-NEXT: // %bb.6: // %bb7
+; CHECK-NEXT: cbnz w10, .LBB0_10
+; CHECK-NEXT: // %bb.7: // %bb8
+; CHECK-NEXT: mov x8, x21
+; CHECK-NEXT: mov x9, x20
+; CHECK-NEXT: mov w20, #0 // =0x0
+; CHECK-NEXT: mov w9, w9
+; CHECK-NEXT: mov x21, x9
; CHECK-NEXT: mov w8, w8
-; CHECK-NEXT: mov w21, w8
-; CHECK-NEXT: .LBB0_5: // %bb7
+; CHECK-NEXT: mov x22, x8
+; CHECK-NEXT: .LBB0_8: // %bb10
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: strb w20, [x19]
-; CHECK-NEXT: cbnz x21, .LBB0_5
-; CHECK-NEXT: // %bb.6: // %bb8
-; CHECK-NEXT: // in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT: bl quux
-; CHECK-NEXT: b .LBB0_5
+; CHECK-NEXT: cbnz x21, .LBB0_8
+; CHECK-NEXT: // %bb.9: // %bb12
+; CHECK-NEXT: // in Loop: Header=BB0_8 Depth=1
+; CHECK-NEXT: bl snork
+; CHECK-NEXT: cbnz x22, .LBB0_8
+; CHECK-NEXT: .LBB0_10:
+; CHECK-NEXT: mov w0, #0 // =0x0
+; CHECK-NEXT: .LBB0_11:
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
+; CHECK-NEXT: ret
bb:
- br i1 %arg1, label %bb6, label %bb4
-
-bb4:
- %load = load ptr, ptr null, align 8
- br i1 %arg2, label %bb6, label %bb5
+ br i1 %arg2, label %bb14, label %bb5
bb5:
- %call = call i32 @zot() #0
- %zext = zext i32 %arg to i64
- br i1 %arg1, label %bb6, label %bb7
+ %load = load ptr, ptr null, align 8
+ br i1 %arg4, label %bb14, label %bb6
bb6:
- ret i32 0
+ %call = call i32 @baz() #1
+ %or = or i1 false, true
+ br i1 %or, label %bb14, label %bb7
bb7:
- store i8 0, ptr %arg3, align 1
- %icmp = icmp eq i64 %zext, 0
- br i1 %icmp, label %bb8, label %bb7
+ %icmp = icmp eq i32 0, 0
+ %zext = zext i32 %arg to i64
+ br i1 %icmp, label %bb14, label %bb8
bb8:
- call void @quux()
- br label %bb7
+ %zext9 = zext i32 %arg1 to i64
+ br label %bb10
+
+bb10:
+ store i8 0, ptr %arg3, align 1
+ %icmp11 = icmp eq i64 %zext, 0
+ br i1 %icmp11, label %bb12, label %bb10
+
+bb12:
+ call void @snork()
+ %icmp13 = icmp eq i64 0, %zext9
+ br i1 %icmp13, label %bb14, label %bb10
+
+bb14:
+ %phi = phi i32 [ 0, %bb6 ], [ 0, %bb7 ], [ 0, %bb12 ], [ 1, %bb ], [ 0, %bb5 ]
+ ret i32 %phi
}
-declare i32 @zot()
+declare i32 @baz()
-declare void @quux()
+declare void @snork()
-attributes #0 = { returns_twice }
+attributes #0 = { "target-cpu"="apple-m1" }
+attributes #1 = { returns_twice }
|
davemgreen
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for doing this - LGTM. You even managed to find two examples of it going wrong.
(I think the reason we see it being incorrect here and not in other cases it due to mcpu=apple using +zcm-gpr64 but not +zcm-gpr32, so the grp32 COPYs prefer to get lowered to a mov x, not a mov w as we would usually use. It means the top bits are not cleared by other movs).
As pointed out in post-commit discussion on #167336 #167336 (comment), although the test case succeeds in showing a codegen difference now the faulty MachineCopyPropagation logic was removed, the example was reduced so much that it actually would have been legal to remove the seemingly redundant mov.
This is a re-reduction of that test case which should now demonstrate a mov that can't safely be removed (mov w9, w9) because the upper bits no longer being zeroed may alter the program logic.