-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[SystemZ] Allow forming overflow op for i128 #153557
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-systemz Author: Nikita Popov (nikic) ChangesAllow matching i128 overflow pattern into UADDO, which then allows use of vaccq. Full diff: https://github.com/llvm/llvm-project/pull/153557.diff 3 Files Affected:
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 1866962e17587..707887c59bd65 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -523,7 +523,7 @@ class SystemZTargetLowering : public TargetLowering {
bool MathUsed) const override {
// Form add and sub with overflow intrinsics regardless of any extra
// users of the math result.
- return VT == MVT::i32 || VT == MVT::i64;
+ return VT == MVT::i32 || VT == MVT::i64 || VT == MVT::i128;
}
bool shouldConsiderGEPOffsetSplit() const override { return true; }
diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll
index c088f6d862e7c..9271dc73e2725 100644
--- a/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll
+++ b/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll
@@ -363,10 +363,11 @@ define i128 @atomicrmw_uinc_wrap(ptr %src, i128 %b) {
define i128 @atomicrmw_udec_wrap(ptr %src, i128 %b) {
; CHECK-LABEL: atomicrmw_udec_wrap:
; CHECK: # %bb.0:
+; CHECK-NEXT: larl %r1, .LCPI12_0
; CHECK-NEXT: vl %v0, 0(%r4), 3
; CHECK-NEXT: vl %v3, 0(%r3), 4
-; CHECK-NEXT: vgbm %v1, 65535
-; CHECK-NEXT: vgbm %v2, 0
+; CHECK-NEXT: vl %v1, 0(%r1), 3
+; CHECK-NEXT: vgbm %v2, 65535
; CHECK-NEXT: j .LBB12_2
; CHECK-NEXT: .LBB12_1: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1
@@ -379,6 +380,9 @@ define i128 @atomicrmw_udec_wrap(ptr %src, i128 %b) {
; CHECK-NEXT: je .LBB12_8
; CHECK-NEXT: .LBB12_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vscbiq %v4, %v3, %v1
+; CHECK-NEXT: vlgvf %r0, %v4, 3
+; CHECK-NEXT: xilf %r0, 1
; CHECK-NEXT: veclg %v0, %v3
; CHECK-NEXT: jlh .LBB12_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
@@ -390,12 +394,11 @@ define i128 @atomicrmw_udec_wrap(ptr %src, i128 %b) {
; CHECK-NEXT: jl .LBB12_6
; CHECK-NEXT: # %bb.5: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1
-; CHECK-NEXT: vaq %v4, %v3, %v1
+; CHECK-NEXT: vaq %v4, %v3, %v2
; CHECK-NEXT: .LBB12_6: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1
-; CHECK-NEXT: vceqgs %v5, %v3, %v2
; CHECK-NEXT: vlr %v5, %v0
-; CHECK-NEXT: je .LBB12_1
+; CHECK-NEXT: cijlh %r0, 0, .LBB12_1
; CHECK-NEXT: # %bb.7: # %atomicrmw.start
; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1
; CHECK-NEXT: vlr %v5, %v4
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-65.ll b/llvm/test/CodeGen/SystemZ/int-cmp-65.ll
index b06ab3c1fa3d3..29c918a061e07 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-65.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-65.ll
@@ -85,3 +85,31 @@ define i128 @i128_addc_4(i128 %a, i128 %b) {
ret i128 %ext
}
+define i128 @i128_addc_xor(i128 %a, i128 %b) {
+; CHECK-LABEL: i128_addc_xor:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r4), 3
+; CHECK-NEXT: vaccq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %b.not = xor i128 %b, -1
+ %cmp = icmp ugt i128 %a, %b.not
+ %ext = zext i1 %cmp to i128
+ ret i128 %ext
+}
+
+define i128 @i128_addc_xor_inv(i128 %a, i128 %b) {
+; CHECK-LABEL: i128_addc_xor_inv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v1, 0(%r4), 3
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vno %v1, %v1, %v1
+; CHECK-NEXT: vscbiq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %b.not = xor i128 %b, -1
+ %cmp = icmp ule i128 %a, %b.not
+ %ext = zext i1 %cmp to i128
+ ret i128 %ext
+}
|
| // Form add and sub with overflow intrinsics regardless of any extra | ||
| // users of the math result. | ||
| return VT == MVT::i32 || VT == MVT::i64; | ||
| return VT == MVT::i32 || VT == MVT::i64 || VT == MVT::i128; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That looks fine for z13 and higher, but on older machines, i128 is not actually a legal type, and we have no instructions for 128-bit operflowing ops. The common code shouldFormOverflowOp has a !isOperationExpand(Opcode, VT) check - I think we need that here for the i128 case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I pushed additional check lines without z13. And here is the diff on top if we only do this for legal types: https://gist.github.com/nikic/6f84e1c7090a8c356f93e27fe0023a62
It looks like forming uaddo still results in a much better expansion even if we don't have legal i128. For usubo we get the same expansion either way (possibly there is a missed optimization opportunity in there?)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Huh, OK. That looks good then. Thanks!
uweigand
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Allow matching i128 overflow pattern into UADDO, which then allows use of vaccq.