-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AArch64] Allow usubo and uaddo to happen for any legal type #162907
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
AZero13
wants to merge
1
commit into
llvm:main
Choose a base branch
from
AZero13:timely
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-aarch64 Author: AZero13 (AZero13) ChangesFull diff: https://github.com/llvm/llvm-project/pull/162907.diff 9 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index e472e7d565d9b..ab518be9178ef 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -329,9 +329,9 @@ class AArch64TargetLowering : public TargetLowering {
bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
bool MathUsed) const override {
- // Using overflow ops for overflow checks only should beneficial on
- // AArch64.
- return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
+ if (VT.isVector())
+ return false;
+ return !isOperationExpand(Opcode, VT);
}
Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 269cbf03f32a0..606162ade272b 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -355,7 +355,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_cmp_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, hs
+; CHECK-NEXT: cneg x0, x8, hi
; CHECK-NEXT: ret
%cmp = icmp ult i64 %a, %b
%ab = sub i64 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/arm64-srl-and.ll b/llvm/test/CodeGen/AArch64/arm64-srl-and.ll
index b58f6ba96a5b8..53d72bada8754 100644
--- a/llvm/test/CodeGen/AArch64/arm64-srl-and.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-srl-and.ll
@@ -2,20 +2,18 @@
; RUN: llc -mtriple=aarch64-linux-gnu -O3 < %s | FileCheck %s
; This used to miscompile:
-; The 16-bit -1 should not become 32-bit -1 (sub w8, w8, #1).
@g = global i16 0, align 4
define i32 @srl_and() {
; CHECK-LABEL: srl_and:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:g
-; CHECK-NEXT: mov w9, #50
; CHECK-NEXT: ldr x8, [x8, :got_lo12:g]
; CHECK-NEXT: ldrh w8, [x8]
-; CHECK-NEXT: eor w8, w8, w9
-; CHECK-NEXT: mov w9, #65535
-; CHECK-NEXT: add w8, w8, w9
-; CHECK-NEXT: and w0, w8, w8, lsr #16
+; CHECK-NEXT: cmp w8, #50
+; CHECK-NEXT: sub w8, w8, #1
+; CHECK-NEXT: cset w9, ne
+; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret
entry:
%0 = load i16, ptr @g, align 4
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll
index 66fea3535b1ec..908da2ee61abc 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll
@@ -75,10 +75,11 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; CHECK-NEXT: .LBB4_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldaxrb w8, [x0]
-; CHECK-NEXT: cmp w8, w1, uxtb
; CHECK-NEXT: sub w9, w8, #1
-; CHECK-NEXT: ccmp w8, #0, #4, ls
-; CHECK-NEXT: csel w9, w1, w9, eq
+; CHECK-NEXT: cmp w8, w1, uxtb
+; CHECK-NEXT: and w10, w9, #0xffffff00
+; CHECK-NEXT: ccmp w10, #0, #0, ls
+; CHECK-NEXT: csel w9, w1, w9, ne
; CHECK-NEXT: stlxrb w10, w9, [x0]
; CHECK-NEXT: cbnz w10, .LBB4_1
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
@@ -94,10 +95,11 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; CHECK-NEXT: .LBB5_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldaxrh w8, [x0]
-; CHECK-NEXT: cmp w8, w1, uxth
; CHECK-NEXT: sub w9, w8, #1
-; CHECK-NEXT: ccmp w8, #0, #4, ls
-; CHECK-NEXT: csel w9, w1, w9, eq
+; CHECK-NEXT: cmp w8, w1, uxth
+; CHECK-NEXT: and w10, w9, #0xffff0000
+; CHECK-NEXT: ccmp w10, #0, #0, ls
+; CHECK-NEXT: csel w9, w1, w9, ne
; CHECK-NEXT: stlxrh w10, w9, [x0]
; CHECK-NEXT: cbnz w10, .LBB5_1
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
@@ -113,10 +115,12 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; CHECK-NEXT: .LBB6_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldaxr w8, [x0]
+; CHECK-NEXT: subs w9, w8, #1
+; CHECK-NEXT: cset w10, lo
; CHECK-NEXT: cmp w8, w1
-; CHECK-NEXT: sub w9, w8, #1
-; CHECK-NEXT: ccmp w8, #0, #4, ls
-; CHECK-NEXT: csel w9, w1, w9, eq
+; CHECK-NEXT: csinc w10, w10, wzr, ls
+; CHECK-NEXT: cmp w10, #0
+; CHECK-NEXT: csel w9, w1, w9, ne
; CHECK-NEXT: stlxr w10, w9, [x0]
; CHECK-NEXT: cbnz w10, .LBB6_1
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
@@ -133,10 +137,12 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; CHECK-NEXT: .LBB7_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldaxr x0, [x8]
+; CHECK-NEXT: subs x9, x0, #1
+; CHECK-NEXT: cset w10, lo
; CHECK-NEXT: cmp x0, x1
-; CHECK-NEXT: sub x9, x0, #1
-; CHECK-NEXT: ccmp x0, #0, #4, ls
-; CHECK-NEXT: csel x9, x1, x9, eq
+; CHECK-NEXT: csinc w10, w10, wzr, ls
+; CHECK-NEXT: cmp w10, #0
+; CHECK-NEXT: csel x9, x1, x9, ne
; CHECK-NEXT: stlxr w10, x9, [x8]
; CHECK-NEXT: cbnz w10, .LBB7_1
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
diff --git a/llvm/test/CodeGen/AArch64/cgp-usubo.ll b/llvm/test/CodeGen/AArch64/cgp-usubo.ll
index d307107fc07ee..e49e8e86561c7 100644
--- a/llvm/test/CodeGen/AArch64/cgp-usubo.ll
+++ b/llvm/test/CodeGen/AArch64/cgp-usubo.ll
@@ -108,11 +108,9 @@ define i1 @usubo_ugt_constant_op1_i8(i8 %x, ptr %p) nounwind {
define i1 @usubo_eq_constant1_op1_i32(i32 %x, ptr %p) nounwind {
; CHECK-LABEL: usubo_eq_constant1_op1_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: sub w9, w0, #1
-; CHECK-NEXT: cset w8, eq
-; CHECK-NEXT: str w9, [x1]
-; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: subs w8, w0, #1
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: str w8, [x1]
; CHECK-NEXT: ret
%s = add i32 %x, -1
%ov = icmp eq i32 %x, 0
diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
index 3f4dd116d91f8..7917be5728591 100644
--- a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
+++ b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
@@ -192,12 +192,12 @@ define i1 @test_conditional2(i32 %a, i32 %b, ptr %c) {
; CHECK-NEXT: mov w22, #2 ; =0x2
; CHECK-NEXT: LBB3_5: ; %for.cond
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: cbz w22, LBB3_8
+; CHECK-NEXT: subs w22, w22, #1
+; CHECK-NEXT: b.lo LBB3_8
; CHECK-NEXT: ; %bb.6: ; %for.body
; CHECK-NEXT: ; in Loop: Header=BB3_5 Depth=1
-; CHECK-NEXT: sub w22, w22, #1
-; CHECK-NEXT: orr w9, w21, w20
; CHECK-NEXT: ldr w10, [x19, w22, sxtw #2]
+; CHECK-NEXT: orr w9, w21, w20
; CHECK-NEXT: cmp w9, w10
; CHECK-NEXT: b.eq LBB3_5
; CHECK-NEXT: ; %bb.7: ; %if.then
@@ -238,12 +238,12 @@ define i1 @test_conditional2(i32 %a, i32 %b, ptr %c) {
; OUTLINE-ATOMICS-NEXT: cset w8, eq
; OUTLINE-ATOMICS-NEXT: LBB3_1: ; %for.cond
; OUTLINE-ATOMICS-NEXT: ; =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT: cbz w22, LBB3_4
+; OUTLINE-ATOMICS-NEXT: subs w22, w22, #1
+; OUTLINE-ATOMICS-NEXT: b.lo LBB3_4
; OUTLINE-ATOMICS-NEXT: ; %bb.2: ; %for.body
; OUTLINE-ATOMICS-NEXT: ; in Loop: Header=BB3_1 Depth=1
-; OUTLINE-ATOMICS-NEXT: sub w22, w22, #1
-; OUTLINE-ATOMICS-NEXT: orr w9, w21, w20
; OUTLINE-ATOMICS-NEXT: ldr w10, [x19, w22, sxtw #2]
+; OUTLINE-ATOMICS-NEXT: orr w9, w21, w20
; OUTLINE-ATOMICS-NEXT: cmp w9, w10
; OUTLINE-ATOMICS-NEXT: b.eq LBB3_1
; OUTLINE-ATOMICS-NEXT: ; %bb.3: ; %if.then
diff --git a/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll b/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll
index 1207eaa2612a3..f2c84006910c5 100644
--- a/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll
+++ b/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll
@@ -17,24 +17,22 @@ define dso_local void @f8(i32 noundef %i, i32 noundef %k) #0 {
; CHECK-ASM-NEXT: .cfi_remember_state
; CHECK-ASM-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-ASM-NEXT: sxtw x8, w0
+; CHECK-ASM-NEXT: mov w9, #10 // =0xa
; CHECK-ASM-NEXT: stp w1, w0, [sp, #8]
-; CHECK-ASM-NEXT: cmp x8, #10
-; CHECK-ASM-NEXT: b.hi .LBB0_5
+; CHECK-ASM-NEXT: subs x9, x9, x8
+; CHECK-ASM-NEXT: b.lo .LBB0_5
; CHECK-ASM-NEXT: // %bb.1: // %entry
-; CHECK-ASM-NEXT: mov w9, #10 // =0xa
-; CHECK-ASM-NEXT: sub x9, x9, x8
; CHECK-ASM-NEXT: cbz x9, .LBB0_5
; CHECK-ASM-NEXT: // %bb.2:
; CHECK-ASM-NEXT: ldrsw x9, [sp, #8]
+; CHECK-ASM-NEXT: mov w10, #10 // =0xa
+; CHECK-ASM-NEXT: subs x11, x10, x9
; CHECK-ASM-NEXT: adrp x10, .L_MergedGlobals
; CHECK-ASM-NEXT: add x10, x10, :lo12:.L_MergedGlobals
; CHECK-ASM-NEXT: strb wzr, [x10, x8]
-; CHECK-ASM-NEXT: cmp x9, #10
-; CHECK-ASM-NEXT: b.hi .LBB0_6
+; CHECK-ASM-NEXT: b.lo .LBB0_6
; CHECK-ASM-NEXT: // %bb.3:
-; CHECK-ASM-NEXT: mov w8, #10 // =0xa
-; CHECK-ASM-NEXT: sub x8, x8, x9
-; CHECK-ASM-NEXT: cbz x8, .LBB0_6
+; CHECK-ASM-NEXT: cbz x11, .LBB0_6
; CHECK-ASM-NEXT: // %bb.4:
; CHECK-ASM-NEXT: add x8, x10, x9
; CHECK-ASM-NEXT: strb wzr, [x8, #10]
diff --git a/llvm/test/CodeGen/AArch64/sat-add.ll b/llvm/test/CodeGen/AArch64/sat-add.ll
index ecd48d6b7c65b..12044ebe20fa1 100644
--- a/llvm/test/CodeGen/AArch64/sat-add.ll
+++ b/llvm/test/CodeGen/AArch64/sat-add.ll
@@ -25,9 +25,9 @@ define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
; CHECK-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: add w8, w8, #42
-; CHECK-NEXT: tst w8, #0x100
-; CHECK-NEXT: csinv w0, w8, wzr, eq
+; CHECK-NEXT: add w9, w0, #42
+; CHECK-NEXT: cmp w8, w9, uxtb
+; CHECK-NEXT: csinv w0, w9, wzr, ls
; CHECK-NEXT: ret
%a = add i8 %x, 42
%c = icmp ugt i8 %x, %a
@@ -68,9 +68,9 @@ define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: add w8, w8, #42
-; CHECK-NEXT: tst w8, #0x10000
-; CHECK-NEXT: csinv w0, w8, wzr, eq
+; CHECK-NEXT: add w9, w0, #42
+; CHECK-NEXT: cmp w8, w9, uxth
+; CHECK-NEXT: csinv w0, w9, wzr, ls
; CHECK-NEXT: ret
%a = add i16 %x, 42
%c = icmp ugt i16 %x, %a
@@ -188,9 +188,9 @@ define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: add w8, w8, w1, uxtb
-; CHECK-NEXT: tst w8, #0x100
-; CHECK-NEXT: csinv w0, w8, wzr, eq
+; CHECK-NEXT: add w9, w0, w1
+; CHECK-NEXT: cmp w8, w9, uxtb
+; CHECK-NEXT: csinv w0, w9, wzr, ls
; CHECK-NEXT: ret
%a = add i8 %x, %y
%c = icmp ugt i8 %x, %a
@@ -201,11 +201,11 @@ define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) {
; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_notval:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w1, #0xff
-; CHECK-NEXT: add w9, w0, w1
-; CHECK-NEXT: add w8, w8, w0, uxtb
-; CHECK-NEXT: tst w8, #0x100
-; CHECK-NEXT: csinv w0, w9, wzr, eq
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: mvn w9, w1
+; CHECK-NEXT: add w10, w0, w1
+; CHECK-NEXT: cmp w8, w9, uxtb
+; CHECK-NEXT: csinv w0, w10, wzr, ls
; CHECK-NEXT: ret
%noty = xor i8 %y, -1
%a = add i8 %x, %y
@@ -234,9 +234,9 @@ define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: add w8, w8, w1, uxth
-; CHECK-NEXT: tst w8, #0x10000
-; CHECK-NEXT: csinv w0, w8, wzr, eq
+; CHECK-NEXT: add w9, w0, w1
+; CHECK-NEXT: cmp w8, w9, uxth
+; CHECK-NEXT: csinv w0, w9, wzr, ls
; CHECK-NEXT: ret
%a = add i16 %x, %y
%c = icmp ugt i16 %x, %a
@@ -247,11 +247,11 @@ define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
define i16 @unsigned_sat_variable_i16_using_cmp_notval(i16 %x, i16 %y) {
; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_notval:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w1, #0xffff
-; CHECK-NEXT: add w9, w0, w1
-; CHECK-NEXT: add w8, w8, w0, uxth
-; CHECK-NEXT: tst w8, #0x10000
-; CHECK-NEXT: csinv w0, w9, wzr, eq
+; CHECK-NEXT: and w8, w0, #0xffff
+; CHECK-NEXT: mvn w9, w1
+; CHECK-NEXT: add w10, w0, w1
+; CHECK-NEXT: cmp w8, w9, uxth
+; CHECK-NEXT: csinv w0, w10, wzr, ls
; CHECK-NEXT: ret
%noty = xor i16 %y, -1
%a = add i16 %x, %y
diff --git a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
index 7c80f9320faec..0720a7f72bd8c 100644
--- a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
+++ b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
@@ -313,9 +313,9 @@ define i1 @add_ultcmp_bad_i16_i8_cmp(i16 %x, i16 %y) nounwind {
define i1 @add_ultcmp_bad_i8_i16(i16 %x) nounwind {
; CHECK-LABEL: add_ultcmp_bad_i8_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: add w8, w8, #128
-; CHECK-NEXT: lsr w0, w8, #16
+; CHECK-NEXT: add w8, w0, #128
+; CHECK-NEXT: tst w8, #0xff80
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i16 %tmp0, 128 ; 1U << (8-1)
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.