From 6417c988efef63cdb08ddf69e21b820e3913751c Mon Sep 17 00:00:00 2001 From: Marius Kamp Date: Sat, 21 Dec 2024 07:57:29 +0100 Subject: [PATCH 1/2] [AArch64] Add Tests for CSEL with Common Subexpression after Reassociation; NFC --- llvm/test/CodeGen/AArch64/csel-cmp-cse.ll | 799 ++++++++++++++++++++++ 1 file changed, 799 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/csel-cmp-cse.ll diff --git a/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll b/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll new file mode 100644 index 0000000000000..3224cf5638a0e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll @@ -0,0 +1,799 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s + +declare void @use_i1(i1 %x) +declare void @use_i32(i32 %x) + +; Based on the IR generated for the `last` method of the type `slice` in Rust +define ptr @test_last_elem_from_ptr(ptr noundef readnone %x0, i64 noundef %x1) { +; CHECK-LABEL: test_last_elem_from_ptr: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, x1 +; CHECK-NEXT: cmp x1, #0 +; CHECK-NEXT: sub x8, x8, #1 +; CHECK-NEXT: csel x0, xzr, x8, eq +; CHECK-NEXT: ret + %cmp = icmp eq i64 %x1, 0 + %add.ptr = getelementptr inbounds nuw i8, ptr %x0, i64 %x1 + %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 -1 + %retval.0 = select i1 %cmp, ptr null, ptr %add.ptr1 + ret ptr %retval.0 +} + +define i32 @test_eq0_sub_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_eq0_sub_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #0 +; CHECK-NEXT: sub w8, w8, #1 +; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: ret + %cmp = icmp eq i32 %x1, 0 + %add = add nuw i32 %x0, %x1 + %sub = sub i32 %add, 1 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_eq7_sub_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_eq7_sub_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #7 +; CHECK-NEXT: sub w8, w8, #7 +; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: ret + %cmp = icmp eq i32 %x1, 7 + %add = add nuw i32 %x0, %x1 + %sub = sub i32 %add, 7 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_ule7_sub7_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_ule7_sub7_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #8 +; CHECK-NEXT: sub w8, w8, #7 +; CHECK-NEXT: csel w0, wzr, w8, lo +; CHECK-NEXT: ret + %cmp = icmp ule i32 %x1, 7 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 7 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_ule7_sub8_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_ule7_sub8_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #8 +; CHECK-NEXT: sub w8, w8, #8 +; CHECK-NEXT: csel w0, wzr, w8, lo +; CHECK-NEXT: ret + %cmp = icmp ule i32 %x1, 7 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 8 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_ule0_sub1_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_ule0_sub1_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #0 +; CHECK-NEXT: sub w8, w8, #1 +; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: ret + %cmp = icmp ule i32 %x1, 0 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 1 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_ultminus2_subminus2_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_ultminus2_subminus2_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmn w1, #2 +; CHECK-NEXT: add w8, w8, #2 +; CHECK-NEXT: csel w0, wzr, w8, lo +; CHECK-NEXT: ret + %cmp = icmp ult i32 %x1, -2 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, -2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_ultminus2_subminus3_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_ultminus2_subminus3_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmn w1, #2 +; CHECK-NEXT: add w8, w8, #3 +; CHECK-NEXT: csel w0, wzr, w8, lo +; CHECK-NEXT: ret + %cmp = icmp ult i32 %x1, -2 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, -3 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_ne0_sub_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_ne0_sub_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #0 +; CHECK-NEXT: sub w8, w8, #1 +; CHECK-NEXT: csel w0, w8, wzr, ne +; CHECK-NEXT: ret + %cmp = icmp ne i32 %x1, 0 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 1 + %ret = select i1 %cmp, i32 %sub, i32 0 + ret i32 %ret +} + +define i32 @test_ne7_sub_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_ne7_sub_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #7 +; CHECK-NEXT: sub w8, w8, #7 +; CHECK-NEXT: csel w0, w8, wzr, ne +; CHECK-NEXT: ret + %cmp = icmp ne i32 %x1, 7 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 7 + %ret = select i1 %cmp, i32 %sub, i32 0 + ret i32 %ret +} + +define i32 @test_ultminus1_sub_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_ultminus1_sub_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmn w1, #1 +; CHECK-NEXT: csinc w0, wzr, w8, ne +; CHECK-NEXT: ret + %cmp = icmp ult i32 %x1, -1 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, -1 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_ugt7_sub7_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_ugt7_sub7_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #7 +; CHECK-NEXT: sub w8, w8, #7 +; CHECK-NEXT: csel w0, wzr, w8, hi +; CHECK-NEXT: ret + %cmp = icmp ugt i32 %x1, 7 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 7 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_ugt7_sub8_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_ugt7_sub8_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #7 +; CHECK-NEXT: sub w8, w8, #8 +; CHECK-NEXT: csel w0, wzr, w8, hi +; CHECK-NEXT: ret + %cmp = icmp ugt i32 %x1, 7 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 8 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_sle7_sub7_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_sle7_sub7_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #8 +; CHECK-NEXT: sub w8, w8, #7 +; CHECK-NEXT: csel w0, wzr, w8, lt +; CHECK-NEXT: ret + %cmp = icmp sle i32 %x1, 7 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 7 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_sle7_sub8_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_sle7_sub8_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #8 +; CHECK-NEXT: sub w8, w8, #8 +; CHECK-NEXT: csel w0, wzr, w8, lt +; CHECK-NEXT: ret + %cmp = icmp sle i32 %x1, 7 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 8 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_slt8_sub8_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_slt8_sub8_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #8 +; CHECK-NEXT: sub w8, w8, #8 +; CHECK-NEXT: csel w0, wzr, w8, lt +; CHECK-NEXT: ret + %cmp = icmp slt i32 %x1, 8 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 8 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_slt8_sub7_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_slt8_sub7_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #8 +; CHECK-NEXT: sub w8, w8, #7 +; CHECK-NEXT: csel w0, wzr, w8, lt +; CHECK-NEXT: ret + %cmp = icmp slt i32 %x1, 8 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 7 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_sltminus8_subminus8_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_sltminus8_subminus8_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmn w1, #8 +; CHECK-NEXT: add w8, w8, #8 +; CHECK-NEXT: csel w0, wzr, w8, lt +; CHECK-NEXT: ret + %cmp = icmp slt i32 %x1, -8 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, -8 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_sgtminus8_subminus8_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_sgtminus8_subminus8_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmn w1, #8 +; CHECK-NEXT: add w8, w8, #8 +; CHECK-NEXT: csel w0, wzr, w8, gt +; CHECK-NEXT: ret + %cmp = icmp sgt i32 %x1, -8 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, -8 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_sgtminus8_subminus7_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_sgtminus8_subminus7_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmn w1, #8 +; CHECK-NEXT: add w8, w8, #7 +; CHECK-NEXT: csel w0, wzr, w8, gt +; CHECK-NEXT: ret + %cmp = icmp sgt i32 %x1, -8 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, -7 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_eq0_sub_addcomm_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_eq0_sub_addcomm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w1, w0 +; CHECK-NEXT: cmp w1, #0 +; CHECK-NEXT: sub w8, w8, #1 +; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: ret + %cmp = icmp eq i32 %x1, 0 + %add = add i32 %x1, %x0 + %sub = sub i32 %add, 1 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_eq0_subcomm_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_eq0_subcomm_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #0 +; CHECK-NEXT: sub w8, w8, #1 +; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: ret + %cmp = icmp eq i32 %x1, 0 + %add = add i32 %x0, %x1 + %sub = add i32 -1, %add + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_eq0_multi_use_sub_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_eq0_multi_use_sub_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: sub w20, w8, #1 +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: bl use_i32 +; CHECK-NEXT: cmp w19, #0 +; CHECK-NEXT: csel w0, wzr, w20, eq +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %cmp = icmp eq i32 %x1, 0 + %add = add nuw i32 %x0, %x1 + %sub = sub i32 %add, 1 + tail call void @use_i32(i32 %sub) + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_eq0_multi_use_cmp_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_eq0_multi_use_cmp_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #0 +; CHECK-NEXT: sub w8, w8, #1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: csel w19, wzr, w8, eq +; CHECK-NEXT: bl use_i1 +; CHECK-NEXT: mov w0, w19 +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret + %cmp = icmp eq i32 %x1, 0 + tail call void @use_i1(i1 %cmp) + %add = add nuw i32 %x0, %x1 + %sub = sub i32 %add, 1 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_eq0_multi_use_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_eq0_multi_use_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: add w20, w0, w1 +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: bl use_i32 +; CHECK-NEXT: sub w8, w20, #1 +; CHECK-NEXT: cmp w19, #0 +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %cmp = icmp eq i32 %x1, 0 + %add = add nuw i32 %x0, %x1 + tail call void @use_i32(i32 %add) + %sub = sub i32 %add, 1 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_eq1_sub_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_eq1_sub_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #1 +; CHECK-NEXT: sub w8, w8, #2 +; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: ret + %cmp = icmp eq i32 %x1, 1 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_ugtsmax_sub_add_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_ugtsmax_sub_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 +; CHECK-NEXT: add w9, w0, w1 +; CHECK-NEXT: cmp w1, #0 +; CHECK-NEXT: add w8, w9, w8 +; CHECK-NEXT: csel w0, wzr, w8, lt +; CHECK-NEXT: ret + %cmp = icmp ugt i32 %x1, 2147483647 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 2147483648 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_ult_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_ult_nonconst_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, w2 +; CHECK-NEXT: sub w8, w8, w2 +; CHECK-NEXT: csel w0, wzr, w8, lo +; CHECK-NEXT: ret + %cmp = icmp ult i32 %x1, %x2 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_eq_const_mismatch_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_eq_const_mismatch_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #0 +; CHECK-NEXT: sub w8, w8, #2 +; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: ret + %cmp = icmp eq i32 %x1, 0 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_ne_const_mismatch_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_ne_const_mismatch_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #0 +; CHECK-NEXT: sub w8, w8, #2 +; CHECK-NEXT: csel w0, w8, wzr, ne +; CHECK-NEXT: ret + %cmp = icmp ne i32 %x1, 0 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 2 + %ret = select i1 %cmp, i32 %sub, i32 0 + ret i32 %ret +} + +; Negative test +define i32 @test_ult7_const_mismatch_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_ult7_const_mismatch_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #7 +; CHECK-NEXT: sub w8, w8, #8 +; CHECK-NEXT: csel w0, wzr, w8, lo +; CHECK-NEXT: ret + %cmp = icmp ult i32 %x1, 7 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 8 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_ule7_const_mismatch_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_ule7_const_mismatch_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #8 +; CHECK-NEXT: sub w8, w8, #6 +; CHECK-NEXT: csel w0, wzr, w8, lo +; CHECK-NEXT: ret + %cmp = icmp ule i32 %x1, 7 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 6 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_ugt7_const_mismatch_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_ugt7_const_mismatch_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #7 +; CHECK-NEXT: sub w8, w8, #6 +; CHECK-NEXT: csel w0, wzr, w8, hi +; CHECK-NEXT: ret + %cmp = icmp ugt i32 %x1, 7 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 6 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_uge7_const_mismatch_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_uge7_const_mismatch_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #6 +; CHECK-NEXT: sub w8, w8, #8 +; CHECK-NEXT: csel w0, wzr, w8, hi +; CHECK-NEXT: ret + %cmp = icmp uge i32 %x1, 7 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 8 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_slt7_const_mismatch_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_slt7_const_mismatch_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #7 +; CHECK-NEXT: sub w8, w8, #8 +; CHECK-NEXT: csel w0, wzr, w8, lt +; CHECK-NEXT: ret + %cmp = icmp slt i32 %x1, 7 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 8 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_sle7_const_mismatch_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_sle7_const_mismatch_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #8 +; CHECK-NEXT: sub w8, w8, #6 +; CHECK-NEXT: csel w0, wzr, w8, lt +; CHECK-NEXT: ret + %cmp = icmp sle i32 %x1, 7 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 6 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_sgt7_const_mismatch_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_sgt7_const_mismatch_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #7 +; CHECK-NEXT: sub w8, w8, #6 +; CHECK-NEXT: csel w0, wzr, w8, gt +; CHECK-NEXT: ret + %cmp = icmp sgt i32 %x1, 7 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 6 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_sge7_const_mismatch_i32(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_sge7_const_mismatch_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, #6 +; CHECK-NEXT: sub w8, w8, #8 +; CHECK-NEXT: csel w0, wzr, w8, gt +; CHECK-NEXT: ret + %cmp = icmp sge i32 %x1, 7 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 8 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_unrelated_add_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_unrelated_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w2 +; CHECK-NEXT: cmp w1, #0 +; CHECK-NEXT: sub w8, w8, #1 +; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: ret + %cmp = icmp eq i32 %x1, 0 + %add = add nuw i32 %x0, %x2 + %sub = sub i32 %add, 1 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i16 @test_eq0_sub_add_i16(i16 %x0, i16 %x1) { +; CHECK-LABEL: test_eq0_sub_add_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: tst w1, #0xffff +; CHECK-NEXT: sub w8, w8, #1 +; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: ret + %cmp = icmp eq i16 %x1, 0 + %add = add nuw i16 %x0, %x1 + %sub = sub i16 %add, 1 + %ret = select i1 %cmp, i16 0, i16 %sub + ret i16 %ret +} + +; Negative test +define i32 @test_ule_unsigned_overflow(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_ule_unsigned_overflow: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %cmp = icmp ule i32 %x1, -1 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 0 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_ugt_unsigned_overflow(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_ugt_unsigned_overflow: +; CHECK: // %bb.0: +; CHECK-NEXT: add w0, w0, w1 +; CHECK-NEXT: ret + %cmp = icmp ugt i32 %x1, -1 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 0 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_ult_unsigned_overflow(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_ult_unsigned_overflow: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: add w0, w8, #1 +; CHECK-NEXT: ret + %cmp = icmp ult i32 %x1, 0 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, -1 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_uge_unsigned_overflow(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_uge_unsigned_overflow: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %cmp = icmp uge i32 %x1, 0 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, -1 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_slt_signed_overflow(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_slt_signed_overflow: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-2147483647 // =0x80000001 +; CHECK-NEXT: add w9, w0, w1 +; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: ret + %cmp = icmp slt i32 %x1, 2147483648 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 2147483647 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_sle_signed_overflow(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_sle_signed_overflow: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %cmp = icmp sle i32 %x1, 2147483647 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 2147483648 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_sgt_signed_overflow(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_sgt_signed_overflow: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 +; CHECK-NEXT: add w9, w0, w1 +; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: ret + %cmp = icmp sgt i32 %x1, 2147483647 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 2147483648 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_sge_signed_overflow(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_sge_signed_overflow: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %cmp = icmp sge i32 %x1, 2147483648 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 2147483647 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_eq0_bitwidth_mismatch(i32 %x0, i32 %x1) { +; CHECK-LABEL: test_eq0_bitwidth_mismatch: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: tst w1, #0xffff +; CHECK-NEXT: sub w8, w8, #1 +; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: ret + %x1t = trunc i32 %x1 to i16 + %cmp = icmp eq i16 %x1t, 0 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, 1 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_eq0_bitwidth_mismatch_2(i32 %x0, i64 %x1) { +; CHECK-LABEL: test_eq0_bitwidth_mismatch_2: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp x1, #0 +; CHECK-NEXT: sub w8, w8, #1 +; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: ret + %x1t = trunc i64 %x1 to i32 + %cmp = icmp eq i64 %x1, 0 + %add = add i32 %x0, %x1t + %sub = sub i32 %add, 1 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} From 43f2da4ebe854378ed24e82b03e4fd965d885606 Mon Sep 17 00:00:00 2001 From: Marius Kamp Date: Mon, 23 Dec 2024 15:16:46 +0100 Subject: [PATCH 2/2] [AArch64] Eliminate Common Subexpression of CSEL by Reassociation If we have a CSEL instruction that depends on the flags set by a (SUBS x c) instruction and the true and/or false expression is (add (add x y) -c), we can reassociate the latter expression to (add (SUBS x c) y) and save one instruction. Proof for the basic transformation: https://alive2.llvm.org/ce/z/-337Pb We can extend this transformation for slightly different constants. For example, if we have (add (add x y) -(c-1)) and a the comparison x getOperand(3); + if (SubsNode.getOpcode() != AArch64ISD::SUBS || !SubsNode.hasOneUse()) + return SDValue(); + auto *CmpOpConst = dyn_cast(SubsNode.getOperand(1)); + if (!CmpOpConst) + return SDValue(); + + SDValue CmpOpOther = SubsNode.getOperand(0); + EVT VT = N->getValueType(0); + + // Get the operand that can be reassociated with the SUBS instruction. + auto GetReassociationOp = [&](SDValue Op, APInt ExpectedConst) { + if (Op.getOpcode() != ISD::ADD) + return SDValue(); + if (Op.getOperand(0).getOpcode() != ISD::ADD || + !Op.getOperand(0).hasOneUse()) + return SDValue(); + SDValue X = Op.getOperand(0).getOperand(0); + SDValue Y = Op.getOperand(0).getOperand(1); + if (X != CmpOpOther) + std::swap(X, Y); + if (X != CmpOpOther) + return SDValue(); + auto *AddOpConst = dyn_cast(Op.getOperand(1)); + if (!AddOpConst || AddOpConst->getAPIntValue() != ExpectedConst) + return SDValue(); + return Y; + }; + + // Try the reassociation using the given constant and condition code. + auto Fold = [&](APInt NewCmpConst, AArch64CC::CondCode NewCC) { + APInt ExpectedConst = -NewCmpConst; + SDValue TReassocOp = GetReassociationOp(N->getOperand(0), ExpectedConst); + SDValue FReassocOp = GetReassociationOp(N->getOperand(1), ExpectedConst); + if (!TReassocOp && !FReassocOp) + return SDValue(); + + SDValue NewCmp = DAG.getNode(AArch64ISD::SUBS, SDLoc(SubsNode), + DAG.getVTList(VT, MVT_CC), CmpOpOther, + DAG.getConstant(NewCmpConst, SDLoc(CmpOpConst), + CmpOpConst->getValueType(0))); + + auto Reassociate = [&](SDValue ReassocOp, unsigned OpNum) { + if (!ReassocOp) + return N->getOperand(OpNum); + SDValue Res = DAG.getNode(ISD::ADD, SDLoc(N->getOperand(OpNum)), VT, + NewCmp.getValue(0), ReassocOp); + DAG.ReplaceAllUsesWith(N->getOperand(OpNum), Res); + return Res; + }; + + SDValue TValReassoc = Reassociate(TReassocOp, 0); + SDValue FValReassoc = Reassociate(FReassocOp, 1); + return DAG.getNode(AArch64ISD::CSEL, SDLoc(N), VT, TValReassoc, FValReassoc, + DAG.getConstant(NewCC, SDLoc(N->getOperand(2)), MVT_CC), + NewCmp.getValue(1)); + }; + + auto CC = static_cast(N->getConstantOperandVal(2)); + + // First, try to eliminate the compare instruction by searching for a + // subtraction with the same constant. + if (SDValue R = Fold(CmpOpConst->getAPIntValue(), CC)) + return R; + + if ((CC == AArch64CC::EQ || CC == AArch64CC::NE) && !CmpOpConst->isZero()) + return SDValue(); + + // Next, search for a subtraction with a slightly different constant. By + // adjusting the condition code, we can still eliminate the compare + // instruction. Adjusting the constant is only valid if it does not result + // in signed/unsigned wrap for signed/unsigned comparisons, respectively. + // Since such comparisons are trivially true/false, we should not encounter + // them here but check for them nevertheless to be on the safe side. + auto CheckedFold = [&](bool Check, APInt NewCmpConst, + AArch64CC::CondCode NewCC) { + return Check ? Fold(NewCmpConst, NewCC) : SDValue(); + }; + switch (CC) { + case AArch64CC::EQ: + case AArch64CC::LS: + return CheckedFold(!CmpOpConst->getAPIntValue().isMaxValue(), + CmpOpConst->getAPIntValue() + 1, AArch64CC::LO); + case AArch64CC::NE: + case AArch64CC::HI: + return CheckedFold(!CmpOpConst->getAPIntValue().isMaxValue(), + CmpOpConst->getAPIntValue() + 1, AArch64CC::HS); + case AArch64CC::LO: + return CheckedFold(!CmpOpConst->getAPIntValue().isZero(), + CmpOpConst->getAPIntValue() - 1, AArch64CC::LS); + case AArch64CC::HS: + return CheckedFold(!CmpOpConst->getAPIntValue().isZero(), + CmpOpConst->getAPIntValue() - 1, AArch64CC::HI); + case AArch64CC::LT: + return CheckedFold(!CmpOpConst->getAPIntValue().isMinSignedValue(), + CmpOpConst->getAPIntValue() - 1, AArch64CC::LE); + case AArch64CC::LE: + return CheckedFold(!CmpOpConst->getAPIntValue().isMaxSignedValue(), + CmpOpConst->getAPIntValue() + 1, AArch64CC::LT); + case AArch64CC::GT: + return CheckedFold(!CmpOpConst->getAPIntValue().isMaxSignedValue(), + CmpOpConst->getAPIntValue() + 1, AArch64CC::GE); + case AArch64CC::GE: + return CheckedFold(!CmpOpConst->getAPIntValue().isMinSignedValue(), + CmpOpConst->getAPIntValue() - 1, AArch64CC::GT); + default: + return SDValue(); + } +} + // Optimize CSEL instructions static SDValue performCSELCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, @@ -24849,6 +24965,11 @@ static SDValue performCSELCombine(SDNode *N, if (SDValue R = foldCSELOfCSEL(N, DAG)) return R; + // Try to reassociate the true/false expressions so that we can do CSE with + // a SUBS instruction used to perform the comparison. + if (SDValue R = reassociateCSELOperandsForCSE(N, DAG)) + return R; + // CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1 // CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1 if (SDValue Folded = foldCSELofCTTZ(N, DAG)) diff --git a/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll b/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll index 3224cf5638a0e..d8904cc6e35e3 100644 --- a/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll +++ b/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll @@ -8,10 +8,9 @@ declare void @use_i32(i32 %x) define ptr @test_last_elem_from_ptr(ptr noundef readnone %x0, i64 noundef %x1) { ; CHECK-LABEL: test_last_elem_from_ptr: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x1 -; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: sub x8, x8, #1 -; CHECK-NEXT: csel x0, xzr, x8, eq +; CHECK-NEXT: subs x8, x1, #1 +; CHECK-NEXT: add x8, x8, x0 +; CHECK-NEXT: csel x0, xzr, x8, lo ; CHECK-NEXT: ret %cmp = icmp eq i64 %x1, 0 %add.ptr = getelementptr inbounds nuw i8, ptr %x0, i64 %x1 @@ -23,10 +22,9 @@ define ptr @test_last_elem_from_ptr(ptr noundef readnone %x0, i64 noundef %x1) { define i32 @test_eq0_sub_add_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_eq0_sub_add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmp w1, #0 -; CHECK-NEXT: sub w8, w8, #1 -; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: subs w8, w1, #1 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, lo ; CHECK-NEXT: ret %cmp = icmp eq i32 %x1, 0 %add = add nuw i32 %x0, %x1 @@ -38,9 +36,8 @@ define i32 @test_eq0_sub_add_i32(i32 %x0, i32 %x1) { define i32 @test_eq7_sub_add_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_eq7_sub_add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmp w1, #7 -; CHECK-NEXT: sub w8, w8, #7 +; CHECK-NEXT: subs w8, w1, #7 +; CHECK-NEXT: add w8, w8, w0 ; CHECK-NEXT: csel w0, wzr, w8, eq ; CHECK-NEXT: ret %cmp = icmp eq i32 %x1, 7 @@ -53,10 +50,9 @@ define i32 @test_eq7_sub_add_i32(i32 %x0, i32 %x1) { define i32 @test_ule7_sub7_add_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_ule7_sub7_add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmp w1, #8 -; CHECK-NEXT: sub w8, w8, #7 -; CHECK-NEXT: csel w0, wzr, w8, lo +; CHECK-NEXT: subs w8, w1, #7 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, ls ; CHECK-NEXT: ret %cmp = icmp ule i32 %x1, 7 %add = add i32 %x0, %x1 @@ -68,9 +64,8 @@ define i32 @test_ule7_sub7_add_i32(i32 %x0, i32 %x1) { define i32 @test_ule7_sub8_add_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_ule7_sub8_add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmp w1, #8 -; CHECK-NEXT: sub w8, w8, #8 +; CHECK-NEXT: subs w8, w1, #8 +; CHECK-NEXT: add w8, w8, w0 ; CHECK-NEXT: csel w0, wzr, w8, lo ; CHECK-NEXT: ret %cmp = icmp ule i32 %x1, 7 @@ -83,10 +78,9 @@ define i32 @test_ule7_sub8_add_i32(i32 %x0, i32 %x1) { define i32 @test_ule0_sub1_add_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_ule0_sub1_add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmp w1, #0 -; CHECK-NEXT: sub w8, w8, #1 -; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: subs w8, w1, #1 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, lo ; CHECK-NEXT: ret %cmp = icmp ule i32 %x1, 0 %add = add i32 %x0, %x1 @@ -98,9 +92,8 @@ define i32 @test_ule0_sub1_add_i32(i32 %x0, i32 %x1) { define i32 @test_ultminus2_subminus2_add_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_ultminus2_subminus2_add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmn w1, #2 -; CHECK-NEXT: add w8, w8, #2 +; CHECK-NEXT: adds w8, w1, #2 +; CHECK-NEXT: add w8, w8, w0 ; CHECK-NEXT: csel w0, wzr, w8, lo ; CHECK-NEXT: ret %cmp = icmp ult i32 %x1, -2 @@ -113,10 +106,9 @@ define i32 @test_ultminus2_subminus2_add_i32(i32 %x0, i32 %x1) { define i32 @test_ultminus2_subminus3_add_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_ultminus2_subminus3_add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmn w1, #2 -; CHECK-NEXT: add w8, w8, #3 -; CHECK-NEXT: csel w0, wzr, w8, lo +; CHECK-NEXT: adds w8, w1, #3 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, ls ; CHECK-NEXT: ret %cmp = icmp ult i32 %x1, -2 %add = add i32 %x0, %x1 @@ -128,10 +120,9 @@ define i32 @test_ultminus2_subminus3_add_i32(i32 %x0, i32 %x1) { define i32 @test_ne0_sub_add_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_ne0_sub_add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmp w1, #0 -; CHECK-NEXT: sub w8, w8, #1 -; CHECK-NEXT: csel w0, w8, wzr, ne +; CHECK-NEXT: subs w8, w1, #1 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, w8, wzr, hs ; CHECK-NEXT: ret %cmp = icmp ne i32 %x1, 0 %add = add i32 %x0, %x1 @@ -143,9 +134,8 @@ define i32 @test_ne0_sub_add_i32(i32 %x0, i32 %x1) { define i32 @test_ne7_sub_add_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_ne7_sub_add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmp w1, #7 -; CHECK-NEXT: sub w8, w8, #7 +; CHECK-NEXT: subs w8, w1, #7 +; CHECK-NEXT: add w8, w8, w0 ; CHECK-NEXT: csel w0, w8, wzr, ne ; CHECK-NEXT: ret %cmp = icmp ne i32 %x1, 7 @@ -158,9 +148,9 @@ define i32 @test_ne7_sub_add_i32(i32 %x0, i32 %x1) { define i32 @test_ultminus1_sub_add_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_ultminus1_sub_add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmn w1, #1 -; CHECK-NEXT: csinc w0, wzr, w8, ne +; CHECK-NEXT: adds w8, w1, #1 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, ne ; CHECK-NEXT: ret %cmp = icmp ult i32 %x1, -1 %add = add i32 %x0, %x1 @@ -172,9 +162,8 @@ define i32 @test_ultminus1_sub_add_i32(i32 %x0, i32 %x1) { define i32 @test_ugt7_sub7_add_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_ugt7_sub7_add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmp w1, #7 -; CHECK-NEXT: sub w8, w8, #7 +; CHECK-NEXT: subs w8, w1, #7 +; CHECK-NEXT: add w8, w8, w0 ; CHECK-NEXT: csel w0, wzr, w8, hi ; CHECK-NEXT: ret %cmp = icmp ugt i32 %x1, 7 @@ -187,10 +176,9 @@ define i32 @test_ugt7_sub7_add_i32(i32 %x0, i32 %x1) { define i32 @test_ugt7_sub8_add_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_ugt7_sub8_add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmp w1, #7 -; CHECK-NEXT: sub w8, w8, #8 -; CHECK-NEXT: csel w0, wzr, w8, hi +; CHECK-NEXT: subs w8, w1, #8 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, hs ; CHECK-NEXT: ret %cmp = icmp ugt i32 %x1, 7 %add = add i32 %x0, %x1 @@ -202,10 +190,9 @@ define i32 @test_ugt7_sub8_add_i32(i32 %x0, i32 %x1) { define i32 @test_sle7_sub7_add_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_sle7_sub7_add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmp w1, #8 -; CHECK-NEXT: sub w8, w8, #7 -; CHECK-NEXT: csel w0, wzr, w8, lt +; CHECK-NEXT: subs w8, w1, #7 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, le ; CHECK-NEXT: ret %cmp = icmp sle i32 %x1, 7 %add = add i32 %x0, %x1 @@ -217,9 +204,8 @@ define i32 @test_sle7_sub7_add_i32(i32 %x0, i32 %x1) { define i32 @test_sle7_sub8_add_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_sle7_sub8_add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmp w1, #8 -; CHECK-NEXT: sub w8, w8, #8 +; CHECK-NEXT: subs w8, w1, #8 +; CHECK-NEXT: add w8, w8, w0 ; CHECK-NEXT: csel w0, wzr, w8, lt ; CHECK-NEXT: ret %cmp = icmp sle i32 %x1, 7 @@ -232,9 +218,8 @@ define i32 @test_sle7_sub8_add_i32(i32 %x0, i32 %x1) { define i32 @test_slt8_sub8_add_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_slt8_sub8_add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmp w1, #8 -; CHECK-NEXT: sub w8, w8, #8 +; CHECK-NEXT: subs w8, w1, #8 +; CHECK-NEXT: add w8, w8, w0 ; CHECK-NEXT: csel w0, wzr, w8, lt ; CHECK-NEXT: ret %cmp = icmp slt i32 %x1, 8 @@ -247,10 +232,9 @@ define i32 @test_slt8_sub8_add_i32(i32 %x0, i32 %x1) { define i32 @test_slt8_sub7_add_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_slt8_sub7_add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmp w1, #8 -; CHECK-NEXT: sub w8, w8, #7 -; CHECK-NEXT: csel w0, wzr, w8, lt +; CHECK-NEXT: subs w8, w1, #7 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, le ; CHECK-NEXT: ret %cmp = icmp slt i32 %x1, 8 %add = add i32 %x0, %x1 @@ -262,9 +246,8 @@ define i32 @test_slt8_sub7_add_i32(i32 %x0, i32 %x1) { define i32 @test_sltminus8_subminus8_add_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_sltminus8_subminus8_add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmn w1, #8 -; CHECK-NEXT: add w8, w8, #8 +; CHECK-NEXT: adds w8, w1, #8 +; CHECK-NEXT: add w8, w8, w0 ; CHECK-NEXT: csel w0, wzr, w8, lt ; CHECK-NEXT: ret %cmp = icmp slt i32 %x1, -8 @@ -277,9 +260,8 @@ define i32 @test_sltminus8_subminus8_add_i32(i32 %x0, i32 %x1) { define i32 @test_sgtminus8_subminus8_add_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_sgtminus8_subminus8_add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmn w1, #8 -; CHECK-NEXT: add w8, w8, #8 +; CHECK-NEXT: adds w8, w1, #8 +; CHECK-NEXT: add w8, w8, w0 ; CHECK-NEXT: csel w0, wzr, w8, gt ; CHECK-NEXT: ret %cmp = icmp sgt i32 %x1, -8 @@ -292,10 +274,9 @@ define i32 @test_sgtminus8_subminus8_add_i32(i32 %x0, i32 %x1) { define i32 @test_sgtminus8_subminus7_add_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_sgtminus8_subminus7_add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmn w1, #8 -; CHECK-NEXT: add w8, w8, #7 -; CHECK-NEXT: csel w0, wzr, w8, gt +; CHECK-NEXT: adds w8, w1, #7 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, ge ; CHECK-NEXT: ret %cmp = icmp sgt i32 %x1, -8 %add = add i32 %x0, %x1 @@ -307,10 +288,9 @@ define i32 @test_sgtminus8_subminus7_add_i32(i32 %x0, i32 %x1) { define i32 @test_eq0_sub_addcomm_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_eq0_sub_addcomm_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w1, w0 -; CHECK-NEXT: cmp w1, #0 -; CHECK-NEXT: sub w8, w8, #1 -; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: subs w8, w1, #1 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, lo ; CHECK-NEXT: ret %cmp = icmp eq i32 %x1, 0 %add = add i32 %x1, %x0 @@ -322,10 +302,9 @@ define i32 @test_eq0_sub_addcomm_i32(i32 %x0, i32 %x1) { define i32 @test_eq0_subcomm_add_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_eq0_subcomm_add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmp w1, #0 -; CHECK-NEXT: sub w8, w8, #1 -; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: subs w8, w1, #1 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, lo ; CHECK-NEXT: ret %cmp = icmp eq i32 %x1, 0 %add = add i32 %x0, %x1 @@ -337,21 +316,16 @@ define i32 @test_eq0_subcomm_add_i32(i32 %x0, i32 %x1) { define i32 @test_eq0_multi_use_sub_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_eq0_multi_use_sub_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: mov w19, w1 -; CHECK-NEXT: sub w20, w8, #1 -; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: subs w8, w1, #1 +; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: csel w19, wzr, w0, lo ; CHECK-NEXT: bl use_i32 -; CHECK-NEXT: cmp w19, #0 -; CHECK-NEXT: csel w0, wzr, w20, eq -; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: mov w0, w19 +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %cmp = icmp eq i32 %x1, 0 %add = add nuw i32 %x0, %x1