-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[DAGCombiner] Fold umax/umin operations with vscale operands #154461
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
If umax/umin operations with vscale operands, that can be constant folded.
|
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-risc-v Author: Jim Lin (tclin914) ChangesIf umax/umin operations with vscale operands, that can be constant folded. Full diff: https://github.com/llvm/llvm-project/pull/154461.diff 8 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6eb8468e2573e..75d61d281224a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6070,6 +6070,16 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
if (N0 == N1)
return N0;
+ // fold operation with vscale operands.
+ if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
+ uint64_t C0 = N0->getConstantOperandVal(0);
+ uint64_t C1 = N1->getConstantOperandVal(0);
+ if (Opcode == ISD::UMAX)
+ return C0 > C1 ? N0 : N1;
+ else if (Opcode == ISD::UMIN)
+ return C0 > C1 ? N1 : N0;
+ }
+
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
index ecd098edb30ae..b6aa4affbb10f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
@@ -939,21 +939,17 @@ define <vscale x 16 x i64> @zero_strided_vadd_nxv16i64(<vscale x 16 x i64> %v, p
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: csrr a1, vlenb
; CHECK-RV32-NEXT: srli a2, a1, 3
-; CHECK-RV32-NEXT: sub a3, a2, a1
-; CHECK-RV32-NEXT: sltu a4, a2, a3
-; CHECK-RV32-NEXT: addi a4, a4, -1
-; CHECK-RV32-NEXT: and a3, a4, a3
-; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-RV32-NEXT: vlse64.v v24, (a0), zero
-; CHECK-RV32-NEXT: bltu a2, a1, .LBB61_2
-; CHECK-RV32-NEXT: # %bb.1:
-; CHECK-RV32-NEXT: mv a2, a1
-; CHECK-RV32-NEXT: .LBB61_2:
+; CHECK-RV32-NEXT: sub a1, a2, a1
+; CHECK-RV32-NEXT: sltu a3, a2, a1
+; CHECK-RV32-NEXT: addi a3, a3, -1
+; CHECK-RV32-NEXT: and a1, a3, a1
; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV32-NEXT: vlse64.v v24, (a0), zero
+; CHECK-RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-RV32-NEXT: vlse64.v v0, (a0), zero
; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-RV32-NEXT: vadd.vv v16, v16, v24
-; CHECK-RV32-NEXT: vadd.vv v8, v8, v0
+; CHECK-RV32-NEXT: vadd.vv v8, v8, v24
+; CHECK-RV32-NEXT: vadd.vv v16, v16, v0
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64-LABEL: zero_strided_vadd_nxv16i64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
index 6e9f4d45cd6b4..a29c78270d595 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
@@ -1393,31 +1393,22 @@ define <vscale x 32 x i32> @vadd_vi_nxv32i32_unmasked(<vscale x 32 x i32> %va, i
declare i32 @llvm.vscale.i32()
-; FIXME: The upper half of the operation is doing nothing.
-; FIXME: The branches comparing vscale vs. vscale should be constant-foldable.
-
-define <vscale x 32 x i32> @vadd_vi_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m) {
+define <vscale x 32 x i32> @vadd_vi_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m) vscale_range(2,1024) {
; CHECK-LABEL: vadd_vi_nxv32i32_evl_nx8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a2, a0, 2
-; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: vslidedown.vx v0, v0, a2
-; CHECK-NEXT: sub a2, a0, a1
-; CHECK-NEXT: sltu a3, a0, a2
-; CHECK-NEXT: addi a3, a3, -1
-; CHECK-NEXT: and a2, a3, a2
-; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB120_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB120_2:
-; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a1
+; CHECK-NEXT: slli a1, a0, 1
+; CHECK-NEXT: sub a1, a0, a1
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
; CHECK-NEXT: ret
%evl = call i32 @llvm.vscale.i32()
%evl0 = mul i32 %evl, 8
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll
index ec95e81b8dd94..dd41a0d125a8d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll
@@ -1026,31 +1026,22 @@ define <vscale x 32 x i32> @vmax_vx_nxv32i32_unmasked(<vscale x 32 x i32> %va, i
declare i32 @llvm.vscale.i32()
-; FIXME: The upper half of the operation is doing nothing.
-; FIXME: The branches comparing vscale vs. vscale should be constant-foldable.
-
-define <vscale x 32 x i32> @vmax_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
+define <vscale x 32 x i32> @vmax_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) vscale_range(2,1024) {
; CHECK-LABEL: vmax_vx_nxv32i32_evl_nx8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a3, a1, 2
-; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: vslidedown.vx v0, v0, a3
-; CHECK-NEXT: sub a3, a1, a2
-; CHECK-NEXT: sltu a4, a1, a3
-; CHECK-NEXT: addi a4, a4, -1
-; CHECK-NEXT: and a3, a4, a3
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t
-; CHECK-NEXT: bltu a1, a2, .LBB82_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB82_2:
-; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: sub a2, a1, a2
+; CHECK-NEXT: sltu a1, a1, a2
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll
index bd1a6c6e55c70..9490bd5a30d40 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll
@@ -1025,31 +1025,22 @@ define <vscale x 32 x i32> @vmaxu_vx_nxv32i32_unmasked(<vscale x 32 x i32> %va,
declare i32 @llvm.vscale.i32()
-; FIXME: The upper half of the operation is doing nothing.
-; FIXME: The branches comparing vscale vs. vscale should be constant-foldable.
-
-define <vscale x 32 x i32> @vmaxu_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
+define <vscale x 32 x i32> @vmaxu_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) vscale_range(2,1024) {
; CHECK-LABEL: vmaxu_vx_nxv32i32_evl_nx8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a3, a1, 2
-; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: vslidedown.vx v0, v0, a3
-; CHECK-NEXT: sub a3, a1, a2
-; CHECK-NEXT: sltu a4, a1, a3
-; CHECK-NEXT: addi a4, a4, -1
-; CHECK-NEXT: and a3, a4, a3
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t
-; CHECK-NEXT: bltu a1, a2, .LBB82_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB82_2:
-; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: sub a2, a1, a2
+; CHECK-NEXT: sltu a1, a1, a2
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll
index 6f1f8e1298321..8a750fbe8bf70 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll
@@ -1026,31 +1026,22 @@ define <vscale x 32 x i32> @vmin_vx_nxv32i32_unmasked(<vscale x 32 x i32> %va, i
declare i32 @llvm.vscale.i32()
-; FIXME: The upper half of the operation is doing nothing.
-; FIXME: The branches comparing vscale vs. vscale should be constant-foldable.
-
-define <vscale x 32 x i32> @vmin_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
+define <vscale x 32 x i32> @vmin_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) vscale_range(2,1024) {
; CHECK-LABEL: vmin_vx_nxv32i32_evl_nx8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a3, a1, 2
-; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: vslidedown.vx v0, v0, a3
-; CHECK-NEXT: sub a3, a1, a2
-; CHECK-NEXT: sltu a4, a1, a3
-; CHECK-NEXT: addi a4, a4, -1
-; CHECK-NEXT: and a3, a4, a3
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t
-; CHECK-NEXT: bltu a1, a2, .LBB82_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB82_2:
-; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: sub a2, a1, a2
+; CHECK-NEXT: sltu a1, a1, a2
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll
index 47e7b7d70c07e..8290cf23f1370 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll
@@ -1025,31 +1025,22 @@ define <vscale x 32 x i32> @vminu_vx_nxv32i32_unmasked(<vscale x 32 x i32> %va,
declare i32 @llvm.vscale.i32()
-; FIXME: The upper half of the operation is doing nothing.
-; FIXME: The branches comparing vscale vs. vscale should be constant-foldable.
-
-define <vscale x 32 x i32> @vminu_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
+define <vscale x 32 x i32> @vminu_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) vscale_range(2,1024) {
; CHECK-LABEL: vminu_vx_nxv32i32_evl_nx8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a3, a1, 2
-; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: vslidedown.vx v0, v0, a3
-; CHECK-NEXT: sub a3, a1, a2
-; CHECK-NEXT: sltu a4, a1, a3
-; CHECK-NEXT: addi a4, a4, -1
-; CHECK-NEXT: and a3, a4, a3
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t
-; CHECK-NEXT: bltu a1, a2, .LBB82_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB82_2:
-; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: sub a2, a1, a2
+; CHECK-NEXT: sltu a1, a1, a2
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
index 522c83fd9fa99..29e8c735654c2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
@@ -397,48 +397,26 @@ define <vscale x 32 x i32> @select_nxv32i32(<vscale x 32 x i1> %a, <vscale x 32
declare i32 @llvm.vscale.i32()
-define <vscale x 32 x i32> @select_evl_nxv32i32(<vscale x 32 x i1> %a, <vscale x 32 x i32> %b, <vscale x 32 x i32> %c) {
+define <vscale x 32 x i32> @select_evl_nxv32i32(<vscale x 32 x i1> %a, <vscale x 32 x i32> %b, <vscale x 32 x i32> %c) vscale_range(2,1024) {
; CHECK-LABEL: select_evl_nxv32i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
-; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv1r.v v7, v0
+; CHECK-NEXT: vl8re32.v v24, (a0)
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a3, a1, 3
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
+; CHECK-NEXT: slli a2, a1, 3
+; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: srli a4, a1, 2
-; CHECK-NEXT: add a3, a0, a3
-; CHECK-NEXT: sub a5, a1, a2
-; CHECK-NEXT: vl8re32.v v24, (a3)
-; CHECK-NEXT: sltu a3, a1, a5
-; CHECK-NEXT: addi a3, a3, -1
-; CHECK-NEXT: vl8re32.v v8, (a0)
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: and a3, a3, a5
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: sub a2, a1, a2
+; CHECK-NEXT: vl8re32.v v24, (a0)
+; CHECK-NEXT: sltu a0, a1, a2
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a1
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0
-; CHECK-NEXT: bltu a1, a2, .LBB28_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB28_2:
-; CHECK-NEXT: vmv1r.v v0, v7
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: .cfi_def_cfa sp, 16
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: ret
%evl = call i32 @llvm.vscale.i32()
%evl0 = mul i32 %evl, 8
|
lukel97
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
| if (N0 == N1) | ||
| return N0; | ||
|
|
||
| // fold operation with vscale operands. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // fold operation with vscale operands. | |
| // Fold operation with vscale operands. |
lukel97
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, new version LGTM
If umax/umin operations with vscale operands, that can be constant folded.