Skip to content

Commit fd28257

Browse files
authored
[DAGCombiner] Fold umax/umin operations with vscale operands (llvm#154461)
If umax/umin operations with vscale operands, that can be constant folded.
1 parent 3c8652e commit fd28257

File tree

8 files changed

+280
-183
lines changed

8 files changed

+280
-183
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6070,6 +6070,16 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
60706070
if (N0 == N1)
60716071
return N0;
60726072

6073+
// Fold operation with vscale operands.
6074+
if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
6075+
uint64_t C0 = N0->getConstantOperandVal(0);
6076+
uint64_t C1 = N1->getConstantOperandVal(0);
6077+
if (Opcode == ISD::UMAX)
6078+
return C0 > C1 ? N0 : N1;
6079+
else if (Opcode == ISD::UMIN)
6080+
return C0 > C1 ? N1 : N0;
6081+
}
6082+
60736083
// canonicalize constant to RHS
60746084
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
60756085
!DAG.isConstantIntBuildVectorOrConstantInt(N1))

llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -939,21 +939,17 @@ define <vscale x 16 x i64> @zero_strided_vadd_nxv16i64(<vscale x 16 x i64> %v, p
939939
; CHECK-RV32: # %bb.0:
940940
; CHECK-RV32-NEXT: csrr a1, vlenb
941941
; CHECK-RV32-NEXT: srli a2, a1, 3
942-
; CHECK-RV32-NEXT: sub a3, a2, a1
943-
; CHECK-RV32-NEXT: sltu a4, a2, a3
944-
; CHECK-RV32-NEXT: addi a4, a4, -1
945-
; CHECK-RV32-NEXT: and a3, a4, a3
946-
; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
947-
; CHECK-RV32-NEXT: vlse64.v v24, (a0), zero
948-
; CHECK-RV32-NEXT: bltu a2, a1, .LBB61_2
949-
; CHECK-RV32-NEXT: # %bb.1:
950-
; CHECK-RV32-NEXT: mv a2, a1
951-
; CHECK-RV32-NEXT: .LBB61_2:
942+
; CHECK-RV32-NEXT: sub a1, a2, a1
943+
; CHECK-RV32-NEXT: sltu a3, a2, a1
944+
; CHECK-RV32-NEXT: addi a3, a3, -1
945+
; CHECK-RV32-NEXT: and a1, a3, a1
952946
; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
947+
; CHECK-RV32-NEXT: vlse64.v v24, (a0), zero
948+
; CHECK-RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
953949
; CHECK-RV32-NEXT: vlse64.v v0, (a0), zero
954950
; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
955-
; CHECK-RV32-NEXT: vadd.vv v16, v16, v24
956-
; CHECK-RV32-NEXT: vadd.vv v8, v8, v0
951+
; CHECK-RV32-NEXT: vadd.vv v8, v8, v24
952+
; CHECK-RV32-NEXT: vadd.vv v16, v16, v0
957953
; CHECK-RV32-NEXT: ret
958954
;
959955
; CHECK-RV64-LABEL: zero_strided_vadd_nxv16i64:

llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1393,32 +1393,46 @@ define <vscale x 32 x i32> @vadd_vi_nxv32i32_unmasked(<vscale x 32 x i32> %va, i
13931393

13941394
declare i32 @llvm.vscale.i32()
13951395

1396-
; FIXME: The upper half of the operation is doing nothing.
1397-
; FIXME: The branches comparing vscale vs. vscale should be constant-foldable.
1398-
13991396
define <vscale x 32 x i32> @vadd_vi_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m) {
1400-
; CHECK-LABEL: vadd_vi_nxv32i32_evl_nx8:
1401-
; CHECK: # %bb.0:
1402-
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
1403-
; CHECK-NEXT: vmv1r.v v24, v0
1404-
; CHECK-NEXT: csrr a0, vlenb
1405-
; CHECK-NEXT: srli a2, a0, 2
1406-
; CHECK-NEXT: slli a1, a0, 1
1407-
; CHECK-NEXT: vslidedown.vx v0, v0, a2
1408-
; CHECK-NEXT: sub a2, a0, a1
1409-
; CHECK-NEXT: sltu a3, a0, a2
1410-
; CHECK-NEXT: addi a3, a3, -1
1411-
; CHECK-NEXT: and a2, a3, a2
1412-
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
1413-
; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
1414-
; CHECK-NEXT: bltu a0, a1, .LBB120_2
1415-
; CHECK-NEXT: # %bb.1:
1416-
; CHECK-NEXT: mv a0, a1
1417-
; CHECK-NEXT: .LBB120_2:
1418-
; CHECK-NEXT: vmv1r.v v0, v24
1419-
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1420-
; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
1421-
; CHECK-NEXT: ret
1397+
; RV32-LABEL: vadd_vi_nxv32i32_evl_nx8:
1398+
; RV32: # %bb.0:
1399+
; RV32-NEXT: csrr a0, vlenb
1400+
; RV32-NEXT: srli a1, a0, 2
1401+
; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1402+
; RV32-NEXT: vadd.vi v8, v8, -1, v0.t
1403+
; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
1404+
; RV32-NEXT: vslidedown.vx v0, v0, a1
1405+
; RV32-NEXT: slli a1, a0, 1
1406+
; RV32-NEXT: sub a1, a0, a1
1407+
; RV32-NEXT: sltu a0, a0, a1
1408+
; RV32-NEXT: addi a0, a0, -1
1409+
; RV32-NEXT: and a0, a0, a1
1410+
; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1411+
; RV32-NEXT: vadd.vi v16, v16, -1, v0.t
1412+
; RV32-NEXT: ret
1413+
;
1414+
; RV64-LABEL: vadd_vi_nxv32i32_evl_nx8:
1415+
; RV64: # %bb.0:
1416+
; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
1417+
; RV64-NEXT: vmv1r.v v24, v0
1418+
; RV64-NEXT: csrr a0, vlenb
1419+
; RV64-NEXT: srli a2, a0, 2
1420+
; RV64-NEXT: slli a1, a0, 1
1421+
; RV64-NEXT: vslidedown.vx v0, v0, a2
1422+
; RV64-NEXT: sub a2, a0, a1
1423+
; RV64-NEXT: sltu a3, a0, a2
1424+
; RV64-NEXT: addi a3, a3, -1
1425+
; RV64-NEXT: and a2, a3, a2
1426+
; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
1427+
; RV64-NEXT: vadd.vi v16, v16, -1, v0.t
1428+
; RV64-NEXT: bltu a0, a1, .LBB120_2
1429+
; RV64-NEXT: # %bb.1:
1430+
; RV64-NEXT: mv a0, a1
1431+
; RV64-NEXT: .LBB120_2:
1432+
; RV64-NEXT: vmv1r.v v0, v24
1433+
; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1434+
; RV64-NEXT: vadd.vi v8, v8, -1, v0.t
1435+
; RV64-NEXT: ret
14221436
%evl = call i32 @llvm.vscale.i32()
14231437
%evl0 = mul i32 %evl, 8
14241438
%v = call <vscale x 32 x i32> @llvm.vp.add.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> splat (i32 -1), <vscale x 32 x i1> %m, i32 %evl0)

llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,32 +1026,46 @@ define <vscale x 32 x i32> @vmax_vx_nxv32i32_unmasked(<vscale x 32 x i32> %va, i
10261026

10271027
declare i32 @llvm.vscale.i32()
10281028

1029-
; FIXME: The upper half of the operation is doing nothing.
1030-
; FIXME: The branches comparing vscale vs. vscale should be constant-foldable.
1031-
10321029
define <vscale x 32 x i32> @vmax_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
1033-
; CHECK-LABEL: vmax_vx_nxv32i32_evl_nx8:
1034-
; CHECK: # %bb.0:
1035-
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
1036-
; CHECK-NEXT: vmv1r.v v24, v0
1037-
; CHECK-NEXT: csrr a1, vlenb
1038-
; CHECK-NEXT: srli a3, a1, 2
1039-
; CHECK-NEXT: slli a2, a1, 1
1040-
; CHECK-NEXT: vslidedown.vx v0, v0, a3
1041-
; CHECK-NEXT: sub a3, a1, a2
1042-
; CHECK-NEXT: sltu a4, a1, a3
1043-
; CHECK-NEXT: addi a4, a4, -1
1044-
; CHECK-NEXT: and a3, a4, a3
1045-
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1046-
; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t
1047-
; CHECK-NEXT: bltu a1, a2, .LBB82_2
1048-
; CHECK-NEXT: # %bb.1:
1049-
; CHECK-NEXT: mv a1, a2
1050-
; CHECK-NEXT: .LBB82_2:
1051-
; CHECK-NEXT: vmv1r.v v0, v24
1052-
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1053-
; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t
1054-
; CHECK-NEXT: ret
1030+
; RV32-LABEL: vmax_vx_nxv32i32_evl_nx8:
1031+
; RV32: # %bb.0:
1032+
; RV32-NEXT: csrr a1, vlenb
1033+
; RV32-NEXT: srli a2, a1, 2
1034+
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1035+
; RV32-NEXT: vmax.vx v8, v8, a0, v0.t
1036+
; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
1037+
; RV32-NEXT: vslidedown.vx v0, v0, a2
1038+
; RV32-NEXT: slli a2, a1, 1
1039+
; RV32-NEXT: sub a2, a1, a2
1040+
; RV32-NEXT: sltu a1, a1, a2
1041+
; RV32-NEXT: addi a1, a1, -1
1042+
; RV32-NEXT: and a1, a1, a2
1043+
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1044+
; RV32-NEXT: vmax.vx v16, v16, a0, v0.t
1045+
; RV32-NEXT: ret
1046+
;
1047+
; RV64-LABEL: vmax_vx_nxv32i32_evl_nx8:
1048+
; RV64: # %bb.0:
1049+
; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
1050+
; RV64-NEXT: vmv1r.v v24, v0
1051+
; RV64-NEXT: csrr a1, vlenb
1052+
; RV64-NEXT: srli a3, a1, 2
1053+
; RV64-NEXT: slli a2, a1, 1
1054+
; RV64-NEXT: vslidedown.vx v0, v0, a3
1055+
; RV64-NEXT: sub a3, a1, a2
1056+
; RV64-NEXT: sltu a4, a1, a3
1057+
; RV64-NEXT: addi a4, a4, -1
1058+
; RV64-NEXT: and a3, a4, a3
1059+
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1060+
; RV64-NEXT: vmax.vx v16, v16, a0, v0.t
1061+
; RV64-NEXT: bltu a1, a2, .LBB82_2
1062+
; RV64-NEXT: # %bb.1:
1063+
; RV64-NEXT: mv a1, a2
1064+
; RV64-NEXT: .LBB82_2:
1065+
; RV64-NEXT: vmv1r.v v0, v24
1066+
; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1067+
; RV64-NEXT: vmax.vx v8, v8, a0, v0.t
1068+
; RV64-NEXT: ret
10551069
%elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
10561070
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
10571071
%evl = call i32 @llvm.vscale.i32()

llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,32 +1025,46 @@ define <vscale x 32 x i32> @vmaxu_vx_nxv32i32_unmasked(<vscale x 32 x i32> %va,
10251025

10261026
declare i32 @llvm.vscale.i32()
10271027

1028-
; FIXME: The upper half of the operation is doing nothing.
1029-
; FIXME: The branches comparing vscale vs. vscale should be constant-foldable.
1030-
10311028
define <vscale x 32 x i32> @vmaxu_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
1032-
; CHECK-LABEL: vmaxu_vx_nxv32i32_evl_nx8:
1033-
; CHECK: # %bb.0:
1034-
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
1035-
; CHECK-NEXT: vmv1r.v v24, v0
1036-
; CHECK-NEXT: csrr a1, vlenb
1037-
; CHECK-NEXT: srli a3, a1, 2
1038-
; CHECK-NEXT: slli a2, a1, 1
1039-
; CHECK-NEXT: vslidedown.vx v0, v0, a3
1040-
; CHECK-NEXT: sub a3, a1, a2
1041-
; CHECK-NEXT: sltu a4, a1, a3
1042-
; CHECK-NEXT: addi a4, a4, -1
1043-
; CHECK-NEXT: and a3, a4, a3
1044-
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1045-
; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t
1046-
; CHECK-NEXT: bltu a1, a2, .LBB82_2
1047-
; CHECK-NEXT: # %bb.1:
1048-
; CHECK-NEXT: mv a1, a2
1049-
; CHECK-NEXT: .LBB82_2:
1050-
; CHECK-NEXT: vmv1r.v v0, v24
1051-
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1052-
; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t
1053-
; CHECK-NEXT: ret
1029+
; RV32-LABEL: vmaxu_vx_nxv32i32_evl_nx8:
1030+
; RV32: # %bb.0:
1031+
; RV32-NEXT: csrr a1, vlenb
1032+
; RV32-NEXT: srli a2, a1, 2
1033+
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1034+
; RV32-NEXT: vmaxu.vx v8, v8, a0, v0.t
1035+
; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
1036+
; RV32-NEXT: vslidedown.vx v0, v0, a2
1037+
; RV32-NEXT: slli a2, a1, 1
1038+
; RV32-NEXT: sub a2, a1, a2
1039+
; RV32-NEXT: sltu a1, a1, a2
1040+
; RV32-NEXT: addi a1, a1, -1
1041+
; RV32-NEXT: and a1, a1, a2
1042+
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1043+
; RV32-NEXT: vmaxu.vx v16, v16, a0, v0.t
1044+
; RV32-NEXT: ret
1045+
;
1046+
; RV64-LABEL: vmaxu_vx_nxv32i32_evl_nx8:
1047+
; RV64: # %bb.0:
1048+
; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
1049+
; RV64-NEXT: vmv1r.v v24, v0
1050+
; RV64-NEXT: csrr a1, vlenb
1051+
; RV64-NEXT: srli a3, a1, 2
1052+
; RV64-NEXT: slli a2, a1, 1
1053+
; RV64-NEXT: vslidedown.vx v0, v0, a3
1054+
; RV64-NEXT: sub a3, a1, a2
1055+
; RV64-NEXT: sltu a4, a1, a3
1056+
; RV64-NEXT: addi a4, a4, -1
1057+
; RV64-NEXT: and a3, a4, a3
1058+
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1059+
; RV64-NEXT: vmaxu.vx v16, v16, a0, v0.t
1060+
; RV64-NEXT: bltu a1, a2, .LBB82_2
1061+
; RV64-NEXT: # %bb.1:
1062+
; RV64-NEXT: mv a1, a2
1063+
; RV64-NEXT: .LBB82_2:
1064+
; RV64-NEXT: vmv1r.v v0, v24
1065+
; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1066+
; RV64-NEXT: vmaxu.vx v8, v8, a0, v0.t
1067+
; RV64-NEXT: ret
10541068
%elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
10551069
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
10561070
%evl = call i32 @llvm.vscale.i32()

llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,32 +1026,46 @@ define <vscale x 32 x i32> @vmin_vx_nxv32i32_unmasked(<vscale x 32 x i32> %va, i
10261026

10271027
declare i32 @llvm.vscale.i32()
10281028

1029-
; FIXME: The upper half of the operation is doing nothing.
1030-
; FIXME: The branches comparing vscale vs. vscale should be constant-foldable.
1031-
10321029
define <vscale x 32 x i32> @vmin_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
1033-
; CHECK-LABEL: vmin_vx_nxv32i32_evl_nx8:
1034-
; CHECK: # %bb.0:
1035-
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
1036-
; CHECK-NEXT: vmv1r.v v24, v0
1037-
; CHECK-NEXT: csrr a1, vlenb
1038-
; CHECK-NEXT: srli a3, a1, 2
1039-
; CHECK-NEXT: slli a2, a1, 1
1040-
; CHECK-NEXT: vslidedown.vx v0, v0, a3
1041-
; CHECK-NEXT: sub a3, a1, a2
1042-
; CHECK-NEXT: sltu a4, a1, a3
1043-
; CHECK-NEXT: addi a4, a4, -1
1044-
; CHECK-NEXT: and a3, a4, a3
1045-
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1046-
; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t
1047-
; CHECK-NEXT: bltu a1, a2, .LBB82_2
1048-
; CHECK-NEXT: # %bb.1:
1049-
; CHECK-NEXT: mv a1, a2
1050-
; CHECK-NEXT: .LBB82_2:
1051-
; CHECK-NEXT: vmv1r.v v0, v24
1052-
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1053-
; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t
1054-
; CHECK-NEXT: ret
1030+
; RV32-LABEL: vmin_vx_nxv32i32_evl_nx8:
1031+
; RV32: # %bb.0:
1032+
; RV32-NEXT: csrr a1, vlenb
1033+
; RV32-NEXT: srli a2, a1, 2
1034+
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1035+
; RV32-NEXT: vmin.vx v8, v8, a0, v0.t
1036+
; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
1037+
; RV32-NEXT: vslidedown.vx v0, v0, a2
1038+
; RV32-NEXT: slli a2, a1, 1
1039+
; RV32-NEXT: sub a2, a1, a2
1040+
; RV32-NEXT: sltu a1, a1, a2
1041+
; RV32-NEXT: addi a1, a1, -1
1042+
; RV32-NEXT: and a1, a1, a2
1043+
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1044+
; RV32-NEXT: vmin.vx v16, v16, a0, v0.t
1045+
; RV32-NEXT: ret
1046+
;
1047+
; RV64-LABEL: vmin_vx_nxv32i32_evl_nx8:
1048+
; RV64: # %bb.0:
1049+
; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
1050+
; RV64-NEXT: vmv1r.v v24, v0
1051+
; RV64-NEXT: csrr a1, vlenb
1052+
; RV64-NEXT: srli a3, a1, 2
1053+
; RV64-NEXT: slli a2, a1, 1
1054+
; RV64-NEXT: vslidedown.vx v0, v0, a3
1055+
; RV64-NEXT: sub a3, a1, a2
1056+
; RV64-NEXT: sltu a4, a1, a3
1057+
; RV64-NEXT: addi a4, a4, -1
1058+
; RV64-NEXT: and a3, a4, a3
1059+
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1060+
; RV64-NEXT: vmin.vx v16, v16, a0, v0.t
1061+
; RV64-NEXT: bltu a1, a2, .LBB82_2
1062+
; RV64-NEXT: # %bb.1:
1063+
; RV64-NEXT: mv a1, a2
1064+
; RV64-NEXT: .LBB82_2:
1065+
; RV64-NEXT: vmv1r.v v0, v24
1066+
; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1067+
; RV64-NEXT: vmin.vx v8, v8, a0, v0.t
1068+
; RV64-NEXT: ret
10551069
%elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
10561070
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
10571071
%evl = call i32 @llvm.vscale.i32()

0 commit comments

Comments
 (0)