@@ -1025,32 +1025,46 @@ define <vscale x 32 x i32> @vmaxu_vx_nxv32i32_unmasked(<vscale x 32 x i32> %va,
1025
1025
1026
1026
declare i32 @llvm.vscale.i32 ()
1027
1027
1028
- ; FIXME: The upper half of the operation is doing nothing.
1029
- ; FIXME: The branches comparing vscale vs. vscale should be constant-foldable.
1030
-
1031
1028
define <vscale x 32 x i32 > @vmaxu_vx_nxv32i32_evl_nx8 (<vscale x 32 x i32 > %va , i32 %b , <vscale x 32 x i1 > %m ) {
1032
- ; CHECK-LABEL: vmaxu_vx_nxv32i32_evl_nx8:
1033
- ; CHECK: # %bb.0:
1034
- ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
1035
- ; CHECK-NEXT: vmv1r.v v24, v0
1036
- ; CHECK-NEXT: csrr a1, vlenb
1037
- ; CHECK-NEXT: srli a3, a1, 2
1038
- ; CHECK-NEXT: slli a2, a1, 1
1039
- ; CHECK-NEXT: vslidedown.vx v0, v0, a3
1040
- ; CHECK-NEXT: sub a3, a1, a2
1041
- ; CHECK-NEXT: sltu a4, a1, a3
1042
- ; CHECK-NEXT: addi a4, a4, -1
1043
- ; CHECK-NEXT: and a3, a4, a3
1044
- ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1045
- ; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t
1046
- ; CHECK-NEXT: bltu a1, a2, .LBB82_2
1047
- ; CHECK-NEXT: # %bb.1:
1048
- ; CHECK-NEXT: mv a1, a2
1049
- ; CHECK-NEXT: .LBB82_2:
1050
- ; CHECK-NEXT: vmv1r.v v0, v24
1051
- ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1052
- ; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t
1053
- ; CHECK-NEXT: ret
1029
+ ; RV32-LABEL: vmaxu_vx_nxv32i32_evl_nx8:
1030
+ ; RV32: # %bb.0:
1031
+ ; RV32-NEXT: csrr a1, vlenb
1032
+ ; RV32-NEXT: srli a2, a1, 2
1033
+ ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1034
+ ; RV32-NEXT: vmaxu.vx v8, v8, a0, v0.t
1035
+ ; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
1036
+ ; RV32-NEXT: vslidedown.vx v0, v0, a2
1037
+ ; RV32-NEXT: slli a2, a1, 1
1038
+ ; RV32-NEXT: sub a2, a1, a2
1039
+ ; RV32-NEXT: sltu a1, a1, a2
1040
+ ; RV32-NEXT: addi a1, a1, -1
1041
+ ; RV32-NEXT: and a1, a1, a2
1042
+ ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1043
+ ; RV32-NEXT: vmaxu.vx v16, v16, a0, v0.t
1044
+ ; RV32-NEXT: ret
1045
+ ;
1046
+ ; RV64-LABEL: vmaxu_vx_nxv32i32_evl_nx8:
1047
+ ; RV64: # %bb.0:
1048
+ ; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
1049
+ ; RV64-NEXT: vmv1r.v v24, v0
1050
+ ; RV64-NEXT: csrr a1, vlenb
1051
+ ; RV64-NEXT: srli a3, a1, 2
1052
+ ; RV64-NEXT: slli a2, a1, 1
1053
+ ; RV64-NEXT: vslidedown.vx v0, v0, a3
1054
+ ; RV64-NEXT: sub a3, a1, a2
1055
+ ; RV64-NEXT: sltu a4, a1, a3
1056
+ ; RV64-NEXT: addi a4, a4, -1
1057
+ ; RV64-NEXT: and a3, a4, a3
1058
+ ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1059
+ ; RV64-NEXT: vmaxu.vx v16, v16, a0, v0.t
1060
+ ; RV64-NEXT: bltu a1, a2, .LBB82_2
1061
+ ; RV64-NEXT: # %bb.1:
1062
+ ; RV64-NEXT: mv a1, a2
1063
+ ; RV64-NEXT: .LBB82_2:
1064
+ ; RV64-NEXT: vmv1r.v v0, v24
1065
+ ; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1066
+ ; RV64-NEXT: vmaxu.vx v8, v8, a0, v0.t
1067
+ ; RV64-NEXT: ret
1054
1068
%elt.head = insertelement <vscale x 32 x i32 > poison, i32 %b , i32 0
1055
1069
%vb = shufflevector <vscale x 32 x i32 > %elt.head , <vscale x 32 x i32 > poison, <vscale x 32 x i32 > zeroinitializer
1056
1070
%evl = call i32 @llvm.vscale.i32 ()
0 commit comments