Skip to content

Commit 6a79441

Browse files
committed
[RISCV] Optimize i64 insertelt on RV32.
We can use tail undisturbed vslide1down to insert into the vector. This should make D136640 unneeded. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D136738
1 parent 4ea6ffb commit 6a79441

File tree

5 files changed

+40
-43
lines changed

5 files changed

+40
-43
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5008,6 +5008,25 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
50085008
getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
50095009
// Limit the active VL to two.
50105010
SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
5011+
// If the Idx is 0 we can insert directly into the vector.
5012+
if (isNullConstant(Idx)) {
5013+
// First slide in the lo value, then the hi in above it. We use slide1down
5014+
// to avoid the register group overlap constraint of vslide1up.
5015+
ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
5016+
Vec, Vec, ValLo, I32Mask, InsertI64VL);
5017+
// If the source vector is undef don't pass along the tail elements from
5018+
// the previous slide1down.
5019+
SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
5020+
ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
5021+
Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
5022+
// Bitcast back to the right container type.
5023+
ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
5024+
5025+
if (!VecVT.isFixedLengthVector())
5026+
return ValInVec;
5027+
return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
5028+
}
5029+
50115030
// First slide in the lo value, then the hi in above it. We use slide1down
50125031
// to avoid the register group overlap constraint of vslide1up.
50135032
ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -509,9 +509,7 @@ define <4 x i16> @bitcast_i64_v4i16(i64 %a) {
509509
; RV32: # %bb.0:
510510
; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
511511
; RV32-NEXT: vslide1down.vx v8, v8, a0
512-
; RV32-NEXT: vslide1down.vx v9, v8, a1
513-
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
514-
; RV32-NEXT: vslideup.vi v8, v9, 0
512+
; RV32-NEXT: vslide1down.vx v8, v8, a1
515513
; RV32-NEXT: ret
516514
;
517515
; RV64-LABEL: bitcast_i64_v4i16:
@@ -547,9 +545,7 @@ define <2 x i32> @bitcast_i64_v2i32(i64 %a) {
547545
; RV32: # %bb.0:
548546
; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
549547
; RV32-NEXT: vslide1down.vx v8, v8, a0
550-
; RV32-NEXT: vslide1down.vx v9, v8, a1
551-
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
552-
; RV32-NEXT: vslideup.vi v8, v9, 0
548+
; RV32-NEXT: vslide1down.vx v8, v8, a1
553549
; RV32-NEXT: ret
554550
;
555551
; RV64-LABEL: bitcast_i64_v2i32:
@@ -585,9 +581,7 @@ define <1 x i64> @bitcast_i64_v1i64(i64 %a) {
585581
; RV32: # %bb.0:
586582
; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
587583
; RV32-NEXT: vslide1down.vx v8, v8, a0
588-
; RV32-NEXT: vslide1down.vx v9, v8, a1
589-
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
590-
; RV32-NEXT: vslideup.vi v8, v9, 0
584+
; RV32-NEXT: vslide1down.vx v8, v8, a1
591585
; RV32-NEXT: ret
592586
;
593587
; RV64-LABEL: bitcast_i64_v1i64:

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -200,9 +200,7 @@ define <4 x half> @bitcast_i64_v4f16(i64 %a) {
200200
; RV32-FP: # %bb.0:
201201
; RV32-FP-NEXT: vsetivli zero, 2, e32, m1, ta, ma
202202
; RV32-FP-NEXT: vslide1down.vx v8, v8, a0
203-
; RV32-FP-NEXT: vslide1down.vx v9, v8, a1
204-
; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, ma
205-
; RV32-FP-NEXT: vslideup.vi v8, v9, 0
203+
; RV32-FP-NEXT: vslide1down.vx v8, v8, a1
206204
; RV32-FP-NEXT: ret
207205
;
208206
; RV64-FP-LABEL: bitcast_i64_v4f16:
@@ -219,9 +217,7 @@ define <2 x float> @bitcast_i64_v2f32(i64 %a) {
219217
; RV32-FP: # %bb.0:
220218
; RV32-FP-NEXT: vsetivli zero, 2, e32, m1, ta, ma
221219
; RV32-FP-NEXT: vslide1down.vx v8, v8, a0
222-
; RV32-FP-NEXT: vslide1down.vx v9, v8, a1
223-
; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, ma
224-
; RV32-FP-NEXT: vslideup.vi v8, v9, 0
220+
; RV32-FP-NEXT: vslide1down.vx v8, v8, a1
225221
; RV32-FP-NEXT: ret
226222
;
227223
; RV64-FP-LABEL: bitcast_i64_v2f32:
@@ -238,9 +234,7 @@ define <1 x double> @bitcast_i64_v1f64(i64 %a) {
238234
; RV32-FP: # %bb.0:
239235
; RV32-FP-NEXT: vsetivli zero, 2, e32, m1, ta, ma
240236
; RV32-FP-NEXT: vslide1down.vx v8, v8, a0
241-
; RV32-FP-NEXT: vslide1down.vx v9, v8, a1
242-
; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, ma
243-
; RV32-FP-NEXT: vslideup.vi v8, v9, 0
237+
; RV32-FP-NEXT: vslide1down.vx v8, v8, a1
244238
; RV32-FP-NEXT: ret
245239
;
246240
; RV64-FP-LABEL: bitcast_i64_v1f64:

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -144,11 +144,9 @@ define <2 x i64> @mgather_v2i64_align4(<2 x i64*> %ptrs, <2 x i1> %m, <2 x i64>
144144
; RV32-NEXT: vmv.x.s a1, v8
145145
; RV32-NEXT: lw a2, 0(a1)
146146
; RV32-NEXT: lw a1, 4(a1)
147-
; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
148-
; RV32-NEXT: vslide1down.vx v10, v8, a2
149-
; RV32-NEXT: vslide1down.vx v10, v10, a1
150-
; RV32-NEXT: vsetivli zero, 1, e64, m1, tu, ma
151-
; RV32-NEXT: vslideup.vi v9, v10, 0
147+
; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
148+
; RV32-NEXT: vslide1down.vx v9, v9, a2
149+
; RV32-NEXT: vslide1down.vx v9, v9, a1
152150
; RV32-NEXT: andi a0, a0, 2
153151
; RV32-NEXT: beqz a0, .LBB5_2
154152
; RV32-NEXT: .LBB5_4: # %cond.load1

llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -635,11 +635,9 @@ define <vscale x 16 x i32> @insertelt_nxv16i32_idx(<vscale x 16 x i32> %v, i32 %
635635
define <vscale x 1 x i64> @insertelt_nxv1i64_0(<vscale x 1 x i64> %v, i64 %elt) {
636636
; CHECK-LABEL: insertelt_nxv1i64_0:
637637
; CHECK: # %bb.0:
638-
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
639-
; CHECK-NEXT: vslide1down.vx v9, v8, a0
640-
; CHECK-NEXT: vslide1down.vx v9, v9, a1
641-
; CHECK-NEXT: vsetivli zero, 1, e64, m1, tu, ma
642-
; CHECK-NEXT: vslideup.vi v8, v9, 0
638+
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
639+
; CHECK-NEXT: vslide1down.vx v8, v8, a0
640+
; CHECK-NEXT: vslide1down.vx v8, v8, a1
643641
; CHECK-NEXT: ret
644642
%r = insertelement <vscale x 1 x i64> %v, i64 %elt, i32 0
645643
ret <vscale x 1 x i64> %r
@@ -675,11 +673,9 @@ define <vscale x 1 x i64> @insertelt_nxv1i64_idx(<vscale x 1 x i64> %v, i64 %elt
675673
define <vscale x 2 x i64> @insertelt_nxv2i64_0(<vscale x 2 x i64> %v, i64 %elt) {
676674
; CHECK-LABEL: insertelt_nxv2i64_0:
677675
; CHECK: # %bb.0:
678-
; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma
679-
; CHECK-NEXT: vslide1down.vx v10, v8, a0
680-
; CHECK-NEXT: vslide1down.vx v10, v10, a1
681-
; CHECK-NEXT: vsetivli zero, 1, e64, m2, tu, ma
682-
; CHECK-NEXT: vslideup.vi v8, v10, 0
676+
; CHECK-NEXT: vsetivli zero, 2, e32, m2, tu, ma
677+
; CHECK-NEXT: vslide1down.vx v8, v8, a0
678+
; CHECK-NEXT: vslide1down.vx v8, v8, a1
683679
; CHECK-NEXT: ret
684680
%r = insertelement <vscale x 2 x i64> %v, i64 %elt, i32 0
685681
ret <vscale x 2 x i64> %r
@@ -715,11 +711,9 @@ define <vscale x 2 x i64> @insertelt_nxv2i64_idx(<vscale x 2 x i64> %v, i64 %elt
715711
define <vscale x 4 x i64> @insertelt_nxv4i64_0(<vscale x 4 x i64> %v, i64 %elt) {
716712
; CHECK-LABEL: insertelt_nxv4i64_0:
717713
; CHECK: # %bb.0:
718-
; CHECK-NEXT: vsetivli zero, 2, e32, m4, ta, ma
719-
; CHECK-NEXT: vslide1down.vx v12, v8, a0
720-
; CHECK-NEXT: vslide1down.vx v12, v12, a1
721-
; CHECK-NEXT: vsetivli zero, 1, e64, m4, tu, ma
722-
; CHECK-NEXT: vslideup.vi v8, v12, 0
714+
; CHECK-NEXT: vsetivli zero, 2, e32, m4, tu, ma
715+
; CHECK-NEXT: vslide1down.vx v8, v8, a0
716+
; CHECK-NEXT: vslide1down.vx v8, v8, a1
723717
; CHECK-NEXT: ret
724718
%r = insertelement <vscale x 4 x i64> %v, i64 %elt, i32 0
725719
ret <vscale x 4 x i64> %r
@@ -755,11 +749,9 @@ define <vscale x 4 x i64> @insertelt_nxv4i64_idx(<vscale x 4 x i64> %v, i64 %elt
755749
define <vscale x 8 x i64> @insertelt_nxv8i64_0(<vscale x 8 x i64> %v, i64 %elt) {
756750
; CHECK-LABEL: insertelt_nxv8i64_0:
757751
; CHECK: # %bb.0:
758-
; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma
759-
; CHECK-NEXT: vslide1down.vx v16, v8, a0
760-
; CHECK-NEXT: vslide1down.vx v16, v16, a1
761-
; CHECK-NEXT: vsetivli zero, 1, e64, m8, tu, ma
762-
; CHECK-NEXT: vslideup.vi v8, v16, 0
752+
; CHECK-NEXT: vsetivli zero, 2, e32, m8, tu, ma
753+
; CHECK-NEXT: vslide1down.vx v8, v8, a0
754+
; CHECK-NEXT: vslide1down.vx v8, v8, a1
763755
; CHECK-NEXT: ret
764756
%r = insertelement <vscale x 8 x i64> %v, i64 %elt, i32 0
765757
ret <vscale x 8 x i64> %r

0 commit comments

Comments
 (0)