Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20764,6 +20764,54 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
break;
}
case RISCVISD::TUPLE_EXTRACT: {
EVT VT = N->getValueType(0);
SDValue Tuple = N->getOperand(0);
unsigned Idx = N->getConstantOperandVal(1);
if (!Tuple.hasOneUse() || Tuple.getOpcode() != ISD::INTRINSIC_W_CHAIN)
break;

unsigned NF = 0;
switch (Tuple.getConstantOperandVal(1)) {
default: break;
case Intrinsic::riscv_vlseg2_mask: NF = 2; break;
case Intrinsic::riscv_vlseg3_mask: NF = 3; break;
case Intrinsic::riscv_vlseg4_mask: NF = 4; break;
case Intrinsic::riscv_vlseg5_mask: NF = 5; break;
case Intrinsic::riscv_vlseg6_mask: NF = 6; break;
case Intrinsic::riscv_vlseg7_mask: NF = 7; break;
case Intrinsic::riscv_vlseg8_mask: NF = 8; break;
}
if (!NF || Subtarget.hasOptimizedSegmentLoadStore(NF))
break;

// @REVIEWERS - What's the right value to use for the mem size here?
unsigned SEW = VT.getScalarSizeInBits();
if (Log2_64(SEW) != Tuple.getConstantOperandVal(7))
break;
unsigned Stride = SEW/8 * NF;
SDValue Offset = DAG.getConstant(SEW/8 * Idx, DL, XLenVT);

SDValue Ops[] = {
/*Chain=*/Tuple.getOperand(0),
/*IntID=*/DAG.getTargetConstant(Intrinsic::riscv_vlse_mask, DL, XLenVT),
/*Passthru=*/Tuple.getOperand(2),
/*Ptr=*/DAG.getNode(ISD::ADD, DL, XLenVT, Tuple.getOperand(3), Offset),
/*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
/*Mask=*/Tuple.getOperand(4),
/*VL=*/Tuple.getOperand(5),
/*Policy=*/Tuple.getOperand(6)
};

SDVTList VTs = DAG.getVTList({VT, MVT::Other});
// @REVIEWERS - What's the right MemVT and MMO to use here?
SDValue Result =
DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
cast<MemIntrinsicSDNode>(Tuple)->getMemoryVT(),
cast<MemIntrinsicSDNode>(Tuple)->getMemOperand());
DAG.ReplaceAllUsesOfValueWith(Tuple.getValue(1), Result.getValue(1));
return Result.getValue(0);
}
}

return SDValue();
Expand Down
23 changes: 0 additions & 23 deletions llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,29 +216,6 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
if (!isLegalInterleavedAccessType(VTy, Factor, Alignment, AS, DL))
return false;

// If the segment load is going to be performed segment at a time anyways
// and there's only one element used, use a strided load instead. This
// will be equally fast, and create less vector register pressure.
if (Indices.size() == 1 && !Subtarget.hasOptimizedSegmentLoadStore(Factor)) {
unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType());
Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);
Value *BasePtr = Builder.CreatePtrAdd(Ptr, Offset);
// Note: Same VL as above, but i32 not xlen due to signature of
// vp.strided.load
VL = Builder.CreateElementCount(Builder.getInt32Ty(),
VTy->getElementCount());
CallInst *CI =
Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,
{VTy, BasePtr->getType(), Stride->getType()},
{BasePtr, Stride, Mask, VL});
Alignment = commonAlignment(Alignment, Indices[0] * ScalarSizeInBytes);
CI->addParamAttr(0,
Attribute::getWithAlignment(CI->getContext(), Alignment));
Shuffles[0]->replaceAllUsesWith(CI);
return true;
};

CallInst *VlsegN = Builder.CreateIntrinsic(
FixedVlsegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy}, {Ptr, Mask, VL});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,10 @@ define {<2 x double>, <2 x double>} @vector_deinterleave_load_v2f64_v4f64(ptr %p
define { <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor3(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor3:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, a0, 2
; CHECK-NEXT: li a1, 3
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vlseg3e8.v v6, (a0)
; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%vec = load <24 x i8>, ptr %p
%d0 = call {<8 x i8>, <8 x i8>, <8 x i8>} @llvm.vector.deinterleave3(<24 x i8> %vec)
Expand Down
24 changes: 18 additions & 6 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ define <8 x i8> @load_factor2(ptr %ptr) {
define <8 x i8> @load_factor3(ptr %ptr) {
; CHECK-LABEL: load_factor3:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, a0, 2
; CHECK-NEXT: li a1, 3
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vlseg3e8.v v6, (a0)
; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%1 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg3.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8)
%2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
Expand All @@ -29,8 +31,10 @@ define <8 x i8> @load_factor3(ptr %ptr) {
define <8 x i8> @load_factor4(ptr %ptr) {
; CHECK-LABEL: load_factor4:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, a0, 3
; CHECK-NEXT: li a1, 4
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vlseg4e8.v v5, (a0)
; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg4.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8)
%2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
Expand All @@ -43,8 +47,10 @@ define <8 x i8> @load_factor4(ptr %ptr) {
define <8 x i8> @load_factor5(ptr %ptr) {
; CHECK-LABEL: load_factor5:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, a0, 4
; CHECK-NEXT: li a1, 5
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vlseg5e8.v v4, (a0)
; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg5.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8)
%2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
Expand All @@ -58,8 +64,10 @@ define <8 x i8> @load_factor5(ptr %ptr) {
define <8 x i8> @load_factor6(ptr %ptr) {
; CHECK-LABEL: load_factor6:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, a0, 5
; CHECK-NEXT: li a1, 6
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vlseg6e8.v v3, (a0)
; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg6.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8)
%2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
Expand All @@ -74,8 +82,10 @@ define <8 x i8> @load_factor6(ptr %ptr) {
define <8 x i8> @load_factor7(ptr %ptr) {
; CHECK-LABEL: load_factor7:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, a0, 6
; CHECK-NEXT: li a1, 7
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vlseg7e8.v v2, (a0)
; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg7.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8)
%2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
Expand All @@ -91,8 +101,10 @@ define <8 x i8> @load_factor7(ptr %ptr) {
define <8 x i8> @load_factor8(ptr %ptr) {
; CHECK-LABEL: load_factor8:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, a0, 7
; CHECK-NEXT: li a1, 8
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vlseg8e8.v v1, (a0)
; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg8.load.mask.v8i8.i64(ptr %ptr, <8 x i1> splat (i1 true), i64 8)
%2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
Expand Down
18 changes: 10 additions & 8 deletions llvm/test/CodeGen/RISCV/rvv/pr141907.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,29 @@ define void @pr141907(ptr %0) nounwind {
; CHECK-NEXT: slli a1, a1, 2
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmclr.m v0
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v12, 0
; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: addi a3, sp, 20
; CHECK-NEXT: li a4, 12
; CHECK-NEXT: .LBB0_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vs4r.v v8, (a2)
; CHECK-NEXT: vsetvli a1, a1, e8, mf8, ta, ma
; CHECK-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
; CHECK-NEXT: vnsrl.wi v11, v9, 0, v0.t
; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
; CHECK-NEXT: vlseg3e32.v v8, (a2)
; CHECK-NEXT: vnsrl.wi v9, v8, 0, v0.t
; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: vlse32.v v8, (a3), a4
; CHECK-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
; CHECK-NEXT: vsseg2e16.v v11, (zero)
; CHECK-NEXT: vsseg2e16.v v9, (zero)
; CHECK-NEXT: bnez a1, .LBB0_1
; CHECK-NEXT: .LBB0_2: # %while.body5
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vse16.v v9, (a0)
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: j .LBB0_2
entry:
br label %vector.body
Expand Down
17 changes: 11 additions & 6 deletions llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -371,8 +371,10 @@ define {<vscale x 2 x ptr>, <vscale x 2 x ptr>} @vector_deinterleave_load_nxv2p0
define { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @vector_deinterleave_load_factor3(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor3:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vlseg3e8.v v6, (a0)
; CHECK-NEXT: addi a0, a0, 2
; CHECK-NEXT: li a1, 3
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%vec = load <vscale x 24 x i8>, ptr %p
%d0 = call {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave3(<vscale x 24 x i8> %vec)
Expand Down Expand Up @@ -407,8 +409,9 @@ define { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x
define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor4_oneactive:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vlseg4e8.v v8, (a0)
; CHECK-NEXT: li a1, 4
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%vec = load <vscale x 32 x i8>, ptr %p
%d0 = call { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.vector.deinterleave4(<vscale x 32 x i8> %vec)
Expand All @@ -419,8 +422,10 @@ define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive(ptr %p) {
define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive2(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor4_oneactive2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vlseg4e8.v v5, (a0)
; CHECK-NEXT: addi a0, a0, 3
; CHECK-NEXT: li a1, 4
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%vec = load <vscale x 32 x i8>, ptr %p
%d0 = call { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.vector.deinterleave4(<vscale x 32 x i8> %vec)
Expand Down
11 changes: 7 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3712,8 +3712,9 @@ define <vscale x 1 x float> @vector_deinterleave_nxv1f32_nxv8f32_oneactive(<vsca
; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs4r.v v8, (a0)
; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
; CHECK-NEXT: vlseg8e32.v v8, (a0)
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; CHECK-NEXT: vlse32.v v8, (a0), a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 2
; CHECK-NEXT: add sp, sp, a0
Expand All @@ -3732,9 +3733,11 @@ define <vscale x 1 x float> @vector_deinterleave_nxv1f32_nxv8f32_oneactive2(<vsc
; CHECK-NEXT: slli a0, a0, 2
; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: addi a1, sp, 36
; CHECK-NEXT: vs4r.v v8, (a0)
; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
; CHECK-NEXT: vlseg8e32.v v3, (a0)
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; CHECK-NEXT: vlse32.v v8, (a1), a0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 2
; CHECK-NEXT: add sp, sp, a0
Expand Down
Loading
Loading