Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20843,6 +20843,62 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
break;
}
case RISCVISD::TUPLE_EXTRACT: {
EVT VT = N->getValueType(0);
SDValue Tuple = N->getOperand(0);
unsigned Idx = N->getConstantOperandVal(1);
if (!Tuple.hasOneUse() || Tuple.getOpcode() != ISD::INTRINSIC_W_CHAIN)
break;

unsigned NF = 0;
switch (Tuple.getConstantOperandVal(1)) {
default:
break;
case Intrinsic::riscv_vlseg2_mask:
case Intrinsic::riscv_vlseg3_mask:
case Intrinsic::riscv_vlseg4_mask:
case Intrinsic::riscv_vlseg5_mask:
case Intrinsic::riscv_vlseg6_mask:
case Intrinsic::riscv_vlseg7_mask:
case Intrinsic::riscv_vlseg8_mask:
NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
break;
}

if (!NF || Subtarget.hasOptimizedSegmentLoadStore(NF))
break;

unsigned SEW = VT.getScalarSizeInBits();
assert(Log2_64(SEW) == Tuple.getConstantOperandVal(7) &&
"Type mismatch without bitcast?");
unsigned Stride = SEW / 8 * NF;
unsigned Offset = SEW / 8 * Idx;

SDValue Ops[] = {
/*Chain=*/Tuple.getOperand(0),
/*IntID=*/DAG.getTargetConstant(Intrinsic::riscv_vlse_mask, DL, XLenVT),
/*Passthru=*/Tuple.getOperand(2),
/*Ptr=*/
DAG.getNode(ISD::ADD, DL, XLenVT, Tuple.getOperand(3),
DAG.getConstant(Offset, DL, XLenVT)),
/*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
/*Mask=*/Tuple.getOperand(4),
/*VL=*/Tuple.getOperand(5),
/*Policy=*/Tuple.getOperand(6)};

auto TupleMemSD = cast<MemIntrinsicSDNode>(Tuple);
// Match getTgtMemIntrinsic for non-unit stride case
EVT MemVT = TupleMemSD->getMemoryVT().getScalarType();
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
TupleMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);

SDVTList VTs = DAG.getVTList({VT, MVT::Other});
SDValue Result = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
Ops, MemVT, MMO);
DAG.ReplaceAllUsesOfValueWith(Tuple.getValue(1), Result.getValue(1));
return Result.getValue(0);
}
}

return SDValue();
Expand Down
23 changes: 0 additions & 23 deletions llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,29 +216,6 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
if (!isLegalInterleavedAccessType(VTy, Factor, Alignment, AS, DL))
return false;

// If the segment load is going to be performed segment at a time anyways
// and there's only one element used, use a strided load instead. This
// will be equally fast, and create less vector register pressure.
if (Indices.size() == 1 && !Subtarget.hasOptimizedSegmentLoadStore(Factor)) {
unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType());
Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);
Value *BasePtr = Builder.CreatePtrAdd(Ptr, Offset);
// For rv64, need to truncate i64 to i32 to match signature. As VL is at most
// the number of active lanes (which is bounded by i32) this is safe.
VL = Builder.CreateTrunc(VL, Builder.getInt32Ty());

CallInst *CI =
Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,
{VTy, BasePtr->getType(), Stride->getType()},
{BasePtr, Stride, Mask, VL});
Alignment = commonAlignment(Alignment, Indices[0] * ScalarSizeInBytes);
CI->addParamAttr(0,
Attribute::getWithAlignment(CI->getContext(), Alignment));
Shuffles[0]->replaceAllUsesWith(CI);
return true;
};

CallInst *VlsegN = Builder.CreateIntrinsic(
FixedVlsegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy}, {Ptr, Mask, VL});

Expand Down
18 changes: 10 additions & 8 deletions llvm/test/CodeGen/RISCV/rvv/pr141907.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,29 @@ define void @pr141907(ptr %0) nounwind {
; CHECK-NEXT: slli a1, a1, 2
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmclr.m v0
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v12, 0
; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: addi a3, sp, 20
; CHECK-NEXT: li a4, 12
; CHECK-NEXT: .LBB0_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vs4r.v v8, (a2)
; CHECK-NEXT: vsetvli a1, a1, e8, mf8, ta, ma
; CHECK-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
; CHECK-NEXT: vnsrl.wi v11, v9, 0, v0.t
; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
; CHECK-NEXT: vlseg3e32.v v8, (a2)
; CHECK-NEXT: vnsrl.wi v9, v8, 0, v0.t
; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: vlse32.v v8, (a3), a4
; CHECK-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
; CHECK-NEXT: vsseg2e16.v v11, (zero)
; CHECK-NEXT: vsseg2e16.v v9, (zero)
; CHECK-NEXT: bnez a1, .LBB0_1
; CHECK-NEXT: .LBB0_2: # %while.body5
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vse16.v v9, (a0)
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: j .LBB0_2
entry:
br label %vector.body
Expand Down
11 changes: 7 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -407,8 +407,9 @@ define { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x
define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor4_oneactive:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vlseg4e8.v v8, (a0)
; CHECK-NEXT: li a1, 4
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%vec = load <vscale x 32 x i8>, ptr %p
%d0 = call { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.vector.deinterleave4(<vscale x 32 x i8> %vec)
Expand All @@ -419,8 +420,10 @@ define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive(ptr %p) {
define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive2(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor4_oneactive2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vlseg4e8.v v5, (a0)
; CHECK-NEXT: addi a0, a0, 3
; CHECK-NEXT: li a1, 4
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%vec = load <vscale x 32 x i8>, ptr %p
%d0 = call { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.vector.deinterleave4(<vscale x 32 x i8> %vec)
Expand Down
11 changes: 7 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3712,8 +3712,9 @@ define <vscale x 1 x float> @vector_deinterleave_nxv1f32_nxv8f32_oneactive(<vsca
; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs4r.v v8, (a0)
; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
; CHECK-NEXT: vlseg8e32.v v8, (a0)
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; CHECK-NEXT: vlse32.v v8, (a0), a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 2
; CHECK-NEXT: add sp, sp, a0
Expand All @@ -3732,9 +3733,11 @@ define <vscale x 1 x float> @vector_deinterleave_nxv1f32_nxv8f32_oneactive2(<vsc
; CHECK-NEXT: slli a0, a0, 2
; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: addi a1, sp, 36
; CHECK-NEXT: vs4r.v v8, (a0)
; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
; CHECK-NEXT: vlseg8e32.v v3, (a0)
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; CHECK-NEXT: vlse32.v v8, (a1), a0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 2
; CHECK-NEXT: add sp, sp, a0
Expand Down
8 changes: 6 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
Original file line number Diff line number Diff line change
Expand Up @@ -674,16 +674,20 @@ define <vscale x 2 x i32> @load_factor2_oneactive(ptr %ptr, i32 %evl) {
define <vscale x 2 x i32> @load_factor5_oneactive(ptr %ptr, i32 %evl) {
; RV32-LABEL: load_factor5_oneactive:
; RV32: # %bb.0:
; RV32-NEXT: addi a0, a0, 12
; RV32-NEXT: li a2, 20
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV32-NEXT: vlseg5e32.v v5, (a0)
; RV32-NEXT: vlse32.v v8, (a0), a2
; RV32-NEXT: ret
;
; RV64-LABEL: load_factor5_oneactive:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: addi a0, a0, 12
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: li a2, 20
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV64-NEXT: vlseg5e32.v v5, (a0)
; RV64-NEXT: vlse32.v v8, (a0), a2
; RV64-NEXT: ret
%rvl = mul nuw i32 %evl, 5
%wide.masked.load = call <vscale x 10 x i32> @llvm.vp.load(ptr %ptr, <vscale x 10 x i1> splat (i1 true), i32 %rvl)
Expand Down