Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2621,8 +2621,40 @@ LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
SDValue
LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
if (isa<ConstantSDNode>(Op->getOperand(2)))
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
SDLoc DL(Op);
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2);

if (isa<ConstantSDNode>(Op2)) {
return Op;
} else {
MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);

if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
return SDValue();

SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
SDValue SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);

SmallVector<SDValue, 32> RawIndices;
for (unsigned i = 0; i < NumElts; ++i)
RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
RawIndices.push_back(DAG.getConstant(i, DL, IdxTy));

I'm not sure...

  /// Return an ISD::BUILD_VECTOR node. The number of elements in VT,
  /// which must be a vector type, must match the number of operands in Ops.
  /// The operands must have the same type as (or, for integers, a type wider
  /// than) VT's element type.
  SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef<SDValue> Ops) {
    // VerifySDNode (via InsertNode) checks BUILD_VECTOR later.
    return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
  }

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

getBuildVector() allows "The operands must have the same type as (or, for integers, a type wider than) VT's element type.". And we must ensure the elements of BUILD_VECTOR we built are type legal (ie. i64. i8/i16/i32 are illegal for loongarch64) at this point.

When I tried to use IdxTy here, assertion occurs:

Legalizing: t38: v16i8 = BUILD_VECTOR Constant:i8<0>, Constant:i8<1>, ... , Constant:i8<15>
llc: /.../llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:991: void (anonymous namespace)::SelectionDAGLegalize::LegalizeOp(SDNode *): Assertion `(TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == TargetLowering::TypeLegal || Op.getOpcode() == ISD::TargetConstant || Op.getOpcode() == ISD::Register) && "Unexpected illegal type!"' failed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about loongarch32 if we use Subtarget.getGRLenVT() when the vector is v{2,4}i64 or v{2,4}f64?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

v2i64/v4i64 is okay, because of the split of i64 in the initial selectiondag.
v2f64/v4f64 occur an assertion error when verifyNode().

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. Other tests may also have issues with loongarch32 + lsx/lasx. I think @heiher will handle them later.

SDValue Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);

// insert vec, elt, idx
// =>
// select (splatidx == {0,1,2...}) ? splatelt : vec
SDValue SelectCC =
DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
}

return SDValue();
}

Expand Down
140 changes: 50 additions & 90 deletions llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
Original file line number Diff line number Diff line change
Expand Up @@ -114,22 +114,15 @@ define void @insert_4xdouble(ptr %src, ptr %dst, double %in) nounwind {
define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind {
; CHECK-LABEL: insert_32xi8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -96
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $fp, $sp, 96
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvst $xr0, $sp, 32
; CHECK-NEXT: addi.d $a0, $sp, 32
; CHECK-NEXT: bstrins.d $a0, $a3, 4, 0
; CHECK-NEXT: st.b $a2, $a0, 0
; CHECK-NEXT: xvld $xr0, $sp, 32
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI8_0)
; CHECK-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI8_0)
; CHECK-NEXT: xvld $xr1, $a0, 0
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
; CHECK-NEXT: xvreplgr2vr.b $xr2, $a0
; CHECK-NEXT: xvseq.b $xr0, $xr2, $xr0
; CHECK-NEXT: xvreplgr2vr.b $xr2, $a2
; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -96
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 96
; CHECK-NEXT: ret
%v = load volatile <32 x i8>, ptr %src
%v_new = insertelement <32 x i8> %v, i8 %in, i32 %idx
Expand All @@ -140,22 +133,15 @@ define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind {
define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind {
; CHECK-LABEL: insert_16xi16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -96
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $fp, $sp, 96
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvst $xr0, $sp, 32
; CHECK-NEXT: addi.d $a0, $sp, 32
; CHECK-NEXT: bstrins.d $a0, $a3, 4, 1
; CHECK-NEXT: st.h $a2, $a0, 0
; CHECK-NEXT: xvld $xr0, $sp, 32
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI9_0)
; CHECK-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI9_0)
; CHECK-NEXT: xvld $xr1, $a0, 0
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
; CHECK-NEXT: xvreplgr2vr.h $xr2, $a0
; CHECK-NEXT: xvseq.h $xr0, $xr2, $xr0
; CHECK-NEXT: xvreplgr2vr.h $xr2, $a2
; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -96
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 96
; CHECK-NEXT: ret
%v = load volatile <16 x i16>, ptr %src
%v_new = insertelement <16 x i16> %v, i16 %in, i32 %idx
Expand All @@ -166,22 +152,15 @@ define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind {
define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind {
; CHECK-LABEL: insert_8xi32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -96
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $fp, $sp, 96
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvst $xr0, $sp, 32
; CHECK-NEXT: addi.d $a0, $sp, 32
; CHECK-NEXT: bstrins.d $a0, $a3, 4, 2
; CHECK-NEXT: st.w $a2, $a0, 0
; CHECK-NEXT: xvld $xr0, $sp, 32
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI10_0)
; CHECK-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI10_0)
; CHECK-NEXT: xvld $xr1, $a0, 0
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
; CHECK-NEXT: xvreplgr2vr.w $xr2, $a0
; CHECK-NEXT: xvseq.w $xr0, $xr2, $xr0
; CHECK-NEXT: xvreplgr2vr.w $xr2, $a2
; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -96
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 96
; CHECK-NEXT: ret
%v = load volatile <8 x i32>, ptr %src
%v_new = insertelement <8 x i32> %v, i32 %in, i32 %idx
Expand All @@ -192,22 +171,15 @@ define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind {
define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind {
; CHECK-LABEL: insert_4xi64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -96
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $fp, $sp, 96
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvst $xr0, $sp, 32
; CHECK-NEXT: addi.d $a0, $sp, 32
; CHECK-NEXT: bstrins.d $a0, $a3, 4, 3
; CHECK-NEXT: st.d $a2, $a0, 0
; CHECK-NEXT: xvld $xr0, $sp, 32
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI11_0)
; CHECK-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI11_0)
; CHECK-NEXT: xvld $xr1, $a0, 0
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
; CHECK-NEXT: xvreplgr2vr.d $xr2, $a0
; CHECK-NEXT: xvseq.d $xr0, $xr2, $xr0
; CHECK-NEXT: xvreplgr2vr.d $xr2, $a2
; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -96
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 96
; CHECK-NEXT: ret
%v = load volatile <4 x i64>, ptr %src
%v_new = insertelement <4 x i64> %v, i64 %in, i32 %idx
Expand All @@ -218,22 +190,16 @@ define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind {
define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwind {
; CHECK-LABEL: insert_8xfloat_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -96
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $fp, $sp, 96
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr1, $a0, 0
; CHECK-NEXT: xvst $xr1, $sp, 32
; CHECK-NEXT: addi.d $a0, $sp, 32
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2
; CHECK-NEXT: fst.s $fa0, $a0, 0
; CHECK-NEXT: xvld $xr0, $sp, 32
; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI12_0)
; CHECK-NEXT: xvld $xr1, $a3, %pc_lo12(.LCPI12_0)
; CHECK-NEXT: xvld $xr2, $a0, 0
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
; CHECK-NEXT: xvreplgr2vr.w $xr3, $a0
; CHECK-NEXT: xvseq.w $xr1, $xr3, $xr1
; CHECK-NEXT: xvreplve0.w $xr0, $xr0
; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -96
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 96
; CHECK-NEXT: ret
%v = load volatile <8 x float>, ptr %src
%v_new = insertelement <8 x float> %v, float %in, i32 %idx
Expand All @@ -244,22 +210,16 @@ define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwin
define void @insert_4xdouble_idx(ptr %src, ptr %dst, double %in, i32 %idx) nounwind {
; CHECK-LABEL: insert_4xdouble_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -96
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $fp, $sp, 96
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr1, $a0, 0
; CHECK-NEXT: xvst $xr1, $sp, 32
; CHECK-NEXT: addi.d $a0, $sp, 32
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3
; CHECK-NEXT: fst.d $fa0, $a0, 0
; CHECK-NEXT: xvld $xr0, $sp, 32
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI13_0)
; CHECK-NEXT: xvld $xr1, $a3, %pc_lo12(.LCPI13_0)
; CHECK-NEXT: xvld $xr2, $a0, 0
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
; CHECK-NEXT: xvreplgr2vr.d $xr3, $a0
; CHECK-NEXT: xvseq.d $xr1, $xr3, $xr1
; CHECK-NEXT: xvreplve0.d $xr0, $xr0
; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -96
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 96
; CHECK-NEXT: ret
%v = load volatile <4 x double>, ptr %src
%v_new = insertelement <4 x double> %v, double %in, i32 %idx
Expand Down
98 changes: 50 additions & 48 deletions llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,15 @@ define void @insert_2xdouble(ptr %src, ptr %dst, double %ins) nounwind {
define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind {
; CHECK-LABEL: insert_16xi8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vst $vr0, $sp, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a3, 3, 0
; CHECK-NEXT: st.b $a2, $a0, 0
; CHECK-NEXT: vld $vr0, $sp, 0
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI6_0)
; CHECK-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI6_0)
; CHECK-NEXT: vld $vr1, $a0, 0
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
; CHECK-NEXT: vreplgr2vr.b $vr2, $a0
; CHECK-NEXT: vseq.b $vr0, $vr2, $vr0
; CHECK-NEXT: vreplgr2vr.b $vr2, $a2
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $sp, 16
; CHECK-NEXT: ret
%v = load volatile <16 x i8>, ptr %src
%v_new = insertelement <16 x i8> %v, i8 %ins, i32 %idx
Expand All @@ -103,15 +103,15 @@ define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind {
define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind {
; CHECK-LABEL: insert_8xi16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vst $vr0, $sp, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a3, 3, 1
; CHECK-NEXT: st.h $a2, $a0, 0
; CHECK-NEXT: vld $vr0, $sp, 0
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI7_0)
; CHECK-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI7_0)
; CHECK-NEXT: vld $vr1, $a0, 0
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
; CHECK-NEXT: vreplgr2vr.h $vr2, $a0
; CHECK-NEXT: vseq.h $vr0, $vr2, $vr0
; CHECK-NEXT: vreplgr2vr.h $vr2, $a2
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $sp, 16
; CHECK-NEXT: ret
%v = load volatile <8 x i16>, ptr %src
%v_new = insertelement <8 x i16> %v, i16 %ins, i32 %idx
Expand All @@ -122,15 +122,15 @@ define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind {
define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind {
; CHECK-LABEL: insert_4xi32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vst $vr0, $sp, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a3, 3, 2
; CHECK-NEXT: st.w $a2, $a0, 0
; CHECK-NEXT: vld $vr0, $sp, 0
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI8_0)
; CHECK-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI8_0)
; CHECK-NEXT: vld $vr1, $a0, 0
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
; CHECK-NEXT: vreplgr2vr.w $vr2, $a0
; CHECK-NEXT: vseq.w $vr0, $vr2, $vr0
; CHECK-NEXT: vreplgr2vr.w $vr2, $a2
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $sp, 16
; CHECK-NEXT: ret
%v = load volatile <4 x i32>, ptr %src
%v_new = insertelement <4 x i32> %v, i32 %ins, i32 %idx
Expand All @@ -141,15 +141,15 @@ define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind {
define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind {
; CHECK-LABEL: insert_2xi64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vst $vr0, $sp, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a3, 3, 3
; CHECK-NEXT: st.d $a2, $a0, 0
; CHECK-NEXT: vld $vr0, $sp, 0
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI9_0)
; CHECK-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI9_0)
; CHECK-NEXT: vld $vr1, $a0, 0
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
; CHECK-NEXT: vreplgr2vr.d $vr2, $a0
; CHECK-NEXT: vseq.d $vr0, $vr2, $vr0
; CHECK-NEXT: vreplgr2vr.d $vr2, $a2
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $sp, 16
; CHECK-NEXT: ret
%v = load volatile <2 x i64>, ptr %src
%v_new = insertelement <2 x i64> %v, i64 %ins, i32 %idx
Expand All @@ -160,15 +160,16 @@ define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind {
define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwind {
; CHECK-LABEL: insert_4xfloat_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: vld $vr1, $a0, 0
; CHECK-NEXT: vst $vr1, $sp, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a2, 3, 2
; CHECK-NEXT: fst.s $fa0, $a0, 0
; CHECK-NEXT: vld $vr0, $sp, 0
; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI10_0)
; CHECK-NEXT: vld $vr1, $a3, %pc_lo12(.LCPI10_0)
; CHECK-NEXT: vld $vr2, $a0, 0
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
; CHECK-NEXT: vreplgr2vr.w $vr3, $a0
; CHECK-NEXT: vseq.w $vr1, $vr3, $vr1
; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $sp, 16
; CHECK-NEXT: ret
%v = load volatile <4 x float>, ptr %src
%v_new = insertelement <4 x float> %v, float %ins, i32 %idx
Expand All @@ -179,15 +180,16 @@ define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwi
define void @insert_2xdouble_idx(ptr %src, ptr %dst, double %ins, i32 %idx) nounwind {
; CHECK-LABEL: insert_2xdouble_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: vld $vr1, $a0, 0
; CHECK-NEXT: vst $vr1, $sp, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a2, 3, 3
; CHECK-NEXT: fst.d $fa0, $a0, 0
; CHECK-NEXT: vld $vr0, $sp, 0
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI11_0)
; CHECK-NEXT: vld $vr1, $a3, %pc_lo12(.LCPI11_0)
; CHECK-NEXT: vld $vr2, $a0, 0
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
; CHECK-NEXT: vreplgr2vr.d $vr3, $a0
; CHECK-NEXT: vseq.d $vr1, $vr3, $vr1
; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $sp, 16
; CHECK-NEXT: ret
%v = load volatile <2 x double>, ptr %src
%v_new = insertelement <2 x double> %v, double %ins, i32 %idx
Expand Down