diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 7e37bbd652114..d4e1d9c6f3ca6 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -1876,6 +1876,51 @@ static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { return false; } +// Lower BUILD_VECTOR as broadcast load (if possible). +// For example: +// %a = load i8, ptr %ptr +// %b = build_vector %a, %a, %a, %a +// is lowered to : +// (VLDREPL_B $a0, 0) +static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, + const SDLoc &DL, + SelectionDAG &DAG) { + MVT VT = BVOp->getSimpleValueType(0); + int NumOps = BVOp->getNumOperands(); + + assert((VT.is128BitVector() || VT.is256BitVector()) && + "Unsupported vector type for broadcast."); + + SDValue IdentitySrc; + bool IsIdeneity = true; + + for (int i = 0; i != NumOps; i++) { + SDValue Op = BVOp->getOperand(i); + if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) { + IsIdeneity = false; + break; + } + IdentitySrc = BVOp->getOperand(0); + } + + // make sure that this load is valid and only has one user. + if (!IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode())) + return SDValue(); + + if (IsIdeneity) { + auto *LN = cast(IdentitySrc); + SDVTList Tys = + LN->isIndexed() + ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other) + : DAG.getVTList(VT, MVT::Other); + SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()}; + SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops); + DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1)); + return BCast; + } + return SDValue(); +} + SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { BuildVectorSDNode *Node = cast(Op); @@ -1891,6 +1936,9 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, (!Subtarget.hasExtLASX() || !Is256Vec)) return SDValue(); + if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG)) + return Result; + if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, /*MinSplatBits=*/8) && SplatBitSize <= 64) { @@ -5326,6 +5374,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VSRLI) NODE_NAME_CASE(VBSLL) NODE_NAME_CASE(VBSRL) + NODE_NAME_CASE(VLDREPL) } #undef NODE_NAME_CASE return nullptr; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 8c2d2597a26ec..40a36b653b1b3 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -155,7 +155,10 @@ enum NodeType : unsigned { // Vector byte logicial left / right shift VBSLL, - VBSRL + VBSRL, + + // Scalar load broadcast to vector + VLDREPL // Intrinsic operations end ============================================= }; diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index e4feaa600c57d..775d9289af7c4 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -307,7 +307,8 @@ def simm8_lsl # I : Operand { } } -def simm9_lsl3 : Operand { +def simm9_lsl3 : Operand, + ImmLeaf(Imm);}]> { let ParserMatchClass = SImmAsmOperand<9, "lsl3">; let EncoderMethod = "getImmOpValueAsr<3>"; let DecoderMethod = "decodeSImmOperand<9, 3>"; @@ -317,13 +318,15 @@ def simm10 : Operand { let ParserMatchClass = SImmAsmOperand<10>; } -def simm10_lsl2 : Operand { +def simm10_lsl2 : Operand, + ImmLeaf(Imm);}]> { let ParserMatchClass = SImmAsmOperand<10, "lsl2">; let EncoderMethod = "getImmOpValueAsr<2>"; let DecoderMethod = "decodeSImmOperand<10, 2>"; } -def simm11_lsl1 : Operand { +def simm11_lsl1 : Operand, + ImmLeaf(Imm);}]> { let ParserMatchClass = SImmAsmOperand<11, "lsl1">; let EncoderMethod = "getImmOpValueAsr<1>"; let DecoderMethod = "decodeSImmOperand<11, 1>"; diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index aaa4b94b6e994..e4268920e0b27 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -2165,6 +2165,7 @@ def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm), def : Pat<(int_loongarch_lasx_xvldx GPR:$rj, GPR:$rk), (XVLDX GPR:$rj, GPR:$rk)>; +// xvldrepl def : Pat<(int_loongarch_lasx_xvldrepl_b GPR:$rj, timm:$imm), (XVLDREPL_B GPR:$rj, (to_valid_timm timm:$imm))>; def : Pat<(int_loongarch_lasx_xvldrepl_h GPR:$rj, timm:$imm), @@ -2174,6 +2175,13 @@ def : Pat<(int_loongarch_lasx_xvldrepl_w GPR:$rj, timm:$imm), def : Pat<(int_loongarch_lasx_xvldrepl_d GPR:$rj, timm:$imm), (XVLDREPL_D GPR:$rj, (to_valid_timm timm:$imm))>; +defm : VldreplPat; +defm : VldreplPat; +defm : VldreplPat; +defm : VldreplPat; +defm : VldreplPat; +defm : VldreplPat; + // store def : Pat<(int_loongarch_lasx_xvst LASX256:$xd, GPR:$rj, timm:$imm), (XVST LASX256:$xd, GPR:$rj, (to_valid_timm timm:$imm))>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index e7327ce7461f7..1ffc5f8056b96 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -26,6 +26,7 @@ def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>, def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>; def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>; def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>; +def SDT_LoongArchVLDREPL : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisPtrTy<1>]>; // Target nodes. def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>; @@ -64,6 +65,10 @@ def loongarch_vsrli : SDNode<"LoongArchISD::VSRLI", SDT_LoongArchV1RUimm>; def loongarch_vbsll : SDNode<"LoongArchISD::VBSLL", SDT_LoongArchV1RUimm>; def loongarch_vbsrl : SDNode<"LoongArchISD::VBSRL", SDT_LoongArchV1RUimm>; +def loongarch_vldrepl + : SDNode<"LoongArchISD::VLDREPL", + SDT_LoongArchVLDREPL, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + def immZExt1 : ImmLeaf(Imm);}]>; def immZExt2 : ImmLeaf(Imm);}]>; def immZExt3 : ImmLeaf(Imm);}]>; @@ -1433,6 +1438,14 @@ multiclass PatCCVrVrF { (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; } +multiclass VldreplPat { + def : Pat<(vt(loongarch_vldrepl BaseAddr:$rj)), (Inst BaseAddr:$rj, 0)>; + def : Pat<(vt(loongarch_vldrepl(AddrConstant GPR:$rj, ImmOpnd:$imm))), + (Inst GPR:$rj, ImmOpnd:$imm)>; + def : Pat<(vt(loongarch_vldrepl(AddLike BaseAddr:$rj, ImmOpnd:$imm))), + (Inst BaseAddr:$rj, ImmOpnd:$imm)>; +} + let Predicates = [HasExtLSX] in { // VADD_{B/H/W/D} @@ -2342,6 +2355,7 @@ def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm), def : Pat<(int_loongarch_lsx_vldx GPR:$rj, GPR:$rk), (VLDX GPR:$rj, GPR:$rk)>; +// vldrepl def : Pat<(int_loongarch_lsx_vldrepl_b GPR:$rj, timm:$imm), (VLDREPL_B GPR:$rj, (to_valid_timm timm:$imm))>; def : Pat<(int_loongarch_lsx_vldrepl_h GPR:$rj, timm:$imm), @@ -2351,6 +2365,13 @@ def : Pat<(int_loongarch_lsx_vldrepl_w GPR:$rj, timm:$imm), def : Pat<(int_loongarch_lsx_vldrepl_d GPR:$rj, timm:$imm), (VLDREPL_D GPR:$rj, (to_valid_timm timm:$imm))>; +defm : VldreplPat; +defm : VldreplPat; +defm : VldreplPat; +defm : VldreplPat; +defm : VldreplPat; +defm : VldreplPat; + // store def : Pat<(int_loongarch_lsx_vst LSX128:$vd, GPR:$rj, timm:$imm), (VST LSX128:$vd, GPR:$rj, (to_valid_timm timm:$imm))>; diff --git a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll index 4fcf016376d09..976924bdca686 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll @@ -21,8 +21,8 @@ define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) { define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) { ; CHECK-LABEL: xvldrepl_d_unaligned_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: ld.d $a0, $a0, 4 -; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0 +; CHECK-NEXT: addi.d $a0, $a0, 4 +; CHECK-NEXT: xvldrepl.d $xr0, $a0, 0 ; CHECK-NEXT: ret %p = getelementptr i32, ptr %ptr, i32 1 %tmp = load i64, ptr %p @@ -34,8 +34,7 @@ define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) { define <32 x i8> @xvldrepl_b(ptr %ptr) { ; CHECK-LABEL: xvldrepl_b: ; CHECK: # %bb.0: -; CHECK-NEXT: ld.b $a0, $a0, 0 -; CHECK-NEXT: xvreplgr2vr.b $xr0, $a0 +; CHECK-NEXT: xvldrepl.b $xr0, $a0, 0 ; CHECK-NEXT: ret %tmp = load i8, ptr %ptr %tmp1 = insertelement <32 x i8> zeroinitializer, i8 %tmp, i32 0 @@ -46,8 +45,7 @@ define <32 x i8> @xvldrepl_b(ptr %ptr) { define <32 x i8> @xvldrepl_b_offset(ptr %ptr) { ; CHECK-LABEL: xvldrepl_b_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: ld.b $a0, $a0, 33 -; CHECK-NEXT: xvreplgr2vr.b $xr0, $a0 +; CHECK-NEXT: xvldrepl.b $xr0, $a0, 33 ; CHECK-NEXT: ret %p = getelementptr i8, ptr %ptr, i64 33 %tmp = load i8, ptr %p @@ -60,8 +58,7 @@ define <32 x i8> @xvldrepl_b_offset(ptr %ptr) { define <16 x i16> @xvldrepl_h(ptr %ptr) { ; CHECK-LABEL: xvldrepl_h: ; CHECK: # %bb.0: -; CHECK-NEXT: ld.h $a0, $a0, 0 -; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0 +; CHECK-NEXT: xvldrepl.h $xr0, $a0, 0 ; CHECK-NEXT: ret %tmp = load i16, ptr %ptr %tmp1 = insertelement <16 x i16> zeroinitializer, i16 %tmp, i32 0 @@ -72,8 +69,7 @@ define <16 x i16> @xvldrepl_h(ptr %ptr) { define <16 x i16> @xvldrepl_h_offset(ptr %ptr) { ; CHECK-LABEL: xvldrepl_h_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: ld.h $a0, $a0, 66 -; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0 +; CHECK-NEXT: xvldrepl.h $xr0, $a0, 66 ; CHECK-NEXT: ret %p = getelementptr i16, ptr %ptr, i64 33 %tmp = load i16, ptr %p @@ -85,8 +81,7 @@ define <16 x i16> @xvldrepl_h_offset(ptr %ptr) { define <8 x i32> @xvldrepl_w(ptr %ptr) { ; CHECK-LABEL: xvldrepl_w: ; CHECK: # %bb.0: -; CHECK-NEXT: ld.w $a0, $a0, 0 -; CHECK-NEXT: xvreplgr2vr.w $xr0, $a0 +; CHECK-NEXT: xvldrepl.w $xr0, $a0, 0 ; CHECK-NEXT: ret %tmp = load i32, ptr %ptr %tmp1 = insertelement <8 x i32> zeroinitializer, i32 %tmp, i32 0 @@ -97,8 +92,7 @@ define <8 x i32> @xvldrepl_w(ptr %ptr) { define <8 x i32> @xvldrepl_w_offset(ptr %ptr) { ; CHECK-LABEL: xvldrepl_w_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: ld.w $a0, $a0, 132 -; CHECK-NEXT: xvreplgr2vr.w $xr0, $a0 +; CHECK-NEXT: xvldrepl.w $xr0, $a0, 132 ; CHECK-NEXT: ret %p = getelementptr i32, ptr %ptr, i64 33 %tmp = load i32, ptr %p @@ -111,8 +105,7 @@ define <8 x i32> @xvldrepl_w_offset(ptr %ptr) { define <4 x i64> @xvldrepl_d(ptr %ptr) { ; CHECK-LABEL: xvldrepl_d: ; CHECK: # %bb.0: -; CHECK-NEXT: ld.d $a0, $a0, 0 -; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0 +; CHECK-NEXT: xvldrepl.d $xr0, $a0, 0 ; CHECK-NEXT: ret %tmp = load i64, ptr %ptr %tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0 @@ -123,8 +116,7 @@ define <4 x i64> @xvldrepl_d(ptr %ptr) { define <4 x i64> @xvldrepl_d_offset(ptr %ptr) { ; CHECK-LABEL: xvldrepl_d_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: ld.d $a0, $a0, 264 -; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0 +; CHECK-NEXT: xvldrepl.d $xr0, $a0, 264 ; CHECK-NEXT: ret %p = getelementptr i64, ptr %ptr, i64 33 %tmp = load i64, ptr %p @@ -136,8 +128,7 @@ define <4 x i64> @xvldrepl_d_offset(ptr %ptr) { define <8 x float> @vldrepl_w_flt(ptr %ptr) { ; CHECK-LABEL: vldrepl_w_flt: ; CHECK: # %bb.0: -; CHECK-NEXT: fld.s $fa0, $a0, 0 -; CHECK-NEXT: xvreplve0.w $xr0, $xr0 +; CHECK-NEXT: xvldrepl.w $xr0, $a0, 0 ; CHECK-NEXT: ret %tmp = load float, ptr %ptr %tmp1 = insertelement <8 x float> zeroinitializer, float %tmp, i32 0 @@ -148,8 +139,7 @@ define <8 x float> @vldrepl_w_flt(ptr %ptr) { define <8 x float> @vldrepl_w_flt_offset(ptr %ptr) { ; CHECK-LABEL: vldrepl_w_flt_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: fld.s $fa0, $a0, 264 -; CHECK-NEXT: xvreplve0.w $xr0, $xr0 +; CHECK-NEXT: xvldrepl.w $xr0, $a0, 264 ; CHECK-NEXT: ret %p = getelementptr i64, ptr %ptr, i64 33 %tmp = load float, ptr %p @@ -161,8 +151,7 @@ define <8 x float> @vldrepl_w_flt_offset(ptr %ptr) { define <4 x double> @vldrepl_d_dbl(ptr %ptr) { ; CHECK-LABEL: vldrepl_d_dbl: ; CHECK: # %bb.0: -; CHECK-NEXT: fld.d $fa0, $a0, 0 -; CHECK-NEXT: xvreplve0.d $xr0, $xr0 +; CHECK-NEXT: xvldrepl.d $xr0, $a0, 0 ; CHECK-NEXT: ret %tmp = load double, ptr %ptr %tmp1 = insertelement <4 x double> zeroinitializer, double %tmp, i32 0 @@ -173,8 +162,7 @@ define <4 x double> @vldrepl_d_dbl(ptr %ptr) { define <4 x double> @vldrepl_d_dbl_offset(ptr %ptr) { ; CHECK-LABEL: vldrepl_d_dbl_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: fld.d $fa0, $a0, 264 -; CHECK-NEXT: xvreplve0.d $xr0, $xr0 +; CHECK-NEXT: xvldrepl.d $xr0, $a0, 264 ; CHECK-NEXT: ret %p = getelementptr i64, ptr %ptr, i64 33 %tmp = load double, ptr %p diff --git a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll index 02b68725687dd..c46747ef30509 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll @@ -21,8 +21,8 @@ define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){ define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) { ; CHECK-LABEL: vldrepl_d_unaligned_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: ld.d $a0, $a0, 4 -; CHECK-NEXT: vreplgr2vr.d $vr0, $a0 +; CHECK-NEXT: addi.d $a0, $a0, 4 +; CHECK-NEXT: vldrepl.d $vr0, $a0, 0 ; CHECK-NEXT: ret %p = getelementptr i32, ptr %ptr, i32 1 %tmp = load i64, ptr %p @@ -34,8 +34,7 @@ define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) { define <16 x i8> @vldrepl_b(ptr %ptr) { ; CHECK-LABEL: vldrepl_b: ; CHECK: # %bb.0: -; CHECK-NEXT: ld.b $a0, $a0, 0 -; CHECK-NEXT: vreplgr2vr.b $vr0, $a0 +; CHECK-NEXT: vldrepl.b $vr0, $a0, 0 ; CHECK-NEXT: ret %tmp = load i8, ptr %ptr %tmp1 = insertelement <16 x i8> zeroinitializer, i8 %tmp, i32 0 @@ -46,8 +45,7 @@ define <16 x i8> @vldrepl_b(ptr %ptr) { define <16 x i8> @vldrepl_b_offset(ptr %ptr) { ; CHECK-LABEL: vldrepl_b_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: ld.b $a0, $a0, 33 -; CHECK-NEXT: vreplgr2vr.b $vr0, $a0 +; CHECK-NEXT: vldrepl.b $vr0, $a0, 33 ; CHECK-NEXT: ret %p = getelementptr i8, ptr %ptr, i64 33 %tmp = load i8, ptr %p @@ -60,8 +58,7 @@ define <16 x i8> @vldrepl_b_offset(ptr %ptr) { define <8 x i16> @vldrepl_h(ptr %ptr) { ; CHECK-LABEL: vldrepl_h: ; CHECK: # %bb.0: -; CHECK-NEXT: ld.h $a0, $a0, 0 -; CHECK-NEXT: vreplgr2vr.h $vr0, $a0 +; CHECK-NEXT: vldrepl.h $vr0, $a0, 0 ; CHECK-NEXT: ret %tmp = load i16, ptr %ptr %tmp1 = insertelement <8 x i16> zeroinitializer, i16 %tmp, i32 0 @@ -72,8 +69,7 @@ define <8 x i16> @vldrepl_h(ptr %ptr) { define <8 x i16> @vldrepl_h_offset(ptr %ptr) { ; CHECK-LABEL: vldrepl_h_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: ld.h $a0, $a0, 66 -; CHECK-NEXT: vreplgr2vr.h $vr0, $a0 +; CHECK-NEXT: vldrepl.h $vr0, $a0, 66 ; CHECK-NEXT: ret %p = getelementptr i16, ptr %ptr, i64 33 %tmp = load i16, ptr %p @@ -85,8 +81,7 @@ define <8 x i16> @vldrepl_h_offset(ptr %ptr) { define <4 x i32> @vldrepl_w(ptr %ptr) { ; CHECK-LABEL: vldrepl_w: ; CHECK: # %bb.0: -; CHECK-NEXT: ld.w $a0, $a0, 0 -; CHECK-NEXT: vreplgr2vr.w $vr0, $a0 +; CHECK-NEXT: vldrepl.w $vr0, $a0, 0 ; CHECK-NEXT: ret %tmp = load i32, ptr %ptr %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 @@ -97,8 +92,7 @@ define <4 x i32> @vldrepl_w(ptr %ptr) { define <4 x i32> @vldrepl_w_offset(ptr %ptr) { ; CHECK-LABEL: vldrepl_w_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: ld.w $a0, $a0, 132 -; CHECK-NEXT: vreplgr2vr.w $vr0, $a0 +; CHECK-NEXT: vldrepl.w $vr0, $a0, 132 ; CHECK-NEXT: ret %p = getelementptr i32, ptr %ptr, i64 33 %tmp = load i32, ptr %p @@ -110,8 +104,7 @@ define <4 x i32> @vldrepl_w_offset(ptr %ptr) { define <2 x i64> @vldrepl_d(ptr %ptr) { ; CHECK-LABEL: vldrepl_d: ; CHECK: # %bb.0: -; CHECK-NEXT: ld.d $a0, $a0, 0 -; CHECK-NEXT: vreplgr2vr.d $vr0, $a0 +; CHECK-NEXT: vldrepl.d $vr0, $a0, 0 ; CHECK-NEXT: ret %tmp = load i64, ptr %ptr %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0 @@ -122,8 +115,7 @@ define <2 x i64> @vldrepl_d(ptr %ptr) { define <2 x i64> @vldrepl_d_offset(ptr %ptr) { ; CHECK-LABEL: vldrepl_d_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: ld.d $a0, $a0, 264 -; CHECK-NEXT: vreplgr2vr.d $vr0, $a0 +; CHECK-NEXT: vldrepl.d $vr0, $a0, 264 ; CHECK-NEXT: ret %p = getelementptr i64, ptr %ptr, i64 33 %tmp = load i64, ptr %p @@ -135,8 +127,7 @@ define <2 x i64> @vldrepl_d_offset(ptr %ptr) { define <4 x float> @vldrepl_w_flt(ptr %ptr) { ; CHECK-LABEL: vldrepl_w_flt: ; CHECK: # %bb.0: -; CHECK-NEXT: fld.s $fa0, $a0, 0 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT: vldrepl.w $vr0, $a0, 0 ; CHECK-NEXT: ret %tmp = load float, ptr %ptr %tmp1 = insertelement <4 x float> zeroinitializer, float %tmp, i32 0 @@ -147,8 +138,7 @@ define <4 x float> @vldrepl_w_flt(ptr %ptr) { define <4 x float> @vldrepl_w_flt_offset(ptr %ptr) { ; CHECK-LABEL: vldrepl_w_flt_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: fld.s $fa0, $a0, 264 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT: vldrepl.w $vr0, $a0, 264 ; CHECK-NEXT: ret %p = getelementptr i64, ptr %ptr, i64 33 %tmp = load float, ptr %p @@ -160,8 +150,7 @@ define <4 x float> @vldrepl_w_flt_offset(ptr %ptr) { define <2 x double> @vldrepl_d_dbl(ptr %ptr) { ; CHECK-LABEL: vldrepl_d_dbl: ; CHECK: # %bb.0: -; CHECK-NEXT: fld.d $fa0, $a0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT: vldrepl.d $vr0, $a0, 0 ; CHECK-NEXT: ret %tmp = load double, ptr %ptr %tmp1 = insertelement <2 x double> zeroinitializer, double %tmp, i32 0 @@ -172,8 +161,7 @@ define <2 x double> @vldrepl_d_dbl(ptr %ptr) { define <2 x double> @vldrepl_d_dbl_offset(ptr %ptr) { ; CHECK-LABEL: vldrepl_d_dbl_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: fld.d $fa0, $a0, 264 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT: vldrepl.d $vr0, $a0, 264 ; CHECK-NEXT: ret %p = getelementptr i64, ptr %ptr, i64 33 %tmp = load double, ptr %p