diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 2282dc8955613..dceb3c682d2df 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -6570,3 +6570,12 @@ bool LoongArchTargetLowering::shouldAlignPointerArgs(CallInst *CI, return true; } + +TargetLoweringBase::LegalizeTypeAction +LoongArchTargetLowering::getPreferredVectorAction(MVT VT) const { + if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 && + VT.getVectorElementType() != MVT::i1) + return TypeWidenVector; + + return TargetLoweringBase::getPreferredVectorAction(VT); +} \ No newline at end of file diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index a215ab523874b..f8d4cef76b955 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -281,6 +281,7 @@ class LoongArchTargetLowering : public TargetLowering { Align &PrefAlign) const override; bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const; + LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; private: /// Target-specific function used to lower LoongArch calling conventions. diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll index ea9da6aa60c4a..75639ae090661 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll @@ -5,10 +5,13 @@ define void @load_sext_2i8_to_2i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_2i8_to_2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.b $a2, $a0, 0 -; CHECK-NEXT: ld.b $a0, $a0, 1 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 0 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1 +; CHECK-NEXT: ld.h $a0, $a0, 0 +; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI0_0) +; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI0_0) +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0 +; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr0 +; CHECK-NEXT: vslli.d $vr0, $vr0, 56 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 56 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -21,14 +24,13 @@ entry: define void @load_sext_4i8_to_4i32(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_4i8_to_4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.b $a2, $a0, 0 -; CHECK-NEXT: ld.b $a3, $a0, 1 -; CHECK-NEXT: ld.b $a4, $a0, 2 -; CHECK-NEXT: ld.b $a0, $a0, 3 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 0 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a3, 1 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a4, 2 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3 +; CHECK-NEXT: ld.w $a0, $a0, 0 +; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI1_0) +; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI1_0) +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr0 +; CHECK-NEXT: vslli.w $vr0, $vr0, 24 +; CHECK-NEXT: vsrai.w $vr0, $vr0, 24 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -41,22 +43,11 @@ entry: define void @load_sext_8i8_to_8i16(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_8i8_to_8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.b $a2, $a0, 0 -; CHECK-NEXT: ld.b $a3, $a0, 1 -; CHECK-NEXT: ld.b $a4, $a0, 2 -; CHECK-NEXT: ld.b $a5, $a0, 3 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 0 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 1 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 2 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 3 -; CHECK-NEXT: ld.b $a2, $a0, 4 -; CHECK-NEXT: ld.b $a3, $a0, 5 -; CHECK-NEXT: ld.b $a4, $a0, 6 -; CHECK-NEXT: ld.b $a0, $a0, 7 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 4 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 5 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 6 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 7 +; CHECK-NEXT: ld.d $a0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 +; CHECK-NEXT: vslli.h $vr0, $vr0, 8 +; CHECK-NEXT: vsrai.h $vr0, $vr0, 8 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -69,10 +60,13 @@ entry: define void @load_sext_2i16_to_2i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_2i16_to_2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.h $a2, $a0, 0 -; CHECK-NEXT: ld.h $a0, $a0, 2 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 0 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1 +; CHECK-NEXT: ld.w $a0, $a0, 0 +; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0) +; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI3_0) +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; CHECK-NEXT: vshuf.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vslli.d $vr0, $vr0, 48 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 48 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -85,14 +79,11 @@ entry: define void @load_sext_4i16_to_4i32(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_4i16_to_4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.h $a2, $a0, 0 -; CHECK-NEXT: ld.h $a3, $a0, 2 -; CHECK-NEXT: ld.h $a4, $a0, 4 -; CHECK-NEXT: ld.h $a0, $a0, 6 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 0 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a3, 1 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a4, 2 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3 +; CHECK-NEXT: ld.d $a0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 +; CHECK-NEXT: vslli.w $vr0, $vr0, 16 +; CHECK-NEXT: vsrai.w $vr0, $vr0, 16 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -105,10 +96,11 @@ entry: define void @load_sext_2i32_to_2i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_2i32_to_2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.w $a2, $a0, 0 -; CHECK-NEXT: ld.w $a0, $a0, 4 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 0 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1 +; CHECK-NEXT: ld.d $a0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 16 +; CHECK-NEXT: vslli.d $vr0, $vr0, 32 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 32 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -122,44 +114,17 @@ define void @load_sext_16i8_to_16i16(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_16i8_to_16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 3 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 4 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 5 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 6 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 7 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) +; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI6_0) +; CHECK-NEXT: vshuf.b $vr1, $vr0, $vr0, $vr1 +; CHECK-NEXT: vilvl.b $vr1, $vr1, $vr1 ; CHECK-NEXT: vslli.h $vr1, $vr1, 8 ; CHECK-NEXT: vsrai.h $vr1, $vr1, 8 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 3 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 4 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 5 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 6 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 7 -; CHECK-NEXT: vslli.h $vr0, $vr2, 8 +; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 +; CHECK-NEXT: vslli.h $vr0, $vr0, 8 ; CHECK-NEXT: vsrai.h $vr0, $vr0, 8 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr1, $a1, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr1, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <16 x i8>, ptr %ptr @@ -172,50 +137,33 @@ define void @load_sext_16i8_to_16i32(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_16i8_to_16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0) +; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI7_0) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_1) +; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI7_1) +; CHECK-NEXT: vshuf.b $vr1, $vr0, $vr0, $vr1 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_2) +; CHECK-NEXT: vld $vr3, $a0, %pc_lo12(.LCPI7_2) +; CHECK-NEXT: vshuf.b $vr1, $vr0, $vr1, $vr2 ; CHECK-NEXT: vslli.w $vr1, $vr1, 24 ; CHECK-NEXT: vsrai.w $vr1, $vr1, 24 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; CHECK-NEXT: vslli.w $vr2, $vr2, 24 -; CHECK-NEXT: vsrai.w $vr2, $vr2, 24 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 3 +; CHECK-NEXT: vshuf.b $vr3, $vr0, $vr0, $vr3 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_3) +; CHECK-NEXT: vld $vr4, $a0, %pc_lo12(.LCPI7_3) +; CHECK-NEXT: vshuf.b $vr3, $vr0, $vr3, $vr2 ; CHECK-NEXT: vslli.w $vr3, $vr3, 24 ; CHECK-NEXT: vsrai.w $vr3, $vr3, 24 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12 -; CHECK-NEXT: vinsgr2vr.w $vr4, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13 -; CHECK-NEXT: vinsgr2vr.w $vr4, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14 -; CHECK-NEXT: vinsgr2vr.w $vr4, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 -; CHECK-NEXT: vinsgr2vr.w $vr4, $a0, 3 -; CHECK-NEXT: vslli.w $vr0, $vr4, 24 +; CHECK-NEXT: vshuf.b $vr4, $vr0, $vr0, $vr4 +; CHECK-NEXT: vshuf.b $vr4, $vr0, $vr4, $vr2 +; CHECK-NEXT: vslli.w $vr4, $vr4, 24 +; CHECK-NEXT: vsrai.w $vr4, $vr4, 24 +; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr2 +; CHECK-NEXT: vslli.w $vr0, $vr0, 24 ; CHECK-NEXT: vsrai.w $vr0, $vr0, 24 -; CHECK-NEXT: vst $vr0, $a1, 48 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr4, $a1, 48 ; CHECK-NEXT: vst $vr3, $a1, 32 -; CHECK-NEXT: vst $vr2, $a1, 16 -; CHECK-NEXT: vst $vr1, $a1, 0 +; CHECK-NEXT: vst $vr1, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <16 x i8>, ptr %ptr @@ -228,62 +176,59 @@ define void @load_sext_16i8_to_16i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_16i8_to_16i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 -; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1 -; CHECK-NEXT: vslli.d $vr1, $vr1, 56 -; CHECK-NEXT: vsrai.d $vr1, $vr1, 56 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2 -; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3 -; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_0) +; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI8_0) +; CHECK-NEXT: vshuf4i.b $vr2, $vr0, 14 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_1) +; CHECK-NEXT: vld $vr3, $a0, %pc_lo12(.LCPI8_1) +; CHECK-NEXT: vshuf.b $vr2, $vr0, $vr2, $vr1 ; CHECK-NEXT: vslli.d $vr2, $vr2, 56 ; CHECK-NEXT: vsrai.d $vr2, $vr2, 56 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4 -; CHECK-NEXT: vinsgr2vr.d $vr3, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5 -; CHECK-NEXT: vinsgr2vr.d $vr3, $a0, 1 +; CHECK-NEXT: vshuf.b $vr3, $vr0, $vr0, $vr3 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_2) +; CHECK-NEXT: vld $vr4, $a0, %pc_lo12(.LCPI8_2) +; CHECK-NEXT: vshuf.b $vr3, $vr0, $vr3, $vr1 ; CHECK-NEXT: vslli.d $vr3, $vr3, 56 ; CHECK-NEXT: vsrai.d $vr3, $vr3, 56 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6 -; CHECK-NEXT: vinsgr2vr.d $vr4, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7 -; CHECK-NEXT: vinsgr2vr.d $vr4, $a0, 1 +; CHECK-NEXT: vshuf.b $vr4, $vr0, $vr0, $vr4 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_3) +; CHECK-NEXT: vld $vr5, $a0, %pc_lo12(.LCPI8_3) +; CHECK-NEXT: vshuf.b $vr4, $vr0, $vr4, $vr1 ; CHECK-NEXT: vslli.d $vr4, $vr4, 56 ; CHECK-NEXT: vsrai.d $vr4, $vr4, 56 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8 -; CHECK-NEXT: vinsgr2vr.d $vr5, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9 -; CHECK-NEXT: vinsgr2vr.d $vr5, $a0, 1 +; CHECK-NEXT: vshuf.b $vr5, $vr0, $vr0, $vr5 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_4) +; CHECK-NEXT: vld $vr6, $a0, %pc_lo12(.LCPI8_4) +; CHECK-NEXT: vshuf.b $vr5, $vr0, $vr5, $vr1 ; CHECK-NEXT: vslli.d $vr5, $vr5, 56 ; CHECK-NEXT: vsrai.d $vr5, $vr5, 56 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10 -; CHECK-NEXT: vinsgr2vr.d $vr6, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11 -; CHECK-NEXT: vinsgr2vr.d $vr6, $a0, 1 +; CHECK-NEXT: vshuf.b $vr6, $vr0, $vr0, $vr6 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_5) +; CHECK-NEXT: vld $vr7, $a0, %pc_lo12(.LCPI8_5) +; CHECK-NEXT: vshuf.b $vr6, $vr0, $vr6, $vr1 ; CHECK-NEXT: vslli.d $vr6, $vr6, 56 ; CHECK-NEXT: vsrai.d $vr6, $vr6, 56 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12 -; CHECK-NEXT: vinsgr2vr.d $vr7, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13 -; CHECK-NEXT: vinsgr2vr.d $vr7, $a0, 1 +; CHECK-NEXT: vshuf.b $vr7, $vr0, $vr0, $vr7 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_6) +; CHECK-NEXT: vld $vr8, $a0, %pc_lo12(.LCPI8_6) +; CHECK-NEXT: vshuf.b $vr7, $vr0, $vr7, $vr1 ; CHECK-NEXT: vslli.d $vr7, $vr7, 56 ; CHECK-NEXT: vsrai.d $vr7, $vr7, 56 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14 -; CHECK-NEXT: vinsgr2vr.d $vr8, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 -; CHECK-NEXT: vinsgr2vr.d $vr8, $a0, 1 -; CHECK-NEXT: vslli.d $vr0, $vr8, 56 +; CHECK-NEXT: vshuf.b $vr8, $vr0, $vr0, $vr8 +; CHECK-NEXT: vshuf.b $vr8, $vr0, $vr8, $vr1 +; CHECK-NEXT: vslli.d $vr8, $vr8, 56 +; CHECK-NEXT: vsrai.d $vr8, $vr8, 56 +; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1 +; CHECK-NEXT: vslli.d $vr0, $vr0, 56 ; CHECK-NEXT: vsrai.d $vr0, $vr0, 56 -; CHECK-NEXT: vst $vr0, $a1, 112 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr8, $a1, 112 ; CHECK-NEXT: vst $vr7, $a1, 96 ; CHECK-NEXT: vst $vr6, $a1, 80 ; CHECK-NEXT: vst $vr5, $a1, 64 ; CHECK-NEXT: vst $vr4, $a1, 48 ; CHECK-NEXT: vst $vr3, $a1, 32 ; CHECK-NEXT: vst $vr2, $a1, 16 -; CHECK-NEXT: vst $vr1, $a1, 0 ; CHECK-NEXT: ret entry: %A = load <16 x i8>, ptr %ptr @@ -296,28 +241,17 @@ define void @load_sext_8i16_to_8i32(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_8i16_to_8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0) +; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI9_0) +; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0 +; CHECK-NEXT: vilvl.h $vr1, $vr1, $vr1 ; CHECK-NEXT: vslli.w $vr1, $vr1, 16 ; CHECK-NEXT: vsrai.w $vr1, $vr1, 16 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; CHECK-NEXT: vslli.w $vr0, $vr2, 16 +; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 +; CHECK-NEXT: vslli.w $vr0, $vr0, 16 ; CHECK-NEXT: vsrai.w $vr0, $vr0, 16 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr1, $a1, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr1, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <8 x i16>, ptr %ptr @@ -330,34 +264,34 @@ define void @load_sext_8i16_to_8i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_8i16_to_8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0 -; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 -; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1 -; CHECK-NEXT: vslli.d $vr1, $vr1, 48 -; CHECK-NEXT: vsrai.d $vr1, $vr1, 48 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2 -; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3 -; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; CHECK-NEXT: vslli.d $vr2, $vr2, 48 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0) +; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI10_0) +; CHECK-NEXT: vshuf4i.h $vr2, $vr0, 14 +; CHECK-NEXT: vori.b $vr3, $vr1, 0 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_1) +; CHECK-NEXT: vld $vr4, $a0, %pc_lo12(.LCPI10_1) +; CHECK-NEXT: vshuf.h $vr3, $vr0, $vr2 +; CHECK-NEXT: vslli.d $vr2, $vr3, 48 ; CHECK-NEXT: vsrai.d $vr2, $vr2, 48 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4 -; CHECK-NEXT: vinsgr2vr.d $vr3, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5 -; CHECK-NEXT: vinsgr2vr.d $vr3, $a0, 1 +; CHECK-NEXT: vshuf.h $vr4, $vr0, $vr0 +; CHECK-NEXT: vori.b $vr3, $vr1, 0 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_2) +; CHECK-NEXT: vld $vr5, $a0, %pc_lo12(.LCPI10_2) +; CHECK-NEXT: vshuf.h $vr3, $vr0, $vr4 ; CHECK-NEXT: vslli.d $vr3, $vr3, 48 ; CHECK-NEXT: vsrai.d $vr3, $vr3, 48 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6 -; CHECK-NEXT: vinsgr2vr.d $vr4, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 -; CHECK-NEXT: vinsgr2vr.d $vr4, $a0, 1 -; CHECK-NEXT: vslli.d $vr0, $vr4, 48 +; CHECK-NEXT: vshuf.h $vr5, $vr0, $vr0 +; CHECK-NEXT: vori.b $vr4, $vr1, 0 +; CHECK-NEXT: vshuf.h $vr4, $vr0, $vr5 +; CHECK-NEXT: vslli.d $vr4, $vr4, 48 +; CHECK-NEXT: vsrai.d $vr4, $vr4, 48 +; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0 +; CHECK-NEXT: vslli.d $vr0, $vr1, 48 ; CHECK-NEXT: vsrai.d $vr0, $vr0, 48 -; CHECK-NEXT: vst $vr0, $a1, 48 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr4, $a1, 48 ; CHECK-NEXT: vst $vr3, $a1, 32 ; CHECK-NEXT: vst $vr2, $a1, 16 -; CHECK-NEXT: vst $vr1, $a1, 0 ; CHECK-NEXT: ret entry: %A = load <8 x i16>, ptr %ptr @@ -370,20 +304,15 @@ define void @load_sext_4i32_to_4i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_4i32_to_4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0 -; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 -; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1 +; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14 +; CHECK-NEXT: vshuf4i.w $vr1, $vr1, 16 ; CHECK-NEXT: vslli.d $vr1, $vr1, 32 ; CHECK-NEXT: vsrai.d $vr1, $vr1, 32 -; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 2 -; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3 -; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; CHECK-NEXT: vslli.d $vr0, $vr2, 32 +; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 16 +; CHECK-NEXT: vslli.d $vr0, $vr0, 32 ; CHECK-NEXT: vsrai.d $vr0, $vr0, 32 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr1, $a1, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr1, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <4 x i32>, ptr %ptr @@ -391,4 +320,3 @@ entry: store <4 x i64> %B, ptr %dst ret void } - diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll index 43cf68c880ff3..3cc9b62d76736 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll @@ -5,10 +5,12 @@ define void @load_zext_2i8_to_2i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_2i8_to_2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.bu $a2, $a0, 0 -; CHECK-NEXT: ld.bu $a0, $a0, 1 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 0 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1 +; CHECK-NEXT: ld.h $a0, $a0, 0 +; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI0_0) +; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI0_0) +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0 +; CHECK-NEXT: vrepli.b $vr2, 0 +; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr2, $vr0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -21,14 +23,12 @@ entry: define void @load_zext_4i8_to_4i32(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_4i8_to_4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.bu $a2, $a0, 0 -; CHECK-NEXT: ld.bu $a3, $a0, 1 -; CHECK-NEXT: ld.bu $a4, $a0, 2 -; CHECK-NEXT: ld.bu $a0, $a0, 3 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 0 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a3, 1 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a4, 2 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3 +; CHECK-NEXT: ld.w $a0, $a0, 0 +; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI1_0) +; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI1_0) +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; CHECK-NEXT: vrepli.b $vr2, 0 +; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr2, $vr0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -41,22 +41,12 @@ entry: define void @load_zext_8i8_to_8i16(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_8i8_to_8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.bu $a2, $a0, 0 -; CHECK-NEXT: ld.bu $a3, $a0, 1 -; CHECK-NEXT: ld.bu $a4, $a0, 2 -; CHECK-NEXT: ld.bu $a5, $a0, 3 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 0 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 1 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 2 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 3 -; CHECK-NEXT: ld.bu $a2, $a0, 4 -; CHECK-NEXT: ld.bu $a3, $a0, 5 -; CHECK-NEXT: ld.bu $a4, $a0, 6 -; CHECK-NEXT: ld.bu $a0, $a0, 7 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 4 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 5 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 6 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 7 +; CHECK-NEXT: ld.d $a0, $a0, 0 +; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_0) +; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI2_0) +; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; CHECK-NEXT: vrepli.b $vr2, 0 +; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr2, $vr0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -69,10 +59,12 @@ entry: define void @load_zext_2i16_to_2i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_2i16_to_2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.hu $a2, $a0, 0 -; CHECK-NEXT: ld.hu $a0, $a0, 2 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 0 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1 +; CHECK-NEXT: ld.w $a0, $a0, 0 +; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0) +; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI3_0) +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; CHECK-NEXT: vrepli.b $vr2, 0 +; CHECK-NEXT: vshuf.h $vr0, $vr1, $vr2 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -85,14 +77,12 @@ entry: define void @load_zext_4i16_to_4i32(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_4i16_to_4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.hu $a2, $a0, 0 -; CHECK-NEXT: ld.hu $a3, $a0, 2 -; CHECK-NEXT: ld.hu $a4, $a0, 4 -; CHECK-NEXT: ld.hu $a0, $a0, 6 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 0 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a3, 1 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a4, 2 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3 +; CHECK-NEXT: ld.d $a0, $a0, 0 +; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI4_0) +; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; CHECK-NEXT: vrepli.b $vr2, 0 +; CHECK-NEXT: vshuf.h $vr0, $vr1, $vr2 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -105,10 +95,12 @@ entry: define void @load_zext_2i32_to_2i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_2i32_to_2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.wu $a2, $a0, 0 -; CHECK-NEXT: ld.wu $a0, $a0, 4 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 0 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1 +; CHECK-NEXT: ld.d $a0, $a0, 0 +; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_0) +; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI5_0) +; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; CHECK-NEXT: vrepli.b $vr2, 0 +; CHECK-NEXT: vshuf.w $vr0, $vr1, $vr2 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -122,43 +114,18 @@ define void @load_zext_16i8_to_16i16(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_16i8_to_16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 3 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 4 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 5 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 6 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 7 -; CHECK-NEXT: vrepli.h $vr2, 255 -; CHECK-NEXT: vand.v $vr1, $vr1, $vr2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8 -; CHECK-NEXT: vinsgr2vr.h $vr3, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9 -; CHECK-NEXT: vinsgr2vr.h $vr3, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10 -; CHECK-NEXT: vinsgr2vr.h $vr3, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11 -; CHECK-NEXT: vinsgr2vr.h $vr3, $a0, 3 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12 -; CHECK-NEXT: vinsgr2vr.h $vr3, $a0, 4 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13 -; CHECK-NEXT: vinsgr2vr.h $vr3, $a0, 5 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14 -; CHECK-NEXT: vinsgr2vr.h $vr3, $a0, 6 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 -; CHECK-NEXT: vinsgr2vr.h $vr3, $a0, 7 -; CHECK-NEXT: vand.v $vr0, $vr3, $vr2 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr1, $a1, 0 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) +; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI6_0) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_1) +; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI6_1) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_2) +; CHECK-NEXT: vld $vr3, $a0, %pc_lo12(.LCPI6_2) +; CHECK-NEXT: vshuf.b $vr1, $vr0, $vr0, $vr1 +; CHECK-NEXT: vrepli.b $vr4, 0 +; CHECK-NEXT: vshuf.b $vr1, $vr4, $vr1, $vr2 +; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr4, $vr3 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr1, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <16 x i8>, ptr %ptr @@ -171,47 +138,28 @@ define void @load_zext_16i8_to_16i32(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_16i8_to_16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3 -; CHECK-NEXT: vrepli.w $vr2, 255 -; CHECK-NEXT: vand.v $vr1, $vr1, $vr2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 3 -; CHECK-NEXT: vand.v $vr3, $vr3, $vr2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8 -; CHECK-NEXT: vinsgr2vr.w $vr4, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9 -; CHECK-NEXT: vinsgr2vr.w $vr4, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10 -; CHECK-NEXT: vinsgr2vr.w $vr4, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11 -; CHECK-NEXT: vinsgr2vr.w $vr4, $a0, 3 -; CHECK-NEXT: vand.v $vr4, $vr4, $vr2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12 -; CHECK-NEXT: vinsgr2vr.w $vr5, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13 -; CHECK-NEXT: vinsgr2vr.w $vr5, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14 -; CHECK-NEXT: vinsgr2vr.w $vr5, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 -; CHECK-NEXT: vinsgr2vr.w $vr5, $a0, 3 -; CHECK-NEXT: vand.v $vr0, $vr5, $vr2 -; CHECK-NEXT: vst $vr0, $a1, 48 -; CHECK-NEXT: vst $vr4, $a1, 32 -; CHECK-NEXT: vst $vr3, $a1, 16 -; CHECK-NEXT: vst $vr1, $a1, 0 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0) +; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI7_0) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_1) +; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI7_1) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_2) +; CHECK-NEXT: vld $vr3, $a0, %pc_lo12(.LCPI7_2) +; CHECK-NEXT: vshuf.b $vr1, $vr0, $vr0, $vr1 +; CHECK-NEXT: vrepli.b $vr4, 0 +; CHECK-NEXT: vshuf.b $vr1, $vr4, $vr1, $vr2 +; CHECK-NEXT: vshuf.b $vr3, $vr0, $vr0, $vr3 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_3) +; CHECK-NEXT: vld $vr5, $a0, %pc_lo12(.LCPI7_3) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_4) +; CHECK-NEXT: vld $vr6, $a0, %pc_lo12(.LCPI7_4) +; CHECK-NEXT: vshuf.b $vr3, $vr4, $vr3, $vr2 +; CHECK-NEXT: vshuf.b $vr5, $vr0, $vr0, $vr5 +; CHECK-NEXT: vshuf.b $vr2, $vr4, $vr5, $vr2 +; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr4, $vr6 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr2, $a1, 48 +; CHECK-NEXT: vst $vr3, $a1, 32 +; CHECK-NEXT: vst $vr1, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <16 x i8>, ptr %ptr @@ -224,55 +172,46 @@ define void @load_zext_16i8_to_16i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_16i8_to_16i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 -; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 -; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1 -; CHECK-NEXT: vrepli.d $vr2, 255 -; CHECK-NEXT: vand.v $vr1, $vr1, $vr2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2 -; CHECK-NEXT: vinsgr2vr.d $vr3, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3 -; CHECK-NEXT: vinsgr2vr.d $vr3, $a0, 1 -; CHECK-NEXT: vand.v $vr3, $vr3, $vr2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4 -; CHECK-NEXT: vinsgr2vr.d $vr4, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5 -; CHECK-NEXT: vinsgr2vr.d $vr4, $a0, 1 -; CHECK-NEXT: vand.v $vr4, $vr4, $vr2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6 -; CHECK-NEXT: vinsgr2vr.d $vr5, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7 -; CHECK-NEXT: vinsgr2vr.d $vr5, $a0, 1 -; CHECK-NEXT: vand.v $vr5, $vr5, $vr2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8 -; CHECK-NEXT: vinsgr2vr.d $vr6, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9 -; CHECK-NEXT: vinsgr2vr.d $vr6, $a0, 1 -; CHECK-NEXT: vand.v $vr6, $vr6, $vr2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10 -; CHECK-NEXT: vinsgr2vr.d $vr7, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11 -; CHECK-NEXT: vinsgr2vr.d $vr7, $a0, 1 -; CHECK-NEXT: vand.v $vr7, $vr7, $vr2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12 -; CHECK-NEXT: vinsgr2vr.d $vr8, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13 -; CHECK-NEXT: vinsgr2vr.d $vr8, $a0, 1 -; CHECK-NEXT: vand.v $vr8, $vr8, $vr2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14 -; CHECK-NEXT: vinsgr2vr.d $vr9, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 -; CHECK-NEXT: vinsgr2vr.d $vr9, $a0, 1 -; CHECK-NEXT: vand.v $vr0, $vr9, $vr2 -; CHECK-NEXT: vst $vr0, $a1, 112 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_0) +; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI8_0) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_1) +; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI8_1) +; CHECK-NEXT: vshuf4i.b $vr3, $vr0, 14 +; CHECK-NEXT: vrepli.b $vr4, 0 +; CHECK-NEXT: vshuf.b $vr3, $vr4, $vr3, $vr1 +; CHECK-NEXT: vshuf.b $vr2, $vr0, $vr0, $vr2 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_2) +; CHECK-NEXT: vld $vr5, $a0, %pc_lo12(.LCPI8_2) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_3) +; CHECK-NEXT: vld $vr6, $a0, %pc_lo12(.LCPI8_3) +; CHECK-NEXT: vshuf.b $vr2, $vr4, $vr2, $vr1 +; CHECK-NEXT: vshuf.b $vr5, $vr0, $vr0, $vr5 +; CHECK-NEXT: vshuf.b $vr5, $vr4, $vr5, $vr1 +; CHECK-NEXT: vshuf.b $vr6, $vr0, $vr0, $vr6 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_4) +; CHECK-NEXT: vld $vr7, $a0, %pc_lo12(.LCPI8_4) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_5) +; CHECK-NEXT: vld $vr8, $a0, %pc_lo12(.LCPI8_5) +; CHECK-NEXT: vshuf.b $vr6, $vr4, $vr6, $vr1 +; CHECK-NEXT: vshuf.b $vr7, $vr0, $vr0, $vr7 +; CHECK-NEXT: vshuf.b $vr7, $vr4, $vr7, $vr1 +; CHECK-NEXT: vshuf.b $vr8, $vr0, $vr0, $vr8 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_6) +; CHECK-NEXT: vld $vr9, $a0, %pc_lo12(.LCPI8_6) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_7) +; CHECK-NEXT: vld $vr10, $a0, %pc_lo12(.LCPI8_7) +; CHECK-NEXT: vshuf.b $vr8, $vr4, $vr8, $vr1 +; CHECK-NEXT: vshuf.b $vr9, $vr0, $vr0, $vr9 +; CHECK-NEXT: vshuf.b $vr1, $vr4, $vr9, $vr1 +; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr4, $vr10 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: vst $vr1, $a1, 112 ; CHECK-NEXT: vst $vr8, $a1, 96 ; CHECK-NEXT: vst $vr7, $a1, 80 ; CHECK-NEXT: vst $vr6, $a1, 64 ; CHECK-NEXT: vst $vr5, $a1, 48 -; CHECK-NEXT: vst $vr4, $a1, 32 +; CHECK-NEXT: vst $vr2, $a1, 32 ; CHECK-NEXT: vst $vr3, $a1, 16 -; CHECK-NEXT: vst $vr1, $a1, 0 ; CHECK-NEXT: ret entry: %A = load <16 x i8>, ptr %ptr @@ -285,29 +224,18 @@ define void @load_zext_8i16_to_8i32(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_8i16_to_8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3 -; CHECK-NEXT: lu12i.w $a0, 15 -; CHECK-NEXT: ori $a0, $a0, 4095 -; CHECK-NEXT: vreplgr2vr.w $vr2, $a0 -; CHECK-NEXT: vand.v $vr1, $vr1, $vr2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 -; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 3 -; CHECK-NEXT: vand.v $vr0, $vr3, $vr2 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr1, $a1, 0 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0) +; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI9_0) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_1) +; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI9_1) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_2) +; CHECK-NEXT: vld $vr3, $a0, %pc_lo12(.LCPI9_2) +; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0 +; CHECK-NEXT: vrepli.b $vr4, 0 +; CHECK-NEXT: vshuf.h $vr2, $vr4, $vr1 +; CHECK-NEXT: vshuf.h $vr3, $vr0, $vr4 +; CHECK-NEXT: vst $vr3, $a1, 0 +; CHECK-NEXT: vst $vr2, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <8 x i16>, ptr %ptr @@ -320,33 +248,28 @@ define void @load_zext_8i16_to_8i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_8i16_to_8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0 -; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 -; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1 -; CHECK-NEXT: lu12i.w $a0, 15 -; CHECK-NEXT: ori $a0, $a0, 4095 -; CHECK-NEXT: vreplgr2vr.d $vr2, $a0 -; CHECK-NEXT: vand.v $vr1, $vr1, $vr2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2 -; CHECK-NEXT: vinsgr2vr.d $vr3, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3 -; CHECK-NEXT: vinsgr2vr.d $vr3, $a0, 1 -; CHECK-NEXT: vand.v $vr3, $vr3, $vr2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4 -; CHECK-NEXT: vinsgr2vr.d $vr4, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5 -; CHECK-NEXT: vinsgr2vr.d $vr4, $a0, 1 -; CHECK-NEXT: vand.v $vr4, $vr4, $vr2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6 -; CHECK-NEXT: vinsgr2vr.d $vr5, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 -; CHECK-NEXT: vinsgr2vr.d $vr5, $a0, 1 -; CHECK-NEXT: vand.v $vr0, $vr5, $vr2 -; CHECK-NEXT: vst $vr0, $a1, 48 -; CHECK-NEXT: vst $vr4, $a1, 32 -; CHECK-NEXT: vst $vr3, $a1, 16 -; CHECK-NEXT: vst $vr1, $a1, 0 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0) +; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI10_0) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_1) +; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI10_1) +; CHECK-NEXT: vshuf4i.h $vr3, $vr0, 14 +; CHECK-NEXT: vrepli.b $vr4, 0 +; CHECK-NEXT: vori.b $vr5, $vr1, 0 +; CHECK-NEXT: vshuf.h $vr5, $vr4, $vr3 +; CHECK-NEXT: vshuf.h $vr2, $vr0, $vr0 +; CHECK-NEXT: vori.b $vr3, $vr1, 0 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_2) +; CHECK-NEXT: vld $vr6, $a0, %pc_lo12(.LCPI10_2) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_3) +; CHECK-NEXT: vld $vr7, $a0, %pc_lo12(.LCPI10_3) +; CHECK-NEXT: vshuf.h $vr3, $vr4, $vr2 +; CHECK-NEXT: vshuf.h $vr6, $vr0, $vr0 +; CHECK-NEXT: vshuf.h $vr1, $vr4, $vr6 +; CHECK-NEXT: vshuf.h $vr7, $vr0, $vr4 +; CHECK-NEXT: vst $vr7, $a1, 0 +; CHECK-NEXT: vst $vr1, $a1, 48 +; CHECK-NEXT: vst $vr3, $a1, 32 +; CHECK-NEXT: vst $vr5, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <8 x i16>, ptr %ptr @@ -359,21 +282,16 @@ define void @load_zext_4i32_to_4i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_4i32_to_4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0 -; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 -; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1 -; CHECK-NEXT: addi.w $a0, $zero, -1 -; CHECK-NEXT: lu32i.d $a0, 0 -; CHECK-NEXT: vreplgr2vr.d $vr2, $a0 -; CHECK-NEXT: vand.v $vr1, $vr1, $vr2 -; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 2 -; CHECK-NEXT: vinsgr2vr.d $vr3, $a0, 0 -; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3 -; CHECK-NEXT: vinsgr2vr.d $vr3, $a0, 1 -; CHECK-NEXT: vand.v $vr0, $vr3, $vr2 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr1, $a1, 0 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0) +; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI11_0) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_1) +; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI11_1) +; CHECK-NEXT: vshuf4i.w $vr3, $vr0, 14 +; CHECK-NEXT: vrepli.b $vr4, 0 +; CHECK-NEXT: vshuf.w $vr1, $vr4, $vr3 +; CHECK-NEXT: vshuf.w $vr2, $vr0, $vr4 +; CHECK-NEXT: vst $vr2, $a1, 0 +; CHECK-NEXT: vst $vr1, $a1, 16 ; CHECK-NEXT: ret entry: %A = load <4 x i32>, ptr %ptr @@ -381,4 +299,3 @@ entry: store <4 x i64> %B, ptr %dst ret void } -