diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index d4e1d9c6f3ca6..1383302059910 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -31,6 +31,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" +#include using namespace llvm; @@ -543,6 +544,37 @@ SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op, } } +// Widen element type to get a new mask value (if possible). +// For example: +// shufflevector <4 x i32> %a, <4 x i32> %b, +// <4 x i32> +// is equivalent to: +// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> +// can be lowered to: +// VPACKOD_D vr0, vr0, vr1 +static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef Mask, MVT VT, + SDValue V1, SDValue V2, SelectionDAG &DAG) { + unsigned EltBits = VT.getScalarSizeInBits(); + + if (EltBits > 32 || EltBits == 1) + return SDValue(); + + SmallVector NewMask; + if (widenShuffleMaskElts(Mask, NewMask)) { + MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2) + : MVT::getIntegerVT(EltBits * 2); + MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2); + if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) { + SDValue NewV1 = DAG.getBitcast(NewVT, V1); + SDValue NewV2 = DAG.getBitcast(NewVT, V2); + return DAG.getBitcast( + VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask)); + } + } + + return SDValue(); +} + /// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI /// instruction. // The funciton matches elements from one of the input vector shuffled to the @@ -1365,6 +1397,8 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, return Result; if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG))) return Result; + if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG)) + return NewShuffle; if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG))) return Result; return SDValue(); @@ -1803,6 +1837,8 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, return Result; if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, NewMask, VT, V1, V2, DAG))) return Result; + if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG)) + return NewShuffle; if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG))) return Result; diff --git a/llvm/test/CodeGen/LoongArch/lasx/widen-shuffle-mask.ll b/llvm/test/CodeGen/LoongArch/lasx/widen-shuffle-mask.ll index c32a60622f2a1..df639cb78cd1f 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/widen-shuffle-mask.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/widen-shuffle-mask.ll @@ -6,7 +6,8 @@ define <32 x i8> @widen_shuffle_mask_v32i8_to_v16i16(<32 x i8> %a, <32 x i8> %b) ; CHECK: # %bb.0: ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0) ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI0_0) -; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvshuf.h $xr2, $xr1, $xr0 +; CHECK-NEXT: xvori.b $xr0, $xr2, 0 ; CHECK-NEXT: ret %r = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %r @@ -17,7 +18,8 @@ define <32 x i8> @widen_shuffle_mask_v32i8_to_v8i32(<32 x i8> %a, <32 x i8> %b) ; CHECK: # %bb.0: ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0) ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI1_0) -; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0 +; CHECK-NEXT: xvori.b $xr0, $xr2, 0 ; CHECK-NEXT: ret %r = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %r @@ -28,7 +30,8 @@ define <32 x i8> @widen_shuffle_mask_v32i8_to_v4i64(<32 x i8> %a, <32 x i8> %b) ; CHECK: # %bb.0: ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI2_0) -; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0 +; CHECK-NEXT: xvori.b $xr0, $xr2, 0 ; CHECK-NEXT: ret %r = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %r @@ -39,7 +42,7 @@ define <16 x i16> @widen_shuffle_mask_v16i16_to_v8i32(<16 x i16> %a, <16 x i16> ; CHECK: # %bb.0: ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI3_0) -; CHECK-NEXT: xvshuf.h $xr2, $xr1, $xr0 +; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0 ; CHECK-NEXT: xvori.b $xr0, $xr2, 0 ; CHECK-NEXT: ret %r = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -51,7 +54,7 @@ define <16 x i16> @widen_shuffle_mask_v16i16_to_v4i64(<16 x i16> %a, <16 x i16> ; CHECK: # %bb.0: ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI4_0) -; CHECK-NEXT: xvshuf.h $xr2, $xr1, $xr0 +; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0 ; CHECK-NEXT: xvori.b $xr0, $xr2, 0 ; CHECK-NEXT: ret %r = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -63,7 +66,7 @@ define <8 x i32> @widen_shuffle_mask_v8i32_to_v4i64(<8 x i32> %a, <8 x i32> %b) ; CHECK: # %bb.0: ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI5_0) -; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0 +; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0 ; CHECK-NEXT: xvori.b $xr0, $xr2, 0 ; CHECK-NEXT: ret %r = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -73,9 +76,7 @@ define <8 x i32> @widen_shuffle_mask_v8i32_to_v4i64(<8 x i32> %a, <8 x i32> %b) define <32 x i8> @widen_shuffle_mask_v32i8_to_xvpackev_h(<32 x i8> %a, <32 x i8> %b) { ; CHECK-LABEL: widen_shuffle_mask_v32i8_to_xvpackev_h: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI6_0) -; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvpackev.h $xr0, $xr1, $xr0 ; CHECK-NEXT: ret %r = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %r @@ -84,9 +85,7 @@ define <32 x i8> @widen_shuffle_mask_v32i8_to_xvpackev_h(<32 x i8> %a, <32 x i8> define <32 x i8> @widen_shuffle_mask_v32i8_to_xvpackod_h(<32 x i8> %a, <32 x i8> %b) { ; CHECK-LABEL: widen_shuffle_mask_v32i8_to_xvpackod_h: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI7_0) -; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvpackod.h $xr0, $xr1, $xr0 ; CHECK-NEXT: ret %r = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %r @@ -95,9 +94,7 @@ define <32 x i8> @widen_shuffle_mask_v32i8_to_xvpackod_h(<32 x i8> %a, <32 x i8> define <32 x i8> @widen_shuffle_mask_v32i8_to_xvpickev_h(<32 x i8> %a, <32 x i8> %b) { ; CHECK-LABEL: widen_shuffle_mask_v32i8_to_xvpickev_h: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI8_0) -; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvpickev.h $xr0, $xr1, $xr0 ; CHECK-NEXT: ret %r = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %r @@ -106,9 +103,7 @@ define <32 x i8> @widen_shuffle_mask_v32i8_to_xvpickev_h(<32 x i8> %a, <32 x i8> define <32 x i8> @widen_shuffle_mask_v32i8_to_xvpickod_h(<32 x i8> %a, <32 x i8> %b) { ; CHECK-LABEL: widen_shuffle_mask_v32i8_to_xvpickod_h: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI9_0) -; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvpickod.h $xr0, $xr1, $xr0 ; CHECK-NEXT: ret %r = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %r @@ -117,9 +112,7 @@ define <32 x i8> @widen_shuffle_mask_v32i8_to_xvpickod_h(<32 x i8> %a, <32 x i8> define <32 x i8> @widen_shuffle_mask_v32i8_to_xvilvl_h(<32 x i8> %a, <32 x i8> %b) { ; CHECK-LABEL: widen_shuffle_mask_v32i8_to_xvilvl_h: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI10_0) -; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvilvl.h $xr0, $xr1, $xr0 ; CHECK-NEXT: ret %r = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %r @@ -128,9 +121,7 @@ define <32 x i8> @widen_shuffle_mask_v32i8_to_xvilvl_h(<32 x i8> %a, <32 x i8> % define <32 x i8> @widen_shuffle_mask_v32i8_to_xvilvh_h(<32 x i8> %a, <32 x i8> %b) { ; CHECK-LABEL: widen_shuffle_mask_v32i8_to_xvilvh_h: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI11_0) -; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvilvh.h $xr0, $xr1, $xr0 ; CHECK-NEXT: ret %r = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %r diff --git a/llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll b/llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll index 35457ffa59586..bd3b7d0951565 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll @@ -6,7 +6,8 @@ define <16 x i8> @widen_shuffle_mask_v16i8_to_v8i16(<16 x i8> %a, <16 x i8> %b) ; CHECK: # %bb.0: ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0) ; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI0_0) -; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vshuf.h $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 ; CHECK-NEXT: ret %r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %r @@ -17,7 +18,8 @@ define <16 x i8> @widen_shuffle_mask_v16i8_to_v4i32(<16 x i8> %a, <16 x i8> %b) ; CHECK: # %bb.0: ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0) ; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI1_0) -; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 ; CHECK-NEXT: ret %r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %r @@ -28,7 +30,8 @@ define <16 x i8> @widen_shuffle_mask_v16i8_to_v2i64(<16 x i8> %a, <16 x i8> %b) ; CHECK: # %bb.0: ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) ; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI2_0) -; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 ; CHECK-NEXT: ret %r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %r @@ -39,7 +42,7 @@ define <8 x i16> @widen_shuffle_mask_v8i16_to_v4i32(<8 x i16> %a, <8 x i16> %b) ; CHECK: # %bb.0: ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) ; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI3_0) -; CHECK-NEXT: vshuf.h $vr2, $vr1, $vr0 +; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 ; CHECK-NEXT: vori.b $vr0, $vr2, 0 ; CHECK-NEXT: ret %r = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -51,7 +54,7 @@ define <8 x i16> @widen_shuffle_mask_v8i16_to_v2i64(<8 x i16> %a, <8 x i16> %b) ; CHECK: # %bb.0: ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) ; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI4_0) -; CHECK-NEXT: vshuf.h $vr2, $vr1, $vr0 +; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0 ; CHECK-NEXT: vori.b $vr0, $vr2, 0 ; CHECK-NEXT: ret %r = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -63,7 +66,7 @@ define <4 x i32> @widen_shuffle_mask_v4i32_to_v2i64(<4 x i32> %a, <4 x i32> %b) ; CHECK: # %bb.0: ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) ; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI5_0) -; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 +; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0 ; CHECK-NEXT: vori.b $vr0, $vr2, 0 ; CHECK-NEXT: ret %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -73,9 +76,7 @@ define <4 x i32> @widen_shuffle_mask_v4i32_to_v2i64(<4 x i32> %a, <4 x i32> %b) define <16 x i8> @widen_shuffle_mask_v16i8_to_vpackev_h(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: widen_shuffle_mask_v16i8_to_vpackev_h: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI6_0) -; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vpackev.h $vr0, $vr1, $vr0 ; CHECK-NEXT: ret %r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %r @@ -84,9 +85,7 @@ define <16 x i8> @widen_shuffle_mask_v16i8_to_vpackev_h(<16 x i8> %a, <16 x i8> define <16 x i8> @widen_shuffle_mask_v16i8_to_vpackod_h(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: widen_shuffle_mask_v16i8_to_vpackod_h: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI7_0) -; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vpackod.h $vr0, $vr1, $vr0 ; CHECK-NEXT: ret %r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %r @@ -95,9 +94,7 @@ define <16 x i8> @widen_shuffle_mask_v16i8_to_vpackod_h(<16 x i8> %a, <16 x i8> define <16 x i8> @widen_shuffle_mask_v16i8_to_vpickev_h(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: widen_shuffle_mask_v16i8_to_vpickev_h: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI8_0) -; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vpickev.h $vr0, $vr1, $vr0 ; CHECK-NEXT: ret %r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %r @@ -106,9 +103,7 @@ define <16 x i8> @widen_shuffle_mask_v16i8_to_vpickev_h(<16 x i8> %a, <16 x i8> define <16 x i8> @widen_shuffle_mask_v16i8_to_vpickod_h(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: widen_shuffle_mask_v16i8_to_vpickod_h: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI9_0) -; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vpickod.h $vr0, $vr1, $vr0 ; CHECK-NEXT: ret %r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %r @@ -117,9 +112,7 @@ define <16 x i8> @widen_shuffle_mask_v16i8_to_vpickod_h(<16 x i8> %a, <16 x i8> define <16 x i8> @widen_shuffle_mask_v16i8_to_vilvl_h(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: widen_shuffle_mask_v16i8_to_vilvl_h: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI10_0) -; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 ; CHECK-NEXT: ret %r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %r @@ -128,9 +121,7 @@ define <16 x i8> @widen_shuffle_mask_v16i8_to_vilvl_h(<16 x i8> %a, <16 x i8> %b define <16 x i8> @widen_shuffle_mask_v16i8_to_vilvh_h(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: widen_shuffle_mask_v16i8_to_vilvh_h: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI11_0) -; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vilvh.h $vr0, $vr1, $vr0 ; CHECK-NEXT: ret %r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %r