From 465803bf93120b968340a2d3b5e61d2e4983eba6 Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Fri, 29 Nov 2024 13:15:51 +0800 Subject: [PATCH 1/2] [LoongArch] Pre-commit tests for vector type llvm.bitreverse. NFC A later commit will optimize this. --- .../test/CodeGen/LoongArch/lasx/bitreverse.ll | 107 ++++++++++++++++++ llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll | 101 +++++++++++++++++ 2 files changed, 208 insertions(+) create mode 100644 llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll create mode 100644 llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll diff --git a/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll b/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll new file mode 100644 index 0000000000000..3d0d232fcca68 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll @@ -0,0 +1,107 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 -mattr=+lasx --verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +declare <32 x i8> @llvm.bitreverse.v32i8(<32 x i8>) + +define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind { +; CHECK-LABEL: test_bitreverse_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvslli.b $xr1, $xr0, 4 +; CHECK-NEXT: xvsrli.b $xr0, $xr0, 4 +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvandi.b $xr1, $xr0, 51 +; CHECK-NEXT: xvslli.b $xr1, $xr1, 2 +; CHECK-NEXT: xvsrli.b $xr0, $xr0, 2 +; CHECK-NEXT: xvandi.b $xr0, $xr0, 51 +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvandi.b $xr1, $xr0, 85 +; CHECK-NEXT: xvslli.b $xr1, $xr1, 1 +; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 +; CHECK-NEXT: xvandi.b $xr0, $xr0, 85 +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: ret + %b = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a) + ret <32 x i8> %b +} + +declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>) + +define <16 x i16> @test_bitreverse_v16i16(<16 x i16> %a) nounwind { +; CHECK-LABEL: test_bitreverse_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 177 +; CHECK-NEXT: xvsrli.h $xr1, $xr0, 4 +; CHECK-NEXT: xvrepli.b $xr2, 15 +; CHECK-NEXT: xvand.v $xr1, $xr1, $xr2 +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2 +; CHECK-NEXT: xvslli.h $xr0, $xr0, 4 +; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvsrli.h $xr1, $xr0, 2 +; CHECK-NEXT: xvrepli.b $xr2, 51 +; CHECK-NEXT: xvand.v $xr1, $xr1, $xr2 +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2 +; CHECK-NEXT: xvslli.h $xr0, $xr0, 2 +; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvsrli.h $xr1, $xr0, 1 +; CHECK-NEXT: xvrepli.b $xr2, 85 +; CHECK-NEXT: xvand.v $xr1, $xr1, $xr2 +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2 +; CHECK-NEXT: xvslli.h $xr0, $xr0, 1 +; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %b = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) + ret <16 x i16> %b +} + +declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>) + +define <8 x i32> @test_bitreverse_v8i32(<8 x i32> %a) nounwind { +; CHECK-LABEL: test_bitreverse_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 27 +; CHECK-NEXT: xvsrli.w $xr1, $xr0, 4 +; CHECK-NEXT: xvrepli.b $xr2, 15 +; CHECK-NEXT: xvand.v $xr1, $xr1, $xr2 +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2 +; CHECK-NEXT: xvslli.w $xr0, $xr0, 4 +; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvsrli.w $xr1, $xr0, 2 +; CHECK-NEXT: xvrepli.b $xr2, 51 +; CHECK-NEXT: xvand.v $xr1, $xr1, $xr2 +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2 +; CHECK-NEXT: xvslli.w $xr0, $xr0, 2 +; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvsrli.w $xr1, $xr0, 1 +; CHECK-NEXT: xvrepli.b $xr2, 85 +; CHECK-NEXT: xvand.v $xr1, $xr1, $xr2 +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2 +; CHECK-NEXT: xvslli.w $xr0, $xr0, 1 +; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %b = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) + ret <8 x i32> %b +} + +declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>) + +define <4 x i64> @test_bitreverse_v4i64(<4 x i64> %a) nounwind { +; CHECK-LABEL: test_bitreverse_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0 +; CHECK-NEXT: bitrev.d $a0, $a0 +; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 +; CHECK-NEXT: bitrev.d $a0, $a0 +; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 1 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2 +; CHECK-NEXT: bitrev.d $a0, $a0 +; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 2 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3 +; CHECK-NEXT: bitrev.d $a0, $a0 +; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 3 +; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: ret + %b = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) + ret <4 x i64> %b +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll b/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll new file mode 100644 index 0000000000000..93624c8dd6a96 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 -mattr=+lsx --verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>) + +define <16 x i8> @test_bitreverse_v16i8(<16 x i8> %a) nounwind { +; CHECK-LABEL: test_bitreverse_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vslli.b $vr1, $vr0, 4 +; CHECK-NEXT: vsrli.b $vr0, $vr0, 4 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vandi.b $vr1, $vr0, 51 +; CHECK-NEXT: vslli.b $vr1, $vr1, 2 +; CHECK-NEXT: vsrli.b $vr0, $vr0, 2 +; CHECK-NEXT: vandi.b $vr0, $vr0, 51 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vandi.b $vr1, $vr0, 85 +; CHECK-NEXT: vslli.b $vr1, $vr1, 1 +; CHECK-NEXT: vsrli.b $vr0, $vr0, 1 +; CHECK-NEXT: vandi.b $vr0, $vr0, 85 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: ret + %b = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a) + ret <16 x i8> %b +} + +declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>) + +define <8 x i16> @test_bitreverse_v8i16(<8 x i16> %a) nounwind { +; CHECK-LABEL: test_bitreverse_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 177 +; CHECK-NEXT: vsrli.h $vr1, $vr0, 4 +; CHECK-NEXT: vrepli.b $vr2, 15 +; CHECK-NEXT: vand.v $vr1, $vr1, $vr2 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr2 +; CHECK-NEXT: vslli.h $vr0, $vr0, 4 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vsrli.h $vr1, $vr0, 2 +; CHECK-NEXT: vrepli.b $vr2, 51 +; CHECK-NEXT: vand.v $vr1, $vr1, $vr2 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr2 +; CHECK-NEXT: vslli.h $vr0, $vr0, 2 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vsrli.h $vr1, $vr0, 1 +; CHECK-NEXT: vrepli.b $vr2, 85 +; CHECK-NEXT: vand.v $vr1, $vr1, $vr2 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr2 +; CHECK-NEXT: vslli.h $vr0, $vr0, 1 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %b = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a) + ret <8 x i16> %b +} + +declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) + +define <4 x i32> @test_bitreverse_v4i32(<4 x i32> %a) nounwind { +; CHECK-LABEL: test_bitreverse_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 27 +; CHECK-NEXT: vsrli.w $vr1, $vr0, 4 +; CHECK-NEXT: vrepli.b $vr2, 15 +; CHECK-NEXT: vand.v $vr1, $vr1, $vr2 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr2 +; CHECK-NEXT: vslli.w $vr0, $vr0, 4 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vsrli.w $vr1, $vr0, 2 +; CHECK-NEXT: vrepli.b $vr2, 51 +; CHECK-NEXT: vand.v $vr1, $vr1, $vr2 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr2 +; CHECK-NEXT: vslli.w $vr0, $vr0, 2 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vsrli.w $vr1, $vr0, 1 +; CHECK-NEXT: vrepli.b $vr2, 85 +; CHECK-NEXT: vand.v $vr1, $vr1, $vr2 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr2 +; CHECK-NEXT: vslli.w $vr0, $vr0, 1 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %b = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a) + ret <4 x i32> %b +} + +declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) + +define <2 x i64> @test_bitreverse_v2i64(<2 x i64> %a) nounwind { +; CHECK-LABEL: test_bitreverse_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0 +; CHECK-NEXT: bitrev.d $a0, $a0 +; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 +; CHECK-NEXT: bitrev.d $a0, $a0 +; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1 +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret + %b = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a) + ret <2 x i64> %b +} From f3065bddf7a325f1bbaefd9c1895a24669e3beda Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Fri, 29 Nov 2024 13:19:41 +0800 Subject: [PATCH 2/2] [LoongArch] Optimize vector bitreverse using scalar bitrev and vshuf4i Custom lower vector type bitreverse to scalar bitrev and vshuf4i instructions. Keep `v2i64` and `v4i64` bitreverse `Expand`, it's good enough. --- .../LoongArch/LoongArchISelLowering.cpp | 51 +++++++++++ .../Target/LoongArch/LoongArchISelLowering.h | 2 + .../Target/LoongArch/LoongArchInstrInfo.td | 2 + .../test/CodeGen/LoongArch/lasx/bitreverse.ll | 90 ++++++++----------- llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll | 72 +++++---------- 5 files changed, 115 insertions(+), 102 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 16bceacfaa222..e10f122b38121 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -270,6 +270,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, Expand); } + for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) + setOperationAction(ISD::BITREVERSE, VT, Custom); for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) setOperationAction(ISD::BSWAP, VT, Legal); for (MVT VT : {MVT::v4i32, MVT::v2i64}) { @@ -324,6 +326,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, Expand); } + for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32}) + setOperationAction(ISD::BITREVERSE, VT, Custom); for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) setOperationAction(ISD::BSWAP, VT, Legal); for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) { @@ -440,10 +444,56 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return lowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); + case ISD::BITREVERSE: + return lowerBITREVERSE(Op, DAG); } return SDValue(); } +SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op, + SelectionDAG &DAG) const { + EVT ResTy = Op->getValueType(0); + SDValue Src = Op->getOperand(0); + SDLoc DL(Op); + + EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64; + unsigned int OrigEltNum = ResTy.getVectorNumElements(); + unsigned int NewEltNum = NewVT.getVectorNumElements(); + + SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src); + + SmallVector Ops; + for (unsigned int i = 0; i < NewEltNum; i++) { + SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc, + DAG.getConstant(i, DL, MVT::i64)); + SDValue RevOp = DAG.getNode((ResTy == MVT::v16i8 || ResTy == MVT::v32i8) + ? LoongArchISD::BITREV_8B + : ISD::BITREVERSE, + DL, MVT::i64, Op); + Ops.push_back(RevOp); + } + SDValue Res = + DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops)); + + switch (ResTy.getSimpleVT().SimpleTy) { + default: + return SDValue(); + case MVT::v16i8: + case MVT::v32i8: + return Res; + case MVT::v8i16: + case MVT::v16i16: + case MVT::v4i32: + case MVT::v8i32: { + SmallVector Mask; + for (unsigned int i = 0; i < NewEltNum; i++) + for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--) + Mask.push_back(j + (OrigEltNum / NewEltNum) * i); + return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask); + } + } +} + /// Determine whether a range fits a regular pattern of values. /// This function accounts for the possibility of jumping over the End iterator. template @@ -4680,6 +4730,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(REVB_2H) NODE_NAME_CASE(REVB_2W) NODE_NAME_CASE(BITREV_4B) + NODE_NAME_CASE(BITREV_8B) NODE_NAME_CASE(BITREV_W) NODE_NAME_CASE(ROTR_W) NODE_NAME_CASE(ROTL_W) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 605093b01476d..a3bcc7599efc3 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -68,6 +68,7 @@ enum NodeType : unsigned { REVB_2H, REVB_2W, BITREV_4B, + BITREV_8B, BITREV_W, // Intrinsic operations start ============================================ @@ -334,6 +335,7 @@ class LoongArchTargetLowering : public TargetLowering { SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBITREVERSE(SDValue Op, SelectionDAG &DAG) const; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index 6134daf2fbe63..2101aa058305f 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -112,6 +112,7 @@ def loongarch_bstrpick def loongarch_revb_2h : SDNode<"LoongArchISD::REVB_2H", SDTUnaryOp>; def loongarch_revb_2w : SDNode<"LoongArchISD::REVB_2W", SDTUnaryOp>; def loongarch_bitrev_4b : SDNode<"LoongArchISD::BITREV_4B", SDTUnaryOp>; +def loongarch_bitrev_8b : SDNode<"LoongArchISD::BITREV_8B", SDTUnaryOp>; def loongarch_bitrev_w : SDNode<"LoongArchISD::BITREV_W", SDTUnaryOp>; def loongarch_clzw : SDNode<"LoongArchISD::CLZ_W", SDTIntBitCountUnaryOp>; def loongarch_ctzw : SDNode<"LoongArchISD::CTZ_W", SDTIntBitCountUnaryOp>; @@ -1765,6 +1766,7 @@ def : Pat<(bitreverse (bswap GPR:$rj)), (BITREV_4B GPR:$rj)>; let Predicates = [IsLA64] in { def : Pat<(loongarch_revb_2w GPR:$rj), (REVB_2W GPR:$rj)>; def : Pat<(bswap GPR:$rj), (REVB_D GPR:$rj)>; +def : Pat<(loongarch_bitrev_8b GPR:$rj), (BITREV_8B GPR:$rj)>; def : Pat<(loongarch_bitrev_w GPR:$rj), (BITREV_W GPR:$rj)>; def : Pat<(bitreverse GPR:$rj), (BITREV_D GPR:$rj)>; def : Pat<(bswap (bitreverse GPR:$rj)), (BITREV_8B GPR:$rj)>; diff --git a/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll b/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll index 3d0d232fcca68..11f1bce55fad6 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll @@ -7,19 +7,19 @@ declare <32 x i8> @llvm.bitreverse.v32i8(<32 x i8>) define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind { ; CHECK-LABEL: test_bitreverse_v32i8: ; CHECK: # %bb.0: -; CHECK-NEXT: xvslli.b $xr1, $xr0, 4 -; CHECK-NEXT: xvsrli.b $xr0, $xr0, 4 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvandi.b $xr1, $xr0, 51 -; CHECK-NEXT: xvslli.b $xr1, $xr1, 2 -; CHECK-NEXT: xvsrli.b $xr0, $xr0, 2 -; CHECK-NEXT: xvandi.b $xr0, $xr0, 51 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvandi.b $xr1, $xr0, 85 -; CHECK-NEXT: xvslli.b $xr1, $xr1, 1 -; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 -; CHECK-NEXT: xvandi.b $xr0, $xr0, 85 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0 +; CHECK-NEXT: bitrev.8b $a0, $a0 +; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 +; CHECK-NEXT: bitrev.8b $a0, $a0 +; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 1 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2 +; CHECK-NEXT: bitrev.8b $a0, $a0 +; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 2 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3 +; CHECK-NEXT: bitrev.8b $a0, $a0 +; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 3 +; CHECK-NEXT: xvori.b $xr0, $xr1, 0 ; CHECK-NEXT: ret %b = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a) ret <32 x i8> %b @@ -30,25 +30,19 @@ declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>) define <16 x i16> @test_bitreverse_v16i16(<16 x i16> %a) nounwind { ; CHECK-LABEL: test_bitreverse_v16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 177 -; CHECK-NEXT: xvsrli.h $xr1, $xr0, 4 -; CHECK-NEXT: xvrepli.b $xr2, 15 -; CHECK-NEXT: xvand.v $xr1, $xr1, $xr2 -; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2 -; CHECK-NEXT: xvslli.h $xr0, $xr0, 4 -; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 -; CHECK-NEXT: xvsrli.h $xr1, $xr0, 2 -; CHECK-NEXT: xvrepli.b $xr2, 51 -; CHECK-NEXT: xvand.v $xr1, $xr1, $xr2 -; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2 -; CHECK-NEXT: xvslli.h $xr0, $xr0, 2 -; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 -; CHECK-NEXT: xvsrli.h $xr1, $xr0, 1 -; CHECK-NEXT: xvrepli.b $xr2, 85 -; CHECK-NEXT: xvand.v $xr1, $xr1, $xr2 -; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2 -; CHECK-NEXT: xvslli.h $xr0, $xr0, 1 -; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0 +; CHECK-NEXT: bitrev.d $a0, $a0 +; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 +; CHECK-NEXT: bitrev.d $a0, $a0 +; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 1 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2 +; CHECK-NEXT: bitrev.d $a0, $a0 +; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 2 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3 +; CHECK-NEXT: bitrev.d $a0, $a0 +; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 3 +; CHECK-NEXT: xvshuf4i.h $xr0, $xr1, 27 ; CHECK-NEXT: ret %b = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) ret <16 x i16> %b @@ -59,25 +53,19 @@ declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>) define <8 x i32> @test_bitreverse_v8i32(<8 x i32> %a) nounwind { ; CHECK-LABEL: test_bitreverse_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 27 -; CHECK-NEXT: xvsrli.w $xr1, $xr0, 4 -; CHECK-NEXT: xvrepli.b $xr2, 15 -; CHECK-NEXT: xvand.v $xr1, $xr1, $xr2 -; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2 -; CHECK-NEXT: xvslli.w $xr0, $xr0, 4 -; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 -; CHECK-NEXT: xvsrli.w $xr1, $xr0, 2 -; CHECK-NEXT: xvrepli.b $xr2, 51 -; CHECK-NEXT: xvand.v $xr1, $xr1, $xr2 -; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2 -; CHECK-NEXT: xvslli.w $xr0, $xr0, 2 -; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 -; CHECK-NEXT: xvsrli.w $xr1, $xr0, 1 -; CHECK-NEXT: xvrepli.b $xr2, 85 -; CHECK-NEXT: xvand.v $xr1, $xr1, $xr2 -; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2 -; CHECK-NEXT: xvslli.w $xr0, $xr0, 1 -; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0 +; CHECK-NEXT: bitrev.d $a0, $a0 +; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 +; CHECK-NEXT: bitrev.d $a0, $a0 +; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 1 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2 +; CHECK-NEXT: bitrev.d $a0, $a0 +; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 2 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3 +; CHECK-NEXT: bitrev.d $a0, $a0 +; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 3 +; CHECK-NEXT: xvshuf4i.w $xr0, $xr1, 177 ; CHECK-NEXT: ret %b = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) ret <8 x i32> %b diff --git a/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll b/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll index 93624c8dd6a96..4c17d3fd8d7b2 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll @@ -7,19 +7,13 @@ declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>) define <16 x i8> @test_bitreverse_v16i8(<16 x i8> %a) nounwind { ; CHECK-LABEL: test_bitreverse_v16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vslli.b $vr1, $vr0, 4 -; CHECK-NEXT: vsrli.b $vr0, $vr0, 4 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vandi.b $vr1, $vr0, 51 -; CHECK-NEXT: vslli.b $vr1, $vr1, 2 -; CHECK-NEXT: vsrli.b $vr0, $vr0, 2 -; CHECK-NEXT: vandi.b $vr0, $vr0, 51 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vandi.b $vr1, $vr0, 85 -; CHECK-NEXT: vslli.b $vr1, $vr1, 1 -; CHECK-NEXT: vsrli.b $vr0, $vr0, 1 -; CHECK-NEXT: vandi.b $vr0, $vr0, 85 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0 +; CHECK-NEXT: bitrev.8b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 +; CHECK-NEXT: bitrev.8b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1 +; CHECK-NEXT: vori.b $vr0, $vr1, 0 ; CHECK-NEXT: ret %b = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a) ret <16 x i8> %b @@ -30,25 +24,13 @@ declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>) define <8 x i16> @test_bitreverse_v8i16(<8 x i16> %a) nounwind { ; CHECK-LABEL: test_bitreverse_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 177 -; CHECK-NEXT: vsrli.h $vr1, $vr0, 4 -; CHECK-NEXT: vrepli.b $vr2, 15 -; CHECK-NEXT: vand.v $vr1, $vr1, $vr2 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr2 -; CHECK-NEXT: vslli.h $vr0, $vr0, 4 -; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 -; CHECK-NEXT: vsrli.h $vr1, $vr0, 2 -; CHECK-NEXT: vrepli.b $vr2, 51 -; CHECK-NEXT: vand.v $vr1, $vr1, $vr2 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr2 -; CHECK-NEXT: vslli.h $vr0, $vr0, 2 -; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 -; CHECK-NEXT: vsrli.h $vr1, $vr0, 1 -; CHECK-NEXT: vrepli.b $vr2, 85 -; CHECK-NEXT: vand.v $vr1, $vr1, $vr2 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr2 -; CHECK-NEXT: vslli.h $vr0, $vr0, 1 -; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0 +; CHECK-NEXT: bitrev.d $a0, $a0 +; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 +; CHECK-NEXT: bitrev.d $a0, $a0 +; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1 +; CHECK-NEXT: vshuf4i.h $vr0, $vr1, 27 ; CHECK-NEXT: ret %b = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a) ret <8 x i16> %b @@ -59,25 +41,13 @@ declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) define <4 x i32> @test_bitreverse_v4i32(<4 x i32> %a) nounwind { ; CHECK-LABEL: test_bitreverse_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 27 -; CHECK-NEXT: vsrli.w $vr1, $vr0, 4 -; CHECK-NEXT: vrepli.b $vr2, 15 -; CHECK-NEXT: vand.v $vr1, $vr1, $vr2 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr2 -; CHECK-NEXT: vslli.w $vr0, $vr0, 4 -; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 -; CHECK-NEXT: vsrli.w $vr1, $vr0, 2 -; CHECK-NEXT: vrepli.b $vr2, 51 -; CHECK-NEXT: vand.v $vr1, $vr1, $vr2 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr2 -; CHECK-NEXT: vslli.w $vr0, $vr0, 2 -; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 -; CHECK-NEXT: vsrli.w $vr1, $vr0, 1 -; CHECK-NEXT: vrepli.b $vr2, 85 -; CHECK-NEXT: vand.v $vr1, $vr1, $vr2 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr2 -; CHECK-NEXT: vslli.w $vr0, $vr0, 1 -; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0 +; CHECK-NEXT: bitrev.d $a0, $a0 +; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 +; CHECK-NEXT: bitrev.d $a0, $a0 +; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1 +; CHECK-NEXT: vshuf4i.w $vr0, $vr1, 177 ; CHECK-NEXT: ret %b = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a) ret <4 x i32> %b