diff --git a/llvm/test/CodeGen/LoongArch/lasx/bswap.ll b/llvm/test/CodeGen/LoongArch/lasx/bswap.ll new file mode 100644 index 0000000000000..4f6d49c7a79db --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/bswap.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @bswap_v16i16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: bswap_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvsrli.h $xr1, $xr0, 8 +; CHECK-NEXT: xvslli.h $xr0, $xr0, 8 +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <16 x i16>, ptr %src + %res = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %v) + store <16 x i16> %res, ptr %dst + ret void +} + +define void @bswap_v8i32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: bswap_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: lu12i.w $a0, 15 +; CHECK-NEXT: ori $a0, $a0, 3840 +; CHECK-NEXT: xvreplgr2vr.w $xr1, $a0 +; CHECK-NEXT: xvsrli.w $xr2, $xr0, 8 +; CHECK-NEXT: xvand.v $xr2, $xr2, $xr1 +; CHECK-NEXT: xvsrli.w $xr3, $xr0, 24 +; CHECK-NEXT: xvor.v $xr2, $xr2, $xr3 +; CHECK-NEXT: xvand.v $xr1, $xr0, $xr1 +; CHECK-NEXT: xvslli.w $xr1, $xr1, 8 +; CHECK-NEXT: xvslli.w $xr0, $xr0, 24 +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <8 x i32>, ptr %src + %res = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %v) + store <8 x i32> %res, ptr %dst + ret void +} + +define void @bswap_v4i64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: bswap_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: lu12i.w $a0, 4080 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $a0 +; CHECK-NEXT: xvsrli.d $xr2, $xr0, 24 +; CHECK-NEXT: xvand.v $xr2, $xr2, $xr1 +; CHECK-NEXT: lu12i.w $a0, -4096 +; CHECK-NEXT: lu32i.d $a0, 0 +; CHECK-NEXT: xvreplgr2vr.d $xr3, $a0 +; CHECK-NEXT: xvsrli.d $xr4, $xr0, 8 +; CHECK-NEXT: xvand.v $xr4, $xr4, $xr3 +; CHECK-NEXT: xvor.v $xr2, $xr4, $xr2 +; CHECK-NEXT: lu12i.w $a0, 15 +; CHECK-NEXT: ori $a0, $a0, 3840 +; CHECK-NEXT: xvreplgr2vr.d $xr4, $a0 +; CHECK-NEXT: xvsrli.d $xr5, $xr0, 40 +; CHECK-NEXT: xvand.v $xr5, $xr5, $xr4 +; CHECK-NEXT: xvsrli.d $xr6, $xr0, 56 +; CHECK-NEXT: xvor.v $xr5, $xr5, $xr6 +; CHECK-NEXT: xvor.v $xr2, $xr2, $xr5 +; CHECK-NEXT: xvand.v $xr1, $xr0, $xr1 +; CHECK-NEXT: xvslli.d $xr1, $xr1, 24 +; CHECK-NEXT: xvand.v $xr3, $xr0, $xr3 +; CHECK-NEXT: xvslli.d $xr3, $xr3, 8 +; CHECK-NEXT: xvor.v $xr1, $xr1, $xr3 +; CHECK-NEXT: xvand.v $xr3, $xr0, $xr4 +; CHECK-NEXT: xvslli.d $xr3, $xr3, 40 +; CHECK-NEXT: xvslli.d $xr0, $xr0, 56 +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr3 +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <4 x i64>, ptr %src + %res = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %v) + store <4 x i64> %res, ptr %dst + ret void +} + +declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>) +declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>) +declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/bswap.ll b/llvm/test/CodeGen/LoongArch/lsx/bswap.ll new file mode 100644 index 0000000000000..ce7af9d33f150 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/bswap.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @bswap_v8i16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: bswap_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vsrli.h $vr1, $vr0, 8 +; CHECK-NEXT: vslli.h $vr0, $vr0, 8 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <8 x i16>, ptr %src + %res = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %v) + store <8 x i16> %res, ptr %dst + ret void +} + +define void @bswap_v4i32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: bswap_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: lu12i.w $a0, 15 +; CHECK-NEXT: ori $a0, $a0, 3840 +; CHECK-NEXT: vreplgr2vr.w $vr1, $a0 +; CHECK-NEXT: vsrli.w $vr2, $vr0, 8 +; CHECK-NEXT: vand.v $vr2, $vr2, $vr1 +; CHECK-NEXT: vsrli.w $vr3, $vr0, 24 +; CHECK-NEXT: vor.v $vr2, $vr2, $vr3 +; CHECK-NEXT: vand.v $vr1, $vr0, $vr1 +; CHECK-NEXT: vslli.w $vr1, $vr1, 8 +; CHECK-NEXT: vslli.w $vr0, $vr0, 24 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <4 x i32>, ptr %src + %res = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %v) + store <4 x i32> %res, ptr %dst + ret void +} + +define void @bswap_v2i64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: bswap_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: lu12i.w $a0, 4080 +; CHECK-NEXT: vreplgr2vr.d $vr1, $a0 +; CHECK-NEXT: vsrli.d $vr2, $vr0, 24 +; CHECK-NEXT: vand.v $vr2, $vr2, $vr1 +; CHECK-NEXT: lu12i.w $a0, -4096 +; CHECK-NEXT: lu32i.d $a0, 0 +; CHECK-NEXT: vreplgr2vr.d $vr3, $a0 +; CHECK-NEXT: vsrli.d $vr4, $vr0, 8 +; CHECK-NEXT: vand.v $vr4, $vr4, $vr3 +; CHECK-NEXT: vor.v $vr2, $vr4, $vr2 +; CHECK-NEXT: lu12i.w $a0, 15 +; CHECK-NEXT: ori $a0, $a0, 3840 +; CHECK-NEXT: vreplgr2vr.d $vr4, $a0 +; CHECK-NEXT: vsrli.d $vr5, $vr0, 40 +; CHECK-NEXT: vand.v $vr5, $vr5, $vr4 +; CHECK-NEXT: vsrli.d $vr6, $vr0, 56 +; CHECK-NEXT: vor.v $vr5, $vr5, $vr6 +; CHECK-NEXT: vor.v $vr2, $vr2, $vr5 +; CHECK-NEXT: vand.v $vr1, $vr0, $vr1 +; CHECK-NEXT: vslli.d $vr1, $vr1, 24 +; CHECK-NEXT: vand.v $vr3, $vr0, $vr3 +; CHECK-NEXT: vslli.d $vr3, $vr3, 8 +; CHECK-NEXT: vor.v $vr1, $vr1, $vr3 +; CHECK-NEXT: vand.v $vr3, $vr0, $vr4 +; CHECK-NEXT: vslli.d $vr3, $vr3, 40 +; CHECK-NEXT: vslli.d $vr0, $vr0, 56 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr3 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <2 x i64>, ptr %src + %res = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %v) + store <2 x i64> %res, ptr %dst + ret void +} + +declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) +declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) +declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)