Skip to content

Conversation

@xen0n
Copy link
Contributor

@xen0n xen0n commented Oct 30, 2024

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Oct 30, 2024

@llvm/pr-subscribers-backend-loongarch

Author: WÁNG Xuěruì (xen0n)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/114170.diff

2 Files Affected:

  • (added) llvm/test/CodeGen/LoongArch/lasx/bswap.ll (+86)
  • (added) llvm/test/CodeGen/LoongArch/lsx/bswap.ll (+86)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/bswap.ll b/llvm/test/CodeGen/LoongArch/lasx/bswap.ll
new file mode 100644
index 00000000000000..4f6d49c7a79db5
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/bswap.ll
@@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+define void @bswap_v16i16(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: bswap_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a0, 0
+; CHECK-NEXT:    xvsrli.h $xr1, $xr0, 8
+; CHECK-NEXT:    xvslli.h $xr0, $xr0, 8
+; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvst $xr0, $a1, 0
+; CHECK-NEXT:    ret
+  %v = load <16 x i16>, ptr %src
+  %res = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %v)
+  store <16 x i16> %res, ptr %dst
+  ret void
+}
+
+define void @bswap_v8i32(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: bswap_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a0, 0
+; CHECK-NEXT:    lu12i.w $a0, 15
+; CHECK-NEXT:    ori $a0, $a0, 3840
+; CHECK-NEXT:    xvreplgr2vr.w $xr1, $a0
+; CHECK-NEXT:    xvsrli.w $xr2, $xr0, 8
+; CHECK-NEXT:    xvand.v $xr2, $xr2, $xr1
+; CHECK-NEXT:    xvsrli.w $xr3, $xr0, 24
+; CHECK-NEXT:    xvor.v $xr2, $xr2, $xr3
+; CHECK-NEXT:    xvand.v $xr1, $xr0, $xr1
+; CHECK-NEXT:    xvslli.w $xr1, $xr1, 8
+; CHECK-NEXT:    xvslli.w $xr0, $xr0, 24
+; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr2
+; CHECK-NEXT:    xvst $xr0, $a1, 0
+; CHECK-NEXT:    ret
+  %v = load <8 x i32>, ptr %src
+  %res = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %v)
+  store <8 x i32> %res, ptr %dst
+  ret void
+}
+
+define void @bswap_v4i64(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: bswap_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a0, 0
+; CHECK-NEXT:    lu12i.w $a0, 4080
+; CHECK-NEXT:    xvreplgr2vr.d $xr1, $a0
+; CHECK-NEXT:    xvsrli.d $xr2, $xr0, 24
+; CHECK-NEXT:    xvand.v $xr2, $xr2, $xr1
+; CHECK-NEXT:    lu12i.w $a0, -4096
+; CHECK-NEXT:    lu32i.d $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.d $xr3, $a0
+; CHECK-NEXT:    xvsrli.d $xr4, $xr0, 8
+; CHECK-NEXT:    xvand.v $xr4, $xr4, $xr3
+; CHECK-NEXT:    xvor.v $xr2, $xr4, $xr2
+; CHECK-NEXT:    lu12i.w $a0, 15
+; CHECK-NEXT:    ori $a0, $a0, 3840
+; CHECK-NEXT:    xvreplgr2vr.d $xr4, $a0
+; CHECK-NEXT:    xvsrli.d $xr5, $xr0, 40
+; CHECK-NEXT:    xvand.v $xr5, $xr5, $xr4
+; CHECK-NEXT:    xvsrli.d $xr6, $xr0, 56
+; CHECK-NEXT:    xvor.v $xr5, $xr5, $xr6
+; CHECK-NEXT:    xvor.v $xr2, $xr2, $xr5
+; CHECK-NEXT:    xvand.v $xr1, $xr0, $xr1
+; CHECK-NEXT:    xvslli.d $xr1, $xr1, 24
+; CHECK-NEXT:    xvand.v $xr3, $xr0, $xr3
+; CHECK-NEXT:    xvslli.d $xr3, $xr3, 8
+; CHECK-NEXT:    xvor.v $xr1, $xr1, $xr3
+; CHECK-NEXT:    xvand.v $xr3, $xr0, $xr4
+; CHECK-NEXT:    xvslli.d $xr3, $xr3, 40
+; CHECK-NEXT:    xvslli.d $xr0, $xr0, 56
+; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr3
+; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr2
+; CHECK-NEXT:    xvst $xr0, $a1, 0
+; CHECK-NEXT:    ret
+  %v = load <4 x i64>, ptr %src
+  %res = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %v)
+  store <4 x i64> %res, ptr %dst
+  ret void
+}
+
+declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
+declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
+declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/bswap.ll b/llvm/test/CodeGen/LoongArch/lsx/bswap.ll
new file mode 100644
index 00000000000000..ce7af9d33f1501
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/bswap.ll
@@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+define void @bswap_v8i16(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: bswap_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vsrli.h $vr1, $vr0, 8
+; CHECK-NEXT:    vslli.h $vr0, $vr0, 8
+; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+  %v = load <8 x i16>, ptr %src
+  %res = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %v)
+  store <8 x i16> %res, ptr %dst
+  ret void
+}
+
+define void @bswap_v4i32(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: bswap_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    lu12i.w $a0, 15
+; CHECK-NEXT:    ori $a0, $a0, 3840
+; CHECK-NEXT:    vreplgr2vr.w $vr1, $a0
+; CHECK-NEXT:    vsrli.w $vr2, $vr0, 8
+; CHECK-NEXT:    vand.v $vr2, $vr2, $vr1
+; CHECK-NEXT:    vsrli.w $vr3, $vr0, 24
+; CHECK-NEXT:    vor.v $vr2, $vr2, $vr3
+; CHECK-NEXT:    vand.v $vr1, $vr0, $vr1
+; CHECK-NEXT:    vslli.w $vr1, $vr1, 8
+; CHECK-NEXT:    vslli.w $vr0, $vr0, 24
+; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vor.v $vr0, $vr0, $vr2
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+  %v = load <4 x i32>, ptr %src
+  %res = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %v)
+  store <4 x i32> %res, ptr %dst
+  ret void
+}
+
+define void @bswap_v2i64(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: bswap_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    lu12i.w $a0, 4080
+; CHECK-NEXT:    vreplgr2vr.d $vr1, $a0
+; CHECK-NEXT:    vsrli.d $vr2, $vr0, 24
+; CHECK-NEXT:    vand.v $vr2, $vr2, $vr1
+; CHECK-NEXT:    lu12i.w $a0, -4096
+; CHECK-NEXT:    lu32i.d $a0, 0
+; CHECK-NEXT:    vreplgr2vr.d $vr3, $a0
+; CHECK-NEXT:    vsrli.d $vr4, $vr0, 8
+; CHECK-NEXT:    vand.v $vr4, $vr4, $vr3
+; CHECK-NEXT:    vor.v $vr2, $vr4, $vr2
+; CHECK-NEXT:    lu12i.w $a0, 15
+; CHECK-NEXT:    ori $a0, $a0, 3840
+; CHECK-NEXT:    vreplgr2vr.d $vr4, $a0
+; CHECK-NEXT:    vsrli.d $vr5, $vr0, 40
+; CHECK-NEXT:    vand.v $vr5, $vr5, $vr4
+; CHECK-NEXT:    vsrli.d $vr6, $vr0, 56
+; CHECK-NEXT:    vor.v $vr5, $vr5, $vr6
+; CHECK-NEXT:    vor.v $vr2, $vr2, $vr5
+; CHECK-NEXT:    vand.v $vr1, $vr0, $vr1
+; CHECK-NEXT:    vslli.d $vr1, $vr1, 24
+; CHECK-NEXT:    vand.v $vr3, $vr0, $vr3
+; CHECK-NEXT:    vslli.d $vr3, $vr3, 8
+; CHECK-NEXT:    vor.v $vr1, $vr1, $vr3
+; CHECK-NEXT:    vand.v $vr3, $vr0, $vr4
+; CHECK-NEXT:    vslli.d $vr3, $vr3, 40
+; CHECK-NEXT:    vslli.d $vr0, $vr0, 56
+; CHECK-NEXT:    vor.v $vr0, $vr0, $vr3
+; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vor.v $vr0, $vr0, $vr2
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+  %v = load <2 x i64>, ptr %src
+  %res = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %v)
+  store <2 x i64> %res, ptr %dst
+  ret void
+}
+
+declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
+declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
+declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)

Copy link
Member

@heiher heiher left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Thanks.

@SixWeining SixWeining merged commit 5581e43 into llvm:main Oct 31, 2024
8 of 10 checks passed
@xen0n xen0n deleted the loong-simd-bswap-1 branch October 31, 2024 13:28
heiher pushed a commit that referenced this pull request Oct 31, 2024
smallp-o-p pushed a commit to smallp-o-p/llvm-project that referenced this pull request Nov 3, 2024
smallp-o-p pushed a commit to smallp-o-p/llvm-project that referenced this pull request Nov 3, 2024
NoumanAmir657 pushed a commit to NoumanAmir657/llvm-project that referenced this pull request Nov 4, 2024
NoumanAmir657 pushed a commit to NoumanAmir657/llvm-project that referenced this pull request Nov 4, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants