-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[Clang][AArch64] make bitperm intrinsics available in streaming mode #129700
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-clang Author: None (Lukacma) ChangesBased on recent changes in armv9.6 BitPerm instructions and thus intrinsics are available in streaming mode when FEAT_SSVE_BitPerm is available. This patch reflects this change and is based on ACLE proposal. Patch is 29.09 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129700.diff 5 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index b51106fa56759..03051362be4f9 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1988,13 +1988,13 @@ def SVSM4E : SInst<"svsm4e[_{d}]", "ddd", "Ui", MergeNone, "aarch64_sve_sm
def SVSM4EKEY : SInst<"svsm4ekey[_{d}]", "ddd", "Ui", MergeNone, "aarch64_sve_sm4ekey", [IsOverloadNone]>;
}
-let SVETargetGuard = "sve2,sve-bitperm", SMETargetGuard = InvalidMode in {
-def SVBDEP : SInst<"svbdep[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_bdep_x">;
-def SVBDEP_N : SInst<"svbdep[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_bdep_x">;
-def SVBEXT : SInst<"svbext[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_bext_x">;
-def SVBEXT_N : SInst<"svbext[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_bext_x">;
-def SVBGRP : SInst<"svbgrp[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_bgrp_x">;
-def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_bgrp_x">;
+let SVETargetGuard = "sve2,sve-bitperm", SMETargetGuard = "sme,ssve-bitperm" in {
+def SVBDEP : SInst<"svbdep[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_bdep_x", [VerifyRuntimeMode]>;
+def SVBDEP_N : SInst<"svbdep[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_bdep_x", [VerifyRuntimeMode]>;
+def SVBEXT : SInst<"svbext[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_bext_x", [VerifyRuntimeMode]>;
+def SVBEXT_N : SInst<"svbext[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_bext_x", [VerifyRuntimeMode]>;
+def SVBGRP : SInst<"svbgrp[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_bgrp_x", [VerifyRuntimeMode]>;
+def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_bgrp_x", [VerifyRuntimeMode]>;
}
let SVETargetGuard = "sve2p1", SMETargetGuard = "sme" in {
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_bdep.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_bdep.c
index d4681394a0508..484a00af04cba 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_bdep.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_bdep.c
@@ -5,6 +5,8 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-bitperm -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-bitperm -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-bitperm -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +ssve-bitperm -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: not %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
#include <arm_sve.h>
@@ -15,6 +17,12 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
// CHECK-LABEL: @test_svbdep_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bdep.x.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
@@ -25,7 +33,7 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bdep.x.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svbdep_u8(svuint8_t op1, svuint8_t op2)
+svuint8_t test_svbdep_u8(svuint8_t op1, svuint8_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbdep,_u8,,)(op1, op2);
}
@@ -40,7 +48,7 @@ svuint8_t test_svbdep_u8(svuint8_t op1, svuint8_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bdep.x.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-svuint16_t test_svbdep_u16(svuint16_t op1, svuint16_t op2)
+svuint16_t test_svbdep_u16(svuint16_t op1, svuint16_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbdep,_u16,,)(op1, op2);
}
@@ -55,7 +63,7 @@ svuint16_t test_svbdep_u16(svuint16_t op1, svuint16_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bdep.x.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-svuint32_t test_svbdep_u32(svuint32_t op1, svuint32_t op2)
+svuint32_t test_svbdep_u32(svuint32_t op1, svuint32_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbdep,_u32,,)(op1, op2);
}
@@ -70,7 +78,7 @@ svuint32_t test_svbdep_u32(svuint32_t op1, svuint32_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bdep.x.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-svuint64_t test_svbdep_u64(svuint64_t op1, svuint64_t op2)
+svuint64_t test_svbdep_u64(svuint64_t op1, svuint64_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbdep,_u64,,)(op1, op2);
}
@@ -89,7 +97,7 @@ svuint64_t test_svbdep_u64(svuint64_t op1, svuint64_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bdep.x.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svbdep_n_u8(svuint8_t op1, uint8_t op2)
+svuint8_t test_svbdep_n_u8(svuint8_t op1, uint8_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbdep,_n_u8,,)(op1, op2);
}
@@ -108,7 +116,7 @@ svuint8_t test_svbdep_n_u8(svuint8_t op1, uint8_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bdep.x.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-svuint16_t test_svbdep_n_u16(svuint16_t op1, uint16_t op2)
+svuint16_t test_svbdep_n_u16(svuint16_t op1, uint16_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbdep,_n_u16,,)(op1, op2);
}
@@ -127,7 +135,7 @@ svuint16_t test_svbdep_n_u16(svuint16_t op1, uint16_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bdep.x.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-svuint32_t test_svbdep_n_u32(svuint32_t op1, uint32_t op2)
+svuint32_t test_svbdep_n_u32(svuint32_t op1, uint32_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbdep,_n_u32,,)(op1, op2);
}
@@ -146,7 +154,7 @@ svuint32_t test_svbdep_n_u32(svuint32_t op1, uint32_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bdep.x.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-svuint64_t test_svbdep_n_u64(svuint64_t op1, uint64_t op2)
+svuint64_t test_svbdep_n_u64(svuint64_t op1, uint64_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbdep,_n_u64,,)(op1, op2);
}
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_bext.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_bext.c
index 6d654b9353e7a..953ef4d203783 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_bext.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_bext.c
@@ -5,6 +5,8 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-bitperm -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-bitperm -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-bitperm -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +ssve-bitperm -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: not %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
#include <arm_sve.h>
@@ -15,6 +17,12 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
// CHECK-LABEL: @test_svbext_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bext.x.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
@@ -25,7 +33,7 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bext.x.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svbext_u8(svuint8_t op1, svuint8_t op2)
+svuint8_t test_svbext_u8(svuint8_t op1, svuint8_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbext,_u8,,)(op1, op2);
}
@@ -40,7 +48,7 @@ svuint8_t test_svbext_u8(svuint8_t op1, svuint8_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bext.x.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-svuint16_t test_svbext_u16(svuint16_t op1, svuint16_t op2)
+svuint16_t test_svbext_u16(svuint16_t op1, svuint16_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbext,_u16,,)(op1, op2);
}
@@ -55,7 +63,7 @@ svuint16_t test_svbext_u16(svuint16_t op1, svuint16_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bext.x.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-svuint32_t test_svbext_u32(svuint32_t op1, svuint32_t op2)
+svuint32_t test_svbext_u32(svuint32_t op1, svuint32_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbext,_u32,,)(op1, op2);
}
@@ -70,7 +78,7 @@ svuint32_t test_svbext_u32(svuint32_t op1, svuint32_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bext.x.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-svuint64_t test_svbext_u64(svuint64_t op1, svuint64_t op2)
+svuint64_t test_svbext_u64(svuint64_t op1, svuint64_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbext,_u64,,)(op1, op2);
}
@@ -89,7 +97,7 @@ svuint64_t test_svbext_u64(svuint64_t op1, svuint64_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bext.x.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svbext_n_u8(svuint8_t op1, uint8_t op2)
+svuint8_t test_svbext_n_u8(svuint8_t op1, uint8_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbext,_n_u8,,)(op1, op2);
}
@@ -108,7 +116,7 @@ svuint8_t test_svbext_n_u8(svuint8_t op1, uint8_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bext.x.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-svuint16_t test_svbext_n_u16(svuint16_t op1, uint16_t op2)
+svuint16_t test_svbext_n_u16(svuint16_t op1, uint16_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbext,_n_u16,,)(op1, op2);
}
@@ -127,7 +135,7 @@ svuint16_t test_svbext_n_u16(svuint16_t op1, uint16_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bext.x.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-svuint32_t test_svbext_n_u32(svuint32_t op1, uint32_t op2)
+svuint32_t test_svbext_n_u32(svuint32_t op1, uint32_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbext,_n_u32,,)(op1, op2);
}
@@ -146,7 +154,7 @@ svuint32_t test_svbext_n_u32(svuint32_t op1, uint32_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bext.x.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-svuint64_t test_svbext_n_u64(svuint64_t op1, uint64_t op2)
+svuint64_t test_svbext_n_u64(svuint64_t op1, uint64_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbext,_n_u64,,)(op1, op2);
}
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_bgrp.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_bgrp.c
index a98d8e8a2b37c..62ea1c5278bb5 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_bgrp.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_bgrp.c
@@ -5,6 +5,8 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-bitperm -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-bitperm -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-bitperm -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +ssve-bitperm -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: not %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
#include <arm_sve.h>
@@ -15,6 +17,12 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
// CHECK-LABEL: @test_svbgrp_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bgrp.x.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
@@ -25,7 +33,7 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bgrp.x.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svbgrp_u8(svuint8_t op1, svuint8_t op2)
+svuint8_t test_svbgrp_u8(svuint8_t op1, svuint8_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbgrp,_u8,,)(op1, op2);
}
@@ -40,7 +48,7 @@ svuint8_t test_svbgrp_u8(svuint8_t op1, svuint8_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bgrp.x.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-svuint16_t test_svbgrp_u16(svuint16_t op1, svuint16_t op2)
+svuint16_t test_svbgrp_u16(svuint16_t op1, svuint16_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbgrp,_u16,,)(op1, op2);
}
@@ -55,7 +63,7 @@ svuint16_t test_svbgrp_u16(svuint16_t op1, svuint16_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bgrp.x.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-svuint32_t test_svbgrp_u32(svuint32_t op1, svuint32_t op2)
+svuint32_t test_svbgrp_u32(svuint32_t op1, svuint32_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbgrp,_u32,,)(op1, op2);
}
@@ -70,7 +78,7 @@ svuint32_t test_svbgrp_u32(svuint32_t op1, svuint32_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bgrp.x.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-svuint64_t test_svbgrp_u64(svuint64_t op1, svuint64_t op2)
+svuint64_t test_svbgrp_u64(svuint64_t op1, svuint64_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbgrp,_u64,,)(op1, op2);
}
@@ -89,7 +97,7 @@ svuint64_t test_svbgrp_u64(svuint64_t op1, svuint64_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bgrp.x.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svbgrp_n_u8(svuint8_t op1, uint8_t op2)
+svuint8_t test_svbgrp_n_u8(svuint8_t op1, uint8_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbgrp,_n_u8,,)(op1, op2);
}
@@ -108,7 +116,7 @@ svuint8_t test_svbgrp_n_u8(svuint8_t op1, uint8_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bgrp.x.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-svuint16_t test_svbgrp_n_u16(svuint16_t op1, uint16_t op2)
+svuint16_t test_svbgrp_n_u16(svuint16_t op1, uint16_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbgrp,_n_u16,,)(op1, op2);
}
@@ -127,7 +135,7 @@ svuint16_t test_svbgrp_n_u16(svuint16_t op1, uint16_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bgrp.x.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-svuint32_t test_svbgrp_n_u32(svuint32_t op1, uint32_t op2)
+svuint32_t test_svbgrp_n_u32(svuint32_t op1, uint32_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbgrp,_n_u32,,)(op1, op2);
}
@@ -146,7 +154,7 @@ svuint32_t test_svbgrp_n_u32(svuint32_t op1, uint32_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bgrp.x.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
-svuint64_t test_svbgrp_n_u64(svuint64_t op1, uint64_t op2)
+svuint64_t test_svbgrp_n_u64(svuint64_t op1, uint64_t op2) STREAMING
{
return SVE_ACLE_FUNC(svbgrp,_n_u64,,)(op1, op2);
}
diff --git a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_aes_bitperm_sha3_sm4.cpp b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_aes_bitperm_sha3_sm4.cpp
index 985ea15ac2a4e..d165b92ffc8e4 100644
--- a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_aes_bitperm_sha3_sm4.cpp
+++ b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_aes_bitperm_sha3_sm4.cpp
@@ -26,61 +26,61 @@ void test(uint8_t u8, uint16_t u16, uint32_t u32, uint64_t u64)
// expected-error@+2 {{'svaesmc_u8' needs target feature sve,sve2,sve-aes}}
// overload-error@+1 {{'svaesmc' needs target feature sve,sve2,sve-aes}}
SVE_ACLE_FUNC(svaesmc,_u8,,)(svundef_u8());
- // expected-error@+2 {{'svbdep_u8' needs target feature sve,sve2,sve-bitperm}}
- // overload-error@+1 {{'svbdep' needs target feature sve,sve2,sve-bitperm}}
+ // expected-error@+2 {{'svbdep_u8' needs target feature (sve,sve2,sve-bitperm)|(sme,ssve-bitperm)}}
+ // overload-error@+1 {{'svbdep' needs target feature (sve,sve2,sve-bitperm)|(sme,ssve-bitperm)}}
SVE_ACLE_FUNC(svbdep,_u8,,)(svundef_u8(), svundef_u8());
- // expected-error@+2 {{'svbdep_n_u8' needs target feature sve,sve2,sve-bitperm}}
- // overload-error@+1 {{'svbdep' needs target feature sve,sve2,sve-bitperm}}
+ // expected-error@+2 {{'svbdep_n_u8' needs target feature (sve,sve2,sve-bitperm)|(sme,ssve-bitperm)}}
+ // overload-error@+1 {{'svbdep' needs target feature (sve,sve2,sve-bitperm)|(sme,ssve-bitperm)}}
SVE_ACLE_FUNC(svbdep,_n_u8,,)(svundef_u8(), u8);
- // expected-error@+2 {{'svbext_u8' needs target feature sve,sve2,sve-bitperm}}
- // overload-error@+1 {{'svbext' needs target feature sve,sve2,sve-bitperm}}
+ // expected-error@+2 {{'svbext_u8' needs target feature (sve,sve2,sve-bitperm)|(sme,ssve-bitperm)}}
+ // overload-error@+1 {{'svbext' needs target feature (sve,sve2,sve-bitperm)|(sme,ssve-bitperm)}}
SVE_ACLE_FUNC(svbext,_u8,,)(svundef_u8(), svundef_u8());
- // expected-error@+2 {{'svbext_n_u8' needs target feature sve,sve2,sve-bitperm}}
- // overload-error@+1 {{'svbext' needs target feature sve,sve2,sve-bitperm}}
+ // expected-error@+2 {{'svbex...
[truncated]
|
jthackray
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
CarolineConcatto
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you Marian,
There are some tests in Sema, like:
test/Sema/aarch64-sve2-intrinsics/acle_sve2_aes_bitperm_sha3_sm4.cpp
That would be nice to also be updated.
I am not sure I understand. I updated tests in this file. Or did I miss smth ? |
…lvm#129700) Based on recent changes in armv9.6 BitPerm instructions and thus intrinsics are available in streaming mode when [FEAT_SSVE_BitPerm](https://developer.arm.com/documentation/109697/2024_12/Feature-descriptions/The-Armv9-6-architecture-extension?lang=en#md463-the-armv96-architecture-extension__feat_FEAT_SSVE_BitPerm) is available. This patch reflects this change and is based on [ACLE proposal](ARM-software/acle#385).
…lvm#129700) Based on recent changes in armv9.6 BitPerm instructions and thus intrinsics are available in streaming mode when [FEAT_SSVE_BitPerm](https://developer.arm.com/documentation/109697/2024_12/Feature-descriptions/The-Armv9-6-architecture-extension?lang=en#md463-the-armv96-architecture-extension__feat_FEAT_SSVE_BitPerm) is available. This patch reflects this change and is based on [ACLE proposal](ARM-software/acle#385).
Based on recent changes in armv9.6 BitPerm instructions and thus intrinsics are available in streaming mode when FEAT_SSVE_BitPerm is available. This patch reflects this change and is based on ACLE proposal.