Skip to content

Commit e7a2489

Browse files
committed
[AArch64] Add intrinsics support for SVE2p2 instructions
1 parent be93399 commit e7a2489

File tree

10 files changed

+793
-12
lines changed

10 files changed

+793
-12
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -984,6 +984,11 @@ let SMETargetGuard = "sme2p2" in {
984984
def SVCOMPACT : SInst<"svcompact[_{d}]", "dPd", "ilUiUlfd", MergeNone, "aarch64_sve_compact", [VerifyRuntimeMode]>;
985985
}
986986

987+
let SVETargetGuard = "sve2p2|sme2p2", SMETargetGuard = "sme2p2" in {
988+
def SVCOMPACT_BH : SInst<"svcompact[_{d}]", "dPd", "cUcsUsmbh", MergeNone, "aarch64_sve_compact", [VerifyRuntimeMode]>;
989+
def SVEXPAND : SInst<"svexpand[_{d}]", "dPd", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_expand", [VerifyRuntimeMode]>;
990+
}
991+
987992
// Note: svdup_lane is implemented using the intrinsic for TBL to represent a
988993
// splat of any possible lane. It is upto LLVM to pick a more efficient
989994
// instruction such as DUP (indexed) if the lane index fits the range of the
@@ -1111,6 +1116,11 @@ def SVCNTD : SInst<"svcntd", "nv", "", MergeNone, "aarch64_sve_cntd", [IsAppendS
11111116
def SVCNTP : SInst<"svcntp_{d}", "nPP", "PcPsPiPl", MergeNone, "aarch64_sve_cntp", [VerifyRuntimeMode]>;
11121117
def SVLEN : SInst<"svlen[_{d}]", "nd", "csilUcUsUiUlhfdb", MergeNone, "", [VerifyRuntimeMode]>;
11131118

1119+
let SVETargetGuard = "sve2p2|sme2p2", SMETargetGuard = "sve2p2|sme2p2" in {
1120+
def SVFIRSTP : SInst<"svfirstp_{d}", "lPP", "PcPsPiPl", MergeNone, "aarch64_sve_firstp", [VerifyRuntimeMode], []>;
1121+
def SVLASTP : SInst<"svlastp_{d}", "lPP", "PcPsPiPl", MergeNone, "aarch64_sve_lastp", [VerifyRuntimeMode], []>;
1122+
}
1123+
11141124
////////////////////////////////////////////////////////////////////////////////
11151125
// Saturating scalar arithmetic
11161126

@@ -2388,4 +2398,4 @@ let SVETargetGuard = "sve2,fp8fma", SMETargetGuard = "ssve-fp8fma" in {
23882398
def SVFMLALLBT_LANE : SInst<"svmlallbt_lane[_f32_mf8]", "dd~~i>", "f", MergeNone, "aarch64_sve_fp8_fmlallbt_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>;
23892399
def SVFMLALLTB_LANE : SInst<"svmlalltb_lane[_f32_mf8]", "dd~~i>", "f", MergeNone, "aarch64_sve_fp8_fmlalltb_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>;
23902400
def SVFMLALLTT_LANE : SInst<"svmlalltt_lane[_f32_mf8]", "dd~~i>", "f", MergeNone, "aarch64_sve_fp8_fmlalltt_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>;
2391-
}
2401+
}

clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,12 @@
1414
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
1515
#endif
1616

17+
#ifdef __ARM_FEATURE_SME
18+
#define STREAMING __arm_streaming
19+
#else
20+
#define STREAMING
21+
#endif
22+
1723
// CHECK-LABEL: @test_svcompact_s32(
1824
// CHECK-NEXT: entry:
1925
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// REQUIRES: aarch64-registered-target
3+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
4+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
5+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
6+
7+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
8+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
9+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
10+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
11+
12+
#ifdef __ARM_FEATURE_SME
13+
#include "arm_sme.h"
14+
#else
15+
#include "arm_sve.h"
16+
#endif
17+
18+
#ifdef SVE_OVERLOADED_FORMS
19+
// A simple used,unused... macro, long enough to represent any SVE builtin.
20+
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
21+
#else
22+
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
23+
#endif
24+
25+
#ifdef __ARM_FEATURE_SME
26+
#define STREAMING __arm_streaming
27+
#else
28+
#define STREAMING
29+
#endif
30+
31+
// CHECK-LABEL: @test_svcompact_s8(
32+
// CHECK-NEXT: entry:
33+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
34+
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
35+
//
36+
// CPP-CHECK-LABEL: @_Z17test_svcompact_s8u10__SVBool_tu10__SVInt8_t(
37+
// CPP-CHECK-NEXT: entry:
38+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
39+
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
40+
//
41+
svint8_t test_svcompact_s8(svbool_t pg, svint8_t op) STREAMING
42+
{
43+
return SVE_ACLE_FUNC(svcompact,_s8,,)(pg, op);
44+
}
45+
46+
// CHECK-LABEL: @test_svcompact_s16(
47+
// CHECK-NEXT: entry:
48+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
49+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
50+
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
51+
//
52+
// CPP-CHECK-LABEL: @_Z18test_svcompact_s16u10__SVBool_tu11__SVInt16_t(
53+
// CPP-CHECK-NEXT: entry:
54+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
55+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
56+
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
57+
//
58+
svint16_t test_svcompact_s16(svbool_t pg, svint16_t op) STREAMING
59+
{
60+
return SVE_ACLE_FUNC(svcompact,_s16,,)(pg, op);
61+
}
62+
63+
// CHECK-LABEL: @test_svcompact_u8(
64+
// CHECK-NEXT: entry:
65+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
66+
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
67+
//
68+
// CPP-CHECK-LABEL: @_Z17test_svcompact_u8u10__SVBool_tu11__SVUint8_t(
69+
// CPP-CHECK-NEXT: entry:
70+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
71+
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
72+
//
73+
svuint8_t test_svcompact_u8(svbool_t pg, svuint8_t op) STREAMING
74+
{
75+
return SVE_ACLE_FUNC(svcompact,_u8,,)(pg, op);
76+
}
77+
78+
// CHECK-LABEL: @test_svcompact_u16(
79+
// CHECK-NEXT: entry:
80+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
81+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
82+
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
83+
//
84+
// CPP-CHECK-LABEL: @_Z18test_svcompact_u16u10__SVBool_tu12__SVUint16_t(
85+
// CPP-CHECK-NEXT: entry:
86+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
87+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
88+
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
89+
//
90+
svuint16_t test_svcompact_u16(svbool_t pg, svuint16_t op) STREAMING
91+
{
92+
return SVE_ACLE_FUNC(svcompact,_u16,,)(pg, op);
93+
}
94+
95+
// CHECK-LABEL: @test_svcompact_mf8(
96+
// CHECK-NEXT: entry:
97+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
98+
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
99+
//
100+
// CPP-CHECK-LABEL: @_Z18test_svcompact_mf8u10__SVBool_tu13__SVMfloat8_t(
101+
// CPP-CHECK-NEXT: entry:
102+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
103+
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
104+
//
105+
svmfloat8_t test_svcompact_mf8(svbool_t pg, svmfloat8_t op) STREAMING
106+
{
107+
return SVE_ACLE_FUNC(svcompact,_mf8,,)(pg, op);
108+
}
109+
110+
// CHECK-LABEL: @test_svcompact_f16(
111+
// CHECK-NEXT: entry:
112+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
113+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.compact.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
114+
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
115+
//
116+
// CPP-CHECK-LABEL: @_Z18test_svcompact_f16u10__SVBool_tu13__SVFloat16_t(
117+
// CPP-CHECK-NEXT: entry:
118+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
119+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.compact.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
120+
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
121+
//
122+
svfloat16_t test_svcompact_f16(svbool_t pg, svfloat16_t op) STREAMING
123+
{
124+
return SVE_ACLE_FUNC(svcompact,_f16,,)(pg, op);
125+
}
126+
127+
// CHECK-LABEL: @test_svcompact_bf16(
128+
// CHECK-NEXT: entry:
129+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
130+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.compact.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP:%.*]])
131+
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
132+
//
133+
// CPP-CHECK-LABEL: @_Z19test_svcompact_bf16u10__SVBool_tu14__SVBfloat16_t(
134+
// CPP-CHECK-NEXT: entry:
135+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
136+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.compact.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP:%.*]])
137+
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
138+
//
139+
svbfloat16_t test_svcompact_bf16(svbool_t pg, svbfloat16_t op) STREAMING
140+
{
141+
return SVE_ACLE_FUNC(svcompact,_bf16,,)(pg, op);
142+
}

0 commit comments

Comments
 (0)