Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions clang/include/clang/Basic/arm_sve.td
Original file line number Diff line number Diff line change
Expand Up @@ -1841,6 +1841,23 @@ def SVPMULLT_PAIR_U64 : SInst<"svpmullt_pair[_{d}]", "ddd", "Ul", MergeNone,
def SVPMULLT_PAIR_N_U64 : SInst<"svpmullt_pair[_n_{d}]", "dda", "Ul", MergeNone, "aarch64_sve_pmullt_pair", [VerifyRuntimeMode]>;
}

let SVETargetGuard = "sve-aes2", SMETargetGuard = "sve-aes2,ssve-aes" in {
def SVAESD_X2 : SInst<"svaesd_laneq[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aesd_laneq_x2", [IsOverloadNone, VerifyRuntimeMode]>;
def SVAESIMC_X2 : SInst<"svaesdimc_laneq[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aesdimc_laneq_x2", [IsOverloadNone, VerifyRuntimeMode]>;
def SVAESE_X2 : SInst<"svaese_laneq[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aese_laneq_x2", [IsOverloadNone, VerifyRuntimeMode]>;
def SVAESeMC_X2 : SInst<"svaesemc_laneq[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aesemc_laneq_x2", [IsOverloadNone, VerifyRuntimeMode]>;

def SVAESD_X4 : SInst<"svaesd_laneq[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aesd_laneq_x4", [IsOverloadNone, VerifyRuntimeMode]>;
def SVAESIMC_X4 : SInst<"svaesdimc_laneq[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aesdimc_laneq_x4", [IsOverloadNone, VerifyRuntimeMode]>;
def SVAESE_X4 : SInst<"svaese_laneq[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aese_laneq_x4", [IsOverloadNone, VerifyRuntimeMode]>;
def SVAESeMC_X4 : SInst<"svaesemc_laneq[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aesemc_laneq_x4", [IsOverloadNone, VerifyRuntimeMode]>;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry if I've missed something, but where does the q in _laneq come from? It doesn't appear in the intrinsic names in the acle proposal linked.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No you are right. These intrinsics are not correctly defined in ACLE and need to be fixed. The reason for q at the end is that these intrinsics operate on 128-bit elements, not really on 8bit ones and we want to make that clear to user


def SVPMULL_U64 : SInst<"svpmull[_u64_x2]", "2dd", "Ul", MergeNone, "aarch64_sve_pmull_x2", [IsOverloadNone, VerifyRuntimeMode]>;
def SVPMULL_N_U64 : SInst<"svpmull[_n_u64_x2]", "2da", "Ul", MergeNone, "aarch64_sve_pmull_x2", [IsOverloadNone, VerifyRuntimeMode]>;
def SVPMLAL_U64 : SInst<"svpmlal[_u64_x2]", "22dd", "Ul", MergeNone, "aarch64_sve_pmlal_x2", [IsOverloadNone, VerifyRuntimeMode]>;
def SVPMLAL_N_U64 : SInst<"svpmlal[_n_u64_x2]", "22da", "Ul", MergeNone, "aarch64_sve_pmlal_x2", [IsOverloadNone, VerifyRuntimeMode]>;
}

let SVETargetGuard = "sve-sha3", SMETargetGuard = "sme2p1,sve-sha3" in {
def SVRAX1 : SInst<"svrax1[_{d}]", "ddd", "lUl", MergeNone, "aarch64_sve_rax1", [IsOverloadNone, VerifyRuntimeMode]>;
}
Expand Down
215 changes: 215 additions & 0 deletions clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK

// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
// REQUIRES: aarch64-registered-target

#include <arm_sve.h>

#ifdef SVE_OVERLOADED_FORMS
// A simple used,unused... macro, long enough to represent any SVE builtin.
#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1
#else
#define SVE_ACLE_FUNC(A1,A2) A1##A2
#endif

#ifdef __ARM_FEATURE_SME
#define STREAMING __arm_streaming
#else
#define STREAMING
#endif

// CHECK-LABEL: @test_svaesd_u8_x2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z17test_svaesd_u8_x211svuint8x2_tu11__SVUint8_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
//
svuint8x2_t test_svaesd_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING
{
return SVE_ACLE_FUNC(svaesd_laneq,_u8_x2)(op1, op2, 0);
}

// CHECK-LABEL: @test_svaesdimc_u8_x2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z20test_svaesdimc_u8_x211svuint8x2_tu11__SVUint8_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
//
svuint8x2_t test_svaesdimc_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING
{
return SVE_ACLE_FUNC(svaesdimc_laneq,_u8_x2)(op1, op2, 0);
}

// CHECK-LABEL: @test_svaese_u8_x2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z17test_svaese_u8_x211svuint8x2_tu11__SVUint8_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
//
svuint8x2_t test_svaese_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING
{
return SVE_ACLE_FUNC(svaese_laneq,_u8_x2)(op1, op2, 0);
}

// CHECK-LABEL: @test_svaesemc_u8_x2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z19test_svaesemc_u8_x211svuint8x2_tu11__SVUint8_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
//
svuint8x2_t test_svaesemc_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING
{
return SVE_ACLE_FUNC(svaesemc_laneq,_u8_x2)(op1, op2, 0);
}

// CHECK-LABEL: @test_svaesd_u8_x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z17test_svaesd_u8_x411svuint8x4_tu11__SVUint8_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
//
svuint8x4_t test_svaesd_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING
{
return SVE_ACLE_FUNC(svaesd_laneq,_u8_x4)(op1, op2, 0);
}

// CHECK-LABEL: @test_svaesdimc_u8_x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z20test_svaesdimc_u8_x411svuint8x4_tu11__SVUint8_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
//
svuint8x4_t test_svaesdimc_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING
{
return SVE_ACLE_FUNC(svaesdimc_laneq,_u8_x4)(op1, op2, 0);
}

// CHECK-LABEL: @test_svaese_u8_x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z17test_svaese_u8_x411svuint8x4_tu11__SVUint8_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
//
svuint8x4_t test_svaese_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING
{
return SVE_ACLE_FUNC(svaese_laneq,_u8_x4)(op1, op2, 0);
}

// CHECK-LABEL: @test_svaesemc_u8_x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z19test_svaesemc_u8_x411svuint8x4_tu11__SVUint8_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
//
svuint8x4_t test_svaesemc_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING
{
return SVE_ACLE_FUNC(svaesemc_laneq,_u8_x4)(op1, op2, 0);
}

// CHECK-LABEL: @test_svpmull_u64_x2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull.x2(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z19test_svpmull_u64_x2u12__SVUint64_tS_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull.x2(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]]
//
svuint64x2_t test_svpmull_u64_x2(svuint64_t op1, svuint64_t op2) STREAMING
{
return SVE_ACLE_FUNC(svpmull,_u64_x2)(op1, op2);
}

// CHECK-LABEL: @test_svpmull_n_u64_x2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull.x2(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z21test_svpmull_n_u64_x2u12__SVUint64_tm(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull.x2(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]]
//
svuint64x2_t test_svpmull_n_u64_x2(svuint64_t op1, uint64_t op2) STREAMING
{
return SVE_ACLE_FUNC(svpmull,_n_u64_x2)(op1, op2);
}

// CHECK-LABEL: @test_svpmlal_u64_x2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal.x2(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
// CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z19test_svpmlal_u64_x212svuint64x2_tu12__SVUint64_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal.x2(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
// CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]]
//
svuint64x2_t test_svpmlal_u64_x2(svuint64x2_t op1, svuint64_t op2, svuint64_t op3) STREAMING
{
return SVE_ACLE_FUNC(svpmlal,_u64_x2)(op1, op2, op3);
}

// CHECK-LABEL: @test_svpmlal_n_u64_x2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP3:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal.x2(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z21test_svpmlal_n_u64_x212svuint64x2_tu12__SVUint64_tm(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP3:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal.x2(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]]
//
svuint64x2_t test_svpmlal_n_u64_x2(svuint64x2_t op1, svuint64_t op2, uint64_t op3) STREAMING
{
return SVE_ACLE_FUNC(svpmlal,_n_u64_x2)(op1, op2, op3);
}
9 changes: 8 additions & 1 deletion clang/utils/TableGen/SveEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,9 +272,16 @@ class Intrinsic {
Proto[I] == 'R' || Proto[I] == '@' || Proto[I] == '!')
break;

if (Proto[I] == '2')
Param += 1;
if (Proto[I] == '4')
Param += 3;

// Multivector modifier can be skipped
if (Proto[I] == '.')
if (Proto[I] == '.') {
Param -= 1; // Adjust for the increment at the top of the loop
I += 2;
}
}
assert(I != Proto.size() && "Prototype has no splat operand");
return Param;
Expand Down
27 changes: 27 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -4194,4 +4194,31 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sme_fp8_fvdot_lane_za16_vg1x2 : SME_FP8_ZA_LANE_VGx2_Intrinsic;
def int_aarch64_sme_fp8_fvdotb_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx2_Intrinsic;
def int_aarch64_sme_fp8_fvdott_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx2_Intrinsic;

// AES2
class SVE2_Crypto_LANE_X2_Intrinsic
: DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These intrinsic looks like they need to have this in front of them : GCCBuiltin<"__builtin_sve_svaesd_u8">,

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I cannot find reference to GCCBuiltin anywhere in the code. What is it supposed do ?

[llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty],
[ImmArg<ArgIndex<3>>, IntrNoMem]>;
class SVE2_Crypto_LANE_X4_Intrinsic
: DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty],
[llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty],
[ImmArg<ArgIndex<5>>, IntrNoMem]>;

def int_aarch64_sve_aesd_laneq_x2 : SVE2_Crypto_LANE_X2_Intrinsic;
def int_aarch64_sve_aesdimc_laneq_x2 : SVE2_Crypto_LANE_X2_Intrinsic;
def int_aarch64_sve_aese_laneq_x2 : SVE2_Crypto_LANE_X2_Intrinsic;
def int_aarch64_sve_aesemc_laneq_x2 : SVE2_Crypto_LANE_X2_Intrinsic;

def int_aarch64_sve_aesd_laneq_x4 : SVE2_Crypto_LANE_X4_Intrinsic;
def int_aarch64_sve_aesdimc_laneq_x4 : SVE2_Crypto_LANE_X4_Intrinsic;
def int_aarch64_sve_aese_laneq_x4 : SVE2_Crypto_LANE_X4_Intrinsic;
def int_aarch64_sve_aesemc_laneq_x4 : SVE2_Crypto_LANE_X4_Intrinsic;

def int_aarch64_sve_pmull_x2 : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, llvm_nxv2i64_ty],
[llvm_nxv2i64_ty, llvm_nxv2i64_ty], [IntrNoMem]>;
def int_aarch64_sve_pmlal_x2 : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, llvm_nxv2i64_ty],
[llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty], [IntrNoMem]>;
}

Loading