-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AArch64] Add intrinsics for 9.6 crypto instructions #165545
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,215 @@ | ||
| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py | ||
| // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s | ||
| // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK | ||
| // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s | ||
| // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK | ||
|
|
||
| // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s | ||
| // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK | ||
| // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s | ||
| // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK | ||
| // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s | ||
| // REQUIRES: aarch64-registered-target | ||
|
|
||
| #include <arm_sve.h> | ||
|
|
||
| #ifdef SVE_OVERLOADED_FORMS | ||
| // A simple used,unused... macro, long enough to represent any SVE builtin. | ||
| #define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 | ||
| #else | ||
| #define SVE_ACLE_FUNC(A1,A2) A1##A2 | ||
| #endif | ||
|
|
||
| #ifdef __ARM_FEATURE_SME | ||
| #define STREAMING __arm_streaming | ||
| #else | ||
| #define STREAMING | ||
| #endif | ||
|
|
||
| // CHECK-LABEL: @test_svaesd_u8_x2( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) | ||
| // CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] | ||
| // | ||
| // CPP-CHECK-LABEL: @_Z17test_svaesd_u8_x211svuint8x2_tu11__SVUint8_t( | ||
| // CPP-CHECK-NEXT: entry: | ||
| // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) | ||
| // CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] | ||
| // | ||
| svuint8x2_t test_svaesd_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING | ||
| { | ||
| return SVE_ACLE_FUNC(svaesd_laneq,_u8_x2)(op1, op2, 0); | ||
| } | ||
|
|
||
| // CHECK-LABEL: @test_svaesdimc_u8_x2( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) | ||
| // CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] | ||
| // | ||
| // CPP-CHECK-LABEL: @_Z20test_svaesdimc_u8_x211svuint8x2_tu11__SVUint8_t( | ||
| // CPP-CHECK-NEXT: entry: | ||
| // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) | ||
| // CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] | ||
| // | ||
| svuint8x2_t test_svaesdimc_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING | ||
| { | ||
| return SVE_ACLE_FUNC(svaesdimc_laneq,_u8_x2)(op1, op2, 0); | ||
| } | ||
|
|
||
| // CHECK-LABEL: @test_svaese_u8_x2( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) | ||
| // CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] | ||
| // | ||
| // CPP-CHECK-LABEL: @_Z17test_svaese_u8_x211svuint8x2_tu11__SVUint8_t( | ||
| // CPP-CHECK-NEXT: entry: | ||
| // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) | ||
| // CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] | ||
| // | ||
| svuint8x2_t test_svaese_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING | ||
| { | ||
| return SVE_ACLE_FUNC(svaese_laneq,_u8_x2)(op1, op2, 0); | ||
| } | ||
|
|
||
| // CHECK-LABEL: @test_svaesemc_u8_x2( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) | ||
| // CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] | ||
| // | ||
| // CPP-CHECK-LABEL: @_Z19test_svaesemc_u8_x211svuint8x2_tu11__SVUint8_t( | ||
| // CPP-CHECK-NEXT: entry: | ||
| // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) | ||
| // CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] | ||
| // | ||
| svuint8x2_t test_svaesemc_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING | ||
| { | ||
| return SVE_ACLE_FUNC(svaesemc_laneq,_u8_x2)(op1, op2, 0); | ||
| } | ||
|
|
||
| // CHECK-LABEL: @test_svaesd_u8_x4( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) | ||
| // CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] | ||
| // | ||
| // CPP-CHECK-LABEL: @_Z17test_svaesd_u8_x411svuint8x4_tu11__SVUint8_t( | ||
| // CPP-CHECK-NEXT: entry: | ||
| // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) | ||
| // CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] | ||
| // | ||
| svuint8x4_t test_svaesd_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING | ||
| { | ||
| return SVE_ACLE_FUNC(svaesd_laneq,_u8_x4)(op1, op2, 0); | ||
| } | ||
|
|
||
| // CHECK-LABEL: @test_svaesdimc_u8_x4( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) | ||
| // CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] | ||
| // | ||
| // CPP-CHECK-LABEL: @_Z20test_svaesdimc_u8_x411svuint8x4_tu11__SVUint8_t( | ||
| // CPP-CHECK-NEXT: entry: | ||
| // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) | ||
| // CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] | ||
| // | ||
| svuint8x4_t test_svaesdimc_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING | ||
| { | ||
| return SVE_ACLE_FUNC(svaesdimc_laneq,_u8_x4)(op1, op2, 0); | ||
| } | ||
|
|
||
| // CHECK-LABEL: @test_svaese_u8_x4( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) | ||
| // CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] | ||
| // | ||
| // CPP-CHECK-LABEL: @_Z17test_svaese_u8_x411svuint8x4_tu11__SVUint8_t( | ||
| // CPP-CHECK-NEXT: entry: | ||
| // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) | ||
| // CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] | ||
| // | ||
| svuint8x4_t test_svaese_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING | ||
| { | ||
| return SVE_ACLE_FUNC(svaese_laneq,_u8_x4)(op1, op2, 0); | ||
| } | ||
|
|
||
| // CHECK-LABEL: @test_svaesemc_u8_x4( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) | ||
| // CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] | ||
| // | ||
| // CPP-CHECK-LABEL: @_Z19test_svaesemc_u8_x411svuint8x4_tu11__SVUint8_t( | ||
| // CPP-CHECK-NEXT: entry: | ||
| // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) | ||
| // CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] | ||
| // | ||
| svuint8x4_t test_svaesemc_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING | ||
| { | ||
| return SVE_ACLE_FUNC(svaesemc_laneq,_u8_x4)(op1, op2, 0); | ||
| } | ||
|
|
||
| // CHECK-LABEL: @test_svpmull_u64_x2( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull.x2(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) | ||
| // CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] | ||
| // | ||
| // CPP-CHECK-LABEL: @_Z19test_svpmull_u64_x2u12__SVUint64_tS_( | ||
| // CPP-CHECK-NEXT: entry: | ||
| // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull.x2(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) | ||
| // CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] | ||
| // | ||
| svuint64x2_t test_svpmull_u64_x2(svuint64_t op1, svuint64_t op2) STREAMING | ||
| { | ||
| return SVE_ACLE_FUNC(svpmull,_u64_x2)(op1, op2); | ||
| } | ||
|
|
||
| // CHECK-LABEL: @test_svpmull_n_u64_x2( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0 | ||
| // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer | ||
| // CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull.x2(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) | ||
| // CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] | ||
| // | ||
| // CPP-CHECK-LABEL: @_Z21test_svpmull_n_u64_x2u12__SVUint64_tm( | ||
| // CPP-CHECK-NEXT: entry: | ||
| // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0 | ||
| // CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer | ||
| // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull.x2(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) | ||
| // CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] | ||
| // | ||
| svuint64x2_t test_svpmull_n_u64_x2(svuint64_t op1, uint64_t op2) STREAMING | ||
| { | ||
| return SVE_ACLE_FUNC(svpmull,_n_u64_x2)(op1, op2); | ||
| } | ||
|
|
||
| // CHECK-LABEL: @test_svpmlal_u64_x2( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal.x2(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]]) | ||
| // CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] | ||
| // | ||
| // CPP-CHECK-LABEL: @_Z19test_svpmlal_u64_x212svuint64x2_tu12__SVUint64_tS0_( | ||
| // CPP-CHECK-NEXT: entry: | ||
| // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal.x2(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]]) | ||
| // CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] | ||
| // | ||
| svuint64x2_t test_svpmlal_u64_x2(svuint64x2_t op1, svuint64_t op2, svuint64_t op3) STREAMING | ||
| { | ||
| return SVE_ACLE_FUNC(svpmlal,_u64_x2)(op1, op2, op3); | ||
| } | ||
|
|
||
| // CHECK-LABEL: @test_svpmlal_n_u64_x2( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP3:%.*]], i64 0 | ||
| // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer | ||
| // CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal.x2(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) | ||
| // CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] | ||
| // | ||
| // CPP-CHECK-LABEL: @_Z21test_svpmlal_n_u64_x212svuint64x2_tu12__SVUint64_tm( | ||
| // CPP-CHECK-NEXT: entry: | ||
| // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP3:%.*]], i64 0 | ||
| // CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer | ||
| // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal.x2(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) | ||
| // CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] | ||
| // | ||
| svuint64x2_t test_svpmlal_n_u64_x2(svuint64x2_t op1, svuint64_t op2, uint64_t op3) STREAMING | ||
| { | ||
| return SVE_ACLE_FUNC(svpmlal,_n_u64_x2)(op1, op2, op3); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4194,4 +4194,31 @@ let TargetPrefix = "aarch64" in { | |
| def int_aarch64_sme_fp8_fvdot_lane_za16_vg1x2 : SME_FP8_ZA_LANE_VGx2_Intrinsic; | ||
| def int_aarch64_sme_fp8_fvdotb_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx2_Intrinsic; | ||
| def int_aarch64_sme_fp8_fvdott_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx2_Intrinsic; | ||
|
|
||
| // AES2 | ||
| class SVE2_Crypto_LANE_X2_Intrinsic | ||
| : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty], | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These intrinsic looks like they need to have this in front of them : GCCBuiltin<"__builtin_sve_svaesd_u8">,
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I cannot find reference to GCCBuiltin anywhere in the code. What is it supposed do ? |
||
| [llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], | ||
| [ImmArg<ArgIndex<3>>, IntrNoMem]>; | ||
| class SVE2_Crypto_LANE_X4_Intrinsic | ||
| : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], | ||
| [llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, | ||
| llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], | ||
| [ImmArg<ArgIndex<5>>, IntrNoMem]>; | ||
|
|
||
| def int_aarch64_sve_aesd_laneq_x2 : SVE2_Crypto_LANE_X2_Intrinsic; | ||
| def int_aarch64_sve_aesdimc_laneq_x2 : SVE2_Crypto_LANE_X2_Intrinsic; | ||
| def int_aarch64_sve_aese_laneq_x2 : SVE2_Crypto_LANE_X2_Intrinsic; | ||
| def int_aarch64_sve_aesemc_laneq_x2 : SVE2_Crypto_LANE_X2_Intrinsic; | ||
|
|
||
| def int_aarch64_sve_aesd_laneq_x4 : SVE2_Crypto_LANE_X4_Intrinsic; | ||
| def int_aarch64_sve_aesdimc_laneq_x4 : SVE2_Crypto_LANE_X4_Intrinsic; | ||
| def int_aarch64_sve_aese_laneq_x4 : SVE2_Crypto_LANE_X4_Intrinsic; | ||
| def int_aarch64_sve_aesemc_laneq_x4 : SVE2_Crypto_LANE_X4_Intrinsic; | ||
|
|
||
| def int_aarch64_sve_pmull_x2 : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, llvm_nxv2i64_ty], | ||
| [llvm_nxv2i64_ty, llvm_nxv2i64_ty], [IntrNoMem]>; | ||
| def int_aarch64_sve_pmlal_x2 : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, llvm_nxv2i64_ty], | ||
| [llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty], [IntrNoMem]>; | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry if I've missed something, but where does the
qin_laneqcome from? It doesn't appear in the intrinsic names in the acle proposal linked.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No you are right. These intrinsics are not correctly defined in ACLE and need to be fixed. The reason for q at the end is that these intrinsics operate on 128-bit elements, not really on 8bit ones and we want to make that clear to user