Skip to content

Commit 516ed9e

Browse files
Lukacmagithub-actions[bot]
authored andcommitted
Automerge: [AArch64] Add intrinsics for 9.6 crypto instructions (#165545)
This patch add intrinsics for crpyto instructions defined in ARM-software/acle#411 ACLE proposal
2 parents f4915fe + ef86355 commit 516ed9e

File tree

8 files changed

+505
-15
lines changed

8 files changed

+505
-15
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1873,6 +1873,23 @@ def SVPMULLT_PAIR_U64 : SInst<"svpmullt_pair[_{d}]", "ddd", "Ul", MergeNone,
18731873
def SVPMULLT_PAIR_N_U64 : SInst<"svpmullt_pair[_n_{d}]", "dda", "Ul", MergeNone, "aarch64_sve_pmullt_pair", [VerifyRuntimeMode]>;
18741874
}
18751875

1876+
let SVETargetGuard = "sve-aes2", SMETargetGuard = "sve-aes2,ssve-aes" in {
1877+
def SVAESD_X2 : SInst<"svaesd_lane[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aesd_lane_x2", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;
1878+
def SVAESDIMC_X2 : SInst<"svaesdimc_lane[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aesdimc_lane_x2", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;
1879+
def SVAESE_X2 : SInst<"svaese_lane[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aese_lane_x2", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;
1880+
def SVAESEMC_X2 : SInst<"svaesemc_lane[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aesemc_lane_x2", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;
1881+
1882+
def SVAESD_X4 : SInst<"svaesd_lane[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aesd_lane_x4", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;
1883+
def SVAESDIMC_X4 : SInst<"svaesdimc_lane[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aesdimc_lane_x4", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;
1884+
def SVAESE_X4 : SInst<"svaese_lane[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aese_lane_x4", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;
1885+
def SVAESEMC_X4 : SInst<"svaesemc_lane[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aesemc_lane_x4", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>;
1886+
1887+
def SVPMULL_PAIR_U64 : SInst<"svpmull_pair[_{d}_x2]", "2dd", "Ul", MergeNone, "aarch64_sve_pmull_pair_x2", [IsOverloadNone, VerifyRuntimeMode]>;
1888+
def SVPMULL_PAIR_N_U64 : SInst<"svpmull_pair[_n_{d}_x2]", "2da", "Ul", MergeNone, "aarch64_sve_pmull_pair_x2", [IsOverloadNone, VerifyRuntimeMode]>;
1889+
def SVPMLAL_PAIR_U64 : SInst<"svpmlal_pair[_{d}_x2]", "22dd", "Ul", MergeNone, "aarch64_sve_pmlal_pair_x2", [IsOverloadNone, VerifyRuntimeMode]>;
1890+
def SVPMLAL_PAIR_N_U64 : SInst<"svpmlal_pair[_n_{d}_x2]", "22da", "Ul", MergeNone, "aarch64_sve_pmlal_pair_x2", [IsOverloadNone, VerifyRuntimeMode]>;
1891+
}
1892+
18761893
let SVETargetGuard = "sve-sha3", SMETargetGuard = "sme2p1,sve-sha3" in {
18771894
def SVRAX1 : SInst<"svrax1[_{d}]", "ddd", "lUl", MergeNone, "aarch64_sve_rax1", [IsOverloadNone, VerifyRuntimeMode]>;
18781895
}

clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c

Lines changed: 217 additions & 0 deletions
Large diffs are not rendered by default.

clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,4 +254,30 @@ void test_svdup_laneq(){
254254
svdup_laneq_f32(zn_f32,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
255255
svdup_laneq_f64(zn_f64,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
256256
svdup_laneq_bf16(zn_bf16,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
257+
}
258+
259+
__attribute__((target("+sve-aes2")))
260+
void test_aes_x2_imm_0_3(svuint8x2_t op1, svuint8_t op2) {
261+
svaesd_lane(op1, op2, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
262+
svaesdimc_lane(op1, op2, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
263+
svaese_lane(op1, op2, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
264+
svaesemc_lane(op1, op2, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
265+
266+
svaesd_lane(op1, op2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
267+
svaesdimc_lane(op1, op2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
268+
svaese_lane(op1, op2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
269+
svaesemc_lane(op1, op2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
270+
}
271+
272+
__attribute__((target("+sve-aes2")))
273+
void test_aes_x4_imm_0_3(svuint8x4_t op1, svuint8_t op2) {
274+
svaesd_lane(op1, op2, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
275+
svaesdimc_lane(op1, op2, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
276+
svaese_lane(op1, op2, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
277+
svaesemc_lane(op1, op2, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
278+
279+
svaesd_lane(op1, op2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
280+
svaesdimc_lane(op1, op2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
281+
svaese_lane(op1, op2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
282+
svaesemc_lane(op1, op2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
257283
}

clang/utils/TableGen/SveEmitter.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,14 +267,21 @@ class Intrinsic {
267267
unsigned getSplatIdx() const {
268268
unsigned I = 1, Param = 0;
269269
for (; I < Proto.size(); ++I, ++Param) {
270+
assert(Proto[I] != '4' &&
271+
"Handling for '4' prototype modifier not implemented");
270272
if (Proto[I] == 'a' || Proto[I] == 'j' || Proto[I] == 'f' ||
271273
Proto[I] == 'r' || Proto[I] == 'K' || Proto[I] == 'L' ||
272274
Proto[I] == 'R' || Proto[I] == '@' || Proto[I] == '!')
273275
break;
274276

277+
if (Proto[I] == '2')
278+
Param += 1;
279+
275280
// Multivector modifier can be skipped
276-
if (Proto[I] == '.')
281+
if (Proto[I] == '.') {
282+
Param -= 1; // Adjust for the increment at the top of the loop
277283
I += 2;
284+
}
278285
}
279286
assert(I != Proto.size() && "Prototype has no splat operand");
280287
return Param;

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4241,4 +4241,31 @@ let TargetPrefix = "aarch64" in {
42414241
def int_aarch64_sme_fp8_fvdot_lane_za16_vg1x2 : SME_FP8_ZA_LANE_VGx2_Intrinsic;
42424242
def int_aarch64_sme_fp8_fvdotb_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx2_Intrinsic;
42434243
def int_aarch64_sme_fp8_fvdott_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx2_Intrinsic;
4244+
4245+
// AES2
4246+
class SVE2_Crypto_LANE_X2_Intrinsic
4247+
: DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty],
4248+
[llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty],
4249+
[ImmArg<ArgIndex<3>>, IntrNoMem]>;
4250+
class SVE2_Crypto_LANE_X4_Intrinsic
4251+
: DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty],
4252+
[llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
4253+
llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty],
4254+
[ImmArg<ArgIndex<5>>, IntrNoMem]>;
4255+
4256+
def int_aarch64_sve_aesd_lane_x2 : SVE2_Crypto_LANE_X2_Intrinsic;
4257+
def int_aarch64_sve_aesdimc_lane_x2 : SVE2_Crypto_LANE_X2_Intrinsic;
4258+
def int_aarch64_sve_aese_lane_x2 : SVE2_Crypto_LANE_X2_Intrinsic;
4259+
def int_aarch64_sve_aesemc_lane_x2 : SVE2_Crypto_LANE_X2_Intrinsic;
4260+
4261+
def int_aarch64_sve_aesd_lane_x4 : SVE2_Crypto_LANE_X4_Intrinsic;
4262+
def int_aarch64_sve_aesdimc_lane_x4 : SVE2_Crypto_LANE_X4_Intrinsic;
4263+
def int_aarch64_sve_aese_lane_x4 : SVE2_Crypto_LANE_X4_Intrinsic;
4264+
def int_aarch64_sve_aesemc_lane_x4 : SVE2_Crypto_LANE_X4_Intrinsic;
4265+
4266+
def int_aarch64_sve_pmull_pair_x2 : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, llvm_nxv2i64_ty],
4267+
[llvm_nxv2i64_ty, llvm_nxv2i64_ty], [IntrNoMem]>;
4268+
def int_aarch64_sve_pmlal_pair_x2 : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, llvm_nxv2i64_ty],
4269+
[llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty], [IntrNoMem]>;
42444270
}
4271+

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 53 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1979,27 +1979,28 @@ void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
19791979

19801980
SDLoc DL(N);
19811981
EVT VT = N->getValueType(0);
1982-
unsigned FirstVecIdx = HasPred ? 2 : 1;
1982+
SDUse *OpsIter = N->op_begin() + 1; // Skip intrinsic ID
1983+
SmallVector<SDValue, 4> Ops;
19831984

1984-
auto GetMultiVecOperand = [=](unsigned StartIdx) {
1985-
SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs));
1985+
auto GetMultiVecOperand = [&]() {
1986+
SmallVector<SDValue, 4> Regs(OpsIter, OpsIter + NumVecs);
1987+
OpsIter += NumVecs;
19861988
return createZMulTuple(Regs);
19871989
};
19881990

1989-
SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1991+
if (HasPred)
1992+
Ops.push_back(*OpsIter++);
19901993

1991-
SDValue Zm;
1994+
Ops.push_back(GetMultiVecOperand());
19921995
if (IsZmMulti)
1993-
Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1996+
Ops.push_back(GetMultiVecOperand());
19941997
else
1995-
Zm = N->getOperand(NumVecs + FirstVecIdx);
1998+
Ops.push_back(*OpsIter++);
19961999

2000+
// Append any remaining operands.
2001+
Ops.append(OpsIter, N->op_end());
19972002
SDNode *Intrinsic;
1998-
if (HasPred)
1999-
Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
2000-
N->getOperand(1), Zdn, Zm);
2001-
else
2002-
Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
2003+
Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
20032004
SDValue SuperReg = SDValue(Intrinsic, 0);
20042005
for (unsigned i = 0; i < NumVecs; ++i)
20052006
ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
@@ -6254,6 +6255,46 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
62546255
AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
62556256
SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
62566257
return;
6258+
case Intrinsic::aarch64_sve_aese_lane_x2:
6259+
SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESE_2ZZI_B);
6260+
return;
6261+
case Intrinsic::aarch64_sve_aesd_lane_x2:
6262+
SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESD_2ZZI_B);
6263+
return;
6264+
case Intrinsic::aarch64_sve_aesemc_lane_x2:
6265+
SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESEMC_2ZZI_B);
6266+
return;
6267+
case Intrinsic::aarch64_sve_aesdimc_lane_x2:
6268+
SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESDIMC_2ZZI_B);
6269+
return;
6270+
case Intrinsic::aarch64_sve_aese_lane_x4:
6271+
SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESE_4ZZI_B);
6272+
return;
6273+
case Intrinsic::aarch64_sve_aesd_lane_x4:
6274+
SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESD_4ZZI_B);
6275+
return;
6276+
case Intrinsic::aarch64_sve_aesemc_lane_x4:
6277+
SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESEMC_4ZZI_B);
6278+
return;
6279+
case Intrinsic::aarch64_sve_aesdimc_lane_x4:
6280+
SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESDIMC_4ZZI_B);
6281+
return;
6282+
case Intrinsic::aarch64_sve_pmlal_pair_x2:
6283+
SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::PMLAL_2ZZZ_Q);
6284+
return;
6285+
case Intrinsic::aarch64_sve_pmull_pair_x2: {
6286+
SDLoc DL(Node);
6287+
SmallVector<SDValue, 4> Regs(Node->ops().slice(1, 2));
6288+
SDNode *Res =
6289+
CurDAG->getMachineNode(AArch64::PMULL_2ZZZ_Q, DL, MVT::Untyped, Regs);
6290+
SDValue SuperReg = SDValue(Res, 0);
6291+
for (unsigned I = 0; I < 2; I++)
6292+
ReplaceUses(SDValue(Node, I),
6293+
CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
6294+
SuperReg));
6295+
CurDAG->RemoveDeadNode(Node);
6296+
return;
6297+
}
62576298
case Intrinsic::aarch64_sve_fscale_x4:
62586299
SelectDestructiveMultiIntrinsic(Node, 4, true, AArch64::BFSCALE_4Z4Z);
62596300
return;

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4271,12 +4271,12 @@ let Predicates = [HasSVEAES2, HasNonStreamingSVE_or_SSVE_AES] in {
42714271
def AESE_2ZZI_B : sve_crypto_binary_multi2<0b000, "aese">;
42724272
def AESD_2ZZI_B : sve_crypto_binary_multi2<0b010, "aesd">;
42734273
def AESEMC_2ZZI_B : sve_crypto_binary_multi2<0b100, "aesemc">;
4274-
def AESDMIC_2ZZI_B : sve_crypto_binary_multi2<0b110, "aesdimc">;
4274+
def AESDIMC_2ZZI_B : sve_crypto_binary_multi2<0b110, "aesdimc">;
42754275
// SVE_AES2 multi-vector instructions (x4)
42764276
def AESE_4ZZI_B : sve_crypto_binary_multi4<0b0000, "aese">;
42774277
def AESD_4ZZI_B : sve_crypto_binary_multi4<0b0100, "aesd">;
42784278
def AESEMC_4ZZI_B : sve_crypto_binary_multi4<0b1000, "aesemc">;
4279-
def AESDMIC_4ZZI_B : sve_crypto_binary_multi4<0b1100, "aesdimc">;
4279+
def AESDIMC_4ZZI_B : sve_crypto_binary_multi4<0b1100, "aesdimc">;
42804280

42814281
// SVE_AES2 multi-vector polynomial multiply
42824282
def PMLAL_2ZZZ_Q : sve_crypto_pmlal_multi<"pmlal">;

0 commit comments

Comments
 (0)