Skip to content

Commit 294ced1

Browse files
committed
[AArch64][llvm] Armv9.7-A: Add support for SVE2p3 LUTI6 operations
Add instructions for SVE2p3 LUTI6 operations: - LUTI6 (16-bit) - LUTI6 (8-bit) - LUTI6 (vector, 16-bit) - LUTI6 (table, four registers, 8-bit) - LUTI6 (table, single, 8-bit) as documented here: * https://developer.arm.com/documentation/ddi0602/2025-09/ * https://developer.arm.com/documentation/109697/2025_09/2025-Architecture-Extensions
1 parent 3bf5f7c commit 294ced1

File tree

11 files changed

+977
-4
lines changed

11 files changed

+977
-4
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,10 @@ def HasSVE_B16MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSV
252252
AssemblerPredicateWithAll<(all_of FeatureSVE_B16MM), "sve-b16mm">;
253253
def HasF16MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasF16MM()">,
254254
AssemblerPredicateWithAll<(all_of FeatureF16MM), "f16mm">;
255+
def HasSVE2p3 : Predicate<"Subtarget->hasSVE2p3()">,
256+
AssemblerPredicateWithAll<(all_of FeatureSVE2p3), "sve2p3">;
257+
def HasSME2p3 : Predicate<"Subtarget->hasSME2p3()">,
258+
AssemblerPredicateWithAll<(all_of FeatureSME2p3), "sme2p3">;
255259

256260
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
257261
// they should be enabled if either has been specified.

llvm/lib/Target/AArch64/AArch64RegisterInfo.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,6 +1341,10 @@ def Z_q : RegisterOperand<ZPR, "printTypedVectorList<0,'q'>"> {
13411341
let ParserMatchClass = ZPRVectorList<128, 1>;
13421342
}
13431343

1344+
def ZZ_Any : RegisterOperand<ZPR2, "printTypedVectorList<0,0>"> {
1345+
let ParserMatchClass = ZPRVectorList<0, 2>;
1346+
}
1347+
13441348
def ZZ_b : RegisterOperand<ZPR2, "printTypedVectorList<0,'b'>"> {
13451349
let ParserMatchClass = ZPRVectorList<8, 2>;
13461350
}
@@ -1361,6 +1365,10 @@ def ZZ_q : RegisterOperand<ZPR2, "printTypedVectorList<0,'q'>"> {
13611365
let ParserMatchClass = ZPRVectorList<128, 2>;
13621366
}
13631367

1368+
def ZZZ_Any : RegisterOperand<ZPR3, "printTypedVectorList<0,0>"> {
1369+
let ParserMatchClass = ZPRVectorList<0, 3>;
1370+
}
1371+
13641372
def ZZZ_b : RegisterOperand<ZPR3, "printTypedVectorList<0,'b'>"> {
13651373
let ParserMatchClass = ZPRVectorList<8, 3>;
13661374
}

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,3 +1173,14 @@ let Predicates = [HasSME_MOP4, HasSMEF64F64] in {
11731173
defm FMOP4A : sme2_fmop4as_fp64_non_widening<0, "fmop4a", "int_aarch64_sme_mop4a">;
11741174
defm FMOP4S : sme2_fmop4as_fp64_non_widening<1, "fmop4s", "int_aarch64_sme_mop4s">;
11751175
}
1176+
1177+
//===----------------------------------------------------------------------===//
1178+
// SME2.3 instructions
1179+
//===----------------------------------------------------------------------===//
1180+
let Predicates = [HasSME2p3] in {
1181+
def LUTI6_ZTZ : sme2_lut_single<"luti6">;
1182+
def LUTI6_4ZT3Z : sme2_luti6_zt<"luti6">;
1183+
def LUTI6_S_4ZT3Z : sme2_luti6_zt_strided<"luti6">;
1184+
def LUTI6_4Z2Z2ZI : sme2_luti6_vector_vg4<"luti6">;
1185+
def LUTI6_S_4Z2Z2ZI : sme2_luti6_vector_vg4_strided<"luti6">;
1186+
} // [HasSME2p3]

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4659,8 +4659,17 @@ let Predicates = [HasSVE2p3_or_SME2p3] in {
46594659
defm SQSHRUN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqshrun", 0b100, null_frag>;
46604660
defm SQSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqshrn", 0b000, null_frag>;
46614661
defm UQSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"uqshrn", 0b010, null_frag>;
4662+
4663+
defm LUTI6_Z2ZZI : sve2_luti6_vector_index<"luti6">;
46624664
} // End HasSME2p3orSVE2p3
46634665

4666+
//===----------------------------------------------------------------------===//
4667+
// SVE2.3 instructions
4668+
//===----------------------------------------------------------------------===//
4669+
let Predicates = [HasSVE2p3] in {
4670+
def LUTI6_Z2ZZ : sve2_luti6_vector<ZPR8, ZZ_b, 0b00011, "luti6">;
4671+
}
4672+
46644673
//===----------------------------------------------------------------------===//
46654674
// SVE_B16MM Instructions
46664675
//===----------------------------------------------------------------------===//

llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4882,6 +4882,13 @@ ParseStatus AArch64AsmParser::tryParseVectorList(OperandVector &Operands,
48824882
FirstReg, Count, Stride, NumElements, ElementWidth, VectorKind, S,
48834883
getLoc(), getContext()));
48844884

4885+
if (getTok().isNot(AsmToken::Comma)) {
4886+
ParseStatus Res = tryParseVectorIndex(Operands);
4887+
if (Res.isFailure())
4888+
return ParseStatus::Failure;
4889+
return ParseStatus::Success;
4890+
}
4891+
48854892
return ParseStatus::Success;
48864893
}
48874894

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3920,6 +3920,80 @@ multiclass sme2_luti4_vector_vg4_index<string mnemonic> {
39203920
def _S : sme2_luti4_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>;
39213921
}
39223922

3923+
// 8-bit Look up table
3924+
class sme2_lut_single<string asm>
3925+
: I<(outs ZPR8:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn),
3926+
asm, "\t$Zd, $ZTt, $Zn", "", []>, Sched<[]> {
3927+
bits<0> ZTt;
3928+
bits<5> Zd;
3929+
bits<5> Zn;
3930+
let Inst{31-10} = 0b1100000011001000010000;
3931+
let Inst{9-5} = Zn;
3932+
let Inst{4-0} = Zd;
3933+
}
3934+
3935+
class sme2_luti6_zt<string asm>
3936+
: I<(outs ZZZZ_b_mul_r:$Zd), (ins ZTR:$ZTt, ZZZ_Any:$Zn),
3937+
asm, "\t$Zd, $ZTt, $Zn", "", []>, Sched<[]> {
3938+
bits<0> ZTt;
3939+
bits<3> Zd;
3940+
bits<3> Zn;
3941+
let Inst{31-10} = 0b1100000010001010000000;
3942+
let Inst{9-7} = Zn;
3943+
let Inst{6-5} = 0b00;
3944+
let Inst{4-2} = Zd;
3945+
let Inst{1-0} = 0b00;
3946+
}
3947+
3948+
class sme2_luti6_zt_strided<string asm>
3949+
: I<(outs ZZZZ_b_strided:$Zd), (ins ZTR:$ZTt, ZZZ_Any:$Zn),
3950+
asm, "\t$Zd, $ZTt, $Zn", "", []>, Sched<[]> {
3951+
bits<0> ZTt;
3952+
bits<3> Zd;
3953+
bits<3> Zn;
3954+
let Inst{31-10} = 0b1100000010011010000000;
3955+
let Inst{9-7} = Zn;
3956+
let Inst{6-5} = 0b00;
3957+
let Inst{4} = Zd{2};
3958+
let Inst{3-2} = 0b00;
3959+
let Inst{1-0} = Zd{1-0};
3960+
}
3961+
3962+
class sme2_luti6_vector_vg4<string asm>
3963+
: I<(outs ZZZZ_h_mul_r:$Zd), (ins ZZ_h:$Zn, ZZ_Any:$Zm, VectorIndexD:$i1),
3964+
asm, "\t$Zd, $Zn, $Zm$i1", "", []>, Sched<[]> {
3965+
bits<3> Zd;
3966+
bits<5> Zn;
3967+
bits<5> Zm;
3968+
bits<1> i1;
3969+
let Inst{31-23} = 0b110000010;
3970+
let Inst{22} = i1;
3971+
let Inst{21} = 0b1;
3972+
let Inst{20-16} = Zm;
3973+
let Inst{15-10} = 0b111101;
3974+
let Inst{9-5} = Zn;
3975+
let Inst{4-2} = Zd;
3976+
let Inst{1-0} = 0b00;
3977+
}
3978+
3979+
class sme2_luti6_vector_vg4_strided<string asm>
3980+
: I<(outs ZZZZ_h_strided:$Zd), (ins ZZ_h:$Zn, ZZ_Any:$Zm, VectorIndexD:$i1),
3981+
asm, "\t$Zd, $Zn, $Zm$i1", "", []>, Sched<[]> {
3982+
bits<3> Zd;
3983+
bits<5> Zn;
3984+
bits<5> Zm;
3985+
bits<1> i1;
3986+
let Inst{31-23} = 0b110000010;
3987+
let Inst{22} = i1;
3988+
let Inst{21} = 0b1;
3989+
let Inst{20-16} = Zm;
3990+
let Inst{15-10} = 0b111111;
3991+
let Inst{9-5} = Zn;
3992+
let Inst{4} = Zd{2};
3993+
let Inst{3-2} = 0b00;
3994+
let Inst{1-0} = Zd{1-0};
3995+
}
3996+
39233997
//===----------------------------------------------------------------------===//
39243998
// SME2 MOV
39253999
class sme2_mova_vec_to_tile_vg2_multi_base<bits<2> sz, bit v,

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11192,7 +11192,7 @@ multiclass sve2_fp8_dot_indexed_s<string asm, SDPatternOperator op> {
1119211192
def : SVE_4_Op_Pat<nxv4f32, op, nxv4f32, nxv16i8, nxv16i8, i32, !cast<Instruction>(NAME)>;
1119311193
}
1119411194

11195-
// FP8 Look up table
11195+
// Look up table
1119611196
class sve2_lut_vector_index<ZPRRegOp zd_ty, RegisterOperand zn_ty,
1119711197
Operand idx_ty, bits<4>opc, string mnemonic>
1119811198
: I<(outs zd_ty:$Zd), (ins zn_ty:$Zn, ZPRAny:$Zm, idx_ty:$idx),
@@ -11211,7 +11211,7 @@ class sve2_lut_vector_index<ZPRRegOp zd_ty, RegisterOperand zn_ty,
1121111211
let Inst{4-0} = Zd;
1121211212
}
1121311213

11214-
// FP8 Look up table read with 2-bit indices
11214+
// Look up table read with 2-bit indices
1121511215
multiclass sve2_luti2_vector_index<string mnemonic> {
1121611216
def _B : sve2_lut_vector_index<ZPR8, Z_b, VectorIndexS32b, {?, 0b100}, mnemonic> {
1121711217
bits<2> idx;
@@ -11233,7 +11233,7 @@ multiclass sve2_luti2_vector_index<string mnemonic> {
1123311233
i32, timm32_0_7, !cast<Instruction>(NAME # _H)>;
1123411234
}
1123511235

11236-
// FP8 Look up table read with 4-bit indices
11236+
// Look up table read with 4-bit indices
1123711237
multiclass sve2_luti4_vector_index<string mnemonic> {
1123811238
def _B : sve2_lut_vector_index<ZPR8, Z_b, VectorIndexD32b, 0b1001, mnemonic> {
1123911239
bit idx;
@@ -11254,7 +11254,7 @@ multiclass sve2_luti4_vector_index<string mnemonic> {
1125411254
i32, timm32_0_3, !cast<Instruction>(NAME # _H)>;
1125511255
}
1125611256

11257-
// FP8 Look up table read with 4-bit indices (two contiguous registers)
11257+
// Look up table read with 4-bit indices (two contiguous registers)
1125811258
multiclass sve2_luti4_vector_vg2_index<string mnemonic> {
1125911259
def NAME : sve2_lut_vector_index<ZPR16, ZZ_h, VectorIndexS32b, {?, 0b101}, mnemonic> {
1126011260
bits<2> idx;
@@ -11278,6 +11278,33 @@ multiclass sve2_luti4_vector_vg2_index<string mnemonic> {
1127811278
nxv16i8:$Op3, timm32_0_3:$Op4))>;
1127911279
}
1128011280

11281+
// Look up table read with 6-bit indices
11282+
multiclass sve2_luti6_vector_index<string mnemonic> {
11283+
def _H : sve2_lut_vector_index<ZPR16, ZZ_h, VectorIndexD32b, 0b1011, mnemonic> {
11284+
bit idx;
11285+
let Inst{23} = idx;
11286+
}
11287+
}
11288+
11289+
// Look up table
11290+
class sve2_luti6_vector<ZPRRegOp zd_ty, RegisterOperand zn_ty,
11291+
bits<5>opc, string mnemonic>
11292+
: I<(outs zd_ty:$Zd), (ins zn_ty:$Zn, ZPRAny:$Zm),
11293+
mnemonic, "\t$Zd, $Zn, $Zm",
11294+
"", []>, Sched<[]> {
11295+
bits<5> Zd;
11296+
bits<5> Zn;
11297+
bits<5> Zm;
11298+
let Inst{31-24} = 0b01000101;
11299+
let Inst{23-22} = opc{4-3};
11300+
let Inst{21} = 0b1;
11301+
let Inst{20-16} = Zm;
11302+
let Inst{15-13} = 0b101;
11303+
let Inst{12-10} = opc{2-0};
11304+
let Inst{9-5} = Zn;
11305+
let Inst{4-0} = Zd;
11306+
}
11307+
1128111308
//===----------------------------------------------------------------------===//
1128211309
// Checked Pointer Arithmetic (FEAT_CPA)
1128311310
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)