Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,33 @@ defm USMOPA_MPPZZ_D : sme_int_outer_product_i64<0b100, "usmopa", int_aarch64_sme
defm USMOPS_MPPZZ_D : sme_int_outer_product_i64<0b101, "usmops", int_aarch64_sme_usmops_wide>;
}

let Predicates = [HasSME2p2] in {
defm SMOP4A : sme_quarter_outer_product_i8_i32<0b0, 0b0, 0b0, "smop4a">;
defm SMOP4S : sme_quarter_outer_product_i8_i32<0b0, 0b0, 0b1, "smop4s">;
defm SUMOP4A : sme_quarter_outer_product_i8_i32<0b0, 0b1, 0b0, "sumop4a">;
defm SUMOP4S : sme_quarter_outer_product_i8_i32<0b0, 0b1, 0b1, "sumop4s">;
defm USMOP4A : sme_quarter_outer_product_i8_i32<0b1, 0b0, 0b0, "usmop4a">;
defm USMOP4S : sme_quarter_outer_product_i8_i32<0b1, 0b0, 0b1, "usmop4s">;
defm UMOP4A : sme_quarter_outer_product_i8_i32<0b1, 0b1, 0b0, "umop4a">;
defm UMOP4S : sme_quarter_outer_product_i8_i32<0b1, 0b1, 0b1, "umop4s">;

defm SMOP4A : sme_quarter_outer_product_i16_i32<0b0, 0b0, "smop4a">;
defm SMOP4S : sme_quarter_outer_product_i16_i32<0b0, 0b1, "smop4s">;
defm UMOP4A : sme_quarter_outer_product_i16_i32<0b1, 0b0, "umop4a">;
defm UMOP4S : sme_quarter_outer_product_i16_i32<0b1, 0b1, "umop4s">;
}

let Predicates = [HasSME2p2, HasSMEI16I64] in {
defm SMOP4A : sme_quarter_outer_product_i64<0b0, 0b0, 0b0, "smop4a">;
defm SMOP4S : sme_quarter_outer_product_i64<0b0, 0b0, 0b1, "smop4s">;
defm SUMOP4A : sme_quarter_outer_product_i64<0b0, 0b1, 0b0, "sumop4a">;
defm SUMOP4S : sme_quarter_outer_product_i64<0b0, 0b1, 0b1, "sumop4s">;
defm UMOP4A : sme_quarter_outer_product_i64<0b1, 0b1, 0b0, "umop4a">;
defm UMOP4S : sme_quarter_outer_product_i64<0b1, 0b1, 0b1, "umop4s">;
defm USMOP4A : sme_quarter_outer_product_i64<0b1, 0b0, 0b0, "usmop4a">;
defm USMOP4S : sme_quarter_outer_product_i64<0b1, 0b0, 0b1, "usmop4s">;
}

let Predicates = [HasSME] in {
//===----------------------------------------------------------------------===//
// Loads and stores
Expand Down
110 changes: 110 additions & 0 deletions llvm/lib/Target/AArch64/SMEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,116 @@ multiclass sme_f16_outer_product<bits<3> opc, string mnemonic, SDPatternOperator
def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv8i1, nxv8f16>;
}

class sme_quarter_outer_product_i64<bits<2> zn_u_pair, bits<2> zm_u_pair, bit subtr, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
: I<(outs TileOp64:$ZAda),
(ins TileOp64:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
mnemonic, "\t$ZAda, $Zn, $Zm",
"", []>,
Sched<[]> {
bits<3> ZAda;
bits<3> Zn;
bits<3> Zm;
let Inst{31-25} = 0b1010000;
let Inst{24} = zn_u_pair{1}; // u0
let Inst{23-22} = 0b11;
let Inst{21} = zm_u_pair{1}; // u1
let Inst{20} = zm_u_pair{0}; // M
let Inst{19-17} = Zm;
let Inst{16-10} = 0b0000000;
let Inst{9} = zn_u_pair{0}; // N
let Inst{8-6} = Zn;
let Inst{5} = 0;
let Inst{4} = subtr;
let Inst{3} = 0b1;
let Inst{2-0} = ZAda;

let Constraints = "$ZAda = $_ZAda";
}

class sme_quarter_outer_product_i8_i32<bits<2> zn_u_pair, bits<2> zm_u_pair, bit subtr, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
: I<(outs TileOp32:$ZAda),
(ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
mnemonic, "\t$ZAda, $Zn, $Zm",
"", []>,
Sched<[]> {
bits<2> ZAda;
bits<3> Zn;
bits<3> Zm;
let Inst{31-25} = 0b1000000;
let Inst{24} = zn_u_pair{1}; // u0
let Inst{23-22} = 0b00;
let Inst{21} = zm_u_pair{1}; // u1
let Inst{20} = zm_u_pair{0}; // M
let Inst{19-17} = Zm;
let Inst{16-10} = 0b0100000;
let Inst{9} = zn_u_pair{0}; // N
let Inst{8-6} = Zn;
let Inst{5} = 0;
let Inst{4} = subtr;
let Inst{3-2} = 0b00;
let Inst{1-0} = ZAda;

let Constraints = "$ZAda = $_ZAda";
}

class sme_quarter_outer_product_i16_i32<bit u0, bit N, bit M, bit subtr, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
: I<(outs TileOp32:$ZAda),
(ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
mnemonic, "\t$ZAda, $Zn, $Zm",
"", []>,
Sched<[]> {
bits<2> ZAda;
bits<3> Zn;
bits<3> Zm;
let Inst{31-25} = 0b1000000;
let Inst{24} = u0;
let Inst{23-21} = 0b000;
let Inst{20} = M;
let Inst{19-17} = Zm;
let Inst{16-10} = 0b0100000;
let Inst{9} = N;
let Inst{8-6} = Zn;
let Inst{5} = 0;
let Inst{4} = subtr;
let Inst{3-2} = 0b10;
let Inst{1-0} = ZAda;

let Constraints = "$ZAda = $_ZAda";
}

multiclass sme_quarter_outer_product_i8_i32<bit zn_u, bit zm_u, bit subtr, string mnemonic>{
def _MZZ_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 0}, {zm_u, 0}, subtr,
ZPR8Mul2_Lo, ZPR8Mul2_Hi, mnemonic>;
def _M2ZZ_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 1}, {zm_u, 0}, subtr,
ZZ_b_mul_r_Lo, ZPR8Mul2_Hi, mnemonic>;
def _MZ2Z_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 0}, {zm_u, 1}, subtr,
ZPR8Mul2_Lo, ZZ_b_mul_r_Hi, mnemonic>;
def _M2Z2Z_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 1}, {zm_u, 1}, subtr,
ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi, mnemonic>;
}

multiclass sme_quarter_outer_product_i16_i32<bit unsigned, bit subtr, string mnemonic>{
def _MZZ_HToS : sme_quarter_outer_product_i16_i32<unsigned, 0b0, 0b0, subtr,
ZPR16Mul2_Lo, ZPR16Mul2_Hi, mnemonic>;
def _M2ZZ_HToS : sme_quarter_outer_product_i16_i32<unsigned, 0b1, 0b0, subtr,
ZZ_h_mul_r_Lo, ZPR16Mul2_Hi, mnemonic>;
def _MZ2Z_HToS : sme_quarter_outer_product_i16_i32<unsigned, 0b0, 0b1, subtr,
ZPR16Mul2_Lo, ZZ_h_mul_r_Hi, mnemonic>;
def _M2Z2Z_HToS : sme_quarter_outer_product_i16_i32<unsigned, 0b1, 0b1, subtr,
ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi, mnemonic>;
}

multiclass sme_quarter_outer_product_i64<bit zn_u, bit zm_u, bit subtr, string mnemonic>{
def _MZZ_HtoD : sme_quarter_outer_product_i64<{zn_u, 0}, {zm_u, 0}, subtr,
ZPR16Mul2_Lo, ZPR16Mul2_Hi, mnemonic>;
def _M2ZZ_HtoD : sme_quarter_outer_product_i64<{zn_u, 1}, {zm_u, 0}, subtr,
ZZ_h_mul_r_Lo, ZPR16Mul2_Hi, mnemonic>;
def _MZ2Z_HtoD : sme_quarter_outer_product_i64<{zn_u, 0}, {zm_u, 1}, subtr,
ZPR16Mul2_Lo, ZZ_h_mul_r_Hi, mnemonic>;
def _M2Z2Z_HtoD : sme_quarter_outer_product_i64<{zn_u, 1}, {zm_u, 1}, subtr,
ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi, mnemonic>;
}

//===----------------------------------------------------------------------===//
// SME Add Vector to Tile
//===----------------------------------------------------------------------===//
Expand Down
85 changes: 85 additions & 0 deletions llvm/test/MC/AArch64/SME2p2/smop4a-16to32.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \
// RUN: | llvm-objdump -d --mattr=+sme2p2 - | FileCheck %s --check-prefix=CHECK-INST
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \
// RUN: | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
// Disassemble encoding and check the re-encoding (-show-encoding) matches.
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p2 -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST

smop4a za0.s, z0.h, z16.h // 10000000-00000000-10000000-00001000
// CHECK-INST: smop4a za0.s, z0.h, z16.h
// CHECK-ENCODING: [0x08,0x80,0x00,0x80]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: 80008008 <unknown>

smop4a za3.s, z12.h, z24.h // 10000000-00001000-10000001-10001011
// CHECK-INST: smop4a za3.s, z12.h, z24.h
// CHECK-ENCODING: [0x8b,0x81,0x08,0x80]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: 8008818b <unknown>

smop4a za3.s, z14.h, z30.h // 10000000-00001110-10000001-11001011
// CHECK-INST: smop4a za3.s, z14.h, z30.h
// CHECK-ENCODING: [0xcb,0x81,0x0e,0x80]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: 800e81cb <unknown>

smop4a za0.s, z0.h, {z16.h-z17.h} // 10000000-00010000-10000000-00001000
// CHECK-INST: smop4a za0.s, z0.h, { z16.h, z17.h }
// CHECK-ENCODING: [0x08,0x80,0x10,0x80]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: 80108008 <unknown>

smop4a za3.s, z12.h, {z24.h-z25.h} // 10000000-00011000-10000001-10001011
// CHECK-INST: smop4a za3.s, z12.h, { z24.h, z25.h }
// CHECK-ENCODING: [0x8b,0x81,0x18,0x80]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: 8018818b <unknown>

smop4a za3.s, z14.h, {z30.h-z31.h} // 10000000-00011110-10000001-11001011
// CHECK-INST: smop4a za3.s, z14.h, { z30.h, z31.h }
// CHECK-ENCODING: [0xcb,0x81,0x1e,0x80]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: 801e81cb <unknown>

smop4a za0.s, {z0.h-z1.h}, z16.h // 10000000-00000000-10000010-00001000
// CHECK-INST: smop4a za0.s, { z0.h, z1.h }, z16.h
// CHECK-ENCODING: [0x08,0x82,0x00,0x80]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: 80008208 <unknown>

smop4a za3.s, {z12.h-z13.h}, z24.h // 10000000-00001000-10000011-10001011
// CHECK-INST: smop4a za3.s, { z12.h, z13.h }, z24.h
// CHECK-ENCODING: [0x8b,0x83,0x08,0x80]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: 8008838b <unknown>

smop4a za3.s, {z14.h-z15.h}, z30.h // 10000000-00001110-10000011-11001011
// CHECK-INST: smop4a za3.s, { z14.h, z15.h }, z30.h
// CHECK-ENCODING: [0xcb,0x83,0x0e,0x80]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: 800e83cb <unknown>

smop4a za0.s, {z0.h-z1.h}, {z16.h-z17.h} // 10000000-00010000-10000010-00001000
// CHECK-INST: smop4a za0.s, { z0.h, z1.h }, { z16.h, z17.h }
// CHECK-ENCODING: [0x08,0x82,0x10,0x80]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: 80108208 <unknown>

smop4a za3.s, {z12.h-z13.h}, {z24.h-z25.h} // 10000000-00011000-10000011-10001011
// CHECK-INST: smop4a za3.s, { z12.h, z13.h }, { z24.h, z25.h }
// CHECK-ENCODING: [0x8b,0x83,0x18,0x80]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: 8018838b <unknown>

smop4a za3.s, {z14.h-z15.h}, {z30.h-z31.h} // 10000000-00011110-10000011-11001011
// CHECK-INST: smop4a za3.s, { z14.h, z15.h }, { z30.h, z31.h }
// CHECK-ENCODING: [0xcb,0x83,0x1e,0x80]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: 801e83cb <unknown>
85 changes: 85 additions & 0 deletions llvm/test/MC/AArch64/SME2p2/smop4a-64.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-i16i64 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-i16i64 < %s \
// RUN: | llvm-objdump -d --mattr=+sme2p2,+sme-i16i64 - | FileCheck %s --check-prefix=CHECK-INST
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-i16i64 < %s \
// RUN: | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
// Disassemble encoding and check the re-encoding (-show-encoding) matches.
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-i16i64 < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-i16i64 -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST

smop4a za0.d, z0.h, z16.h // 10100000-11000000-00000000-00001000
// CHECK-INST: smop4a za0.d, z0.h, z16.h
// CHECK-ENCODING: [0x08,0x00,0xc0,0xa0]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: a0c00008 <unknown>

smop4a za5.d, z10.h, z20.h // 10100000-11000100-00000001-01001101
// CHECK-INST: smop4a za5.d, z10.h, z20.h
// CHECK-ENCODING: [0x4d,0x01,0xc4,0xa0]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: a0c4014d <unknown>

smop4a za7.d, z14.h, z30.h // 10100000-11001110-00000001-11001111
// CHECK-INST: smop4a za7.d, z14.h, z30.h
// CHECK-ENCODING: [0xcf,0x01,0xce,0xa0]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: a0ce01cf <unknown>

smop4a za0.d, z0.h, {z16.h-z17.h} // 10100000-11010000-00000000-00001000
// CHECK-INST: smop4a za0.d, z0.h, { z16.h, z17.h }
// CHECK-ENCODING: [0x08,0x00,0xd0,0xa0]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: a0d00008 <unknown>

smop4a za5.d, z10.h, {z20.h-z21.h} // 10100000-11010100-00000001-01001101
// CHECK-INST: smop4a za5.d, z10.h, { z20.h, z21.h }
// CHECK-ENCODING: [0x4d,0x01,0xd4,0xa0]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: a0d4014d <unknown>

smop4a za7.d, z14.h, {z30.h-z31.h} // 10100000-11011110-00000001-11001111
// CHECK-INST: smop4a za7.d, z14.h, { z30.h, z31.h }
// CHECK-ENCODING: [0xcf,0x01,0xde,0xa0]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: a0de01cf <unknown>

smop4a za0.d, {z0.h-z1.h}, z16.h // 10100000-11000000-00000010-00001000
// CHECK-INST: smop4a za0.d, { z0.h, z1.h }, z16.h
// CHECK-ENCODING: [0x08,0x02,0xc0,0xa0]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: a0c00208 <unknown>

smop4a za5.d, {z10.h-z11.h}, z20.h // 10100000-11000100-00000011-01001101
// CHECK-INST: smop4a za5.d, { z10.h, z11.h }, z20.h
// CHECK-ENCODING: [0x4d,0x03,0xc4,0xa0]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: a0c4034d <unknown>

smop4a za7.d, {z14.h-z15.h}, z30.h // 10100000-11001110-00000011-11001111
// CHECK-INST: smop4a za7.d, { z14.h, z15.h }, z30.h
// CHECK-ENCODING: [0xcf,0x03,0xce,0xa0]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: a0ce03cf <unknown>

smop4a za0.d, {z0.h-z1.h}, {z16.h-z17.h} // 10100000-11010000-00000010-00001000
// CHECK-INST: smop4a za0.d, { z0.h, z1.h }, { z16.h, z17.h }
// CHECK-ENCODING: [0x08,0x02,0xd0,0xa0]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: a0d00208 <unknown>

smop4a za5.d, {z10.h-z11.h}, {z20.h-z21.h} // 10100000-11010100-00000011-01001101
// CHECK-INST: smop4a za5.d, { z10.h, z11.h }, { z20.h, z21.h }
// CHECK-ENCODING: [0x4d,0x03,0xd4,0xa0]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: a0d4034d <unknown>

smop4a za7.d, {z14.h-z15.h}, {z30.h-z31.h} // 10100000-11011110-00000011-11001111
// CHECK-INST: smop4a za7.d, { z14.h, z15.h }, { z30.h, z31.h }
// CHECK-ENCODING: [0xcf,0x03,0xde,0xa0]
// CHECK-ERROR: instruction requires: sme2p2
// CHECK-UNKNOWN: a0de03cf <unknown>
Loading
Loading