Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/arm_sme.td
Original file line number Diff line number Diff line change
Expand Up @@ -819,6 +819,8 @@ defm SVREADZ_VG2 : ZAReadzArray<"2">;
defm SVREADZ_VG4 : ZAReadzArray<"4">;

let SMETargetGuard = "sme2,sme-lutv2" in {
def SVWRITE_LANE_ZT : SInst<"svwrite_lane_zt[_{d}]", "vidi", "cUcsUsiUilUlfhdb", MergeNone, "aarch64_sme_write_lane_zt", [IsStreaming, IsInOutZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck1_3>]>;
def SVWRITE_ZT : SInst<"svwrite_zt[_{d}]", "vid", "cUcsUsiUilUlfhdb", MergeNone, "aarch64_sme_write_zt", [IsStreaming, IsOutZT0], [ImmCheck<0, ImmCheck0_0>]>;
def SVLUTI4_ZT_X4 : SInst<"svluti4_zt_{d}_x4", "4i2.u", "cUc", MergeNone, "aarch64_sme_luti4_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>]>;
}

Expand Down
402 changes: 402 additions & 0 deletions clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c

Large diffs are not rendered by default.

11 changes: 10 additions & 1 deletion clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \
// RUN: -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sve2 -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -fsyntax-only -verify %s
// RUN: -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sve2 -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sme-lutv2 -fsyntax-only -verify %s

// REQUIRES: aarch64-registered-target

Expand Down Expand Up @@ -351,6 +351,15 @@ void test_svdot_multi_za32_bad_lane(uint32_t slice_base, svuint16_t z_u16,
svsudot_lane_za32_s8_vg1x4(slice_base, z_s8x4, z_u8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
}

void test_write_zt() __arm_streaming __arm_inout("zt0") {
// Check Zt tile 0
svwrite_lane_zt(1, svundef_s8(), 1); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
svwrite_zt(1, svundef_s8()); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
// Check index
svwrite_lane_zt(0, svundef_s8(), 0); // expected-error {{argument value 0 is outside the valid range [1, 3]}}
svwrite_lane_zt(0, svundef_s8(), 4); // expected-error {{argument value 4 is outside the valid range [1, 3]}}
}

void test_luti4_zt_x4(svuint8x2_t op) __arm_streaming __arm_in("zt0") {
// Check Zt tile 0
svluti4_zt_u8_x4(1, op); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
Expand Down
9 changes: 9 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -2961,6 +2961,15 @@ let TargetPrefix = "aarch64" in {
[llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects]>;

def int_aarch64_sme_write_lane_zt
: DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, llvm_i32_ty],
[ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrNoMem, IntrHasSideEffects]>;

def int_aarch64_sme_write_zt
: DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty],
[ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;


def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;

class SME_OuterProduct_Intrinsic
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3242,6 +3242,8 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
return EmitZero(MI, BB);
case AArch64::ZERO_T_PSEUDO:
return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true);
case AArch64::MOVT_TIZ_PSEUDO:
return EmitZTInstr(MI, BB, AArch64::MOVT_TIZ, /*Op0IsDef=*/true);
}
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -939,8 +939,8 @@ defm FAMIN_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famin", 0b0010101>;
} //[HasSME2, HasFAMINMAX]

let Predicates = [HasSME2, HasSME_LUTv2] in {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not to do with this work, but I think the predication around FEAT_SME_LUTv2 is incorrect.

According to latest spec, these instructions should both only require FEAT_SME_LUTv2

which itself requires SME2:

If FEAT_SME_LUTv2 is implemented, then FEAT_SME2 is implemented.

(from arm developer latest)

So we could remove SME2 from this predicate and list it as a dependency of FEAT_SME_LUTv2 in AArch64Features.td.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will do this in another patch, because it is related to more than one intrinsic.

defm MOVT : sme2_movt_zt_to_zt<"movt", 0b0011111>;
def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">;
defm MOVT_TIZ : sme2_movt_zt_to_zt<"movt", 0b0011111, int_aarch64_sme_write_lane_zt, int_aarch64_sme_write_zt>;
def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">;
} //[HasSME2, HasSME_LUTv2]

let Predicates = [HasSME2p1, HasSME_LUTv2] in {
Expand Down
13 changes: 12 additions & 1 deletion llvm/lib/Target/AArch64/SMEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -3283,10 +3283,21 @@ class sme2_movt_zt_to_zt<string mnemonic, bits<7> opc>
let Inst{4-0} = Zt;
}

multiclass sme2_movt_zt_to_zt<string mnemonic, bits<7> opc> {
multiclass sme2_movt_zt_to_zt<string mnemonic, bits<7> opc, SDPatternOperator intrinsic_lane, SDPatternOperator intrinsic> {
def NAME : sme2_movt_zt_to_zt<mnemonic, opc>;
def NAME # _PSEUDO
: Pseudo<(outs), (ins ZTR:$ZT, sme_elm_idx0_3:$off2, ZPRAny:$Zt), []>, Sched<[]> {
let usesCustomInserter = 1;
}
def : InstAlias<mnemonic # "\t$ZTt, $Zt",
(!cast<Instruction>(NAME) ZTR:$ZTt, 0, ZPRAny:$Zt), 1>;

foreach vt = [nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16] in {
def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), vt:$zn, sme_elm_idx0_3:$imm),
(!cast<Instruction>(NAME # _PSEUDO) $zt, $imm, $zn)>;
def : Pat<(intrinsic (imm_to_zt untyped:$zt), vt:$zn),
(!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>;
}
}

//===----------------------------------------------------------------------===//
Expand Down
162 changes: 162 additions & 0 deletions llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -verify-machineinstrs -force-streaming < %s | FileCheck %s

target triple = "aarch64-linux"


define void @test_write_zt_i8_0(<vscale x 16 x i8> %zn) #0 {
; CHECK-LABEL: test_write_zt_i8_0:
; CHECK: // %bb.0:
; CHECK-NEXT: movt zt0, z0
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> %zn, i32 0)
ret void
}

define void @test_write_zt_i8_1(<vscale x 16 x i8> %zn) #0 {
; CHECK-LABEL: test_write_zt_i8_1:
; CHECK: // %bb.0:
; CHECK-NEXT: movt zt0[1, mul vl], z0
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> %zn, i32 1)
ret void
}

define void @test_write_zt_i16_2(<vscale x 8 x i16> %zn) #0 {
; CHECK-LABEL: test_write_zt_i16_2:
; CHECK: // %bb.0:
; CHECK-NEXT: movt zt0[2, mul vl], z0
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> %zn, i32 2)
ret void
}

define void @test_write_zt_i32_3(<vscale x 4 x i32> %zn) #0 {
; CHECK-LABEL: test_write_zt_i32_3:
; CHECK: // %bb.0:
; CHECK-NEXT: movt zt0[3, mul vl], z0
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> %zn, i32 3)
ret void
}

define void @test_write_zt_i64_1(<vscale x 2 x i64> %zn) #0 {
; CHECK-LABEL: test_write_zt_i64_1:
; CHECK: // %bb.0:
; CHECK-NEXT: movt zt0[1, mul vl], z0
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.lane.zt.nxv2i64(i32 0, <vscale x 2 x i64> %zn, i32 1)
ret void
}

define void @test_write_zt_f16_2(<vscale x 8 x half> %zn) #0 {
; CHECK-LABEL: test_write_zt_f16_2:
; CHECK: // %bb.0:
; CHECK-NEXT: movt zt0[2, mul vl], z0
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.lane.zt.nxv8f16(i32 0, <vscale x 8 x half> %zn, i32 2)
ret void
}

define void @test_write_zt_f32_3(<vscale x 4 x float> %zn) #0 {
; CHECK-LABEL: test_write_zt_f32_3:
; CHECK: // %bb.0:
; CHECK-NEXT: movt zt0[3, mul vl], z0
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.lane.zt.nxv4f32(i32 0, <vscale x 4 x float> %zn, i32 3)
ret void
}

define void @test_write_zt_f64_1(<vscale x 2 x double> %zn) #0 {
; CHECK-LABEL: test_write_zt_f64_1:
; CHECK: // %bb.0:
; CHECK-NEXT: movt zt0[1, mul vl], z0
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.lane.zt.nxv2f64(i32 0, <vscale x 2 x double> %zn, i32 1)
ret void
}

define void @test_write_zt_bf16_2(<vscale x 8 x bfloat> %zn) #0 {
; CHECK-LABEL: test_write_zt_bf16_2:
; CHECK: // %bb.0:
; CHECK-NEXT: movt zt0[2, mul vl], z0
; CHECK-NEXT: ret
call void @llvm.aarch64.sme.write.lane.zt.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn, i32 2)
ret void
}

;; ALIAS

define void @test_write_zt_i8(<vscale x 16 x i8> %v) #0 {
; CHECK-LABEL: test_write_zt_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: movt zt0, z0
; CHECK-NEXT: ret
tail call void @llvm.aarch64.sme.write.zt.nxv16i8(i32 0, <vscale x 16 x i8> %v)
ret void
}

define void @test_write_zt_i16(<vscale x 8 x i16> %v) #0 {
; CHECK-LABEL: test_write_zt_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: movt zt0, z0
; CHECK-NEXT: ret
tail call void @llvm.aarch64.sme.write.zt.nxv8i16(i32 0, <vscale x 8 x i16> %v)
ret void
}

define void @test_write_zt_i32(<vscale x 4 x i32> %v) #0 {
; CHECK-LABEL: test_write_zt_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: movt zt0, z0
; CHECK-NEXT: ret
tail call void @llvm.aarch64.sme.write.zt.nxv4i32(i32 0, <vscale x 4 x i32> %v)
ret void
}

define void @test_write_zt_i64(<vscale x 2 x i64> %v) #0 {
; CHECK-LABEL: test_write_zt_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: movt zt0, z0
; CHECK-NEXT: ret
tail call void @llvm.aarch64.sme.write.zt.nxv2i64(i32 0, <vscale x 2 x i64> %v)
ret void
}

define void @test_write_zt_f16(<vscale x 8 x half> %v) #0 {
; CHECK-LABEL: test_write_zt_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: movt zt0, z0
; CHECK-NEXT: ret
tail call void @llvm.aarch64.sme.write.zt.nxv8f16(i32 0, <vscale x 8 x half> %v)
ret void
}

define void @test_write_zt_bf16(<vscale x 8 x bfloat> %v) #0 {
; CHECK-LABEL: test_write_zt_bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: movt zt0, z0
; CHECK-NEXT: ret
tail call void @llvm.aarch64.sme.write.zt.nxv8bf16(i32 0, <vscale x 8 x bfloat> %v)
ret void
}

define void @test_write_zt_f32(<vscale x 4 x float> %v) #0 {
; CHECK-LABEL: test_write_zt_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: movt zt0, z0
; CHECK-NEXT: ret
tail call void @llvm.aarch64.sme.write.zt.nxv4f32(i32 0, <vscale x 4 x float> %v)
ret void
}

define void @test_write_zt_f64(<vscale x 2 x double> %v) #0 {
; CHECK-LABEL: test_write_zt_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: movt zt0, z0
; CHECK-NEXT: ret
tail call void @llvm.aarch64.sme.write.zt.nxv2f64(i32 0, <vscale x 2 x double> %v)
ret void
}

attributes #0 = { "target-features"="+sme2,+sme-lutv2" }