Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,12 @@ def G_SDOT : AArch64GenericInstruction {
let hasSideEffects = 0;
}

def G_USDOT : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3);
let hasSideEffects = 0;
}

// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
Expand Down Expand Up @@ -278,6 +284,7 @@ def : GINodeEquiv<G_UADDLV, AArch64uaddlv>;

def : GINodeEquiv<G_UDOT, AArch64udot>;
def : GINodeEquiv<G_SDOT, AArch64sdot>;
def : GINodeEquiv<G_USDOT, AArch64usdot>;

def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;

Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1855,6 +1855,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerTriOp(AArch64::G_UDOT);
case Intrinsic::aarch64_neon_sdot:
return LowerTriOp(AArch64::G_SDOT);
case Intrinsic::aarch64_neon_usdot:
return LowerTriOp(AArch64::G_USDOT);
case Intrinsic::aarch64_neon_sqxtn:
return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
case Intrinsic::aarch64_neon_sqxtun:
Expand Down
87 changes: 59 additions & 28 deletions llvm/test/CodeGen/AArch64/aarch64-matmul.ll
Original file line number Diff line number Diff line change
@@ -1,41 +1,54 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon,+i8mm < %s -o -| FileCheck %s
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+neon,+i8mm < %s | FileCheck %s
; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+neon,+i8mm -global-isel < %s | FileCheck %s

define <4 x i32> @smmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: smmla.v4i32.v16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: smmla v0.4s, v1.16b, v2.16b
; CHECK-NEXT: ret
entry:
; CHECK-LABEL: smmla.v4i32.v16i8
; CHECK: smmla v0.4s, v1.16b, v2.16b
%vmmla1.i = tail call <4 x i32> @llvm.aarch64.neon.smmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b)
ret <4 x i32> %vmmla1.i
}

define <4 x i32> @ummla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: ummla.v4i32.v16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ummla v0.4s, v1.16b, v2.16b
; CHECK-NEXT: ret
entry:
; CHECK-LABEL: ummla.v4i32.v16i8
; CHECK: ummla v0.4s, v1.16b, v2.16b
%vmmla1.i = tail call <4 x i32> @llvm.aarch64.neon.ummla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b)
ret <4 x i32> %vmmla1.i
}

define <4 x i32> @usmmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: usmmla.v4i32.v16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: usmmla v0.4s, v1.16b, v2.16b
; CHECK-NEXT: ret
entry:
; CHECK-LABEL: usmmla.v4i32.v16i8
; CHECK: usmmla v0.4s, v1.16b, v2.16b
%vusmmla1.i = tail call <4 x i32> @llvm.aarch64.neon.usmmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) #3
ret <4 x i32> %vusmmla1.i
}

define <2 x i32> @usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: usdot.v2i32.v8i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: usdot v0.2s, v1.8b, v2.8b
; CHECK-NEXT: ret
entry:
; CHECK-LABEL: usdot.v2i32.v8i8
; CHECK: usdot v0.2s, v1.8b, v2.8b
%vusdot1.i = tail call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b)
ret <2 x i32> %vusdot1.i
}

define <2 x i32> @usdot_lane.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: usdot_lane.v2i32.v8i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: usdot v0.2s, v1.8b, v2.4b[0]
; CHECK-NEXT: ret
entry:
; CHECK-LABEL: usdot_lane.v2i32.v8i8
; CHECK: usdot v0.2s, v1.8b, v2.4b[0]
%0 = bitcast <8 x i8> %b to <2 x i32>
%shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer
%1 = bitcast <2 x i32> %shuffle to <8 x i8>
Expand All @@ -44,9 +57,12 @@ entry:
}

define <2 x i32> @sudot_lane.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: sudot_lane.v2i32.v8i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: sudot v0.2s, v1.8b, v2.4b[0]
; CHECK-NEXT: ret
entry:
; CHECK-LABEL: sudot_lane.v2i32.v8i8
; CHECK: sudot v0.2s, v1.8b, v2.4b[0]
%0 = bitcast <8 x i8> %b to <2 x i32>
%shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer
%1 = bitcast <2 x i32> %shuffle to <8 x i8>
Expand All @@ -55,9 +71,11 @@ entry:
}

define <2 x i32> @usdot_lane.v2i32.v16i8(<2 x i32> %r, <8 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: usdot_lane.v2i32.v16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: usdot v0.2s, v1.8b, v2.4b[0]
; CHECK-NEXT: ret
entry:
; CHECK-LABEL: usdot_lane.v2i32.v16i8
; CHECK: usdot v0.2s, v1.8b, v2.4b[0]
%0 = bitcast <16 x i8> %b to <4 x i32>
%shuffle = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> zeroinitializer
%1 = bitcast <2 x i32> %shuffle to <8 x i8>
Expand All @@ -66,9 +84,11 @@ entry:
}

define <2 x i32> @sudot_lane.v2i32.v16i8(<2 x i32> %r, <8 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: sudot_lane.v2i32.v16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sudot v0.2s, v1.8b, v2.4b[0]
; CHECK-NEXT: ret
entry:
; CHECK-LABEL: sudot_lane.v2i32.v16i8
; CHECK: sudot v0.2s, v1.8b, v2.4b[0]
%0 = bitcast <16 x i8> %b to <4 x i32>
%shuffle = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> zeroinitializer
%1 = bitcast <2 x i32> %shuffle to <8 x i8>
Expand All @@ -77,17 +97,22 @@ entry:
}

define <4 x i32> @usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: usdot.v4i32.v16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: usdot v0.4s, v1.16b, v2.16b
; CHECK-NEXT: ret
entry:
; CHECK-LABEL: usdot.v4i32.v16i8
; CHECK: usdot v0.4s, v1.16b, v2.16b
%vusdot1.i = tail call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) #3
ret <4 x i32> %vusdot1.i
}

define <4 x i32> @usdot_lane.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: usdot_lane.v4i32.v16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: usdot v0.4s, v1.16b, v2.4b[0]
; CHECK-NEXT: ret
entry:
; CHECK-LABEL: usdot_lane.v4i32.v16i8
; CHECK: usdot v0.4s, v1.16b, v2.4b[0]
%0 = bitcast <8 x i8> %b to <2 x i32>
%shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> zeroinitializer
%1 = bitcast <4 x i32> %shuffle to <16 x i8>
Expand All @@ -96,9 +121,12 @@ entry:
}

define <4 x i32> @sudot_lane.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: sudot_lane.v4i32.v16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: sudot v0.4s, v1.16b, v2.4b[0]
; CHECK-NEXT: ret
entry:
; CHECK-LABEL: sudot_lane.v4i32.v16i8
; CHECK: sudot v0.4s, v1.16b, v2.4b[0]
%0 = bitcast <8 x i8> %b to <2 x i32>
%shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> zeroinitializer
%1 = bitcast <4 x i32> %shuffle to <16 x i8>
Expand All @@ -107,9 +135,11 @@ entry:
}

define <4 x i32> @usdot_laneq.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: usdot_laneq.v4i32.v16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: usdot v0.4s, v1.16b, v2.4b[0]
; CHECK-NEXT: ret
entry:
; CHECK-LABEL: usdot_laneq.v4i32.v16i8
; CHECK: usdot v0.4s, v1.16b, v2.4b[0]
%0 = bitcast <16 x i8> %b to <4 x i32>
%shuffle = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
%1 = bitcast <4 x i32> %shuffle to <16 x i8>
Expand All @@ -118,9 +148,11 @@ entry:
}

define <4 x i32> @sudot_laneq.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: sudot_laneq.v4i32.v16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sudot v0.4s, v1.16b, v2.4b[0]
; CHECK-NEXT: ret
entry:
; CHECK-LABEL: sudot_laneq.v4i32.v16i8
; CHECK: sudot v0.4s, v1.16b, v2.4b[0]
%0 = bitcast <16 x i8> %b to <4 x i32>
%shuffle = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
%1 = bitcast <4 x i32> %shuffle to <16 x i8>
Expand All @@ -133,4 +165,3 @@ declare <4 x i32> @llvm.aarch64.neon.ummla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16
declare <4 x i32> @llvm.aarch64.neon.usmmla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2
declare <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32>, <8 x i8>, <8 x i8>) #2
declare <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2